Fix PercentageRunnersBusy scaling delay (#1579)

* Use a dedicated pod label to say it is a runner pod

Follow-up for #1546

* Fix PercentageRunnersBusy scaling delay

Ref #1374
This commit is contained in:
Yusuke Kuoka 2022-06-29 20:49:21 +09:00 committed by GitHub
parent a9ac5a1cbf
commit 8161136cbd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 83 additions and 17 deletions

View File

@ -10,6 +10,8 @@ import (
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1" "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
"github.com/google/go-github/v45/github" "github.com/google/go-github/v45/github"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
) )
const ( const (
@ -314,22 +316,52 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunner
numRunners int numRunners int
numRunnersRegistered int numRunnersRegistered int
numRunnersBusy int numRunnersBusy int
numTerminatingBusy int
) )
numRunners = len(runnerMap) numRunners = len(runnerMap)
busyTerminatingRunnerPods := map[string]struct{}{}
kindLabel := LabelKeyRunnerDeploymentName
if hra.Spec.ScaleTargetRef.Kind == "RunnerSet" {
kindLabel = LabelKeyRunnerSetName
}
var runnerPodList corev1.PodList
if err := r.Client.List(ctx, &runnerPodList, client.InNamespace(hra.Namespace), client.MatchingLabels(map[string]string{
kindLabel: hra.Spec.ScaleTargetRef.Name,
})); err != nil {
return nil, err
}
for _, p := range runnerPodList.Items {
if p.Annotations[AnnotationKeyUnregistrationFailureMessage] != "" {
busyTerminatingRunnerPods[p.Name] = struct{}{}
}
}
for _, runner := range runners { for _, runner := range runners {
if _, ok := runnerMap[*runner.Name]; ok { if _, ok := runnerMap[*runner.Name]; ok {
numRunnersRegistered++ numRunnersRegistered++
if runner.GetBusy() { if runner.GetBusy() {
numRunnersBusy++ numRunnersBusy++
} else if _, ok := busyTerminatingRunnerPods[*runner.Name]; ok {
numTerminatingBusy++
} }
delete(busyTerminatingRunnerPods, *runner.Name)
} }
} }
// Remaining busyTerminatingRunnerPods are runners that were not on the ListRunners API response yet
for range busyTerminatingRunnerPods {
numTerminatingBusy++
}
var desiredReplicas int var desiredReplicas int
fractionBusy := float64(numRunnersBusy) / float64(desiredReplicasBefore) fractionBusy := float64(numRunnersBusy+numTerminatingBusy) / float64(desiredReplicasBefore)
if fractionBusy >= scaleUpThreshold { if fractionBusy >= scaleUpThreshold {
if scaleUpAdjustment > 0 { if scaleUpAdjustment > 0 {
desiredReplicas = desiredReplicasBefore + scaleUpAdjustment desiredReplicas = desiredReplicasBefore + scaleUpAdjustment
@ -358,6 +390,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunner
"num_runners", numRunners, "num_runners", numRunners,
"num_runners_registered", numRunnersRegistered, "num_runners_registered", numRunnersRegistered,
"num_runners_busy", numRunnersBusy, "num_runners_busy", numRunnersBusy,
"num_terminating_busy", numTerminatingBusy,
"namespace", hra.Namespace, "namespace", hra.Namespace,
"kind", st.kind, "kind", st.kind,
"name", st.st, "name", st.st,

View File

@ -4,6 +4,7 @@ import "time"
const ( const (
LabelKeyRunnerSetName = "runnerset-name" LabelKeyRunnerSetName = "runnerset-name"
LabelKeyRunner = "actions-runner"
) )
const ( const (
@ -16,6 +17,9 @@ const (
AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time" AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"
// AnnotationKeyUnregistrationFailureMessage is the annotation that is added onto the pod once it failed to be unregistered from GitHub due to e.g. 422 error
AnnotationKeyUnregistrationFailureMessage = annotationKeyPrefix + "unregistration-failure-message"
// AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed. // AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed.
AnnotationKeyUnregistrationCompleteTimestamp = annotationKeyPrefix + "unregistration-complete-timestamp" AnnotationKeyUnregistrationCompleteTimestamp = annotationKeyPrefix + "unregistration-complete-timestamp"

View File

@ -56,7 +56,7 @@ func TestNewRunnerPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{ Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true", "actions-runner-controller/inject-registration-token": "true",
"runnerset-name": "runner", "actions-runner": "",
}, },
}, },
Spec: corev1.PodSpec{ Spec: corev1.PodSpec{
@ -198,7 +198,7 @@ func TestNewRunnerPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{ Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true", "actions-runner-controller/inject-registration-token": "true",
"runnerset-name": "runner", "actions-runner": "",
}, },
}, },
Spec: corev1.PodSpec{ Spec: corev1.PodSpec{
@ -276,7 +276,7 @@ func TestNewRunnerPod(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{ Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true", "actions-runner-controller/inject-registration-token": "true",
"runnerset-name": "runner", "actions-runner": "",
}, },
}, },
Spec: corev1.PodSpec{ Spec: corev1.PodSpec{
@ -515,7 +515,7 @@ func TestNewRunnerPod(t *testing.T) {
for i := range testcases { for i := range testcases {
tc := testcases[i] tc := testcases[i]
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
got, err := newRunnerPod("runner", tc.template, tc.config, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL) got, err := newRunnerPod(tc.template, tc.config, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, tc.want, got) require.Equal(t, tc.want, got)
}) })
@ -546,7 +546,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
Labels: map[string]string{ Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true", "actions-runner-controller/inject-registration-token": "true",
"pod-template-hash": "8857b86c7", "pod-template-hash": "8857b86c7",
"runnerset-name": "runner", "actions-runner": "",
}, },
OwnerReferences: []metav1.OwnerReference{ OwnerReferences: []metav1.OwnerReference{
{ {
@ -703,7 +703,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
Labels: map[string]string{ Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true", "actions-runner-controller/inject-registration-token": "true",
"pod-template-hash": "8857b86c7", "pod-template-hash": "8857b86c7",
"runnerset-name": "runner", "actions-runner": "",
}, },
OwnerReferences: []metav1.OwnerReference{ OwnerReferences: []metav1.OwnerReference{
{ {
@ -800,7 +800,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
Labels: map[string]string{ Labels: map[string]string{
"actions-runner-controller/inject-registration-token": "true", "actions-runner-controller/inject-registration-token": "true",
"pod-template-hash": "8857b86c7", "pod-template-hash": "8857b86c7",
"runnerset-name": "runner", "actions-runner": "",
}, },
OwnerReferences: []metav1.OwnerReference{ OwnerReferences: []metav1.OwnerReference{
{ {

View File

@ -426,7 +426,7 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
} }
} }
pod, err := newRunnerPodWithContainerMode(runner.Spec.ContainerMode, runner.Name, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL) pod, err := newRunnerPodWithContainerMode(runner.Spec.ContainerMode, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL)
if err != nil { if err != nil {
return pod, err return pod, err
} }
@ -589,7 +589,7 @@ func runnerHookEnvs(pod *corev1.Pod) ([]corev1.EnvVar, error) {
}, nil }, nil
} }
func newRunnerPodWithContainerMode(containerMode string, runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) { func newRunnerPodWithContainerMode(containerMode string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
var ( var (
privileged bool = true privileged bool = true
dockerdInRunner bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer dockerdInRunner bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer
@ -607,7 +607,7 @@ func newRunnerPodWithContainerMode(containerMode string, runnerName string, temp
template = *template.DeepCopy() template = *template.DeepCopy()
// This label selector is used by default when rd.Spec.Selector is empty. // This label selector is used by default when rd.Spec.Selector is empty.
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerName) template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunner, "")
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation) template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)
workDir := runnerSpec.WorkDir workDir := runnerSpec.WorkDir
@ -962,8 +962,8 @@ func newRunnerPodWithContainerMode(containerMode string, runnerName string, temp
return *pod, nil return *pod, nil
} }
func newRunnerPod(runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) { func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
return newRunnerPodWithContainerMode("", runnerName, template, runnerSpec, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL) return newRunnerPodWithContainerMode("", template, runnerSpec, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
} }
func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {

View File

@ -67,6 +67,25 @@ func annotatePodOnce(ctx context.Context, c client.Client, log logr.Logger, pod
return updated, nil return updated, nil
} }
func labelPod(ctx context.Context, c client.Client, log logr.Logger, pod *corev1.Pod, k, v string) (*corev1.Pod, error) {
if pod == nil {
return nil, nil
}
updated := pod.DeepCopy()
if updated.Labels == nil {
updated.Labels = map[string]string{}
}
updated.Labels[k] = v
if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil {
log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", k))
return nil, err
}
log.V(2).Info("Labeled pod", "key", k, "value", v)
return updated, nil
}
// If the first return value is nil, it's safe to delete the runner pod. // If the first return value is nil, it's safe to delete the runner pod.
func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) { func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
@ -151,7 +170,10 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
log.V(1).Info("Failed to unregister runner before deleting the pod.", "error", err) log.V(1).Info("Failed to unregister runner before deleting the pod.", "error", err)
var runnerBusy bool var (
runnerBusy bool
runnerUnregistrationFailureMessage string
)
errRes := &gogithub.ErrorResponse{} errRes := &gogithub.ErrorResponse{}
if errors.As(err, &errRes) { if errors.As(err, &errRes) {
@ -173,6 +195,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
} }
runnerBusy = errRes.Response.StatusCode == 422 runnerBusy = errRes.Response.StatusCode == 422
runnerUnregistrationFailureMessage = errRes.Message
if runnerBusy && code != nil { if runnerBusy && code != nil {
log.V(2).Info("Runner container has already stopped but the unregistration attempt failed. "+ log.V(2).Info("Runner container has already stopped but the unregistration attempt failed. "+
@ -187,6 +210,11 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
} }
if runnerBusy { if runnerBusy {
_, err := labelPod(ctx, c, log, pod, AnnotationKeyUnregistrationFailureMessage, runnerUnregistrationFailureMessage)
if err != nil {
return &ctrl.Result{}, err
}
// We want to prevent spamming the deletion attemps but returning ctrl.Result with RequeueAfter doesn't // We want to prevent spamming the deletion attemps but returning ctrl.Result with RequeueAfter doesn't
// work as the reconcilation can happen earlier due to pod status update. // work as the reconcilation can happen earlier due to pod status update.
// For ephemeral runners, we can expect it to stop and unregister itself on completion. // For ephemeral runners, we can expect it to stop and unregister itself on completion.

View File

@ -62,8 +62,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, client.IgnoreNotFound(err) return ctrl.Result{}, client.IgnoreNotFound(err)
} }
_, isRunnerPod := runnerPod.Labels[LabelKeyRunnerSetName] if _, isRunnerPod := runnerPod.Labels[LabelKeyRunner]; !isRunnerPod {
if !isRunnerPod {
return ctrl.Result{}, nil return ctrl.Result{}, nil
} }

View File

@ -219,7 +219,9 @@ func (r *RunnerSetReconciler) newStatefulSet(runnerSet *v1alpha1.RunnerSet) (*ap
template.Spec.ServiceAccountName = runnerSet.Spec.ServiceAccountName template.Spec.ServiceAccountName = runnerSet.Spec.ServiceAccountName
} }
pod, err := newRunnerPodWithContainerMode(runnerSet.Spec.RunnerConfig.ContainerMode, runnerSet.Name, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL) template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerSet.Name)
pod, err := newRunnerPodWithContainerMode(runnerSet.Spec.RunnerConfig.ContainerMode, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL)
if err != nil { if err != nil {
return nil, err return nil, err
} }