Fix PercentageRunnersBusy scaling delay (#1579)
* Use a dedicated pod label to say it is a runner pod Follow-up for #1546 * Fix PercentageRunnersBusy scaling delay Ref #1374
This commit is contained in:
parent
a9ac5a1cbf
commit
8161136cbd
|
|
@ -10,6 +10,8 @@ import (
|
||||||
|
|
||||||
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||||
"github.com/google/go-github/v45/github"
|
"github.com/google/go-github/v45/github"
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
@ -314,22 +316,52 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunner
|
||||||
numRunners int
|
numRunners int
|
||||||
numRunnersRegistered int
|
numRunnersRegistered int
|
||||||
numRunnersBusy int
|
numRunnersBusy int
|
||||||
|
numTerminatingBusy int
|
||||||
)
|
)
|
||||||
|
|
||||||
numRunners = len(runnerMap)
|
numRunners = len(runnerMap)
|
||||||
|
|
||||||
|
busyTerminatingRunnerPods := map[string]struct{}{}
|
||||||
|
|
||||||
|
kindLabel := LabelKeyRunnerDeploymentName
|
||||||
|
if hra.Spec.ScaleTargetRef.Kind == "RunnerSet" {
|
||||||
|
kindLabel = LabelKeyRunnerSetName
|
||||||
|
}
|
||||||
|
|
||||||
|
var runnerPodList corev1.PodList
|
||||||
|
if err := r.Client.List(ctx, &runnerPodList, client.InNamespace(hra.Namespace), client.MatchingLabels(map[string]string{
|
||||||
|
kindLabel: hra.Spec.ScaleTargetRef.Name,
|
||||||
|
})); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, p := range runnerPodList.Items {
|
||||||
|
if p.Annotations[AnnotationKeyUnregistrationFailureMessage] != "" {
|
||||||
|
busyTerminatingRunnerPods[p.Name] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, runner := range runners {
|
for _, runner := range runners {
|
||||||
if _, ok := runnerMap[*runner.Name]; ok {
|
if _, ok := runnerMap[*runner.Name]; ok {
|
||||||
numRunnersRegistered++
|
numRunnersRegistered++
|
||||||
|
|
||||||
if runner.GetBusy() {
|
if runner.GetBusy() {
|
||||||
numRunnersBusy++
|
numRunnersBusy++
|
||||||
|
} else if _, ok := busyTerminatingRunnerPods[*runner.Name]; ok {
|
||||||
|
numTerminatingBusy++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delete(busyTerminatingRunnerPods, *runner.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remaining busyTerminatingRunnerPods are runners that were not on the ListRunners API response yet
|
||||||
|
for range busyTerminatingRunnerPods {
|
||||||
|
numTerminatingBusy++
|
||||||
|
}
|
||||||
|
|
||||||
var desiredReplicas int
|
var desiredReplicas int
|
||||||
fractionBusy := float64(numRunnersBusy) / float64(desiredReplicasBefore)
|
fractionBusy := float64(numRunnersBusy+numTerminatingBusy) / float64(desiredReplicasBefore)
|
||||||
if fractionBusy >= scaleUpThreshold {
|
if fractionBusy >= scaleUpThreshold {
|
||||||
if scaleUpAdjustment > 0 {
|
if scaleUpAdjustment > 0 {
|
||||||
desiredReplicas = desiredReplicasBefore + scaleUpAdjustment
|
desiredReplicas = desiredReplicasBefore + scaleUpAdjustment
|
||||||
|
|
@ -358,6 +390,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunner
|
||||||
"num_runners", numRunners,
|
"num_runners", numRunners,
|
||||||
"num_runners_registered", numRunnersRegistered,
|
"num_runners_registered", numRunnersRegistered,
|
||||||
"num_runners_busy", numRunnersBusy,
|
"num_runners_busy", numRunnersBusy,
|
||||||
|
"num_terminating_busy", numTerminatingBusy,
|
||||||
"namespace", hra.Namespace,
|
"namespace", hra.Namespace,
|
||||||
"kind", st.kind,
|
"kind", st.kind,
|
||||||
"name", st.st,
|
"name", st.st,
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import "time"
|
||||||
|
|
||||||
const (
|
const (
|
||||||
LabelKeyRunnerSetName = "runnerset-name"
|
LabelKeyRunnerSetName = "runnerset-name"
|
||||||
|
LabelKeyRunner = "actions-runner"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
@ -16,6 +17,9 @@ const (
|
||||||
|
|
||||||
AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"
|
AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"
|
||||||
|
|
||||||
|
// AnnotationKeyUnregistrationFailureMessage is the annotation that is added onto the pod once it failed to be unregistered from GitHub due to e.g. 422 error
|
||||||
|
AnnotationKeyUnregistrationFailureMessage = annotationKeyPrefix + "unregistration-failure-message"
|
||||||
|
|
||||||
// AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed.
|
// AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed.
|
||||||
AnnotationKeyUnregistrationCompleteTimestamp = annotationKeyPrefix + "unregistration-complete-timestamp"
|
AnnotationKeyUnregistrationCompleteTimestamp = annotationKeyPrefix + "unregistration-complete-timestamp"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ func TestNewRunnerPod(t *testing.T) {
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"actions-runner-controller/inject-registration-token": "true",
|
"actions-runner-controller/inject-registration-token": "true",
|
||||||
"runnerset-name": "runner",
|
"actions-runner": "",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Spec: corev1.PodSpec{
|
Spec: corev1.PodSpec{
|
||||||
|
|
@ -198,7 +198,7 @@ func TestNewRunnerPod(t *testing.T) {
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"actions-runner-controller/inject-registration-token": "true",
|
"actions-runner-controller/inject-registration-token": "true",
|
||||||
"runnerset-name": "runner",
|
"actions-runner": "",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Spec: corev1.PodSpec{
|
Spec: corev1.PodSpec{
|
||||||
|
|
@ -276,7 +276,7 @@ func TestNewRunnerPod(t *testing.T) {
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"actions-runner-controller/inject-registration-token": "true",
|
"actions-runner-controller/inject-registration-token": "true",
|
||||||
"runnerset-name": "runner",
|
"actions-runner": "",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Spec: corev1.PodSpec{
|
Spec: corev1.PodSpec{
|
||||||
|
|
@ -515,7 +515,7 @@ func TestNewRunnerPod(t *testing.T) {
|
||||||
for i := range testcases {
|
for i := range testcases {
|
||||||
tc := testcases[i]
|
tc := testcases[i]
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
got, err := newRunnerPod("runner", tc.template, tc.config, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
|
got, err := newRunnerPod(tc.template, tc.config, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, tc.want, got)
|
require.Equal(t, tc.want, got)
|
||||||
})
|
})
|
||||||
|
|
@ -546,7 +546,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"actions-runner-controller/inject-registration-token": "true",
|
"actions-runner-controller/inject-registration-token": "true",
|
||||||
"pod-template-hash": "8857b86c7",
|
"pod-template-hash": "8857b86c7",
|
||||||
"runnerset-name": "runner",
|
"actions-runner": "",
|
||||||
},
|
},
|
||||||
OwnerReferences: []metav1.OwnerReference{
|
OwnerReferences: []metav1.OwnerReference{
|
||||||
{
|
{
|
||||||
|
|
@ -703,7 +703,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"actions-runner-controller/inject-registration-token": "true",
|
"actions-runner-controller/inject-registration-token": "true",
|
||||||
"pod-template-hash": "8857b86c7",
|
"pod-template-hash": "8857b86c7",
|
||||||
"runnerset-name": "runner",
|
"actions-runner": "",
|
||||||
},
|
},
|
||||||
OwnerReferences: []metav1.OwnerReference{
|
OwnerReferences: []metav1.OwnerReference{
|
||||||
{
|
{
|
||||||
|
|
@ -800,7 +800,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
||||||
Labels: map[string]string{
|
Labels: map[string]string{
|
||||||
"actions-runner-controller/inject-registration-token": "true",
|
"actions-runner-controller/inject-registration-token": "true",
|
||||||
"pod-template-hash": "8857b86c7",
|
"pod-template-hash": "8857b86c7",
|
||||||
"runnerset-name": "runner",
|
"actions-runner": "",
|
||||||
},
|
},
|
||||||
OwnerReferences: []metav1.OwnerReference{
|
OwnerReferences: []metav1.OwnerReference{
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -426,7 +426,7 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pod, err := newRunnerPodWithContainerMode(runner.Spec.ContainerMode, runner.Name, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL)
|
pod, err := newRunnerPodWithContainerMode(runner.Spec.ContainerMode, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return pod, err
|
return pod, err
|
||||||
}
|
}
|
||||||
|
|
@ -589,7 +589,7 @@ func runnerHookEnvs(pod *corev1.Pod) ([]corev1.EnvVar, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRunnerPodWithContainerMode(containerMode string, runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
func newRunnerPodWithContainerMode(containerMode string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
||||||
var (
|
var (
|
||||||
privileged bool = true
|
privileged bool = true
|
||||||
dockerdInRunner bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer
|
dockerdInRunner bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer
|
||||||
|
|
@ -607,7 +607,7 @@ func newRunnerPodWithContainerMode(containerMode string, runnerName string, temp
|
||||||
template = *template.DeepCopy()
|
template = *template.DeepCopy()
|
||||||
|
|
||||||
// This label selector is used by default when rd.Spec.Selector is empty.
|
// This label selector is used by default when rd.Spec.Selector is empty.
|
||||||
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerName)
|
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunner, "")
|
||||||
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)
|
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)
|
||||||
|
|
||||||
workDir := runnerSpec.WorkDir
|
workDir := runnerSpec.WorkDir
|
||||||
|
|
@ -962,8 +962,8 @@ func newRunnerPodWithContainerMode(containerMode string, runnerName string, temp
|
||||||
return *pod, nil
|
return *pod, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func newRunnerPod(runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
||||||
return newRunnerPodWithContainerMode("", runnerName, template, runnerSpec, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
|
return newRunnerPodWithContainerMode("", template, runnerSpec, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||||
|
|
|
||||||
|
|
@ -67,6 +67,25 @@ func annotatePodOnce(ctx context.Context, c client.Client, log logr.Logger, pod
|
||||||
|
|
||||||
return updated, nil
|
return updated, nil
|
||||||
}
|
}
|
||||||
|
func labelPod(ctx context.Context, c client.Client, log logr.Logger, pod *corev1.Pod, k, v string) (*corev1.Pod, error) {
|
||||||
|
if pod == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
updated := pod.DeepCopy()
|
||||||
|
if updated.Labels == nil {
|
||||||
|
updated.Labels = map[string]string{}
|
||||||
|
}
|
||||||
|
updated.Labels[k] = v
|
||||||
|
if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil {
|
||||||
|
log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", k))
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.V(2).Info("Labeled pod", "key", k, "value", v)
|
||||||
|
|
||||||
|
return updated, nil
|
||||||
|
}
|
||||||
|
|
||||||
// If the first return value is nil, it's safe to delete the runner pod.
|
// If the first return value is nil, it's safe to delete the runner pod.
|
||||||
func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
|
func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
|
||||||
|
|
@ -151,7 +170,10 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
||||||
|
|
||||||
log.V(1).Info("Failed to unregister runner before deleting the pod.", "error", err)
|
log.V(1).Info("Failed to unregister runner before deleting the pod.", "error", err)
|
||||||
|
|
||||||
var runnerBusy bool
|
var (
|
||||||
|
runnerBusy bool
|
||||||
|
runnerUnregistrationFailureMessage string
|
||||||
|
)
|
||||||
|
|
||||||
errRes := &gogithub.ErrorResponse{}
|
errRes := &gogithub.ErrorResponse{}
|
||||||
if errors.As(err, &errRes) {
|
if errors.As(err, &errRes) {
|
||||||
|
|
@ -173,6 +195,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
||||||
}
|
}
|
||||||
|
|
||||||
runnerBusy = errRes.Response.StatusCode == 422
|
runnerBusy = errRes.Response.StatusCode == 422
|
||||||
|
runnerUnregistrationFailureMessage = errRes.Message
|
||||||
|
|
||||||
if runnerBusy && code != nil {
|
if runnerBusy && code != nil {
|
||||||
log.V(2).Info("Runner container has already stopped but the unregistration attempt failed. "+
|
log.V(2).Info("Runner container has already stopped but the unregistration attempt failed. "+
|
||||||
|
|
@ -187,6 +210,11 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
||||||
}
|
}
|
||||||
|
|
||||||
if runnerBusy {
|
if runnerBusy {
|
||||||
|
_, err := labelPod(ctx, c, log, pod, AnnotationKeyUnregistrationFailureMessage, runnerUnregistrationFailureMessage)
|
||||||
|
if err != nil {
|
||||||
|
return &ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
// We want to prevent spamming the deletion attemps but returning ctrl.Result with RequeueAfter doesn't
|
// We want to prevent spamming the deletion attemps but returning ctrl.Result with RequeueAfter doesn't
|
||||||
// work as the reconcilation can happen earlier due to pod status update.
|
// work as the reconcilation can happen earlier due to pod status update.
|
||||||
// For ephemeral runners, we can expect it to stop and unregister itself on completion.
|
// For ephemeral runners, we can expect it to stop and unregister itself on completion.
|
||||||
|
|
|
||||||
|
|
@ -62,8 +62,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, isRunnerPod := runnerPod.Labels[LabelKeyRunnerSetName]
|
if _, isRunnerPod := runnerPod.Labels[LabelKeyRunner]; !isRunnerPod {
|
||||||
if !isRunnerPod {
|
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -219,7 +219,9 @@ func (r *RunnerSetReconciler) newStatefulSet(runnerSet *v1alpha1.RunnerSet) (*ap
|
||||||
template.Spec.ServiceAccountName = runnerSet.Spec.ServiceAccountName
|
template.Spec.ServiceAccountName = runnerSet.Spec.ServiceAccountName
|
||||||
}
|
}
|
||||||
|
|
||||||
pod, err := newRunnerPodWithContainerMode(runnerSet.Spec.RunnerConfig.ContainerMode, runnerSet.Name, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL)
|
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerSet.Name)
|
||||||
|
|
||||||
|
pod, err := newRunnerPodWithContainerMode(runnerSet.Spec.RunnerConfig.ContainerMode, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue