Fix PercentageRunnersBusy scaling delay (#1579)
* Use a dedicated pod label to say it is a runner pod Follow-up for #1546 * Fix PercentageRunnersBusy scaling delay Ref #1374
This commit is contained in:
parent
a9ac5a1cbf
commit
8161136cbd
|
|
@ -10,6 +10,8 @@ import (
|
|||
|
||||
"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
|
||||
"github.com/google/go-github/v45/github"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
@ -314,22 +316,52 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunner
|
|||
numRunners int
|
||||
numRunnersRegistered int
|
||||
numRunnersBusy int
|
||||
numTerminatingBusy int
|
||||
)
|
||||
|
||||
numRunners = len(runnerMap)
|
||||
|
||||
busyTerminatingRunnerPods := map[string]struct{}{}
|
||||
|
||||
kindLabel := LabelKeyRunnerDeploymentName
|
||||
if hra.Spec.ScaleTargetRef.Kind == "RunnerSet" {
|
||||
kindLabel = LabelKeyRunnerSetName
|
||||
}
|
||||
|
||||
var runnerPodList corev1.PodList
|
||||
if err := r.Client.List(ctx, &runnerPodList, client.InNamespace(hra.Namespace), client.MatchingLabels(map[string]string{
|
||||
kindLabel: hra.Spec.ScaleTargetRef.Name,
|
||||
})); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, p := range runnerPodList.Items {
|
||||
if p.Annotations[AnnotationKeyUnregistrationFailureMessage] != "" {
|
||||
busyTerminatingRunnerPods[p.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
for _, runner := range runners {
|
||||
if _, ok := runnerMap[*runner.Name]; ok {
|
||||
numRunnersRegistered++
|
||||
|
||||
if runner.GetBusy() {
|
||||
numRunnersBusy++
|
||||
} else if _, ok := busyTerminatingRunnerPods[*runner.Name]; ok {
|
||||
numTerminatingBusy++
|
||||
}
|
||||
|
||||
delete(busyTerminatingRunnerPods, *runner.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Remaining busyTerminatingRunnerPods are runners that were not on the ListRunners API response yet
|
||||
for range busyTerminatingRunnerPods {
|
||||
numTerminatingBusy++
|
||||
}
|
||||
|
||||
var desiredReplicas int
|
||||
fractionBusy := float64(numRunnersBusy) / float64(desiredReplicasBefore)
|
||||
fractionBusy := float64(numRunnersBusy+numTerminatingBusy) / float64(desiredReplicasBefore)
|
||||
if fractionBusy >= scaleUpThreshold {
|
||||
if scaleUpAdjustment > 0 {
|
||||
desiredReplicas = desiredReplicasBefore + scaleUpAdjustment
|
||||
|
|
@ -358,6 +390,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) suggestReplicasByPercentageRunner
|
|||
"num_runners", numRunners,
|
||||
"num_runners_registered", numRunnersRegistered,
|
||||
"num_runners_busy", numRunnersBusy,
|
||||
"num_terminating_busy", numTerminatingBusy,
|
||||
"namespace", hra.Namespace,
|
||||
"kind", st.kind,
|
||||
"name", st.st,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import "time"
|
|||
|
||||
const (
|
||||
LabelKeyRunnerSetName = "runnerset-name"
|
||||
LabelKeyRunner = "actions-runner"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
@ -16,6 +17,9 @@ const (
|
|||
|
||||
AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"
|
||||
|
||||
// AnnotationKeyUnregistrationFailureMessage is the annotation that is added onto the pod once it failed to be unregistered from GitHub due to e.g. 422 error
|
||||
AnnotationKeyUnregistrationFailureMessage = annotationKeyPrefix + "unregistration-failure-message"
|
||||
|
||||
// AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed.
|
||||
AnnotationKeyUnregistrationCompleteTimestamp = annotationKeyPrefix + "unregistration-complete-timestamp"
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ func TestNewRunnerPod(t *testing.T) {
|
|||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"actions-runner-controller/inject-registration-token": "true",
|
||||
"runnerset-name": "runner",
|
||||
"actions-runner": "",
|
||||
},
|
||||
},
|
||||
Spec: corev1.PodSpec{
|
||||
|
|
@ -198,7 +198,7 @@ func TestNewRunnerPod(t *testing.T) {
|
|||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"actions-runner-controller/inject-registration-token": "true",
|
||||
"runnerset-name": "runner",
|
||||
"actions-runner": "",
|
||||
},
|
||||
},
|
||||
Spec: corev1.PodSpec{
|
||||
|
|
@ -276,7 +276,7 @@ func TestNewRunnerPod(t *testing.T) {
|
|||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"actions-runner-controller/inject-registration-token": "true",
|
||||
"runnerset-name": "runner",
|
||||
"actions-runner": "",
|
||||
},
|
||||
},
|
||||
Spec: corev1.PodSpec{
|
||||
|
|
@ -515,7 +515,7 @@ func TestNewRunnerPod(t *testing.T) {
|
|||
for i := range testcases {
|
||||
tc := testcases[i]
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
got, err := newRunnerPod("runner", tc.template, tc.config, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
|
||||
got, err := newRunnerPod(tc.template, tc.config, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.want, got)
|
||||
})
|
||||
|
|
@ -546,7 +546,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
|||
Labels: map[string]string{
|
||||
"actions-runner-controller/inject-registration-token": "true",
|
||||
"pod-template-hash": "8857b86c7",
|
||||
"runnerset-name": "runner",
|
||||
"actions-runner": "",
|
||||
},
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
|
|
@ -703,7 +703,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
|||
Labels: map[string]string{
|
||||
"actions-runner-controller/inject-registration-token": "true",
|
||||
"pod-template-hash": "8857b86c7",
|
||||
"runnerset-name": "runner",
|
||||
"actions-runner": "",
|
||||
},
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
|
|
@ -800,7 +800,7 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
|
|||
Labels: map[string]string{
|
||||
"actions-runner-controller/inject-registration-token": "true",
|
||||
"pod-template-hash": "8857b86c7",
|
||||
"runnerset-name": "runner",
|
||||
"actions-runner": "",
|
||||
},
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
|
|
|
|||
|
|
@ -426,7 +426,7 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
|
|||
}
|
||||
}
|
||||
|
||||
pod, err := newRunnerPodWithContainerMode(runner.Spec.ContainerMode, runner.Name, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL)
|
||||
pod, err := newRunnerPodWithContainerMode(runner.Spec.ContainerMode, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL)
|
||||
if err != nil {
|
||||
return pod, err
|
||||
}
|
||||
|
|
@ -589,7 +589,7 @@ func runnerHookEnvs(pod *corev1.Pod) ([]corev1.EnvVar, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func newRunnerPodWithContainerMode(containerMode string, runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
||||
func newRunnerPodWithContainerMode(containerMode string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
||||
var (
|
||||
privileged bool = true
|
||||
dockerdInRunner bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer
|
||||
|
|
@ -607,7 +607,7 @@ func newRunnerPodWithContainerMode(containerMode string, runnerName string, temp
|
|||
template = *template.DeepCopy()
|
||||
|
||||
// This label selector is used by default when rd.Spec.Selector is empty.
|
||||
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerName)
|
||||
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunner, "")
|
||||
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)
|
||||
|
||||
workDir := runnerSpec.WorkDir
|
||||
|
|
@ -962,8 +962,8 @@ func newRunnerPodWithContainerMode(containerMode string, runnerName string, temp
|
|||
return *pod, nil
|
||||
}
|
||||
|
||||
func newRunnerPod(runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
||||
return newRunnerPodWithContainerMode("", runnerName, template, runnerSpec, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
|
||||
func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string) (corev1.Pod, error) {
|
||||
return newRunnerPodWithContainerMode("", template, runnerSpec, defaultRunnerImage, defaultRunnerImagePullSecrets, defaultDockerImage, defaultDockerRegistryMirror, githubBaseURL)
|
||||
}
|
||||
|
||||
func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
|
|
|
|||
|
|
@ -67,6 +67,25 @@ func annotatePodOnce(ctx context.Context, c client.Client, log logr.Logger, pod
|
|||
|
||||
return updated, nil
|
||||
}
|
||||
func labelPod(ctx context.Context, c client.Client, log logr.Logger, pod *corev1.Pod, k, v string) (*corev1.Pod, error) {
|
||||
if pod == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
updated := pod.DeepCopy()
|
||||
if updated.Labels == nil {
|
||||
updated.Labels = map[string]string{}
|
||||
}
|
||||
updated.Labels[k] = v
|
||||
if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil {
|
||||
log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", k))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.V(2).Info("Labeled pod", "key", k, "value", v)
|
||||
|
||||
return updated, nil
|
||||
}
|
||||
|
||||
// If the first return value is nil, it's safe to delete the runner pod.
|
||||
func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
|
||||
|
|
@ -151,7 +170,10 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
|
||||
log.V(1).Info("Failed to unregister runner before deleting the pod.", "error", err)
|
||||
|
||||
var runnerBusy bool
|
||||
var (
|
||||
runnerBusy bool
|
||||
runnerUnregistrationFailureMessage string
|
||||
)
|
||||
|
||||
errRes := &gogithub.ErrorResponse{}
|
||||
if errors.As(err, &errRes) {
|
||||
|
|
@ -173,6 +195,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
}
|
||||
|
||||
runnerBusy = errRes.Response.StatusCode == 422
|
||||
runnerUnregistrationFailureMessage = errRes.Message
|
||||
|
||||
if runnerBusy && code != nil {
|
||||
log.V(2).Info("Runner container has already stopped but the unregistration attempt failed. "+
|
||||
|
|
@ -187,6 +210,11 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
}
|
||||
|
||||
if runnerBusy {
|
||||
_, err := labelPod(ctx, c, log, pod, AnnotationKeyUnregistrationFailureMessage, runnerUnregistrationFailureMessage)
|
||||
if err != nil {
|
||||
return &ctrl.Result{}, err
|
||||
}
|
||||
|
||||
// We want to prevent spamming the deletion attemps but returning ctrl.Result with RequeueAfter doesn't
|
||||
// work as the reconcilation can happen earlier due to pod status update.
|
||||
// For ephemeral runners, we can expect it to stop and unregister itself on completion.
|
||||
|
|
|
|||
|
|
@ -62,8 +62,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
|
||||
_, isRunnerPod := runnerPod.Labels[LabelKeyRunnerSetName]
|
||||
if !isRunnerPod {
|
||||
if _, isRunnerPod := runnerPod.Labels[LabelKeyRunner]; !isRunnerPod {
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -219,7 +219,9 @@ func (r *RunnerSetReconciler) newStatefulSet(runnerSet *v1alpha1.RunnerSet) (*ap
|
|||
template.Spec.ServiceAccountName = runnerSet.Spec.ServiceAccountName
|
||||
}
|
||||
|
||||
pod, err := newRunnerPodWithContainerMode(runnerSet.Spec.RunnerConfig.ContainerMode, runnerSet.Name, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL)
|
||||
template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerSet.Name)
|
||||
|
||||
pod, err := newRunnerPodWithContainerMode(runnerSet.Spec.RunnerConfig.ContainerMode, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue