Make EphemeralRunnerReconciler create runner pods faster and earlier

This commit is contained in:
Yusuke Kuoka 2024-09-30 04:48:33 +00:00
parent 3778fb0f65
commit d72e6f60c9
1 changed files with 38 additions and 17 deletions

View File

@ -182,7 +182,9 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
if ephemeralRunner.Status.RunnerId == 0 { if ephemeralRunner.Status.RunnerId == 0 {
log.Info("Creating new ephemeral runner registration and updating status with runner config") log.Info("Creating new ephemeral runner registration and updating status with runner config")
return r.updateStatusWithRunnerConfig(ctx, ephemeralRunner, log) if r, err := r.updateStatusWithRunnerConfig(ctx, ephemeralRunner, log); r != nil {
return *r, err
}
} }
secret := new(corev1.Secret) secret := new(corev1.Secret)
@ -193,7 +195,17 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
} }
// create secret if not created // create secret if not created
log.Info("Creating new ephemeral runner secret for jitconfig.") log.Info("Creating new ephemeral runner secret for jitconfig.")
return r.createSecret(ctx, ephemeralRunner, log) if r, err := r.createSecret(ctx, ephemeralRunner, log); r != nil {
return *r, err
}
// Retry to get the secret that was just created.
// Otherwise, even though we want to continue to create the pod,
// it fails due to the missing secret resulting in an invalid pod spec.
if err := r.Get(ctx, req.NamespacedName, secret); err != nil {
log.Error(err, "Failed to fetch secret")
return ctrl.Result{}, err
}
} }
pod := new(corev1.Pod) pod := new(corev1.Pod)
@ -563,7 +575,7 @@ func (r *EphemeralRunnerReconciler) deletePodAsFailed(ctx context.Context, ephem
// updateStatusWithRunnerConfig fetches runtime configuration needed by the runner // updateStatusWithRunnerConfig fetches runtime configuration needed by the runner
// This method should always set .status.runnerId and .status.runnerJITConfig // This method should always set .status.runnerId and .status.runnerJITConfig
func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (ctrl.Result, error) { func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (*ctrl.Result, error) {
ctx, span := otel.Tracer("arc").Start(ctx, "EphemeralRunnerReconciler.updateStatusWithRunnerConfig") ctx, span := otel.Tracer("arc").Start(ctx, "EphemeralRunnerReconciler.updateStatusWithRunnerConfig")
defer span.End() defer span.End()
@ -571,7 +583,7 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
log.Info("Creating ephemeral runner JIT config") log.Info("Creating ephemeral runner JIT config")
actionsClient, err := r.actionsClientFor(ctx, ephemeralRunner) actionsClient, err := r.actionsClientFor(ctx, ephemeralRunner)
if err != nil { if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get actions client for generating JIT config: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to get actions client for generating JIT config: %v", err)
} }
jitSettings := &actions.RunnerScaleSetJitRunnerSetting{ jitSettings := &actions.RunnerScaleSetJitRunnerSetting{
@ -581,12 +593,12 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
if err != nil { if err != nil {
actionsError := &actions.ActionsError{} actionsError := &actions.ActionsError{}
if !errors.As(err, &actionsError) { if !errors.As(err, &actionsError) {
return ctrl.Result{}, fmt.Errorf("failed to generate JIT config with generic error: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to generate JIT config with generic error: %v", err)
} }
if actionsError.StatusCode != http.StatusConflict || if actionsError.StatusCode != http.StatusConflict ||
!actionsError.IsException("AgentExistsException") { !actionsError.IsException("AgentExistsException") {
return ctrl.Result{}, fmt.Errorf("failed to generate JIT config with Actions service error: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to generate JIT config with Actions service error: %v", err)
} }
// If the runner with the name we want already exists it means: // If the runner with the name we want already exists it means:
@ -599,12 +611,12 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
log.Info("Getting runner jit config failed with conflict error, trying to get the runner by name", "runnerName", ephemeralRunner.Name) log.Info("Getting runner jit config failed with conflict error, trying to get the runner by name", "runnerName", ephemeralRunner.Name)
existingRunner, err := actionsClient.GetRunnerByName(ctx, ephemeralRunner.Name) existingRunner, err := actionsClient.GetRunnerByName(ctx, ephemeralRunner.Name)
if err != nil { if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get runner by name: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to get runner by name: %v", err)
} }
if existingRunner == nil { if existingRunner == nil {
log.Info("Runner with the same name does not exist, re-queuing the reconciliation") log.Info("Runner with the same name does not exist, re-queuing the reconciliation")
return ctrl.Result{Requeue: true}, nil return &ctrl.Result{Requeue: true}, nil
} }
log.Info("Found the runner with the same name", "runnerId", existingRunner.Id, "runnerScaleSetId", existingRunner.RunnerScaleSetId) log.Info("Found the runner with the same name", "runnerId", existingRunner.Id, "runnerScaleSetId", existingRunner.RunnerScaleSetId)
@ -612,16 +624,16 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
log.Info("Removing the runner with the same name") log.Info("Removing the runner with the same name")
err := actionsClient.RemoveRunner(ctx, int64(existingRunner.Id)) err := actionsClient.RemoveRunner(ctx, int64(existingRunner.Id))
if err != nil { if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to remove runner from the service: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to remove runner from the service: %v", err)
} }
log.Info("Removed the runner with the same name, re-queuing the reconciliation") log.Info("Removed the runner with the same name, re-queuing the reconciliation")
return ctrl.Result{Requeue: true}, nil return &ctrl.Result{Requeue: true}, nil
} }
// TODO: Do we want to mark the ephemeral runner as failed, and let EphemeralRunnerSet to clean it up, so we can recover from this situation? // TODO: Do we want to mark the ephemeral runner as failed, and let EphemeralRunnerSet to clean it up, so we can recover from this situation?
// The situation is that the EphemeralRunner's name is already used by something else to register a runner, and we can't take the control back. // The situation is that the EphemeralRunner's name is already used by something else to register a runner, and we can't take the control back.
return ctrl.Result{}, fmt.Errorf("runner with the same name but doesn't belong to this RunnerScaleSet: %v", err) return &ctrl.Result{}, fmt.Errorf("runner with the same name but doesn't belong to this RunnerScaleSet: %v", err)
} }
log.Info("Created ephemeral runner JIT config", "runnerId", jitConfig.Runner.Id) log.Info("Created ephemeral runner JIT config", "runnerId", jitConfig.Runner.Id)
@ -632,11 +644,20 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
obj.Status.RunnerJITConfig = jitConfig.EncodedJITConfig obj.Status.RunnerJITConfig = jitConfig.EncodedJITConfig
}) })
if err != nil { if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %v", err)
} }
// We want to continue without a requeue for faster pod creation.
//
// To do so, we update the status in-place, so that both continuing the loop and
// and requeuing and skipping updateStatusWithRunnerConfig in the next loop, will
// have the same effect.
ephemeralRunner.Status.RunnerId = jitConfig.Runner.Id
ephemeralRunner.Status.RunnerName = jitConfig.Runner.Name
ephemeralRunner.Status.RunnerJITConfig = jitConfig.EncodedJITConfig
log.Info("Updated ephemeral runner status with runnerId and runnerJITConfig") log.Info("Updated ephemeral runner status with runnerId and runnerJITConfig")
return ctrl.Result{}, nil return nil, nil
} }
func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alpha1.EphemeralRunner, secret *corev1.Secret, log logr.Logger) (ctrl.Result, error) { func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alpha1.EphemeralRunner, secret *corev1.Secret, log logr.Logger) (ctrl.Result, error) {
@ -715,7 +736,7 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp
return ctrl.Result{}, nil return ctrl.Result{}, nil
} }
func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (ctrl.Result, error) { func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (*ctrl.Result, error) {
ctx, span := otel.Tracer("arc").Start(ctx, "EphemeralRunnerReconciler.createSecret") ctx, span := otel.Tracer("arc").Start(ctx, "EphemeralRunnerReconciler.createSecret")
defer span.End() defer span.End()
@ -723,16 +744,16 @@ func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1
jitSecret := r.resourceBuilder.newEphemeralRunnerJitSecret(runner) jitSecret := r.resourceBuilder.newEphemeralRunnerJitSecret(runner)
if err := ctrl.SetControllerReference(runner, jitSecret, r.Scheme); err != nil { if err := ctrl.SetControllerReference(runner, jitSecret, r.Scheme); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to set controller reference: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to set controller reference: %v", err)
} }
log.Info("Created new secret spec for ephemeral runner") log.Info("Created new secret spec for ephemeral runner")
if err := r.Create(ctx, jitSecret); err != nil { if err := r.Create(ctx, jitSecret); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create jit secret: %v", err) return &ctrl.Result{}, fmt.Errorf("failed to create jit secret: %v", err)
} }
log.Info("Created ephemeral runner secret", "secretName", jitSecret.Name) log.Info("Created ephemeral runner secret", "secretName", jitSecret.Name)
return ctrl.Result{Requeue: true}, nil return nil, nil
} }
// updateRunStatusFromPod is responsible for updating non-exiting statuses. // updateRunStatusFromPod is responsible for updating non-exiting statuses.