From 3b5693eecb868e0192ae12d0747ad944df7beed3 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Fri, 27 Jun 2025 11:11:39 +0200 Subject: [PATCH] Remove check if runner exists after exit code 0 (#4142) --- .../ephemeralrunner_controller.go | 55 ++----------------- .../ephemeralrunner_controller_test.go | 47 ---------------- 2 files changed, 4 insertions(+), 98 deletions(-) diff --git a/controllers/actions.github.com/ephemeralrunner_controller.go b/controllers/actions.github.com/ephemeralrunner_controller.go index 9ca3f52e..5324c036 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller.go +++ b/controllers/actions.github.com/ephemeralrunner_controller.go @@ -293,28 +293,10 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ } return ctrl.Result{}, nil - default: - // pod succeeded. We double-check with the service if the runner exists. - // The reason is that image can potentially finish with status 0, but not pick up the job. - existsInService, err := r.runnerRegisteredWithService(ctx, ephemeralRunner.DeepCopy(), log) - if err != nil { - log.Error(err, "Failed to check if runner is registered with the service") - return ctrl.Result{}, err - } - if !existsInService { - // the runner does not exist in the service, so it must be done - log.Info("Ephemeral runner has finished since it does not exist in the service anymore") - if err := r.markAsFinished(ctx, ephemeralRunner, log); err != nil { - log.Error(err, "Failed to mark ephemeral runner as finished") - return ctrl.Result{}, err - } - return ctrl.Result{}, nil - } - - // The runner still exists. This can happen if the pod exited with 0 but fails to start - log.Info("Ephemeral runner pod has finished, but the runner still exists in the service. Deleting the pod to restart it.") - if err := r.deletePodAsFailed(ctx, ephemeralRunner, pod, log); err != nil { - log.Error(err, "failed to delete a pod that still exists in the service") + default: // succeeded + log.Info("Ephemeral runner has finished successfully") + if err := r.markAsFinished(ctx, ephemeralRunner, log); err != nil { + log.Error(err, "Failed to mark ephemeral runner as finished") return ctrl.Result{}, err } return ctrl.Result{}, nil @@ -752,35 +734,6 @@ func (r *EphemeralRunnerReconciler) updateRunStatusFromPod(ctx context.Context, return nil } -// runnerRegisteredWithService checks if the runner is still registered with the service -// Returns found=false and err=nil if ephemeral runner does not exist in GitHub service and should be deleted -func (r EphemeralRunnerReconciler) runnerRegisteredWithService(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (found bool, err error) { - actionsClient, err := r.GetActionsService(ctx, runner) - if err != nil { - return false, fmt.Errorf("failed to get Actions client for ScaleSet: %w", err) - } - - log.Info("Checking if runner exists in GitHub service", "runnerId", runner.Status.RunnerId) - _, err = actionsClient.GetRunner(ctx, int64(runner.Status.RunnerId)) - if err != nil { - actionsError := &actions.ActionsError{} - if !errors.As(err, &actionsError) { - return false, err - } - - if actionsError.StatusCode != http.StatusNotFound || - !actionsError.IsException("AgentNotFoundException") { - return false, fmt.Errorf("failed to check if runner exists in GitHub service: %w", err) - } - - log.Info("Runner does not exist in GitHub service", "runnerId", runner.Status.RunnerId) - return false, nil - } - - log.Info("Runner exists in GitHub service", "runnerId", runner.Status.RunnerId) - return true, nil -} - func (r *EphemeralRunnerReconciler) deleteRunnerFromService(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) error { client, err := r.GetActionsService(ctx, ephemeralRunner) if err != nil { diff --git a/controllers/actions.github.com/ephemeralrunner_controller_test.go b/controllers/actions.github.com/ephemeralrunner_controller_test.go index 26d66814..3aa6a8ea 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller_test.go +++ b/controllers/actions.github.com/ephemeralrunner_controller_test.go @@ -675,53 +675,6 @@ var _ = Describe("EphemeralRunner", func() { ).Should(BeEquivalentTo(true)) }) - It("It should re-create pod on exit status 0, but runner exists within the service", func() { - pod := new(corev1.Pod) - Eventually( - func() (bool, error) { - if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil { - return false, err - } - return true, nil - }, - ephemeralRunnerTimeout, - ephemeralRunnerInterval, - ).Should(BeEquivalentTo(true)) - - pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{ - Name: v1alpha1.EphemeralRunnerContainerName, - State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{ - ExitCode: 0, - }, - }, - }) - err := k8sClient.Status().Update(ctx, pod) - Expect(err).To(BeNil(), "failed to update pod status") - - updated := new(v1alpha1.EphemeralRunner) - Eventually(func() (bool, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated) - if err != nil { - return false, err - } - return len(updated.Status.Failures) == 1, nil - }, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo(true)) - - // should re-create after failure - Eventually( - func() (bool, error) { - pod := new(corev1.Pod) - if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil { - return false, err - } - return true, nil - }, - ephemeralRunnerTimeout, - ephemeralRunnerInterval, - ).Should(BeEquivalentTo(true)) - }) - It("It should not set the phase to succeeded without pod termination status", func() { pod := new(corev1.Pod) Eventually(