Remove check if runner exists after exit code 0 (#4142)

This commit is contained in:
Nikola Jokic 2025-06-27 11:11:39 +02:00 committed by GitHub
parent e6e621a50a
commit 3b5693eecb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 98 deletions

View File

@ -293,28 +293,10 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
return ctrl.Result{}, nil
default:
// pod succeeded. We double-check with the service if the runner exists.
// The reason is that image can potentially finish with status 0, but not pick up the job.
existsInService, err := r.runnerRegisteredWithService(ctx, ephemeralRunner.DeepCopy(), log)
if err != nil {
log.Error(err, "Failed to check if runner is registered with the service")
return ctrl.Result{}, err
}
if !existsInService {
// the runner does not exist in the service, so it must be done
log.Info("Ephemeral runner has finished since it does not exist in the service anymore")
if err := r.markAsFinished(ctx, ephemeralRunner, log); err != nil {
log.Error(err, "Failed to mark ephemeral runner as finished")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
// The runner still exists. This can happen if the pod exited with 0 but fails to start
log.Info("Ephemeral runner pod has finished, but the runner still exists in the service. Deleting the pod to restart it.")
if err := r.deletePodAsFailed(ctx, ephemeralRunner, pod, log); err != nil {
log.Error(err, "failed to delete a pod that still exists in the service")
default: // succeeded
log.Info("Ephemeral runner has finished successfully")
if err := r.markAsFinished(ctx, ephemeralRunner, log); err != nil {
log.Error(err, "Failed to mark ephemeral runner as finished")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
@ -752,35 +734,6 @@ func (r *EphemeralRunnerReconciler) updateRunStatusFromPod(ctx context.Context,
return nil
}
// runnerRegisteredWithService checks if the runner is still registered with the service
// Returns found=false and err=nil if ephemeral runner does not exist in GitHub service and should be deleted
func (r EphemeralRunnerReconciler) runnerRegisteredWithService(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (found bool, err error) {
actionsClient, err := r.GetActionsService(ctx, runner)
if err != nil {
return false, fmt.Errorf("failed to get Actions client for ScaleSet: %w", err)
}
log.Info("Checking if runner exists in GitHub service", "runnerId", runner.Status.RunnerId)
_, err = actionsClient.GetRunner(ctx, int64(runner.Status.RunnerId))
if err != nil {
actionsError := &actions.ActionsError{}
if !errors.As(err, &actionsError) {
return false, err
}
if actionsError.StatusCode != http.StatusNotFound ||
!actionsError.IsException("AgentNotFoundException") {
return false, fmt.Errorf("failed to check if runner exists in GitHub service: %w", err)
}
log.Info("Runner does not exist in GitHub service", "runnerId", runner.Status.RunnerId)
return false, nil
}
log.Info("Runner exists in GitHub service", "runnerId", runner.Status.RunnerId)
return true, nil
}
func (r *EphemeralRunnerReconciler) deleteRunnerFromService(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) error {
client, err := r.GetActionsService(ctx, ephemeralRunner)
if err != nil {

View File

@ -675,53 +675,6 @@ var _ = Describe("EphemeralRunner", func() {
).Should(BeEquivalentTo(true))
})
It("It should re-create pod on exit status 0, but runner exists within the service", func() {
pod := new(corev1.Pod)
Eventually(
func() (bool, error) {
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil {
return false, err
}
return true, nil
},
ephemeralRunnerTimeout,
ephemeralRunnerInterval,
).Should(BeEquivalentTo(true))
pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{
Name: v1alpha1.EphemeralRunnerContainerName,
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
ExitCode: 0,
},
},
})
err := k8sClient.Status().Update(ctx, pod)
Expect(err).To(BeNil(), "failed to update pod status")
updated := new(v1alpha1.EphemeralRunner)
Eventually(func() (bool, error) {
err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated)
if err != nil {
return false, err
}
return len(updated.Status.Failures) == 1, nil
}, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo(true))
// should re-create after failure
Eventually(
func() (bool, error) {
pod := new(corev1.Pod)
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil {
return false, err
}
return true, nil
},
ephemeralRunnerTimeout,
ephemeralRunnerInterval,
).Should(BeEquivalentTo(true))
})
It("It should not set the phase to succeeded without pod termination status", func() {
pod := new(corev1.Pod)
Eventually(