update ephemeral runner controller to retry even if the pod doesn't get created

This commit is contained in:
Adam Furbee 2025-10-11 00:04:15 -05:00 committed by Adam Furbee
parent a0c30df25b
commit 8428fcd49b
2 changed files with 22 additions and 13 deletions

View File

@ -139,7 +139,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
return ctrl.Result{}, nil return ctrl.Result{}, nil
} }
if ephemeralRunner.IsDone() { if ephemeralRunner.IsDone() && ephemeralRunner.Status.Phase != corev1.PodFailed {
log.Info("Cleaning up resources after after ephemeral runner termination", "phase", ephemeralRunner.Status.Phase) log.Info("Cleaning up resources after after ephemeral runner termination", "phase", ephemeralRunner.Status.Phase)
err := r.cleanupResources(ctx, ephemeralRunner, log) err := r.cleanupResources(ctx, ephemeralRunner, log)
if err != nil { if err != nil {
@ -517,16 +517,14 @@ func (r *EphemeralRunnerReconciler) markAsFailed(ctx context.Context, ephemeralR
obj.Status.Phase = corev1.PodFailed obj.Status.Phase = corev1.PodFailed
obj.Status.Reason = reason obj.Status.Reason = reason
obj.Status.Message = errMessage obj.Status.Message = errMessage
if obj.Status.Failures == nil {
obj.Status.Failures = make(map[string]metav1.Time)
}
obj.Status.Failures[metav1.Now().GoString()] = metav1.Now()
}); err != nil { }); err != nil {
return fmt.Errorf("failed to update ephemeral runner status Phase/Message: %w", err) return fmt.Errorf("failed to update ephemeral runner status Phase/Message: %w", err)
} }
log.Info("EphemeralRunner is marked as Failed")
log.Info("Removing the runner from the service")
if err := r.deleteRunnerFromService(ctx, ephemeralRunner, log); err != nil {
return fmt.Errorf("failed to remove the runner from service: %w", err)
}
log.Info("EphemeralRunner is marked as Failed and deleted from the service")
return nil return nil
} }

View File

@ -261,11 +261,12 @@ var _ = Describe("EphemeralRunner", func() {
).Should(BeTrue(), "Ephemeral runner should eventually be deleted") ).Should(BeTrue(), "Ephemeral runner should eventually be deleted")
}) })
It("It should failed if a pod template is invalid", func() { It("It should failed and eventually retry if a pod template is invalid", func() {
invalideEphemeralRunner := newExampleRunner("invalid-ephemeral-runner", autoscalingNS.Name, configSecret.Name)
invalideEphemeralRunner.Spec.Spec.PriorityClassName = "notexist"
err := k8sClient.Create(ctx, invalideEphemeralRunner) invalidEphemeralRunner := newExampleRunner("invalid-ephemeral-runner", autoscalingNS.Name, configSecret.Name)
invalidEphemeralRunner.Spec.Spec.PriorityClassName = "notexist"
err := k8sClient.Create(ctx, invalidEphemeralRunner)
Expect(err).To(BeNil()) Expect(err).To(BeNil())
updated := new(v1alpha1.EphemeralRunner) updated := new(v1alpha1.EphemeralRunner)
@ -273,7 +274,7 @@ var _ = Describe("EphemeralRunner", func() {
func() (corev1.PodPhase, error) { func() (corev1.PodPhase, error) {
err := k8sClient.Get( err := k8sClient.Get(
ctx, ctx,
client.ObjectKey{Name: invalideEphemeralRunner.Name, Namespace: invalideEphemeralRunner.Namespace}, client.ObjectKey{Name: invalidEphemeralRunner.Name, Namespace: invalidEphemeralRunner.Namespace},
updated, updated,
) )
if err != nil { if err != nil {
@ -287,6 +288,16 @@ var _ = Describe("EphemeralRunner", func() {
Expect(updated.Status.Reason).Should(Equal("InvalidPod")) Expect(updated.Status.Reason).Should(Equal("InvalidPod"))
Expect(updated.Status.Message).Should(Equal("Failed to create the pod: pods \"invalid-ephemeral-runner\" is forbidden: no PriorityClass with name notexist was found")) Expect(updated.Status.Message).Should(Equal("Failed to create the pod: pods \"invalid-ephemeral-runner\" is forbidden: no PriorityClass with name notexist was found"))
er := new(v1alpha1.EphemeralRunner)
Eventually(
func() bool {
err := k8sClient.Get(ctx, client.ObjectKey{Name: invalidEphemeralRunner.Name, Namespace: invalidEphemeralRunner.Namespace}, er)
return kerrors.IsNotFound(err)
},
ephemeralRunnerTimeout,
ephemeralRunnerInterval,
).Should(BeTrue(), "Ephemeral runner should eventually be deleted")
}) })
It("It should clean up resources when deleted", func() { It("It should clean up resources when deleted", func() {