Record the error when the creation pod fails (#3112)
Signed-off-by: utam0k <k0ma@utam0k.jp>
This commit is contained in:
parent
0bfa57ac50
commit
b08d533105
|
|
@ -66,3 +66,9 @@ const DefaultScaleSetListenerLogFormat = string(logging.LogFormatText)
|
|||
|
||||
// ownerKey is field selector matching the owner name of a particular resource
|
||||
const resourceOwnerKey = ".metadata.controller"
|
||||
|
||||
// EphemeralRunner pod creation failure reasons
|
||||
const (
|
||||
ReasonTooManyPodFailures = "TooManyPodFailures"
|
||||
ReasonInvalidPodFailure = "InvalidPod"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
|
|||
case len(ephemeralRunner.Status.Failures) > 5:
|
||||
log.Info("EphemeralRunner has failed more than 5 times. Marking it as failed")
|
||||
errMessage := fmt.Sprintf("Pod has failed to start more than 5 times: %s", pod.Status.Message)
|
||||
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, log); err != nil {
|
||||
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonTooManyPodFailures, log); err != nil {
|
||||
log.Error(err, "Failed to set ephemeral runner to phase Failed")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
|
@ -201,7 +201,22 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
|
|||
default:
|
||||
// Pod was not found. Create if the pod has never been created
|
||||
log.Info("Creating new EphemeralRunner pod.")
|
||||
return r.createPod(ctx, ephemeralRunner, secret, log)
|
||||
result, err := r.createPod(ctx, ephemeralRunner, secret, log)
|
||||
switch {
|
||||
case err == nil:
|
||||
return result, nil
|
||||
case kerrors.IsInvalid(err) || kerrors.IsForbidden(err):
|
||||
log.Error(err, "Failed to create a pod due to unrecoverable failure")
|
||||
errMessage := fmt.Sprintf("Failed to create the pod: %v", err)
|
||||
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
|
||||
log.Error(err, "Failed to set ephemeral runner to phase Failed")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
return ctrl.Result{}, nil
|
||||
default:
|
||||
log.Error(err, "Failed to create the pod")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -424,11 +439,11 @@ func (r *EphemeralRunnerReconciler) cleanupRunnerLinkedSecrets(ctx context.Conte
|
|||
return false, multierr.Combine(errs...)
|
||||
}
|
||||
|
||||
func (r *EphemeralRunnerReconciler) markAsFailed(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, errMessage string, log logr.Logger) error {
|
||||
func (r *EphemeralRunnerReconciler) markAsFailed(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, errMessage string, reason string, log logr.Logger) error {
|
||||
log.Info("Updating ephemeral runner status to Failed")
|
||||
if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
|
||||
obj.Status.Phase = corev1.PodFailed
|
||||
obj.Status.Reason = "TooManyPodFailures"
|
||||
obj.Status.Reason = reason
|
||||
obj.Status.Message = errMessage
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to update ephemeral runner status Phase/Message: %v", err)
|
||||
|
|
|
|||
|
|
@ -189,6 +189,25 @@ var _ = Describe("EphemeralRunner", func() {
|
|||
).Should(BeEquivalentTo(true))
|
||||
})
|
||||
|
||||
It("It should failed if a pod template is invalid", func() {
|
||||
invalideEphemeralRunner := newExampleRunner("invalid-ephemeral-runner", autoscalingNS.Name, configSecret.Name)
|
||||
invalideEphemeralRunner.Spec.Spec.PriorityClassName = "notexist"
|
||||
|
||||
err := k8sClient.Create(ctx, invalideEphemeralRunner)
|
||||
Expect(err).To(BeNil())
|
||||
|
||||
updated := new(v1alpha1.EphemeralRunner)
|
||||
Eventually(func() (corev1.PodPhase, error) {
|
||||
err := k8sClient.Get(ctx, client.ObjectKey{Name: invalideEphemeralRunner.Name, Namespace: invalideEphemeralRunner.Namespace}, updated)
|
||||
if err != nil {
|
||||
return "", nil
|
||||
}
|
||||
return updated.Status.Phase, nil
|
||||
}, timeout, interval).Should(BeEquivalentTo(corev1.PodFailed))
|
||||
Expect(updated.Status.Reason).Should(Equal("InvalidPod"))
|
||||
Expect(updated.Status.Message).Should(Equal("Failed to create the pod: pods \"invalid-ephemeral-runner\" is forbidden: no PriorityClass with name notexist was found"))
|
||||
})
|
||||
|
||||
It("It should clean up resources when deleted", func() {
|
||||
// wait for pod to be created
|
||||
pod := new(corev1.Pod)
|
||||
|
|
|
|||
Loading…
Reference in New Issue