Handle resource quota on status forbidden by retrying (#4305)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
parent
3d73636407
commit
9f9409a4c1
|
|
@ -22,6 +22,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
|
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
|
||||||
|
|
@ -282,7 +283,34 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
|
||||||
case kerrors.IsAlreadyExists(err):
|
case kerrors.IsAlreadyExists(err):
|
||||||
log.Info("Runner pod already exists. Waiting for the pod event to be received")
|
log.Info("Runner pod already exists. Waiting for the pod event to be received")
|
||||||
return ctrl.Result{Requeue: true, RequeueAfter: 5 * time.Second}, nil
|
return ctrl.Result{Requeue: true, RequeueAfter: 5 * time.Second}, nil
|
||||||
case kerrors.IsInvalid(err) || kerrors.IsForbidden(err):
|
case kerrors.IsInvalid(err):
|
||||||
|
log.Error(err, "Failed to create a pod due to unrecoverable failure")
|
||||||
|
errMessage := fmt.Sprintf("Failed to create the pod: %v", err)
|
||||||
|
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
|
||||||
|
log.Error(err, "Failed to set ephemeral runner to phase Failed")
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
return ctrl.Result{}, nil
|
||||||
|
case kerrors.IsForbidden(err):
|
||||||
|
if status, ok := err.(kerrors.APIStatus); ok || errors.As(err, &status) {
|
||||||
|
isResourceQuotaExceeded := strings.Contains(status.Status().Message, "exceeded quota:")
|
||||||
|
isAboutToExpire := ephemeralRunner.CreationTimestamp.Time.Add(10 * time.Minute).Before(time.Now())
|
||||||
|
switch {
|
||||||
|
case isResourceQuotaExceeded && isAboutToExpire:
|
||||||
|
log.Error(err, "Failed to create a pod due to resource quota exceeded and the ephemeral runner is about to expire; re-creating the ephemeral runner")
|
||||||
|
if err := r.Delete(ctx, ephemeralRunner); err != nil {
|
||||||
|
log.Error(err, "Failed to delete the ephemeral runner")
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
return ctrl.Result{}, nil
|
||||||
|
case isResourceQuotaExceeded:
|
||||||
|
log.Error(err, "Resource quota is exceeded; requeue in 30s to retry pod creation")
|
||||||
|
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
|
||||||
|
default:
|
||||||
|
// other forbidden errors
|
||||||
|
// fallthrough to the default handling below
|
||||||
|
}
|
||||||
|
}
|
||||||
log.Error(err, "Failed to create a pod due to unrecoverable failure")
|
log.Error(err, "Failed to create a pod due to unrecoverable failure")
|
||||||
errMessage := fmt.Sprintf("Failed to create the pod: %v", err)
|
errMessage := fmt.Sprintf("Failed to create the pod: %v", err)
|
||||||
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
|
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue