fix: runner pods managed by RunnerSet to not stuck in Terminating (#1420)
This is intended to fix #1369 mostly for RunnerSet-managed runner pods. It is "mostly" because this fix might work well for RunnerDeployment in cases that #1395 does not work, like in a case that the user explicitly set the runner pod restart policy to anything other than "Never". Ref #1369
This commit is contained in:
parent
3a7e8c844b
commit
e46b90f758
|
|
@ -206,6 +206,24 @@ func runnerPodOrContainerIsStopped(pod *corev1.Pod) bool {
|
|||
return stopped
|
||||
}
|
||||
|
||||
func ephemeralRunnerContainerStatus(pod *corev1.Pod) *corev1.ContainerStatus {
|
||||
if getRunnerEnv(pod, "RUNNER_EPHEMERAL") != "true" {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, status := range pod.Status.ContainerStatuses {
|
||||
if status.Name != containerName {
|
||||
continue
|
||||
}
|
||||
|
||||
status := status
|
||||
|
||||
return &status
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger, pod *corev1.Pod) (reconcile.Result, error) {
|
||||
finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName)
|
||||
|
||||
|
|
|
|||
|
|
@ -113,9 +113,27 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
// Happens e.g. when dind is in runner and run completes
|
||||
log.Info("Runner pod has been stopped with a successful status.")
|
||||
} else if pod != nil && pod.Annotations[AnnotationKeyRunnerCompletionWaitStartTimestamp] != "" {
|
||||
log.Info("Runner pod is annotated to wait for completion")
|
||||
ct := ephemeralRunnerContainerStatus(pod)
|
||||
if ct == nil {
|
||||
log.Info("Runner pod is annotated to wait for completion, and the runner container is not ephemeral")
|
||||
|
||||
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||
}
|
||||
|
||||
lts := ct.LastTerminationState.Terminated
|
||||
if lts == nil {
|
||||
log.Info("Runner pod is annotated to wait for completion, and the runner container is not restarting")
|
||||
|
||||
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||
}
|
||||
|
||||
// Prevent runner pod from stucking in Terminating.
|
||||
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/1369
|
||||
log.Info("Deleting runner pod anyway because it has stopped prematurely. This may leave a dangling runner resource in GitHub Actions",
|
||||
"lastState.exitCode", lts.ExitCode,
|
||||
"lastState.message", lts.Message,
|
||||
"pod.phase", pod.Status.Phase,
|
||||
)
|
||||
} else if ok, err := unregisterRunner(ctx, ghClient, enterprise, organization, repository, runner, *runnerID); err != nil {
|
||||
if errors.Is(err, &gogithub.RateLimitError{}) {
|
||||
// We log the underlying error when we failed calling GitHub API to list or unregisters,
|
||||
|
|
|
|||
Loading…
Reference in New Issue