fix: runner pods managed by RunnerSet to not stuck in Terminating (#1420)
This is intended to fix #1369 mostly for RunnerSet-managed runner pods. It is "mostly" because this fix might work well for RunnerDeployment in cases that #1395 does not work, like in a case that the user explicitly set the runner pod restart policy to anything other than "Never". Ref #1369
This commit is contained in:
		
							parent
							
								
									3a7e8c844b
								
							
						
					
					
						commit
						e46b90f758
					
				| 
						 | 
					@ -206,6 +206,24 @@ func runnerPodOrContainerIsStopped(pod *corev1.Pod) bool {
 | 
				
			||||||
	return stopped
 | 
						return stopped
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func ephemeralRunnerContainerStatus(pod *corev1.Pod) *corev1.ContainerStatus {
 | 
				
			||||||
 | 
						if getRunnerEnv(pod, "RUNNER_EPHEMERAL") != "true" {
 | 
				
			||||||
 | 
							return nil
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, status := range pod.Status.ContainerStatuses {
 | 
				
			||||||
 | 
							if status.Name != containerName {
 | 
				
			||||||
 | 
								continue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							status := status
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							return &status
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger, pod *corev1.Pod) (reconcile.Result, error) {
 | 
					func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger, pod *corev1.Pod) (reconcile.Result, error) {
 | 
				
			||||||
	finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName)
 | 
						finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -113,9 +113,27 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
 | 
				
			||||||
		// Happens e.g. when dind is in runner and run completes
 | 
							// Happens e.g. when dind is in runner and run completes
 | 
				
			||||||
		log.Info("Runner pod has been stopped with a successful status.")
 | 
							log.Info("Runner pod has been stopped with a successful status.")
 | 
				
			||||||
	} else if pod != nil && pod.Annotations[AnnotationKeyRunnerCompletionWaitStartTimestamp] != "" {
 | 
						} else if pod != nil && pod.Annotations[AnnotationKeyRunnerCompletionWaitStartTimestamp] != "" {
 | 
				
			||||||
		log.Info("Runner pod is annotated to wait for completion")
 | 
							ct := ephemeralRunnerContainerStatus(pod)
 | 
				
			||||||
 | 
							if ct == nil {
 | 
				
			||||||
 | 
								log.Info("Runner pod is annotated to wait for completion, and the runner container is not ephemeral")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			return &ctrl.Result{RequeueAfter: retryDelay}, nil
 | 
								return &ctrl.Result{RequeueAfter: retryDelay}, nil
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							lts := ct.LastTerminationState.Terminated
 | 
				
			||||||
 | 
							if lts == nil {
 | 
				
			||||||
 | 
								log.Info("Runner pod is annotated to wait for completion, and the runner container is not restarting")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								return &ctrl.Result{RequeueAfter: retryDelay}, nil
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// Prevent runner pod from stucking in Terminating.
 | 
				
			||||||
 | 
							// See https://github.com/actions-runner-controller/actions-runner-controller/issues/1369
 | 
				
			||||||
 | 
							log.Info("Deleting runner pod anyway because it has stopped prematurely. This may leave a dangling runner resource in GitHub Actions",
 | 
				
			||||||
 | 
								"lastState.exitCode", lts.ExitCode,
 | 
				
			||||||
 | 
								"lastState.message", lts.Message,
 | 
				
			||||||
 | 
								"pod.phase", pod.Status.Phase,
 | 
				
			||||||
 | 
							)
 | 
				
			||||||
	} else if ok, err := unregisterRunner(ctx, ghClient, enterprise, organization, repository, runner, *runnerID); err != nil {
 | 
						} else if ok, err := unregisterRunner(ctx, ghClient, enterprise, organization, repository, runner, *runnerID); err != nil {
 | 
				
			||||||
		if errors.Is(err, &gogithub.RateLimitError{}) {
 | 
							if errors.Is(err, &gogithub.RateLimitError{}) {
 | 
				
			||||||
			// We log the underlying error when we failed calling GitHub API to list or unregisters,
 | 
								// We log the underlying error when we failed calling GitHub API to list or unregisters,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue