Fix runner pod to not stuck in Terminating when runner got deleted before pod scheduling (#2043)
This fixes the said issue that I found while I was running a series of E2E tests to test other features and pull requestes I have recently contributed.
This commit is contained in:
parent
877c93c5c3
commit
96a930bfd9
|
|
@ -207,9 +207,35 @@ func runnerPodOrContainerIsStopped(pod *corev1.Pod) bool {
|
||||||
|
|
||||||
if status.State.Terminated != nil {
|
if status.State.Terminated != nil {
|
||||||
stopped = true
|
stopped = true
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if pod.DeletionTimestamp != nil && !pod.DeletionTimestamp.IsZero() && len(pod.Status.ContainerStatuses) == 0 {
|
||||||
|
// This falls into cases where the pod is stuck with pod status like the below:
|
||||||
|
//
|
||||||
|
// status:
|
||||||
|
// conditions:
|
||||||
|
// - lastProbeTime: null
|
||||||
|
// lastTransitionTime: "2022-11-20T07:58:05Z"
|
||||||
|
// message: 'binding rejected: running Bind plugin "DefaultBinder": Operation cannot
|
||||||
|
// be fulfilled on pods/binding "org-runnerdeploy-l579v-qx5p2": pod org-runnerdeploy-l579v-qx5p2
|
||||||
|
// is being deleted, cannot be assigned to a host'
|
||||||
|
// reason: SchedulerError
|
||||||
|
// status: "False"
|
||||||
|
// type: PodScheduled
|
||||||
|
// phase: Pending
|
||||||
|
// qosClass: BestEffort
|
||||||
|
//
|
||||||
|
// ARC usually waits for the registration timeout to elapse when the pod is terminated before getting scheduled onto a node,
|
||||||
|
// assuming there can be a race condition between ARC and Kubernetes where Kubernetes schedules the pod while ARC is deleting the pod,
|
||||||
|
// which may end up with non-gracefully terminating the runner.
|
||||||
|
//
|
||||||
|
// However, Kubernetes seems to not schedule the pod after observing status like the above.
|
||||||
|
// This if-block is therefore needed to prevent ARC from unnecessarily waiting for the registration timeout to happen.
|
||||||
|
stopped = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return stopped
|
return stopped
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
||||||
!podIsPending(pod) {
|
!podIsPending(pod) {
|
||||||
|
|
||||||
log.Info(
|
log.Info(
|
||||||
"Unregistration started before runner obtains ID. Waiting for the regisration timeout to elapse, or the runner to obtain ID, or the runner pod to stop",
|
"Unregistration started before runner obtains ID. Waiting for the registration timeout to elapse, or the runner to obtain ID, or the runner pod to stop",
|
||||||
"registrationTimeout", registrationTimeout,
|
"registrationTimeout", registrationTimeout,
|
||||||
)
|
)
|
||||||
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue