This commit is contained in:
Andy Reitz 2025-09-20 02:30:26 +05:30 committed by GitHub
commit f5ec131e92
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 12 additions and 12 deletions

View File

@ -398,7 +398,7 @@ func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1a
// Without this we got a few errors like the below on new runner pod:
// 2021-03-16T00:23:10.116Z ERROR controller-runtime.controller Reconciler error {"controller": "runner-controller", "request": "default/example-runnerdeploy-b2g2g-j4mcp", "error": "pods \"example-runnerdeploy-b2g2g-j4mcp\" already exists"}
log.Info(
"Failed to create pod due to AlreadyExists error. Probably this pod has been already created in previous reconcilation but is still not in the informer cache. Will retry on pod created. If it doesn't repeat, there's no problem",
"Failed to create pod due to AlreadyExists error. Probably this pod has been already created in previous reconciliation but is still not in the informer cache. Will retry on pod created. If it doesn't repeat, there's no problem",
)
return ctrl.Result{}, nil
}
@ -602,7 +602,7 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
runnerSpec := runner.Spec
if len(runnerSpec.VolumeMounts) != 0 {
// if operater provides a work volume mount, use that
// if the operator provides a work volume mount, use that
isPresent, _ := workVolumeMountPresent(runnerSpec.VolumeMounts)
if isPresent {
if runnerSpec.ContainerMode == "kubernetes" {
@ -1250,7 +1250,7 @@ func newRunnerPodWithContainerMode(containerMode string, template corev1.Pod, ru
// the TLS key and the cert to be used by dockerd.
//
// The author of this prestop script encountered issues where the prestophung for ten or more minutes on his cluster.
// He realized that the hang happened when a prestop hook is executed while the docker init is provioning the key and cert.
// He realized that the hang happened when a prestop hook is executed while the docker init is provisioning the key and cert.
// Assuming it's due to that the SIGTERM sent by K8s after the prestop hook was ignored by the docker init at that time,
// and it needed to wait until terminationGracePeriodSeconds to elapse before finally killing the container,
// he wrote this script so that it tries to delay SIGTERM until dockerd starts and becomes ready for processing the signal.

View File

@ -159,7 +159,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, err
}
// Otherwise the subsequent patch request can revive the removed finalizer and it will trigger a unnecessary reconcilation
// Otherwise the subsequent patch request can revive the removed finalizer and it will trigger a unnecessary reconciliation
runnerPod = *patchedPod
}
@ -177,7 +177,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
patchedPod := updatedPod.DeepCopy()
patchedPod.Finalizers = finalizers
// We commit the removal of the finalizer so that Kuberenetes notices it and delete the pod resource from the cluster.
// We commit the removal of the finalizer so that Kubernetes notices it and delete the pod resource from the cluster.
if err := r.Patch(ctx, patchedPod, client.MergeFrom(&runnerPod)); err != nil {
log.Error(err, "Failed to update runner for finalizer removal")
return ctrl.Result{}, err

View File

@ -238,17 +238,17 @@ type result struct {
currentObjects []*podsForOwner
}
// Why `create` must be a function rather than a client.Object? That's becase we use it to create one or more objects on scale up.
// Why `create` must be a function rather than a client.Object? That's because we use it to create one or more objects on scale up.
//
// We use client.Create to create a necessary number of client.Object. client.Create mutates the passed object on a successful creation.
// It seems to set .Revision at least, and the existence of .Revision let client.Create fail due to K8s restriction that an object being just created
// can't have .Revision.
// Now, imagine that you are to add 2 runner replicas on scale up.
// We create one resource object per a replica that ends up calling 2 client.Create calls.
// If we were reusing client.Object to be passed to client.Create calls, only the first call suceeeds.
// If we were reusing client.Object to be passed to client.Create calls, only the first call succeeds.
// The second call fails due to the first call mutated the client.Object to have .Revision.
// Passing a factory function of client.Object and creating a brand-new client.Object per a client.Create call resolves this issue,
// allowing us to create two or more replicas in one reconcilation loop without being rejected by K8s.
// allowing us to create two or more replicas in one reconciliation loop without being rejected by K8s.
func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger, effectiveTime *metav1.Time, newDesiredReplicas int, create func() client.Object, ephemeral bool, owners []client.Object) (*result, error) {
state, err := collectPodsForOwners(ctx, c, log, owners)
if err != nil || state == nil {
@ -351,7 +351,7 @@ func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger,
// This is our special handling of the situation for ephemeral runners only.
//
// Handling static runners this way results in scale-up to not work at all,
// because then any scale up attempts for static runenrs fall within this condition, for two reasons.
// because then any scale up attempts for static runners fall within this condition, for two reasons.
// First, static(persistent) runners will never restart on their own.
// Second, we don't update EffectiveTime for static runners.
//
@ -422,7 +422,7 @@ func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger,
for _, ss := range delete {
log := log.WithValues("owner", types.NamespacedName{Namespace: ss.owner.GetNamespace(), Name: ss.owner.GetName()})
// Statefulset termination process 1/4: Set unregistrationRequestTimestamp only after all the pods managed by the statefulset have
// started unregistreation process.
// started the unregistration process.
//
// NOTE: We just mark it instead of immediately starting the deletion process.
// Otherwise, the runner pod may hit termiationGracePeriod before the unregistration completes(the max terminationGracePeriod is limited to 1h by K8s and a job can be run for more than that),
@ -511,7 +511,7 @@ func collectPodsForOwners(ctx context.Context, c client.Client, log logr.Logger,
// Statefulset termination process 4/4: Let Kubernetes cascade-delete the statefulset and the pods.
//
// If the runner is already marked for deletion(=has a non-zero deletion timestamp) by the runner controller (can be caused by an ephemeral runner completion)
// or by this controller (in case it was deleted in the previous reconcilation loop),
// or by this controller (in case it was deleted in the previous reconciliation loop),
// we don't need to bother calling GitHub API to re-mark the runner for deletion.
// Just hold on, and runners will disappear as long as the runner controller is up and running.
if !res.owner.GetDeletionTimestamp().IsZero() {
@ -588,7 +588,7 @@ func collectPodsForOwners(ctx context.Context, c client.Client, log logr.Logger,
}
if !res.synced {
log.V(1).Info("Skipped reconcilation because owner is not synced yet", "pods", res.pods)
log.V(1).Info("Skipped reconciliation because owner is not synced yet", "pods", res.pods)
return nil, nil
}