From 7f0e65cb73dd174dc93586154ba6f4de63ef9114 Mon Sep 17 00:00:00 2001 From: Yusuke Kuoka Date: Wed, 2 Mar 2022 02:31:35 +0000 Subject: [PATCH] refactor: Extract definitions of various annotation keys and other defaults to their own source --- controllers/constants.go | 40 +++++++++++++++++++++++++++ controllers/runner_graceful_stop.go | 41 +++++----------------------- controllers/runner_pod_controller.go | 10 +------ controllers/runnerset_controller.go | 24 +++++++--------- 4 files changed, 58 insertions(+), 57 deletions(-) create mode 100644 controllers/constants.go diff --git a/controllers/constants.go b/controllers/constants.go new file mode 100644 index 00000000..176036d6 --- /dev/null +++ b/controllers/constants.go @@ -0,0 +1,40 @@ +package controllers + +import "time" + +const ( + LabelKeyRunnerSetName = "runnerset-name" +) + +const ( + // This names requires at least one slash to work. + // See https://github.com/google/knative-gcp/issues/378 + runnerPodFinalizerName = "actions.summerwind.dev/runner-pod" + + AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time" + + // AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed. + AnnotationKeyUnregistrationCompleteTimestamp = "unregistration-complete-timestamp" + + // unregistarionStartTimestamp is the annotation that contains the time that the requested unregistration process has been started + AnnotationKeyUnregistrationStartTimestamp = "unregistration-start-timestamp" + + // AnnotationKeyUnregistrationRequestTimestamp is the annotation that contains the time that the unregistration has been requested. + // This doesn't immediately start the unregistration. Instead, ARC will first check if the runner has already been registered. + // If not, ARC will hold on until the registration to complete first, and only after that it starts the unregistration process. + // This is crucial to avoid a race between ARC marking the runner pod for deletion while the actions-runner registers itself to GitHub, leaving the assigned job + // hang like forever. + AnnotationKeyUnregistrationRequestTimestamp = "unregistration-request-timestamp" + + AnnotationKeyRunnerID = "runner-id" + + // DefaultUnregistrationTimeout is the duration until ARC gives up retrying the combo of ListRunners API (to detect the runner ID by name) + // and RemoveRunner API (to actually unregister the runner) calls. + // This needs to be longer than 60 seconds because a part of the combo, the ListRunners API, seems to use the Cache-Control header of max-age=60s + // and that instructs our cache library httpcache to cache responses for 60 seconds, which results in ARC unable to see the runner in the ListRunners response + // up to 60 seconds (or even more depending on the situation). + DefaultUnregistrationTimeout = 60 * time.Second + + // This can be any value but a larger value can make an unregistration timeout longer than configured in practice. + DefaultUnregistrationRetryDelay = 30 * time.Second +) diff --git a/controllers/runner_graceful_stop.go b/controllers/runner_graceful_stop.go index f533ba4e..3d71f5cc 100644 --- a/controllers/runner_graceful_stop.go +++ b/controllers/runner_graceful_stop.go @@ -16,33 +16,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -const ( - // unregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed. - unregistrationCompleteTimestamp = "unregistration-complete-timestamp" - - // unregistarionStartTimestamp is the annotation that contains the time that the requested unregistration process has been started - unregistrationStartTimestamp = "unregistration-start-timestamp" - - // unregistrationRequestTimestamp is the annotation that contains the time that the unregistration has been requested. - // This doesn't immediately start the unregistration. Instead, ARC will first check if the runner has already been registered. - // If not, ARC will hold on until the registration to complete first, and only after that it starts the unregistration process. - // This is crucial to avoid a race between ARC marking the runner pod for deletion while the actions-runner registers itself to GitHub, leaving the assigned job - // hang like forever. - unregistrationRequestTimestamp = "unregistration-request-timestamp" - - annotationKeyRunnerID = "runner-id" - - // DefaultUnregistrationTimeout is the duration until ARC gives up retrying the combo of ListRunners API (to detect the runner ID by name) - // and RemoveRunner API (to actually unregister the runner) calls. - // This needs to be longer than 60 seconds because a part of the combo, the ListRunners API, seems to use the Cache-Control header of max-age=60s - // and that instructs our cache library httpcache to cache responses for 60 seconds, which results in ARC unable to see the runner in the ListRunners response - // up to 60 seconds (or even more depending on the situation). - DefaultUnregistrationTimeout = 60 * time.Second - - // This can be any value but a larger value can make an unregistration timeout longer than configured in practice. - DefaultUnregistrationRetryDelay = 30 * time.Second -) - // tickRunnerGracefulStop reconciles the runner and the runner pod in a way so that // we can delete the runner pod without disrupting a workflow job. // @@ -54,7 +27,7 @@ const ( // When it wants to be retried later, the function returns a non-nil *ctrl.Result as the second return value, may or may not populating the error in the second return value. // The caller is expected to return the returned ctrl.Result and error to postpone the current reconcilation loop and trigger a scheduled retry. func tickRunnerGracefulStop(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) { - pod, err := annotatePodOnce(ctx, c, log, pod, unregistrationStartTimestamp, time.Now().Format(time.RFC3339)) + pod, err := annotatePodOnce(ctx, c, log, pod, AnnotationKeyUnregistrationStartTimestamp, time.Now().Format(time.RFC3339)) if err != nil { return nil, &ctrl.Result{}, err } @@ -63,7 +36,7 @@ func tickRunnerGracefulStop(ctx context.Context, unregistrationTimeout time.Dura return nil, res, err } - pod, err = annotatePodOnce(ctx, c, log, pod, unregistrationCompleteTimestamp, time.Now().Format(time.RFC3339)) + pod, err = annotatePodOnce(ctx, c, log, pod, AnnotationKeyUnregistrationCompleteTimestamp, time.Now().Format(time.RFC3339)) if err != nil { return nil, &ctrl.Result{}, err } @@ -99,7 +72,7 @@ func annotatePodOnce(ctx context.Context, c client.Client, log logr.Logger, pod func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) { var runnerID *int64 - if id, ok := getAnnotation(&pod.ObjectMeta, annotationKeyRunnerID); ok { + if id, ok := getAnnotation(&pod.ObjectMeta, AnnotationKeyRunnerID); ok { v, err := strconv.ParseInt(id, 10, 64) if err != nil { return &ctrl.Result{}, err @@ -162,7 +135,7 @@ func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time. // In that case we can safely assume that the runner will never be registered. log.Info("Runner was not found on GitHub and the runner pod was not found on Kuberntes.") - } else if pod.Annotations[unregistrationCompleteTimestamp] != "" { + } else if pod.Annotations[AnnotationKeyUnregistrationCompleteTimestamp] != "" { // If it's already unregistered in the previous reconcilation loop, // you can safely assume that it won't get registered again so it's safe to delete the runner pod. log.Info("Runner pod is marked as already unregistered.") @@ -174,7 +147,7 @@ func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time. // If pod has ended up succeeded we need to restart it // Happens e.g. when dind is in runner and run completes log.Info("Runner pod has been stopped with a successful status.") - } else if ts := pod.Annotations[unregistrationStartTimestamp]; ts != "" { + } else if ts := pod.Annotations[AnnotationKeyUnregistrationStartTimestamp]; ts != "" { t, err := time.Parse(time.RFC3339, ts) if err != nil { return &ctrl.Result{RequeueAfter: retryDelay}, err @@ -202,7 +175,7 @@ func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time. } func ensureRunnerPodRegistered(ctx context.Context, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) { - _, hasRunnerID := getAnnotation(&pod.ObjectMeta, annotationKeyRunnerID) + _, hasRunnerID := getAnnotation(&pod.ObjectMeta, AnnotationKeyRunnerID) if runnerPodOrContainerIsStopped(pod) || hasRunnerID { return pod, nil, nil } @@ -218,7 +191,7 @@ func ensureRunnerPodRegistered(ctx context.Context, log logr.Logger, ghClient *g id := *r.ID - updated, err := annotatePodOnce(ctx, c, log, pod, annotationKeyRunnerID, fmt.Sprintf("%d", id)) + updated, err := annotatePodOnce(ctx, c, log, pod, AnnotationKeyRunnerID, fmt.Sprintf("%d", id)) if err != nil { return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err } diff --git a/controllers/runner_pod_controller.go b/controllers/runner_pod_controller.go index 64874002..04002eac 100644 --- a/controllers/runner_pod_controller.go +++ b/controllers/runner_pod_controller.go @@ -52,14 +52,6 @@ type RunnerPodReconciler struct { UnregistrationRetryDelay time.Duration } -const ( - // This names requires at least one slash to work. - // See https://github.com/google/knative-gcp/issues/378 - runnerPodFinalizerName = "actions.summerwind.dev/runner-pod" - - AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time" -) - // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch;delete // +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch @@ -173,7 +165,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( runnerPod = *po - if _, unregistrationRequested := getAnnotation(&runnerPod.ObjectMeta, unregistrationRequestTimestamp); unregistrationRequested { + if _, unregistrationRequested := getAnnotation(&runnerPod.ObjectMeta, AnnotationKeyUnregistrationRequestTimestamp); unregistrationRequested { log.V(2).Info("Progressing unregistration because unregistration-request timestamp is set") // At this point we're sure that DeletionTimestamp is not set yet, but the unregistration process is triggered by an upstream controller like runnerset-controller. diff --git a/controllers/runnerset_controller.go b/controllers/runnerset_controller.go index 64d61867..85a38452 100644 --- a/controllers/runnerset_controller.go +++ b/controllers/runnerset_controller.go @@ -39,10 +39,6 @@ import ( "github.com/go-logr/logr" ) -const ( - LabelKeyRunnerSetName = "runnerset-name" -) - // RunnerSetReconciler reconciles a Runner object type RunnerSetReconciler struct { Name string @@ -161,7 +157,7 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } // Statefulset termination process 3/4: Set the deletionTimestamp to let Kubernetes start a cascade deletion of the statefulset and the pods. - if _, ok := getAnnotation(&res.statefulset.ObjectMeta, unregistrationCompleteTimestamp); ok { + if _, ok := getAnnotation(&res.statefulset.ObjectMeta, AnnotationKeyUnregistrationCompleteTimestamp); ok { if err := r.Client.Delete(ctx, res.statefulset); err != nil { log.Error(err, "Failed to delete statefulset") return ctrl.Result{}, err @@ -171,10 +167,10 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Statefulset termination process 2/4: Set unregistrationCompleteTimestamp only if all the pods managed by the statefulset // have either unregistered or being deleted. - if _, ok := getAnnotation(&res.statefulset.ObjectMeta, unregistrationRequestTimestamp); ok { + if _, ok := getAnnotation(&res.statefulset.ObjectMeta, AnnotationKeyUnregistrationRequestTimestamp); ok { var deletionSafe int for _, po := range res.pods { - if _, ok := getAnnotation(&po.ObjectMeta, unregistrationCompleteTimestamp); ok { + if _, ok := getAnnotation(&po.ObjectMeta, AnnotationKeyUnregistrationCompleteTimestamp); ok { deletionSafe++ } else if !po.DeletionTimestamp.IsZero() { deletionSafe++ @@ -184,12 +180,12 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( log.V(2).Info("Marking statefulset for unregistration completion", "deletionSafe", deletionSafe, "total", res.total) if deletionSafe == res.total { - if _, ok := getAnnotation(&res.statefulset.ObjectMeta, unregistrationCompleteTimestamp); !ok { + if _, ok := getAnnotation(&res.statefulset.ObjectMeta, AnnotationKeyUnregistrationCompleteTimestamp); !ok { updated := res.statefulset.DeepCopy() - setAnnotation(&updated.ObjectMeta, unregistrationCompleteTimestamp, time.Now().Format(time.RFC3339)) + setAnnotation(&updated.ObjectMeta, AnnotationKeyUnregistrationCompleteTimestamp, time.Now().Format(time.RFC3339)) if err := r.Client.Patch(ctx, updated, client.MergeFrom(res.statefulset)); err != nil { - log.Error(err, fmt.Sprintf("Failed to patch statefulset to have %s annotation", unregistrationCompleteTimestamp)) + log.Error(err, fmt.Sprintf("Failed to patch statefulset to have %s annotation", AnnotationKeyUnregistrationCompleteTimestamp)) return ctrl.Result{}, err } @@ -320,17 +316,17 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // We'd better unregister first and then start a pod deletion process. // The annotation works as a mark to start the pod unregistration and deletion process of ours. for _, po := range ss.pods { - if _, err := annotatePodOnce(ctx, r.Client, log, &po, unregistrationRequestTimestamp, time.Now().Format(time.RFC3339)); err != nil { + if _, err := annotatePodOnce(ctx, r.Client, log, &po, AnnotationKeyUnregistrationRequestTimestamp, time.Now().Format(time.RFC3339)); err != nil { return ctrl.Result{}, err } } - if _, ok := getAnnotation(&ss.statefulset.ObjectMeta, unregistrationRequestTimestamp); !ok { + if _, ok := getAnnotation(&ss.statefulset.ObjectMeta, AnnotationKeyUnregistrationRequestTimestamp); !ok { updated := ss.statefulset.DeepCopy() - setAnnotation(&updated.ObjectMeta, unregistrationRequestTimestamp, time.Now().Format(time.RFC3339)) + setAnnotation(&updated.ObjectMeta, AnnotationKeyUnregistrationRequestTimestamp, time.Now().Format(time.RFC3339)) if err := r.Client.Patch(ctx, updated, client.MergeFrom(ss.statefulset)); err != nil { - log.Error(err, fmt.Sprintf("Failed to patch statefulset to have %s annotation", unregistrationRequestTimestamp)) + log.Error(err, fmt.Sprintf("Failed to patch statefulset to have %s annotation", AnnotationKeyUnregistrationRequestTimestamp)) return ctrl.Result{}, err }