refactor: Extract definitions of various annotation keys and other defaults to their own source
This commit is contained in:
parent
12a04b7f38
commit
7f0e65cb73
|
|
@ -0,0 +1,40 @@
|
||||||
|
package controllers
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
const (
|
||||||
|
LabelKeyRunnerSetName = "runnerset-name"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// This names requires at least one slash to work.
|
||||||
|
// See https://github.com/google/knative-gcp/issues/378
|
||||||
|
runnerPodFinalizerName = "actions.summerwind.dev/runner-pod"
|
||||||
|
|
||||||
|
AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"
|
||||||
|
|
||||||
|
// AnnotationKeyUnregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed.
|
||||||
|
AnnotationKeyUnregistrationCompleteTimestamp = "unregistration-complete-timestamp"
|
||||||
|
|
||||||
|
// unregistarionStartTimestamp is the annotation that contains the time that the requested unregistration process has been started
|
||||||
|
AnnotationKeyUnregistrationStartTimestamp = "unregistration-start-timestamp"
|
||||||
|
|
||||||
|
// AnnotationKeyUnregistrationRequestTimestamp is the annotation that contains the time that the unregistration has been requested.
|
||||||
|
// This doesn't immediately start the unregistration. Instead, ARC will first check if the runner has already been registered.
|
||||||
|
// If not, ARC will hold on until the registration to complete first, and only after that it starts the unregistration process.
|
||||||
|
// This is crucial to avoid a race between ARC marking the runner pod for deletion while the actions-runner registers itself to GitHub, leaving the assigned job
|
||||||
|
// hang like forever.
|
||||||
|
AnnotationKeyUnregistrationRequestTimestamp = "unregistration-request-timestamp"
|
||||||
|
|
||||||
|
AnnotationKeyRunnerID = "runner-id"
|
||||||
|
|
||||||
|
// DefaultUnregistrationTimeout is the duration until ARC gives up retrying the combo of ListRunners API (to detect the runner ID by name)
|
||||||
|
// and RemoveRunner API (to actually unregister the runner) calls.
|
||||||
|
// This needs to be longer than 60 seconds because a part of the combo, the ListRunners API, seems to use the Cache-Control header of max-age=60s
|
||||||
|
// and that instructs our cache library httpcache to cache responses for 60 seconds, which results in ARC unable to see the runner in the ListRunners response
|
||||||
|
// up to 60 seconds (or even more depending on the situation).
|
||||||
|
DefaultUnregistrationTimeout = 60 * time.Second
|
||||||
|
|
||||||
|
// This can be any value but a larger value can make an unregistration timeout longer than configured in practice.
|
||||||
|
DefaultUnregistrationRetryDelay = 30 * time.Second
|
||||||
|
)
|
||||||
|
|
@ -16,33 +16,6 @@ import (
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
// unregistrationCompleteTimestamp is the annotation that is added onto the pod once the previously started unregistration process has been completed.
|
|
||||||
unregistrationCompleteTimestamp = "unregistration-complete-timestamp"
|
|
||||||
|
|
||||||
// unregistarionStartTimestamp is the annotation that contains the time that the requested unregistration process has been started
|
|
||||||
unregistrationStartTimestamp = "unregistration-start-timestamp"
|
|
||||||
|
|
||||||
// unregistrationRequestTimestamp is the annotation that contains the time that the unregistration has been requested.
|
|
||||||
// This doesn't immediately start the unregistration. Instead, ARC will first check if the runner has already been registered.
|
|
||||||
// If not, ARC will hold on until the registration to complete first, and only after that it starts the unregistration process.
|
|
||||||
// This is crucial to avoid a race between ARC marking the runner pod for deletion while the actions-runner registers itself to GitHub, leaving the assigned job
|
|
||||||
// hang like forever.
|
|
||||||
unregistrationRequestTimestamp = "unregistration-request-timestamp"
|
|
||||||
|
|
||||||
annotationKeyRunnerID = "runner-id"
|
|
||||||
|
|
||||||
// DefaultUnregistrationTimeout is the duration until ARC gives up retrying the combo of ListRunners API (to detect the runner ID by name)
|
|
||||||
// and RemoveRunner API (to actually unregister the runner) calls.
|
|
||||||
// This needs to be longer than 60 seconds because a part of the combo, the ListRunners API, seems to use the Cache-Control header of max-age=60s
|
|
||||||
// and that instructs our cache library httpcache to cache responses for 60 seconds, which results in ARC unable to see the runner in the ListRunners response
|
|
||||||
// up to 60 seconds (or even more depending on the situation).
|
|
||||||
DefaultUnregistrationTimeout = 60 * time.Second
|
|
||||||
|
|
||||||
// This can be any value but a larger value can make an unregistration timeout longer than configured in practice.
|
|
||||||
DefaultUnregistrationRetryDelay = 30 * time.Second
|
|
||||||
)
|
|
||||||
|
|
||||||
// tickRunnerGracefulStop reconciles the runner and the runner pod in a way so that
|
// tickRunnerGracefulStop reconciles the runner and the runner pod in a way so that
|
||||||
// we can delete the runner pod without disrupting a workflow job.
|
// we can delete the runner pod without disrupting a workflow job.
|
||||||
//
|
//
|
||||||
|
|
@ -54,7 +27,7 @@ const (
|
||||||
// When it wants to be retried later, the function returns a non-nil *ctrl.Result as the second return value, may or may not populating the error in the second return value.
|
// When it wants to be retried later, the function returns a non-nil *ctrl.Result as the second return value, may or may not populating the error in the second return value.
|
||||||
// The caller is expected to return the returned ctrl.Result and error to postpone the current reconcilation loop and trigger a scheduled retry.
|
// The caller is expected to return the returned ctrl.Result and error to postpone the current reconcilation loop and trigger a scheduled retry.
|
||||||
func tickRunnerGracefulStop(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) {
|
func tickRunnerGracefulStop(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) {
|
||||||
pod, err := annotatePodOnce(ctx, c, log, pod, unregistrationStartTimestamp, time.Now().Format(time.RFC3339))
|
pod, err := annotatePodOnce(ctx, c, log, pod, AnnotationKeyUnregistrationStartTimestamp, time.Now().Format(time.RFC3339))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, &ctrl.Result{}, err
|
return nil, &ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
@ -63,7 +36,7 @@ func tickRunnerGracefulStop(ctx context.Context, unregistrationTimeout time.Dura
|
||||||
return nil, res, err
|
return nil, res, err
|
||||||
}
|
}
|
||||||
|
|
||||||
pod, err = annotatePodOnce(ctx, c, log, pod, unregistrationCompleteTimestamp, time.Now().Format(time.RFC3339))
|
pod, err = annotatePodOnce(ctx, c, log, pod, AnnotationKeyUnregistrationCompleteTimestamp, time.Now().Format(time.RFC3339))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, &ctrl.Result{}, err
|
return nil, &ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
@ -99,7 +72,7 @@ func annotatePodOnce(ctx context.Context, c client.Client, log logr.Logger, pod
|
||||||
func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
|
func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
|
||||||
var runnerID *int64
|
var runnerID *int64
|
||||||
|
|
||||||
if id, ok := getAnnotation(&pod.ObjectMeta, annotationKeyRunnerID); ok {
|
if id, ok := getAnnotation(&pod.ObjectMeta, AnnotationKeyRunnerID); ok {
|
||||||
v, err := strconv.ParseInt(id, 10, 64)
|
v, err := strconv.ParseInt(id, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &ctrl.Result{}, err
|
return &ctrl.Result{}, err
|
||||||
|
|
@ -162,7 +135,7 @@ func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time.
|
||||||
// In that case we can safely assume that the runner will never be registered.
|
// In that case we can safely assume that the runner will never be registered.
|
||||||
|
|
||||||
log.Info("Runner was not found on GitHub and the runner pod was not found on Kuberntes.")
|
log.Info("Runner was not found on GitHub and the runner pod was not found on Kuberntes.")
|
||||||
} else if pod.Annotations[unregistrationCompleteTimestamp] != "" {
|
} else if pod.Annotations[AnnotationKeyUnregistrationCompleteTimestamp] != "" {
|
||||||
// If it's already unregistered in the previous reconcilation loop,
|
// If it's already unregistered in the previous reconcilation loop,
|
||||||
// you can safely assume that it won't get registered again so it's safe to delete the runner pod.
|
// you can safely assume that it won't get registered again so it's safe to delete the runner pod.
|
||||||
log.Info("Runner pod is marked as already unregistered.")
|
log.Info("Runner pod is marked as already unregistered.")
|
||||||
|
|
@ -174,7 +147,7 @@ func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time.
|
||||||
// If pod has ended up succeeded we need to restart it
|
// If pod has ended up succeeded we need to restart it
|
||||||
// Happens e.g. when dind is in runner and run completes
|
// Happens e.g. when dind is in runner and run completes
|
||||||
log.Info("Runner pod has been stopped with a successful status.")
|
log.Info("Runner pod has been stopped with a successful status.")
|
||||||
} else if ts := pod.Annotations[unregistrationStartTimestamp]; ts != "" {
|
} else if ts := pod.Annotations[AnnotationKeyUnregistrationStartTimestamp]; ts != "" {
|
||||||
t, err := time.Parse(time.RFC3339, ts)
|
t, err := time.Parse(time.RFC3339, ts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &ctrl.Result{RequeueAfter: retryDelay}, err
|
return &ctrl.Result{RequeueAfter: retryDelay}, err
|
||||||
|
|
@ -202,7 +175,7 @@ func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time.
|
||||||
}
|
}
|
||||||
|
|
||||||
func ensureRunnerPodRegistered(ctx context.Context, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) {
|
func ensureRunnerPodRegistered(ctx context.Context, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) {
|
||||||
_, hasRunnerID := getAnnotation(&pod.ObjectMeta, annotationKeyRunnerID)
|
_, hasRunnerID := getAnnotation(&pod.ObjectMeta, AnnotationKeyRunnerID)
|
||||||
if runnerPodOrContainerIsStopped(pod) || hasRunnerID {
|
if runnerPodOrContainerIsStopped(pod) || hasRunnerID {
|
||||||
return pod, nil, nil
|
return pod, nil, nil
|
||||||
}
|
}
|
||||||
|
|
@ -218,7 +191,7 @@ func ensureRunnerPodRegistered(ctx context.Context, log logr.Logger, ghClient *g
|
||||||
|
|
||||||
id := *r.ID
|
id := *r.ID
|
||||||
|
|
||||||
updated, err := annotatePodOnce(ctx, c, log, pod, annotationKeyRunnerID, fmt.Sprintf("%d", id))
|
updated, err := annotatePodOnce(ctx, c, log, pod, AnnotationKeyRunnerID, fmt.Sprintf("%d", id))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err
|
return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -52,14 +52,6 @@ type RunnerPodReconciler struct {
|
||||||
UnregistrationRetryDelay time.Duration
|
UnregistrationRetryDelay time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
|
||||||
// This names requires at least one slash to work.
|
|
||||||
// See https://github.com/google/knative-gcp/issues/378
|
|
||||||
runnerPodFinalizerName = "actions.summerwind.dev/runner-pod"
|
|
||||||
|
|
||||||
AnnotationKeyLastRegistrationCheckTime = "actions-runner-controller/last-registration-check-time"
|
|
||||||
)
|
|
||||||
|
|
||||||
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch;delete
|
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;update;patch;delete
|
||||||
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
||||||
|
|
||||||
|
|
@ -173,7 +165,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||||
|
|
||||||
runnerPod = *po
|
runnerPod = *po
|
||||||
|
|
||||||
if _, unregistrationRequested := getAnnotation(&runnerPod.ObjectMeta, unregistrationRequestTimestamp); unregistrationRequested {
|
if _, unregistrationRequested := getAnnotation(&runnerPod.ObjectMeta, AnnotationKeyUnregistrationRequestTimestamp); unregistrationRequested {
|
||||||
log.V(2).Info("Progressing unregistration because unregistration-request timestamp is set")
|
log.V(2).Info("Progressing unregistration because unregistration-request timestamp is set")
|
||||||
|
|
||||||
// At this point we're sure that DeletionTimestamp is not set yet, but the unregistration process is triggered by an upstream controller like runnerset-controller.
|
// At this point we're sure that DeletionTimestamp is not set yet, but the unregistration process is triggered by an upstream controller like runnerset-controller.
|
||||||
|
|
|
||||||
|
|
@ -39,10 +39,6 @@ import (
|
||||||
"github.com/go-logr/logr"
|
"github.com/go-logr/logr"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
LabelKeyRunnerSetName = "runnerset-name"
|
|
||||||
)
|
|
||||||
|
|
||||||
// RunnerSetReconciler reconciles a Runner object
|
// RunnerSetReconciler reconciles a Runner object
|
||||||
type RunnerSetReconciler struct {
|
type RunnerSetReconciler struct {
|
||||||
Name string
|
Name string
|
||||||
|
|
@ -161,7 +157,7 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||||
}
|
}
|
||||||
|
|
||||||
// Statefulset termination process 3/4: Set the deletionTimestamp to let Kubernetes start a cascade deletion of the statefulset and the pods.
|
// Statefulset termination process 3/4: Set the deletionTimestamp to let Kubernetes start a cascade deletion of the statefulset and the pods.
|
||||||
if _, ok := getAnnotation(&res.statefulset.ObjectMeta, unregistrationCompleteTimestamp); ok {
|
if _, ok := getAnnotation(&res.statefulset.ObjectMeta, AnnotationKeyUnregistrationCompleteTimestamp); ok {
|
||||||
if err := r.Client.Delete(ctx, res.statefulset); err != nil {
|
if err := r.Client.Delete(ctx, res.statefulset); err != nil {
|
||||||
log.Error(err, "Failed to delete statefulset")
|
log.Error(err, "Failed to delete statefulset")
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
|
|
@ -171,10 +167,10 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||||
|
|
||||||
// Statefulset termination process 2/4: Set unregistrationCompleteTimestamp only if all the pods managed by the statefulset
|
// Statefulset termination process 2/4: Set unregistrationCompleteTimestamp only if all the pods managed by the statefulset
|
||||||
// have either unregistered or being deleted.
|
// have either unregistered or being deleted.
|
||||||
if _, ok := getAnnotation(&res.statefulset.ObjectMeta, unregistrationRequestTimestamp); ok {
|
if _, ok := getAnnotation(&res.statefulset.ObjectMeta, AnnotationKeyUnregistrationRequestTimestamp); ok {
|
||||||
var deletionSafe int
|
var deletionSafe int
|
||||||
for _, po := range res.pods {
|
for _, po := range res.pods {
|
||||||
if _, ok := getAnnotation(&po.ObjectMeta, unregistrationCompleteTimestamp); ok {
|
if _, ok := getAnnotation(&po.ObjectMeta, AnnotationKeyUnregistrationCompleteTimestamp); ok {
|
||||||
deletionSafe++
|
deletionSafe++
|
||||||
} else if !po.DeletionTimestamp.IsZero() {
|
} else if !po.DeletionTimestamp.IsZero() {
|
||||||
deletionSafe++
|
deletionSafe++
|
||||||
|
|
@ -184,12 +180,12 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||||
log.V(2).Info("Marking statefulset for unregistration completion", "deletionSafe", deletionSafe, "total", res.total)
|
log.V(2).Info("Marking statefulset for unregistration completion", "deletionSafe", deletionSafe, "total", res.total)
|
||||||
|
|
||||||
if deletionSafe == res.total {
|
if deletionSafe == res.total {
|
||||||
if _, ok := getAnnotation(&res.statefulset.ObjectMeta, unregistrationCompleteTimestamp); !ok {
|
if _, ok := getAnnotation(&res.statefulset.ObjectMeta, AnnotationKeyUnregistrationCompleteTimestamp); !ok {
|
||||||
updated := res.statefulset.DeepCopy()
|
updated := res.statefulset.DeepCopy()
|
||||||
setAnnotation(&updated.ObjectMeta, unregistrationCompleteTimestamp, time.Now().Format(time.RFC3339))
|
setAnnotation(&updated.ObjectMeta, AnnotationKeyUnregistrationCompleteTimestamp, time.Now().Format(time.RFC3339))
|
||||||
|
|
||||||
if err := r.Client.Patch(ctx, updated, client.MergeFrom(res.statefulset)); err != nil {
|
if err := r.Client.Patch(ctx, updated, client.MergeFrom(res.statefulset)); err != nil {
|
||||||
log.Error(err, fmt.Sprintf("Failed to patch statefulset to have %s annotation", unregistrationCompleteTimestamp))
|
log.Error(err, fmt.Sprintf("Failed to patch statefulset to have %s annotation", AnnotationKeyUnregistrationCompleteTimestamp))
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -320,17 +316,17 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||||
// We'd better unregister first and then start a pod deletion process.
|
// We'd better unregister first and then start a pod deletion process.
|
||||||
// The annotation works as a mark to start the pod unregistration and deletion process of ours.
|
// The annotation works as a mark to start the pod unregistration and deletion process of ours.
|
||||||
for _, po := range ss.pods {
|
for _, po := range ss.pods {
|
||||||
if _, err := annotatePodOnce(ctx, r.Client, log, &po, unregistrationRequestTimestamp, time.Now().Format(time.RFC3339)); err != nil {
|
if _, err := annotatePodOnce(ctx, r.Client, log, &po, AnnotationKeyUnregistrationRequestTimestamp, time.Now().Format(time.RFC3339)); err != nil {
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, ok := getAnnotation(&ss.statefulset.ObjectMeta, unregistrationRequestTimestamp); !ok {
|
if _, ok := getAnnotation(&ss.statefulset.ObjectMeta, AnnotationKeyUnregistrationRequestTimestamp); !ok {
|
||||||
updated := ss.statefulset.DeepCopy()
|
updated := ss.statefulset.DeepCopy()
|
||||||
setAnnotation(&updated.ObjectMeta, unregistrationRequestTimestamp, time.Now().Format(time.RFC3339))
|
setAnnotation(&updated.ObjectMeta, AnnotationKeyUnregistrationRequestTimestamp, time.Now().Format(time.RFC3339))
|
||||||
|
|
||||||
if err := r.Client.Patch(ctx, updated, client.MergeFrom(ss.statefulset)); err != nil {
|
if err := r.Client.Patch(ctx, updated, client.MergeFrom(ss.statefulset)); err != nil {
|
||||||
log.Error(err, fmt.Sprintf("Failed to patch statefulset to have %s annotation", unregistrationRequestTimestamp))
|
log.Error(err, fmt.Sprintf("Failed to patch statefulset to have %s annotation", AnnotationKeyUnregistrationRequestTimestamp))
|
||||||
return ctrl.Result{}, err
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue