From c424215044086b14286f4060e93c93bde5470110 Mon Sep 17 00:00:00 2001 From: Yusuke Kuoka Date: Fri, 19 Mar 2021 11:02:47 +0900 Subject: [PATCH] Do recheck runner registration timely (#405) Since #392, the runner controller could have taken unexpectedly long time until it finally notices that the runner has been registered to GitHub. This patch fixes the issue, so that the controller will notice the successful registration in approximately 1 minute(hard-coded). More concretely, let's say you had configured a long sync-period of like 10m, the runner controller could have taken approx 10m to notice the successful registration. The original expectation was 1m, because it was intended to recheck every 1m as implemented in #392. It wasn't working as such due to my misunderstanding in how requeueing work. --- controllers/runner_controller.go | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/controllers/runner_controller.go b/controllers/runner_controller.go index 1dc9bb50..62e92546 100644 --- a/controllers/runner_controller.go +++ b/controllers/runner_controller.go @@ -255,15 +255,25 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { // achieves that. if lastCheckTime := runner.Status.LastRegistrationCheckTime; lastCheckTime != nil { nextCheckTime := lastCheckTime.Add(registrationCheckInterval) - if nextCheckTime.After(time.Now()) { + now := time.Now() + if nextCheckTime.After(now) { + requeueAfter := nextCheckTime.Sub(now) + log.Info( - fmt.Sprintf("Skipping registration check because it's deferred until %s", nextCheckTime), + fmt.Sprintf("Skipped registration check because it's deferred until %s. Retrying in %s at latest", nextCheckTime, requeueAfter), + "lastRegistrationCheckTime", lastCheckTime, + "registrationCheckInterval", registrationCheckInterval, ) - // Note that we don't need to explicitly requeue on this reconcilation because - // the requeue should have been already scheduled previsouly - // (with `return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil` as noted above and coded below) - return ctrl.Result{}, nil + // Without RequeueAfter, the controller may not retry on scheduled. Instead, it must wait until the + // next sync period passes, which can be too much later than nextCheckTime. + // + // We need to requeue on this reconcilation even though we have already scheduled the initial + // requeue previously with `return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil`. + // Apparently, the workqueue used by controller-runtime seems to deduplicate and resets the delay on + // other requeues- so the initial scheduled requeue may have been reset due to requeue on + // spec/status change. + return ctrl.Result{RequeueAfter: requeueAfter}, nil } }