From a5ed6bd2630ca45c589d63a36f003b88c65de169 Mon Sep 17 00:00:00 2001 From: Yusuke Kuoka Date: Sat, 19 Feb 2022 21:19:37 +0900 Subject: [PATCH] Fix RunerSet managed runner pods to terminate more gracefully (#1126) Make RunnerSet-managed runners as reliable as RunnerDeployment-managed runners. Ref https://github.com/actions-runner-controller/actions-runner-controller/issues/911#issuecomment-1042404460 --- controllers/runner_controller.go | 33 +++++-------------- controllers/runner_pod_controller.go | 37 +-------------------- controllers/unregister.go | 49 ++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 61 deletions(-) create mode 100644 controllers/unregister.go diff --git a/controllers/runner_controller.go b/controllers/runner_controller.go index ebe06adc..17976231 100644 --- a/controllers/runner_controller.go +++ b/controllers/runner_controller.go @@ -531,32 +531,15 @@ func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1a return ctrl.Result{}, nil } +// unregisterRunner unregisters the runner from GitHub Actions by name. +// +// This function returns: +// - (true, nil) when it has successfully unregistered the runner. +// - (false, nil) when the runner has been already unregistered. +// - (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to +// an error returned by GitHub API. func (r *RunnerReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) { - runners, err := r.GitHubClient.ListRunners(ctx, enterprise, org, repo) - if err != nil { - return false, err - } - - id := int64(0) - for _, runner := range runners { - if runner.GetName() == name { - if runner.GetBusy() { - return false, fmt.Errorf("runner is busy") - } - id = runner.GetID() - break - } - } - - if id == int64(0) { - return false, nil - } - - if err := r.GitHubClient.RemoveRunner(ctx, enterprise, org, repo, id); err != nil { - return false, err - } - - return true, nil + return unregisterRunner(ctx, r.GitHubClient, enterprise, org, repo, name) } func (r *RunnerReconciler) updateRegistrationToken(ctx context.Context, runner v1alpha1.Runner) (bool, error) { diff --git a/controllers/runner_pod_controller.go b/controllers/runner_pod_controller.go index ac1705c3..d43eea37 100644 --- a/controllers/runner_pod_controller.go +++ b/controllers/runner_pod_controller.go @@ -378,42 +378,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } func (r *RunnerPodReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) { - runners, err := r.GitHubClient.ListRunners(ctx, enterprise, org, repo) - if err != nil { - return false, err - } - - var busy bool - - id := int64(0) - for _, runner := range runners { - if runner.GetName() == name { - // Sometimes a runner can stuck "busy" even though it is already "offline". - // Thus removing the condition on status can block the runner pod from being terminated forever. - busy = runner.GetBusy() - if runner.GetStatus() != "offline" && busy { - r.Log.Info("This runner will delay the runner pod deletion and the runner deregistration until it becomes either offline or non-busy", "name", runner.GetName(), "status", runner.GetStatus(), "busy", runner.GetBusy()) - return false, fmt.Errorf("runner is busy") - } - id = runner.GetID() - break - } - } - - if id == int64(0) { - return false, nil - } - - // Sometimes a runner can stuck "busy" even though it is already "offline". - // Trying to remove the offline but busy runner can result in errors like the following: - // failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" [] - if !busy { - if err := r.GitHubClient.RemoveRunner(ctx, enterprise, org, repo, id); err != nil { - return false, err - } - } - - return true, nil + return unregisterRunner(ctx, r.GitHubClient, enterprise, org, repo, name) } func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error { diff --git a/controllers/unregister.go b/controllers/unregister.go new file mode 100644 index 00000000..9c139be0 --- /dev/null +++ b/controllers/unregister.go @@ -0,0 +1,49 @@ +package controllers + +import ( + "context" + "fmt" + + "github.com/actions-runner-controller/actions-runner-controller/github" +) + +// unregisterRunner unregisters the runner from GitHub Actions by name. +// +// This function returns: +// - (true, nil) when it has successfully unregistered the runner. +// - (false, nil) when the runner has been already unregistered. +// - (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to +// an error returned by GitHub API. +func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo, name string) (bool, error) { + runners, err := client.ListRunners(ctx, enterprise, org, repo) + if err != nil { + return false, err + } + + id := int64(0) + for _, runner := range runners { + if runner.GetName() == name { + // Note that sometimes a runner can stuck "busy" even though it is already "offline". + // But we assume that it's not actually offline and still running a job. + if runner.GetBusy() { + return false, fmt.Errorf("runner is busy") + } + id = runner.GetID() + break + } + } + + if id == int64(0) { + return false, nil + } + + // Trying to remove a busy runner can result in errors like the following: + // failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" [] + // + // TODO: Probably we can just remove the runner by ID without seeing if the runner is busy, by treating it as busy when a remove-runner call failed with 422? + if err := client.RemoveRunner(ctx, enterprise, org, repo, id); err != nil { + return false, err + } + + return true, nil +}