Fix RunerSet managed runner pods to terminate more gracefully (#1126)
Make RunnerSet-managed runners as reliable as RunnerDeployment-managed runners. Ref https://github.com/actions-runner-controller/actions-runner-controller/issues/911#issuecomment-1042404460
This commit is contained in:
parent
921f547200
commit
a5ed6bd263
|
|
@ -531,32 +531,15 @@ func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1a
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// unregisterRunner unregisters the runner from GitHub Actions by name.
|
||||||
|
//
|
||||||
|
// This function returns:
|
||||||
|
// - (true, nil) when it has successfully unregistered the runner.
|
||||||
|
// - (false, nil) when the runner has been already unregistered.
|
||||||
|
// - (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
|
||||||
|
// an error returned by GitHub API.
|
||||||
func (r *RunnerReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
|
func (r *RunnerReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
|
||||||
runners, err := r.GitHubClient.ListRunners(ctx, enterprise, org, repo)
|
return unregisterRunner(ctx, r.GitHubClient, enterprise, org, repo, name)
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
id := int64(0)
|
|
||||||
for _, runner := range runners {
|
|
||||||
if runner.GetName() == name {
|
|
||||||
if runner.GetBusy() {
|
|
||||||
return false, fmt.Errorf("runner is busy")
|
|
||||||
}
|
|
||||||
id = runner.GetID()
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if id == int64(0) {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := r.GitHubClient.RemoveRunner(ctx, enterprise, org, repo, id); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerReconciler) updateRegistrationToken(ctx context.Context, runner v1alpha1.Runner) (bool, error) {
|
func (r *RunnerReconciler) updateRegistrationToken(ctx context.Context, runner v1alpha1.Runner) (bool, error) {
|
||||||
|
|
|
||||||
|
|
@ -378,42 +378,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerPodReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
|
func (r *RunnerPodReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
|
||||||
runners, err := r.GitHubClient.ListRunners(ctx, enterprise, org, repo)
|
return unregisterRunner(ctx, r.GitHubClient, enterprise, org, repo, name)
|
||||||
if err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var busy bool
|
|
||||||
|
|
||||||
id := int64(0)
|
|
||||||
for _, runner := range runners {
|
|
||||||
if runner.GetName() == name {
|
|
||||||
// Sometimes a runner can stuck "busy" even though it is already "offline".
|
|
||||||
// Thus removing the condition on status can block the runner pod from being terminated forever.
|
|
||||||
busy = runner.GetBusy()
|
|
||||||
if runner.GetStatus() != "offline" && busy {
|
|
||||||
r.Log.Info("This runner will delay the runner pod deletion and the runner deregistration until it becomes either offline or non-busy", "name", runner.GetName(), "status", runner.GetStatus(), "busy", runner.GetBusy())
|
|
||||||
return false, fmt.Errorf("runner is busy")
|
|
||||||
}
|
|
||||||
id = runner.GetID()
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if id == int64(0) {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sometimes a runner can stuck "busy" even though it is already "offline".
|
|
||||||
// Trying to remove the offline but busy runner can result in errors like the following:
|
|
||||||
// failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
|
|
||||||
if !busy {
|
|
||||||
if err := r.GitHubClient.RemoveRunner(ctx, enterprise, org, repo, id); err != nil {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
package controllers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/actions-runner-controller/actions-runner-controller/github"
|
||||||
|
)
|
||||||
|
|
||||||
|
// unregisterRunner unregisters the runner from GitHub Actions by name.
|
||||||
|
//
|
||||||
|
// This function returns:
|
||||||
|
// - (true, nil) when it has successfully unregistered the runner.
|
||||||
|
// - (false, nil) when the runner has been already unregistered.
|
||||||
|
// - (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
|
||||||
|
// an error returned by GitHub API.
|
||||||
|
func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo, name string) (bool, error) {
|
||||||
|
runners, err := client.ListRunners(ctx, enterprise, org, repo)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
id := int64(0)
|
||||||
|
for _, runner := range runners {
|
||||||
|
if runner.GetName() == name {
|
||||||
|
// Note that sometimes a runner can stuck "busy" even though it is already "offline".
|
||||||
|
// But we assume that it's not actually offline and still running a job.
|
||||||
|
if runner.GetBusy() {
|
||||||
|
return false, fmt.Errorf("runner is busy")
|
||||||
|
}
|
||||||
|
id = runner.GetID()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if id == int64(0) {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trying to remove a busy runner can result in errors like the following:
|
||||||
|
// failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
|
||||||
|
//
|
||||||
|
// TODO: Probably we can just remove the runner by ID without seeing if the runner is busy, by treating it as busy when a remove-runner call failed with 422?
|
||||||
|
if err := client.RemoveRunner(ctx, enterprise, org, repo, id); err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue