Fix RunerSet managed runner pods to terminate more gracefully (#1126)
Make RunnerSet-managed runners as reliable as RunnerDeployment-managed runners. Ref https://github.com/actions-runner-controller/actions-runner-controller/issues/911#issuecomment-1042404460
This commit is contained in:
		
							parent
							
								
									921f547200
								
							
						
					
					
						commit
						a5ed6bd263
					
				|  | @ -531,32 +531,15 @@ func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1a | |||
| 	return ctrl.Result{}, nil | ||||
| } | ||||
| 
 | ||||
| // unregisterRunner unregisters the runner from GitHub Actions by name.
 | ||||
| //
 | ||||
| // This function returns:
 | ||||
| // - (true, nil) when it has successfully unregistered the runner.
 | ||||
| // - (false, nil) when the runner has been already unregistered.
 | ||||
| // - (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
 | ||||
| //   an error returned by GitHub API.
 | ||||
| func (r *RunnerReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) { | ||||
| 	runners, err := r.GitHubClient.ListRunners(ctx, enterprise, org, repo) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 
 | ||||
| 	id := int64(0) | ||||
| 	for _, runner := range runners { | ||||
| 		if runner.GetName() == name { | ||||
| 			if runner.GetBusy() { | ||||
| 				return false, fmt.Errorf("runner is busy") | ||||
| 			} | ||||
| 			id = runner.GetID() | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if id == int64(0) { | ||||
| 		return false, nil | ||||
| 	} | ||||
| 
 | ||||
| 	if err := r.GitHubClient.RemoveRunner(ctx, enterprise, org, repo, id); err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 
 | ||||
| 	return true, nil | ||||
| 	return unregisterRunner(ctx, r.GitHubClient, enterprise, org, repo, name) | ||||
| } | ||||
| 
 | ||||
| func (r *RunnerReconciler) updateRegistrationToken(ctx context.Context, runner v1alpha1.Runner) (bool, error) { | ||||
|  |  | |||
|  | @ -378,42 +378,7 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( | |||
| } | ||||
| 
 | ||||
| func (r *RunnerPodReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) { | ||||
| 	runners, err := r.GitHubClient.ListRunners(ctx, enterprise, org, repo) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 
 | ||||
| 	var busy bool | ||||
| 
 | ||||
| 	id := int64(0) | ||||
| 	for _, runner := range runners { | ||||
| 		if runner.GetName() == name { | ||||
| 			// Sometimes a runner can stuck "busy" even though it is already "offline".
 | ||||
| 			// Thus removing the condition on status can block the runner pod from being terminated forever.
 | ||||
| 			busy = runner.GetBusy() | ||||
| 			if runner.GetStatus() != "offline" && busy { | ||||
| 				r.Log.Info("This runner will delay the runner pod deletion and the runner deregistration until it becomes either offline or non-busy", "name", runner.GetName(), "status", runner.GetStatus(), "busy", runner.GetBusy()) | ||||
| 				return false, fmt.Errorf("runner is busy") | ||||
| 			} | ||||
| 			id = runner.GetID() | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if id == int64(0) { | ||||
| 		return false, nil | ||||
| 	} | ||||
| 
 | ||||
| 	// Sometimes a runner can stuck "busy" even though it is already "offline".
 | ||||
| 	// Trying to remove the offline but busy runner can result in errors like the following:
 | ||||
| 	//    failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
 | ||||
| 	if !busy { | ||||
| 		if err := r.GitHubClient.RemoveRunner(ctx, enterprise, org, repo, id); err != nil { | ||||
| 			return false, err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return true, nil | ||||
| 	return unregisterRunner(ctx, r.GitHubClient, enterprise, org, repo, name) | ||||
| } | ||||
| 
 | ||||
| func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error { | ||||
|  |  | |||
|  | @ -0,0 +1,49 @@ | |||
| package controllers | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 
 | ||||
| 	"github.com/actions-runner-controller/actions-runner-controller/github" | ||||
| ) | ||||
| 
 | ||||
| // unregisterRunner unregisters the runner from GitHub Actions by name.
 | ||||
| //
 | ||||
| // This function returns:
 | ||||
| // - (true, nil) when it has successfully unregistered the runner.
 | ||||
| // - (false, nil) when the runner has been already unregistered.
 | ||||
| // - (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
 | ||||
| //   an error returned by GitHub API.
 | ||||
| func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo, name string) (bool, error) { | ||||
| 	runners, err := client.ListRunners(ctx, enterprise, org, repo) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 
 | ||||
| 	id := int64(0) | ||||
| 	for _, runner := range runners { | ||||
| 		if runner.GetName() == name { | ||||
| 			// Note that sometimes a runner can stuck "busy" even though it is already "offline".
 | ||||
| 			// But we assume that it's not actually offline and still running a job.
 | ||||
| 			if runner.GetBusy() { | ||||
| 				return false, fmt.Errorf("runner is busy") | ||||
| 			} | ||||
| 			id = runner.GetID() | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if id == int64(0) { | ||||
| 		return false, nil | ||||
| 	} | ||||
| 
 | ||||
| 	// Trying to remove a busy runner can result in errors like the following:
 | ||||
| 	//    failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
 | ||||
| 	//
 | ||||
| 	// TODO: Probably we can just remove the runner by ID without seeing if the runner is busy, by treating it as busy when a remove-runner call failed with 422?
 | ||||
| 	if err := client.RemoveRunner(ctx, enterprise, org, repo, id); err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 
 | ||||
| 	return true, nil | ||||
| } | ||||
		Loading…
	
		Reference in New Issue