Fix rate limit and runner registration logic (#309)

* errors.Is compares all members of a struct to return true which never 
happened
* switched to type check instead of exact value check
* notRegistered was using double negation in if statement which lead to 
unregistering runners after the registration timeout
This commit is contained in:
Johannes Nicolai 2021-02-15 01:36:49 +01:00 committed by GitHub
parent 34c6c3d9cd
commit bc8bc70f69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 30 deletions

View File

@ -244,16 +244,18 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
return ctrl.Result{}, err
}
var notRegistered bool
notRegistered := false
runnerBusy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
if err != nil {
if errors.Is(err, github.RunnerNotFound{}) {
var e *github.RunnerNotFound
if errors.As(err, &e) {
log.Error(err, "Failed to check if runner is busy. Probably this runner has never been successfully registered to GitHub.")
notRegistered = true
} else {
if errors.Is(err, &gogithub.RateLimitError{}) {
var e *gogithub.RateLimitError
if errors.As(err, &e) {
// We log the underlying error when we failed calling GitHub API to list or unregisters,
// or the runner is still busy.
log.Error(
@ -284,7 +286,7 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
currentTime := time.Now()
registrationDidTimeout := currentTime.Sub(pod.CreationTimestamp.Add(registrationTimeout)) > 0
if !notRegistered && registrationDidTimeout {
if notRegistered && registrationDidTimeout {
log.Info(
"Runner failed to register itself to GitHub in timely manner. "+
"Recreating the pod to see if it resolves the issue. "+

View File

@ -107,39 +107,36 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
for _, runner := range myRunners {
busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
if err != nil {
log.Error(err, "Failed to check if runner is busy. Probably this runner has never been successfully registered to GitHub, and therefore we prioritize it for deletion", "runnerName", runner.Name)
notRegistered := false
var notRegistered bool
var e *github.RunnerNotFound
if errors.As(err, &e) {
log.Error(err, "Failed to check if runner is busy. Probably this runner has never been successfully registered to GitHub, and therefore we prioritize it for deletion", "runnerName", runner.Name)
notRegistered = true
} else {
var e *gogithub.RateLimitError
if errors.As(err, &e) {
// We log the underlying error when we failed calling GitHub API to list or unregisters,
// or the runner is still busy.
log.Error(
err,
fmt.Sprintf(
"Failed to check if runner is busy due to GitHub API rate limit. Retrying in %s to avoid excessive GitHub API calls",
retryDelayOnGitHubAPIRateLimitError,
),
)
if err != nil {
if errors.Is(err, github.RunnerNotFound{}) {
log.Error(err, "Failed to check if runner is busy. Probably this runner has never been successfully registered to GitHub.")
notRegistered = true
} else {
if errors.Is(err, &gogithub.RateLimitError{}) {
// We log the underlying error when we failed calling GitHub API to list or unregisters,
// or the runner is still busy.
log.Error(
err,
fmt.Sprintf(
"Failed to check if runner is busy due to GitHub API rate limit. Retrying in %s to avoid excessive GitHub API calls",
retryDelayOnGitHubAPIRateLimitError,
),
)
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
}
return ctrl.Result{}, err
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
}
return ctrl.Result{}, err
}
registrationTimeout := 15 * time.Minute
currentTime := time.Now()
registrationDidTimeout := currentTime.Sub(runner.CreationTimestamp.Add(registrationTimeout)) > 0
if !notRegistered && registrationDidTimeout {
if notRegistered && registrationDidTimeout {
log.Info(
"Runner failed to register itself to GitHub in timely manner. "+
"Recreating the pod to see if it resolves the issue. "+

View File

@ -287,7 +287,7 @@ type RunnerNotFound struct {
runnerName string
}
func (e RunnerNotFound) Error() string {
func (e *RunnerNotFound) Error() string {
return fmt.Sprintf("runner %q not found", e.runnerName)
}
@ -303,5 +303,5 @@ func (r *Client) IsRunnerBusy(ctx context.Context, enterprise, org, repo, name s
}
}
return false, RunnerNotFound{runnerName: name}
return false, &RunnerNotFound{runnerName: name}
}