Use patch instead of update to alleviate HRA conflict on webhook (#358)

We sometimes see that integration test fails due to runner replicas not meeting the expected number in a timely manner. After investigating a bit, this turned out to be due to that HRA updates on webhook-based autoscaler and HRA controller are conflicting. This changes the controllers to use Patch instead of Update to make conflicts less likely to happen.

I have also updated the hra controller to use Patch when updating RunnerDeployment, too.

Overall, these changes should make the webhook-based autoscaling more reliable due to less conflicts.
This commit is contained in:
Yusuke Kuoka 2021-02-26 10:17:09 +09:00 committed by GitHub
parent d18884a0b9
commit 584590e97c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 23 additions and 14 deletions

View File

@ -371,8 +371,6 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) tryScaleUp(ctx contex
return nil
}
log := autoscaler.Log.WithValues("horizontalrunnerautoscaler", target.HorizontalRunnerAutoscaler.Name)
copy := target.HorizontalRunnerAutoscaler.DeepCopy()
amount := 1
@ -388,10 +386,8 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) tryScaleUp(ctx contex
Replicas: amount,
})
if err := autoscaler.Client.Update(ctx, copy); err != nil {
log.Error(err, "Failed to update horizontalrunnerautoscaler resource")
return err
if err := autoscaler.Client.Patch(ctx, copy, client.MergeFrom(&target.HorizontalRunnerAutoscaler)); err != nil {
return fmt.Errorf("patching horizontalrunnerautoscaler to add capacity reservation: %w", err)
}
return nil

View File

@ -18,6 +18,7 @@ package controllers
import (
"context"
"fmt"
"time"
"github.com/summerwind/actions-runner-controller/github"
@ -123,10 +124,8 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
copy := rd.DeepCopy()
copy.Spec.Replicas = &newDesiredReplicas
if err := r.Client.Update(ctx, copy); err != nil {
log.Error(err, "Failed to update runnerderployment resource")
return ctrl.Result{}, err
if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil {
return ctrl.Result{}, fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err)
}
}
@ -167,10 +166,8 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl
}
if updated != nil {
if err := r.Status().Update(ctx, updated); err != nil {
log.Error(err, "Failed to update horizontalrunnerautoscaler status")
return ctrl.Result{}, err
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&hra)); err != nil {
return ctrl.Result{}, fmt.Errorf("patching horizontalrunnerautoscaler status to add cache entry: %w", err)
}
}

View File

@ -244,6 +244,7 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3)
ExpectHRAStatusCacheEntryLengthEventuallyEquals(ctx, ns.Name, name, 1)
}
{
@ -601,6 +602,21 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
})
})
func ExpectHRAStatusCacheEntryLengthEventuallyEquals(ctx context.Context, ns string, name string, value int, optionalDescriptions ...interface{}) {
EventuallyWithOffset(
1,
func() int {
var hra actionsv1alpha1.HorizontalRunnerAutoscaler
err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns, Name: name}, &hra)
ExpectWithOffset(1, err).NotTo(HaveOccurred(), "failed to get test HRA resource")
return len(hra.Status.CacheEntries)
},
time.Second*5, time.Millisecond*500).Should(Equal(value), optionalDescriptions...)
}
func ExpectHRADesiredReplicasEquals(ctx context.Context, ns, name string, desired int, optionalDescriptions ...interface{}) {
var rd actionsv1alpha1.HorizontalRunnerAutoscaler