diff --git a/.github/workflows/gha-e2e-tests.yaml b/.github/workflows/gha-e2e-tests.yaml index 9b6861b5..713db048 100644 --- a/.github/workflows/gha-e2e-tests.yaml +++ b/.github/workflows/gha-e2e-tests.yaml @@ -880,3 +880,98 @@ jobs: helm uninstall "${{ steps.install_arc.outputs.ARC_NAME }}" --namespace "arc-runners" --debug kubectl wait --timeout=10s --for=delete AutoScalingRunnerSet -n "${{ steps.install_arc.outputs.ARC_NAME }}" -l app.kubernetes.io/instance="${{ steps.install_arc.outputs.ARC_NAME }}" kubectl logs deployment/arc-gha-rs-controller -n "arc-systems" + + init-with-min-runners: + runs-on: ubuntu-latest + timeout-minutes: 20 + if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id + env: + WORKFLOW_FILE: arc-test-workflow.yaml + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.head_ref }} + + - uses: ./.github/actions/setup-arc-e2e + id: setup + with: + app-id: ${{secrets.E2E_TESTS_ACCESS_APP_ID}} + app-pk: ${{secrets.E2E_TESTS_ACCESS_PK}} + image-name: ${{env.IMAGE_NAME}} + image-tag: ${{env.IMAGE_VERSION}} + target-org: ${{env.TARGET_ORG}} + + - name: Install gha-runner-scale-set-controller + id: install_arc_controller + run: | + helm install arc \ + --namespace "arc-systems" \ + --create-namespace \ + --set image.repository=${{ env.IMAGE_NAME }} \ + --set image.tag=${{ env.IMAGE_VERSION }} \ + --set flags.updateStrategy="eventual" \ + ./charts/gha-runner-scale-set-controller \ + --debug + count=0 + while true; do + POD_NAME=$(kubectl get pods -n arc-systems -l app.kubernetes.io/name=gha-rs-controller -o name) + if [ -n "$POD_NAME" ]; then + echo "Pod found: $POD_NAME" + break + fi + if [ "$count" -ge 60 ]; then + echo "Timeout waiting for controller pod with label app.kubernetes.io/name=gha-rs-controller" + exit 1 + fi + sleep 1 + count=$((count+1)) + done + kubectl wait --timeout=30s --for=condition=ready pod -n arc-systems -l app.kubernetes.io/name=gha-rs-controller + kubectl get pod -n arc-systems + kubectl describe deployment arc-gha-rs-controller -n arc-systems + + - name: Install gha-runner-scale-set + id: install_arc + run: | + ARC_NAME=${{github.job}}-$(date +'%M%S')$((($RANDOM + 100) % 100 + 1)) + helm install "$ARC_NAME" \ + --namespace "arc-runners" \ + --create-namespace \ + --set githubConfigUrl="https://github.com/${{ env.TARGET_ORG }}/${{env.TARGET_REPO}}" \ + --set githubConfigSecret.github_token="${{ steps.setup.outputs.token }}" \ + --set minRunners=5 \ + ./charts/gha-runner-scale-set \ + --debug + echo "ARC_NAME=$ARC_NAME" >> $GITHUB_OUTPUT + count=0 + while true; do + POD_NAME=$(kubectl get pods -n arc-systems -l actions.github.com/scale-set-name=$ARC_NAME -o name) + if [ -n "$POD_NAME" ]; then + echo "Pod found: $POD_NAME" + break + fi + if [ "$count" -ge 60 ]; then + echo "Timeout waiting for listener pod with label actions.github.com/scale-set-name=$ARC_NAME" + exit 1 + fi + sleep 1 + count=$((count+1)) + done + kubectl wait --timeout=30s --for=condition=ready pod -n arc-systems -l actions.github.com/scale-set-name=$ARC_NAME + kubectl get pod -n arc-systems + - name: Ensure 5 runners are up + run: | + count=0 + while true; do + pod_count=$(kubectl get pods -n arc-runners --no-headers | wc -l) + if [[ "$pod_count" = 5 ]]; then + echo "5 pods are up!" + break + fi + if [[ "$count" -ge 12 ]]; then + echo "Timeout waiting for 5 pods to be created" + exit 1 + fi + sleep 1 + count=$((count+1)) + done diff --git a/charts/gha-runner-scale-set/values.yaml b/charts/gha-runner-scale-set/values.yaml index cedb44be..021fecb9 100644 --- a/charts/gha-runner-scale-set/values.yaml +++ b/charts/gha-runner-scale-set/values.yaml @@ -39,7 +39,8 @@ githubConfigSecret: ## maxRunners is the max number of runners the autoscaling runner set will scale up to. # maxRunners: 5 -## minRunners is the min number of runners the autoscaling runner set will scale down to. +## minRunners is the min number of idle runners. The target number of runners created will be +## calculated as a sum of minRunners and the number of jobs assigned to the scale set. # minRunners: 0 # runnerGroup: "default" diff --git a/cmd/ghalistener/worker/worker.go b/cmd/ghalistener/worker/worker.go index 12fd4d79..f9d7b7db 100644 --- a/cmd/ghalistener/worker/worker.go +++ b/cmd/ghalistener/worker/worker.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "math" "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1" "github.com/actions/actions-runner-controller/cmd/ghalistener/listener" @@ -158,7 +157,9 @@ func (w *Worker) HandleJobStarted(ctx context.Context, jobInfo *actions.JobStart // Finally, it logs the scaled ephemeral runner set details and returns nil if successful. // If any error occurs during the process, it returns an error with a descriptive message. func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) error { - targetRunnerCount := int(math.Max(math.Min(float64(w.config.MaxRunners), float64(count)), float64(w.config.MinRunners))) + // Max runners should always be set by the resource builder either to the configured value, + // or the maximum int32 (resourcebuilder.newAutoScalingListener()). + targetRunnerCount := min(w.config.MinRunners+count, w.config.MaxRunners) logValues := []any{ "assigned job", count, @@ -187,7 +188,7 @@ func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) error patch, err := json.Marshal( &v1alpha1.EphemeralRunnerSet{ Spec: v1alpha1.EphemeralRunnerSetSpec{ - Replicas: count, + Replicas: targetRunnerCount, }, }, ) diff --git a/cmd/githubrunnerscalesetlistener/autoScalerService.go b/cmd/githubrunnerscalesetlistener/autoScalerService.go index f76adf7b..b8e14521 100644 --- a/cmd/githubrunnerscalesetlistener/autoScalerService.go +++ b/cmd/githubrunnerscalesetlistener/autoScalerService.go @@ -5,7 +5,6 @@ import ( "encoding/json" "errors" "fmt" - "math" "strings" "github.com/actions/actions-runner-controller/cmd/githubrunnerscalesetlistener/config" @@ -206,7 +205,9 @@ func (s *Service) processMessage(message *actions.RunnerScaleSetMessage) error { } func (s *Service) scaleForAssignedJobCount(count int) error { - targetRunnerCount := int(math.Max(math.Min(float64(s.settings.MaxRunners), float64(count)), float64(s.settings.MinRunners))) + // Max runners should always be set by the resource builder either to the configured value, + // or the maximum int32 (resourcebuilder.newAutoScalingListener()). + targetRunnerCount := min(s.settings.MinRunners+count, s.settings.MaxRunners) s.metricsExporter.publishDesiredRunners(targetRunnerCount) if targetRunnerCount != s.currentRunnerCount { s.logger.Info("try scale runner request up/down base on assigned job count", diff --git a/cmd/githubrunnerscalesetlistener/autoScalerService_test.go b/cmd/githubrunnerscalesetlistener/autoScalerService_test.go index 23db17cd..d0e54545 100644 --- a/cmd/githubrunnerscalesetlistener/autoScalerService_test.go +++ b/cmd/githubrunnerscalesetlistener/autoScalerService_test.go @@ -397,7 +397,7 @@ func TestProcessMessage_MultipleMessages(t *testing.T) { require.NoError(t, err) mockRsClient.On("AcquireJobsForRunnerScaleSet", ctx, mock.MatchedBy(func(ids []int64) bool { return ids[0] == 3 && ids[1] == 4 })).Return(nil).Once() - mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 2).Run(func(args mock.Arguments) { cancel() }).Return(nil).Once() + mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 3).Run(func(args mock.Arguments) { cancel() }).Return(nil).Once() err = service.processMessage(&actions.RunnerScaleSetMessage{ MessageId: 1, @@ -523,9 +523,9 @@ func TestScaleForAssignedJobCount_ScaleWithinMinMax(t *testing.T) { require.NoError(t, err) mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 1).Return(nil).Once() - mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 3).Return(nil).Once() + mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 4).Return(nil).Once() mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 5).Return(nil).Once() - mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 1).Return(nil).Once() + mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 2).Return(nil).Once() mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 5).Return(nil).Once() err = service.scaleForAssignedJobCount(0) @@ -569,7 +569,7 @@ func TestScaleForAssignedJobCount_ScaleFailed(t *testing.T) { ) require.NoError(t, err) - mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 2).Return(fmt.Errorf("error")) + mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 3).Return(fmt.Errorf("error")) err = service.scaleForAssignedJobCount(2) @@ -605,8 +605,23 @@ func TestProcessMessage_JobStartedMessage(t *testing.T) { service.currentRunnerCount = 1 - mockKubeManager.On("UpdateEphemeralRunnerWithJobInfo", ctx, service.settings.Namespace, "runner1", "owner1", "repo1", ".github/workflows/ci.yaml", "job1", int64(100), int64(3)).Run(func(args mock.Arguments) { cancel() }).Return(nil).Once() + mockKubeManager.On( + "UpdateEphemeralRunnerWithJobInfo", + ctx, + service.settings.Namespace, + "runner1", + "owner1", + "repo1", + ".github/workflows/ci.yaml", + "job1", + int64(100), + int64(3), + ).Run( + func(_ mock.Arguments) { cancel() }, + ).Return(nil).Once() + mockRsClient.On("AcquireJobsForRunnerScaleSet", ctx, mock.MatchedBy(func(ids []int64) bool { return len(ids) == 0 })).Return(nil).Once() + mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 2).Return(nil) err = service.processMessage(&actions.RunnerScaleSetMessage{ MessageId: 1, diff --git a/docs/adrs/2023-11-02-min-runners-strategy.md b/docs/adrs/2023-11-02-min-runners-semantics.md similarity index 98% rename from docs/adrs/2023-11-02-min-runners-strategy.md rename to docs/adrs/2023-11-02-min-runners-semantics.md index ace4061f..6dc482db 100644 --- a/docs/adrs/2023-11-02-min-runners-strategy.md +++ b/docs/adrs/2023-11-02-min-runners-semantics.md @@ -1,6 +1,6 @@ # Changing semantics of the `minRunners` field -**Status**: Proposed +**Status**: Accepted ## Context