Change minRunners behavior and fix the new listener min runners (#3139)

This commit is contained in:
Nikola Jokic 2023-12-13 19:39:21 +01:00 committed by GitHub
parent 0fd8eac305
commit f7eb88ce9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 125 additions and 12 deletions

View File

@ -880,3 +880,98 @@ jobs:
helm uninstall "${{ steps.install_arc.outputs.ARC_NAME }}" --namespace "arc-runners" --debug helm uninstall "${{ steps.install_arc.outputs.ARC_NAME }}" --namespace "arc-runners" --debug
kubectl wait --timeout=10s --for=delete AutoScalingRunnerSet -n "${{ steps.install_arc.outputs.ARC_NAME }}" -l app.kubernetes.io/instance="${{ steps.install_arc.outputs.ARC_NAME }}" kubectl wait --timeout=10s --for=delete AutoScalingRunnerSet -n "${{ steps.install_arc.outputs.ARC_NAME }}" -l app.kubernetes.io/instance="${{ steps.install_arc.outputs.ARC_NAME }}"
kubectl logs deployment/arc-gha-rs-controller -n "arc-systems" kubectl logs deployment/arc-gha-rs-controller -n "arc-systems"
init-with-min-runners:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
env:
WORKFLOW_FILE: arc-test-workflow.yaml
steps:
- uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- uses: ./.github/actions/setup-arc-e2e
id: setup
with:
app-id: ${{secrets.E2E_TESTS_ACCESS_APP_ID}}
app-pk: ${{secrets.E2E_TESTS_ACCESS_PK}}
image-name: ${{env.IMAGE_NAME}}
image-tag: ${{env.IMAGE_VERSION}}
target-org: ${{env.TARGET_ORG}}
- name: Install gha-runner-scale-set-controller
id: install_arc_controller
run: |
helm install arc \
--namespace "arc-systems" \
--create-namespace \
--set image.repository=${{ env.IMAGE_NAME }} \
--set image.tag=${{ env.IMAGE_VERSION }} \
--set flags.updateStrategy="eventual" \
./charts/gha-runner-scale-set-controller \
--debug
count=0
while true; do
POD_NAME=$(kubectl get pods -n arc-systems -l app.kubernetes.io/name=gha-rs-controller -o name)
if [ -n "$POD_NAME" ]; then
echo "Pod found: $POD_NAME"
break
fi
if [ "$count" -ge 60 ]; then
echo "Timeout waiting for controller pod with label app.kubernetes.io/name=gha-rs-controller"
exit 1
fi
sleep 1
count=$((count+1))
done
kubectl wait --timeout=30s --for=condition=ready pod -n arc-systems -l app.kubernetes.io/name=gha-rs-controller
kubectl get pod -n arc-systems
kubectl describe deployment arc-gha-rs-controller -n arc-systems
- name: Install gha-runner-scale-set
id: install_arc
run: |
ARC_NAME=${{github.job}}-$(date +'%M%S')$((($RANDOM + 100) % 100 + 1))
helm install "$ARC_NAME" \
--namespace "arc-runners" \
--create-namespace \
--set githubConfigUrl="https://github.com/${{ env.TARGET_ORG }}/${{env.TARGET_REPO}}" \
--set githubConfigSecret.github_token="${{ steps.setup.outputs.token }}" \
--set minRunners=5 \
./charts/gha-runner-scale-set \
--debug
echo "ARC_NAME=$ARC_NAME" >> $GITHUB_OUTPUT
count=0
while true; do
POD_NAME=$(kubectl get pods -n arc-systems -l actions.github.com/scale-set-name=$ARC_NAME -o name)
if [ -n "$POD_NAME" ]; then
echo "Pod found: $POD_NAME"
break
fi
if [ "$count" -ge 60 ]; then
echo "Timeout waiting for listener pod with label actions.github.com/scale-set-name=$ARC_NAME"
exit 1
fi
sleep 1
count=$((count+1))
done
kubectl wait --timeout=30s --for=condition=ready pod -n arc-systems -l actions.github.com/scale-set-name=$ARC_NAME
kubectl get pod -n arc-systems
- name: Ensure 5 runners are up
run: |
count=0
while true; do
pod_count=$(kubectl get pods -n arc-runners --no-headers | wc -l)
if [[ "$pod_count" = 5 ]]; then
echo "5 pods are up!"
break
fi
if [[ "$count" -ge 12 ]]; then
echo "Timeout waiting for 5 pods to be created"
exit 1
fi
sleep 1
count=$((count+1))
done

View File

@ -39,7 +39,8 @@ githubConfigSecret:
## maxRunners is the max number of runners the autoscaling runner set will scale up to. ## maxRunners is the max number of runners the autoscaling runner set will scale up to.
# maxRunners: 5 # maxRunners: 5
## minRunners is the min number of runners the autoscaling runner set will scale down to. ## minRunners is the min number of idle runners. The target number of runners created will be
## calculated as a sum of minRunners and the number of jobs assigned to the scale set.
# minRunners: 0 # minRunners: 0
# runnerGroup: "default" # runnerGroup: "default"

View File

@ -4,7 +4,6 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1" "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/actions/actions-runner-controller/cmd/ghalistener/listener" "github.com/actions/actions-runner-controller/cmd/ghalistener/listener"
@ -158,7 +157,9 @@ func (w *Worker) HandleJobStarted(ctx context.Context, jobInfo *actions.JobStart
// Finally, it logs the scaled ephemeral runner set details and returns nil if successful. // Finally, it logs the scaled ephemeral runner set details and returns nil if successful.
// If any error occurs during the process, it returns an error with a descriptive message. // If any error occurs during the process, it returns an error with a descriptive message.
func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) error { func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) error {
targetRunnerCount := int(math.Max(math.Min(float64(w.config.MaxRunners), float64(count)), float64(w.config.MinRunners))) // Max runners should always be set by the resource builder either to the configured value,
// or the maximum int32 (resourcebuilder.newAutoScalingListener()).
targetRunnerCount := min(w.config.MinRunners+count, w.config.MaxRunners)
logValues := []any{ logValues := []any{
"assigned job", count, "assigned job", count,
@ -187,7 +188,7 @@ func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count int) error
patch, err := json.Marshal( patch, err := json.Marshal(
&v1alpha1.EphemeralRunnerSet{ &v1alpha1.EphemeralRunnerSet{
Spec: v1alpha1.EphemeralRunnerSetSpec{ Spec: v1alpha1.EphemeralRunnerSetSpec{
Replicas: count, Replicas: targetRunnerCount,
}, },
}, },
) )

View File

@ -5,7 +5,6 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"math"
"strings" "strings"
"github.com/actions/actions-runner-controller/cmd/githubrunnerscalesetlistener/config" "github.com/actions/actions-runner-controller/cmd/githubrunnerscalesetlistener/config"
@ -206,7 +205,9 @@ func (s *Service) processMessage(message *actions.RunnerScaleSetMessage) error {
} }
func (s *Service) scaleForAssignedJobCount(count int) error { func (s *Service) scaleForAssignedJobCount(count int) error {
targetRunnerCount := int(math.Max(math.Min(float64(s.settings.MaxRunners), float64(count)), float64(s.settings.MinRunners))) // Max runners should always be set by the resource builder either to the configured value,
// or the maximum int32 (resourcebuilder.newAutoScalingListener()).
targetRunnerCount := min(s.settings.MinRunners+count, s.settings.MaxRunners)
s.metricsExporter.publishDesiredRunners(targetRunnerCount) s.metricsExporter.publishDesiredRunners(targetRunnerCount)
if targetRunnerCount != s.currentRunnerCount { if targetRunnerCount != s.currentRunnerCount {
s.logger.Info("try scale runner request up/down base on assigned job count", s.logger.Info("try scale runner request up/down base on assigned job count",

View File

@ -397,7 +397,7 @@ func TestProcessMessage_MultipleMessages(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
mockRsClient.On("AcquireJobsForRunnerScaleSet", ctx, mock.MatchedBy(func(ids []int64) bool { return ids[0] == 3 && ids[1] == 4 })).Return(nil).Once() mockRsClient.On("AcquireJobsForRunnerScaleSet", ctx, mock.MatchedBy(func(ids []int64) bool { return ids[0] == 3 && ids[1] == 4 })).Return(nil).Once()
mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 2).Run(func(args mock.Arguments) { cancel() }).Return(nil).Once() mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 3).Run(func(args mock.Arguments) { cancel() }).Return(nil).Once()
err = service.processMessage(&actions.RunnerScaleSetMessage{ err = service.processMessage(&actions.RunnerScaleSetMessage{
MessageId: 1, MessageId: 1,
@ -523,9 +523,9 @@ func TestScaleForAssignedJobCount_ScaleWithinMinMax(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 1).Return(nil).Once() mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 1).Return(nil).Once()
mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 3).Return(nil).Once() mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 4).Return(nil).Once()
mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 5).Return(nil).Once() mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 5).Return(nil).Once()
mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 1).Return(nil).Once() mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 2).Return(nil).Once()
mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 5).Return(nil).Once() mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 5).Return(nil).Once()
err = service.scaleForAssignedJobCount(0) err = service.scaleForAssignedJobCount(0)
@ -569,7 +569,7 @@ func TestScaleForAssignedJobCount_ScaleFailed(t *testing.T) {
) )
require.NoError(t, err) require.NoError(t, err)
mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 2).Return(fmt.Errorf("error")) mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 3).Return(fmt.Errorf("error"))
err = service.scaleForAssignedJobCount(2) err = service.scaleForAssignedJobCount(2)
@ -605,8 +605,23 @@ func TestProcessMessage_JobStartedMessage(t *testing.T) {
service.currentRunnerCount = 1 service.currentRunnerCount = 1
mockKubeManager.On("UpdateEphemeralRunnerWithJobInfo", ctx, service.settings.Namespace, "runner1", "owner1", "repo1", ".github/workflows/ci.yaml", "job1", int64(100), int64(3)).Run(func(args mock.Arguments) { cancel() }).Return(nil).Once() mockKubeManager.On(
"UpdateEphemeralRunnerWithJobInfo",
ctx,
service.settings.Namespace,
"runner1",
"owner1",
"repo1",
".github/workflows/ci.yaml",
"job1",
int64(100),
int64(3),
).Run(
func(_ mock.Arguments) { cancel() },
).Return(nil).Once()
mockRsClient.On("AcquireJobsForRunnerScaleSet", ctx, mock.MatchedBy(func(ids []int64) bool { return len(ids) == 0 })).Return(nil).Once() mockRsClient.On("AcquireJobsForRunnerScaleSet", ctx, mock.MatchedBy(func(ids []int64) bool { return len(ids) == 0 })).Return(nil).Once()
mockKubeManager.On("ScaleEphemeralRunnerSet", ctx, service.settings.Namespace, service.settings.ResourceName, 2).Return(nil)
err = service.processMessage(&actions.RunnerScaleSetMessage{ err = service.processMessage(&actions.RunnerScaleSetMessage{
MessageId: 1, MessageId: 1,

View File

@ -1,6 +1,6 @@
# Changing semantics of the `minRunners` field # Changing semantics of the `minRunners` field
**Status**: Proposed **Status**: Accepted
## Context ## Context