Add "PercentageRunnersBusy" horizontal runner autoscaler metric type (#223)

* hpa scheme based off busy runners * running make manifests Co-authored-by: Zachary Benamram <zacharybenamram@blend.com>
2020-12-12 15:48:19 -08:00 · 2020-12-12 15:48:19 -08:00 · 466b30728d
parent c13704d7e2
commit 466b30728d
5 changed files with 180 additions and 8 deletions
--- a/api/v1alpha1/horizontalrunnerautoscaler_types.go
+++ b/api/v1alpha1/horizontalrunnerautoscaler_types.go
@ -56,6 +56,26 @@ type MetricSpec struct {
 	// For example, a repository name is the REPO part of `github.com/USER/REPO`.
 	// +optional
 	RepositoryNames []string `json:"repositoryNames,omitempty"`
 	// ScaleUpThreshold is the percentage of busy runners greater than which will
 	// trigger the hpa to scale runners up.
 	// +optional
 	ScaleUpThreshold string `json:"scaleUpThreshold,omitempty"`
 	// ScaleDownThreshold is the percentage of busy runners less than which will
 	// trigger the hpa to scale the runners down.
 	// +optional
 	ScaleDownThreshold string `json:"scaleDownThreshold,omitempty"`
 	// ScaleUpFactor is the multiplicative factor applied to the current number of runners used
 	// to determine how many pods should be added.
 	// +optional
 	ScaleUpFactor string `json:"scaleUpFactor,omitempty"`
 	// ScaleDownFactor is the multiplicative factor applied to the current number of runners used
 	// to determine how many pods should be removed.
 	// +optional
 	ScaleDownFactor string `json:"scaleDownFactor,omitempty"`
 }
 type HorizontalRunnerAutoscalerStatus struct {
--- a/api/v1alpha1/runnerdeployment_types.go
+++ b/api/v1alpha1/runnerdeployment_types.go
@ -22,6 +22,7 @@ import (
 const (
 	AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns"
 	AutoscalingMetricTypePercentageRunnersBusy                        = "PercentageRunnersBusy"
 )
 // RunnerReplicaSetSpec defines the desired state of RunnerDeployment
--- a/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
+++ b/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
@ -64,6 +64,24 @@ spec:
                    items:
                      type: string
                    type: array
                  scaleDownFactor:
                    description: ScaleDownFactor is the multiplicative factor applied
                      to the current number of runners used to determine how many
                      pods should be removed.
                    type: string
                  scaleDownThreshold:
                    description: ScaleDownThreshold is the percentage of busy runners
                      less than which will trigger the hpa to scale the runners down.
                    type: string
                  scaleUpFactor:
                    description: ScaleUpFactor is the multiplicative factor applied
                      to the current number of runners used to determine how many
                      pods should be added.
                    type: string
                  scaleUpThreshold:
                    description: ScaleUpThreshold is the percentage of busy runners
                      greater than which will trigger the hpa to scale runners up.
                    type: string
                  type:
                    description: Type is the type of metric to be used for autoscaling.
                      The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
--- a/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
+++ b/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
@ -64,6 +64,24 @@ spec:
                    items:
                      type: string
                    type: array
                  scaleDownFactor:
                    description: ScaleDownFactor is the multiplicative factor applied
                      to the current number of runners used to determine how many
                      pods should be removed.
                    type: string
                  scaleDownThreshold:
                    description: ScaleDownThreshold is the percentage of busy runners
                      less than which will trigger the hpa to scale the runners down.
                    type: string
                  scaleUpFactor:
                    description: ScaleUpFactor is the multiplicative factor applied
                      to the current number of runners used to determine how many
                      pods should be added.
                    type: string
                  scaleUpThreshold:
                    description: ScaleUpThreshold is the percentage of busy runners
                      greater than which will trigger the hpa to scale runners up.
                    type: string
                  type:
                    description: Type is the type of metric to be used for autoscaling.
                      The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
--- a/controllers/autoscaling.go
+++ b/controllers/autoscaling.go
@ -4,9 +4,18 @@ import (
 	"context"
 	"errors"
 	"fmt"
 	"strconv"
 	"strings"
 	"github.com/summerwind/actions-runner-controller/api/v1alpha1"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 const (
 	defaultScaleUpThreshold   = 0.8
 	defaultScaleDownThreshold = 0.3
 	defaultScaleUpFactor      = 1.3
 	defaultScaleDownFactor    = 0.7
 )
 func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
@ -16,8 +25,20 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
 		return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name)
 	}
-	var repos [][]string
+	metrics := hra.Spec.Metrics
 	if len(metrics) == 0 || metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
 		return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra)
 	} else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy {
 		return r.calculateReplicasByPercentageRunnersBusy(rd, hra)
 	} else {
 		return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type)
 	}
 }
 func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
 	var repos [][]string
 	metrics := hra.Spec.Metrics
 	repoID := rd.Spec.Template.Spec.Repository
 	if repoID == "" {
 		orgName := rd.Spec.Template.Spec.Organization
@ -25,13 +46,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
 			return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path")
 		}
-		metrics := hra.Spec.Metrics
+		if len(metrics[0].RepositoryNames) == 0 {
 		if len(metrics) == 0 {
 			return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required")
 		} else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
 			return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns)
 		} else if len(metrics[0].RepositoryNames) == 0 {
 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment")
 		}
@ -135,3 +150,103 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
 	return &replicas, nil
 }
 func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
 	ctx := context.Background()
 	orgName := rd.Spec.Template.Spec.Organization
 	minReplicas := *hra.Spec.MinReplicas
 	maxReplicas := *hra.Spec.MaxReplicas
 	metrics := hra.Spec.Metrics[0]
 	scaleUpThreshold := defaultScaleUpThreshold
 	scaleDownThreshold := defaultScaleDownThreshold
 	scaleUpFactor := defaultScaleUpFactor
 	scaleDownFactor := defaultScaleDownFactor
 	if metrics.ScaleUpThreshold != "" {
 		sut, err := strconv.ParseFloat(metrics.ScaleUpThreshold, 64)
 		if err != nil {
 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpThreshold cannot be parsed into a float64")
 		}
 		scaleUpThreshold = sut
 	}
 	if metrics.ScaleDownThreshold != "" {
 		sdt, err := strconv.ParseFloat(metrics.ScaleDownThreshold, 64)
 		if err != nil {
 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownThreshold cannot be parsed into a float64")
 		}
 		scaleDownThreshold = sdt
 	}
 	if metrics.ScaleUpFactor != "" {
 		suf, err := strconv.ParseFloat(metrics.ScaleUpFactor, 64)
 		if err != nil {
 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpFactor cannot be parsed into a float64")
 		}
 		scaleUpFactor = suf
 	}
 	if metrics.ScaleDownFactor != "" {
 		sdf, err := strconv.ParseFloat(metrics.ScaleDownFactor, 64)
 		if err != nil {
 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownFactor cannot be parsed into a float64")
 		}
 		scaleDownFactor = sdf
 	}
 	// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
 	var runnerList v1alpha1.RunnerList
 	if err := r.List(ctx, &runnerList, client.InNamespace(rd.Namespace)); err != nil {
 		return nil, err
 	}
 	runnerMap := make(map[string]struct{})
 	for _, items := range runnerList.Items {
 		runnerMap[items.Name] = struct{}{}
 	}
 	// ListRunners will return all runners managed by GitHub - not restricted to ns
 	runners, err := r.GitHubClient.ListRunners(ctx, orgName, "")
 	if err != nil {
 		return nil, err
 	}
 	numRunners := len(runnerList.Items)
 	numRunnersBusy := 0
 	for _, runner := range runners {
 		if _, ok := runnerMap[*runner.Name]; ok && runner.GetBusy() {
 			numRunnersBusy++
 		}
 	}
 	var desiredReplicas int
 	fractionBusy := float64(numRunnersBusy) / float64(numRunners)
 	if fractionBusy >= scaleUpThreshold {
 		scaleUpReplicas := int(float64(numRunners)*scaleUpFactor + 0.5)
 		if scaleUpReplicas > maxReplicas {
 			desiredReplicas = maxReplicas
 		} else {
 			desiredReplicas = scaleUpReplicas
 		}
 	} else if fractionBusy < scaleDownThreshold {
 		scaleDownReplicas := int(float64(numRunners) * scaleDownFactor)
 		if scaleDownReplicas < minReplicas {
 			desiredReplicas = minReplicas
 		} else {
 			desiredReplicas = scaleDownReplicas
 		}
 	} else {
 		desiredReplicas = *rd.Spec.Replicas
 	}
 	r.Log.V(1).Info(
 		"Calculated desired replicas",
 		"computed_replicas_desired", desiredReplicas,
 		"spec_replicas_min", minReplicas,
 		"spec_replicas_max", maxReplicas,
 		"current_replicas", rd.Spec.Replicas,
 		"num_runners", numRunners,
 		"num_runners_busy", numRunnersBusy,
 	)
 	rd.Status.Replicas = &desiredReplicas
 	replicas := desiredReplicas
 	return &replicas, nil
 }