Add "PercentageRunnersBusy" horizontal runner autoscaler metric type (#223)

* hpa scheme based off busy runners * running make manifests Co-authored-by: Zachary Benamram <zacharybenamram@blend.com>
2020-12-12 15:48:19 -08:00 · 2020-12-12 15:48:19 -08:00 · 466b30728d
parent c13704d7e2
commit 466b30728d
5 changed files with 180 additions and 8 deletions
--- a/api/v1alpha1/horizontalrunnerautoscaler_types.go
+++ b/api/v1alpha1/horizontalrunnerautoscaler_types.go
@ -56,6 +56,26 @@ type MetricSpec struct {
 	// For example, a repository name is the REPO part of `github.com/USER/REPO`.
 	// +optional
 	RepositoryNames []string `json:"repositoryNames,omitempty"`
+
+	// ScaleUpThreshold is the percentage of busy runners greater than which will
+	// trigger the hpa to scale runners up.
+	// +optional
+	ScaleUpThreshold string `json:"scaleUpThreshold,omitempty"`
+
+	// ScaleDownThreshold is the percentage of busy runners less than which will
+	// trigger the hpa to scale the runners down.
+	// +optional
+	ScaleDownThreshold string `json:"scaleDownThreshold,omitempty"`
+
+	// ScaleUpFactor is the multiplicative factor applied to the current number of runners used
+	// to determine how many pods should be added.
+	// +optional
+	ScaleUpFactor string `json:"scaleUpFactor,omitempty"`
+
+	// ScaleDownFactor is the multiplicative factor applied to the current number of runners used
+	// to determine how many pods should be removed.
+	// +optional
+	ScaleDownFactor string `json:"scaleDownFactor,omitempty"`
 }

 type HorizontalRunnerAutoscalerStatus struct {
--- a/api/v1alpha1/runnerdeployment_types.go
+++ b/api/v1alpha1/runnerdeployment_types.go
@ -22,6 +22,7 @@ import (

 const (
 	AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns"
+	AutoscalingMetricTypePercentageRunnersBusy                        = "PercentageRunnersBusy"
 )

 // RunnerReplicaSetSpec defines the desired state of RunnerDeployment
--- a/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
+++ b/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
@ -64,6 +64,24 @@ spec:
                    items:
                      type: string
                    type: array
+                  scaleDownFactor:
+                    description: ScaleDownFactor is the multiplicative factor applied
+                      to the current number of runners used to determine how many
+                      pods should be removed.
+                    type: string
+                  scaleDownThreshold:
+                    description: ScaleDownThreshold is the percentage of busy runners
+                      less than which will trigger the hpa to scale the runners down.
+                    type: string
+                  scaleUpFactor:
+                    description: ScaleUpFactor is the multiplicative factor applied
+                      to the current number of runners used to determine how many
+                      pods should be added.
+                    type: string
+                  scaleUpThreshold:
+                    description: ScaleUpThreshold is the percentage of busy runners
+                      greater than which will trigger the hpa to scale runners up.
+                    type: string
                  type:
                    description: Type is the type of metric to be used for autoscaling.
                      The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
--- a/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
+++ b/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
@ -64,6 +64,24 @@ spec:
                    items:
                      type: string
                    type: array
+                  scaleDownFactor:
+                    description: ScaleDownFactor is the multiplicative factor applied
+                      to the current number of runners used to determine how many
+                      pods should be removed.
+                    type: string
+                  scaleDownThreshold:
+                    description: ScaleDownThreshold is the percentage of busy runners
+                      less than which will trigger the hpa to scale the runners down.
+                    type: string
+                  scaleUpFactor:
+                    description: ScaleUpFactor is the multiplicative factor applied
+                      to the current number of runners used to determine how many
+                      pods should be added.
+                    type: string
+                  scaleUpThreshold:
+                    description: ScaleUpThreshold is the percentage of busy runners
+                      greater than which will trigger the hpa to scale runners up.
+                    type: string
                  type:
                    description: Type is the type of metric to be used for autoscaling.
                      The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
--- a/controllers/autoscaling.go
+++ b/controllers/autoscaling.go
@ -4,9 +4,18 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"strconv"
 	"strings"

 	"github.com/summerwind/actions-runner-controller/api/v1alpha1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+const (
+	defaultScaleUpThreshold   = 0.8
+	defaultScaleDownThreshold = 0.3
+	defaultScaleUpFactor      = 1.3
+	defaultScaleDownFactor    = 0.7
 )

 func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
@ -16,8 +25,20 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
 		return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name)
 	}

-	var repos [][]string
+	metrics := hra.Spec.Metrics
+	if len(metrics) == 0 || metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
+		return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra)
+	} else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy {
+		return r.calculateReplicasByPercentageRunnersBusy(rd, hra)
+	} else {
+		return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type)
+	}
+}

+func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
+
+	var repos [][]string
+	metrics := hra.Spec.Metrics
 	repoID := rd.Spec.Template.Spec.Repository
 	if repoID == "" {
 		orgName := rd.Spec.Template.Spec.Organization
@ -25,13 +46,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
 			return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path")
 		}

-		metrics := hra.Spec.Metrics
-
-		if len(metrics) == 0 {
-			return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required")
-		} else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
-			return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns)
-		} else if len(metrics[0].RepositoryNames) == 0 {
+		if len(metrics[0].RepositoryNames) == 0 {
 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment")
 		}

@ -135,3 +150,103 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp

 	return &replicas, nil
 }
+
+func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
+	ctx := context.Background()
+	orgName := rd.Spec.Template.Spec.Organization
+	minReplicas := *hra.Spec.MinReplicas
+	maxReplicas := *hra.Spec.MaxReplicas
+	metrics := hra.Spec.Metrics[0]
+	scaleUpThreshold := defaultScaleUpThreshold
+	scaleDownThreshold := defaultScaleDownThreshold
+	scaleUpFactor := defaultScaleUpFactor
+	scaleDownFactor := defaultScaleDownFactor
+
+	if metrics.ScaleUpThreshold != "" {
+		sut, err := strconv.ParseFloat(metrics.ScaleUpThreshold, 64)
+		if err != nil {
+			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpThreshold cannot be parsed into a float64")
+		}
+		scaleUpThreshold = sut
+	}
+	if metrics.ScaleDownThreshold != "" {
+		sdt, err := strconv.ParseFloat(metrics.ScaleDownThreshold, 64)
+		if err != nil {
+			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownThreshold cannot be parsed into a float64")
+		}
+
+		scaleDownThreshold = sdt
+	}
+	if metrics.ScaleUpFactor != "" {
+		suf, err := strconv.ParseFloat(metrics.ScaleUpFactor, 64)
+		if err != nil {
+			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpFactor cannot be parsed into a float64")
+		}
+		scaleUpFactor = suf
+	}
+	if metrics.ScaleDownFactor != "" {
+		sdf, err := strconv.ParseFloat(metrics.ScaleDownFactor, 64)
+		if err != nil {
+			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownFactor cannot be parsed into a float64")
+		}
+		scaleDownFactor = sdf
+	}
+
+	// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
+	var runnerList v1alpha1.RunnerList
+	if err := r.List(ctx, &runnerList, client.InNamespace(rd.Namespace)); err != nil {
+		return nil, err
+	}
+	runnerMap := make(map[string]struct{})
+	for _, items := range runnerList.Items {
+		runnerMap[items.Name] = struct{}{}
+	}
+
+	// ListRunners will return all runners managed by GitHub - not restricted to ns
+	runners, err := r.GitHubClient.ListRunners(ctx, orgName, "")
+	if err != nil {
+		return nil, err
+	}
+	numRunners := len(runnerList.Items)
+	numRunnersBusy := 0
+	for _, runner := range runners {
+		if _, ok := runnerMap[*runner.Name]; ok && runner.GetBusy() {
+			numRunnersBusy++
+		}
+	}
+
+	var desiredReplicas int
+	fractionBusy := float64(numRunnersBusy) / float64(numRunners)
+	if fractionBusy >= scaleUpThreshold {
+		scaleUpReplicas := int(float64(numRunners)*scaleUpFactor + 0.5)
+		if scaleUpReplicas > maxReplicas {
+			desiredReplicas = maxReplicas
+		} else {
+			desiredReplicas = scaleUpReplicas
+		}
+	} else if fractionBusy < scaleDownThreshold {
+		scaleDownReplicas := int(float64(numRunners) * scaleDownFactor)
+		if scaleDownReplicas < minReplicas {
+			desiredReplicas = minReplicas
+		} else {
+			desiredReplicas = scaleDownReplicas
+		}
+	} else {
+		desiredReplicas = *rd.Spec.Replicas
+	}
+
+	r.Log.V(1).Info(
+		"Calculated desired replicas",
+		"computed_replicas_desired", desiredReplicas,
+		"spec_replicas_min", minReplicas,
+		"spec_replicas_max", maxReplicas,
+		"current_replicas", rd.Spec.Replicas,
+		"num_runners", numRunners,
+		"num_runners_busy", numRunnersBusy,
+	)
+
+	rd.Status.Replicas = &desiredReplicas
+	replicas := desiredReplicas
+
+	return &replicas, nil
+}