Add "PercentageRunnersBusy" horizontal runner autoscaler metric type (#223)

* hpa scheme based off busy runners

* running make manifests

Co-authored-by: Zachary Benamram <zacharybenamram@blend.com>
This commit is contained in:
ZacharyBenamram 2020-12-12 15:48:19 -08:00 committed by GitHub
parent c13704d7e2
commit 466b30728d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 180 additions and 8 deletions

View File

@ -56,6 +56,26 @@ type MetricSpec struct {
// For example, a repository name is the REPO part of `github.com/USER/REPO`.
// +optional
RepositoryNames []string `json:"repositoryNames,omitempty"`
// ScaleUpThreshold is the percentage of busy runners greater than which will
// trigger the hpa to scale runners up.
// +optional
ScaleUpThreshold string `json:"scaleUpThreshold,omitempty"`
// ScaleDownThreshold is the percentage of busy runners less than which will
// trigger the hpa to scale the runners down.
// +optional
ScaleDownThreshold string `json:"scaleDownThreshold,omitempty"`
// ScaleUpFactor is the multiplicative factor applied to the current number of runners used
// to determine how many pods should be added.
// +optional
ScaleUpFactor string `json:"scaleUpFactor,omitempty"`
// ScaleDownFactor is the multiplicative factor applied to the current number of runners used
// to determine how many pods should be removed.
// +optional
ScaleDownFactor string `json:"scaleDownFactor,omitempty"`
}
type HorizontalRunnerAutoscalerStatus struct {

View File

@ -22,6 +22,7 @@ import (
const (
AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns"
AutoscalingMetricTypePercentageRunnersBusy = "PercentageRunnersBusy"
)
// RunnerReplicaSetSpec defines the desired state of RunnerDeployment

View File

@ -64,6 +64,24 @@ spec:
items:
type: string
type: array
scaleDownFactor:
description: ScaleDownFactor is the multiplicative factor applied
to the current number of runners used to determine how many
pods should be removed.
type: string
scaleDownThreshold:
description: ScaleDownThreshold is the percentage of busy runners
less than which will trigger the hpa to scale the runners down.
type: string
scaleUpFactor:
description: ScaleUpFactor is the multiplicative factor applied
to the current number of runners used to determine how many
pods should be added.
type: string
scaleUpThreshold:
description: ScaleUpThreshold is the percentage of busy runners
greater than which will trigger the hpa to scale runners up.
type: string
type:
description: Type is the type of metric to be used for autoscaling.
The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns

View File

@ -64,6 +64,24 @@ spec:
items:
type: string
type: array
scaleDownFactor:
description: ScaleDownFactor is the multiplicative factor applied
to the current number of runners used to determine how many
pods should be removed.
type: string
scaleDownThreshold:
description: ScaleDownThreshold is the percentage of busy runners
less than which will trigger the hpa to scale the runners down.
type: string
scaleUpFactor:
description: ScaleUpFactor is the multiplicative factor applied
to the current number of runners used to determine how many
pods should be added.
type: string
scaleUpThreshold:
description: ScaleUpThreshold is the percentage of busy runners
greater than which will trigger the hpa to scale runners up.
type: string
type:
description: Type is the type of metric to be used for autoscaling.
The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns

View File

@ -4,9 +4,18 @@ import (
"context"
"errors"
"fmt"
"strconv"
"strings"
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
const (
defaultScaleUpThreshold = 0.8
defaultScaleDownThreshold = 0.3
defaultScaleUpFactor = 1.3
defaultScaleDownFactor = 0.7
)
func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
@ -16,8 +25,20 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name)
}
var repos [][]string
metrics := hra.Spec.Metrics
if len(metrics) == 0 || metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra)
} else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy {
return r.calculateReplicasByPercentageRunnersBusy(rd, hra)
} else {
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type)
}
}
func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
var repos [][]string
metrics := hra.Spec.Metrics
repoID := rd.Spec.Template.Spec.Repository
if repoID == "" {
orgName := rd.Spec.Template.Spec.Organization
@ -25,13 +46,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path")
}
metrics := hra.Spec.Metrics
if len(metrics) == 0 {
return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required")
} else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns)
} else if len(metrics[0].RepositoryNames) == 0 {
if len(metrics[0].RepositoryNames) == 0 {
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment")
}
@ -135,3 +150,103 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
return &replicas, nil
}
func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
ctx := context.Background()
orgName := rd.Spec.Template.Spec.Organization
minReplicas := *hra.Spec.MinReplicas
maxReplicas := *hra.Spec.MaxReplicas
metrics := hra.Spec.Metrics[0]
scaleUpThreshold := defaultScaleUpThreshold
scaleDownThreshold := defaultScaleDownThreshold
scaleUpFactor := defaultScaleUpFactor
scaleDownFactor := defaultScaleDownFactor
if metrics.ScaleUpThreshold != "" {
sut, err := strconv.ParseFloat(metrics.ScaleUpThreshold, 64)
if err != nil {
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpThreshold cannot be parsed into a float64")
}
scaleUpThreshold = sut
}
if metrics.ScaleDownThreshold != "" {
sdt, err := strconv.ParseFloat(metrics.ScaleDownThreshold, 64)
if err != nil {
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownThreshold cannot be parsed into a float64")
}
scaleDownThreshold = sdt
}
if metrics.ScaleUpFactor != "" {
suf, err := strconv.ParseFloat(metrics.ScaleUpFactor, 64)
if err != nil {
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpFactor cannot be parsed into a float64")
}
scaleUpFactor = suf
}
if metrics.ScaleDownFactor != "" {
sdf, err := strconv.ParseFloat(metrics.ScaleDownFactor, 64)
if err != nil {
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownFactor cannot be parsed into a float64")
}
scaleDownFactor = sdf
}
// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
var runnerList v1alpha1.RunnerList
if err := r.List(ctx, &runnerList, client.InNamespace(rd.Namespace)); err != nil {
return nil, err
}
runnerMap := make(map[string]struct{})
for _, items := range runnerList.Items {
runnerMap[items.Name] = struct{}{}
}
// ListRunners will return all runners managed by GitHub - not restricted to ns
runners, err := r.GitHubClient.ListRunners(ctx, orgName, "")
if err != nil {
return nil, err
}
numRunners := len(runnerList.Items)
numRunnersBusy := 0
for _, runner := range runners {
if _, ok := runnerMap[*runner.Name]; ok && runner.GetBusy() {
numRunnersBusy++
}
}
var desiredReplicas int
fractionBusy := float64(numRunnersBusy) / float64(numRunners)
if fractionBusy >= scaleUpThreshold {
scaleUpReplicas := int(float64(numRunners)*scaleUpFactor + 0.5)
if scaleUpReplicas > maxReplicas {
desiredReplicas = maxReplicas
} else {
desiredReplicas = scaleUpReplicas
}
} else if fractionBusy < scaleDownThreshold {
scaleDownReplicas := int(float64(numRunners) * scaleDownFactor)
if scaleDownReplicas < minReplicas {
desiredReplicas = minReplicas
} else {
desiredReplicas = scaleDownReplicas
}
} else {
desiredReplicas = *rd.Spec.Replicas
}
r.Log.V(1).Info(
"Calculated desired replicas",
"computed_replicas_desired", desiredReplicas,
"spec_replicas_min", minReplicas,
"spec_replicas_max", maxReplicas,
"current_replicas", rd.Spec.Replicas,
"num_runners", numRunners,
"num_runners_busy", numRunnersBusy,
)
rd.Status.Replicas = &desiredReplicas
replicas := desiredReplicas
return &replicas, nil
}