Add "PercentageRunnersBusy" horizontal runner autoscaler metric type (#223)
* hpa scheme based off busy runners * running make manifests Co-authored-by: Zachary Benamram <zacharybenamram@blend.com>
This commit is contained in:
parent
c13704d7e2
commit
466b30728d
|
|
@ -56,6 +56,26 @@ type MetricSpec struct {
|
|||
// For example, a repository name is the REPO part of `github.com/USER/REPO`.
|
||||
// +optional
|
||||
RepositoryNames []string `json:"repositoryNames,omitempty"`
|
||||
|
||||
// ScaleUpThreshold is the percentage of busy runners greater than which will
|
||||
// trigger the hpa to scale runners up.
|
||||
// +optional
|
||||
ScaleUpThreshold string `json:"scaleUpThreshold,omitempty"`
|
||||
|
||||
// ScaleDownThreshold is the percentage of busy runners less than which will
|
||||
// trigger the hpa to scale the runners down.
|
||||
// +optional
|
||||
ScaleDownThreshold string `json:"scaleDownThreshold,omitempty"`
|
||||
|
||||
// ScaleUpFactor is the multiplicative factor applied to the current number of runners used
|
||||
// to determine how many pods should be added.
|
||||
// +optional
|
||||
ScaleUpFactor string `json:"scaleUpFactor,omitempty"`
|
||||
|
||||
// ScaleDownFactor is the multiplicative factor applied to the current number of runners used
|
||||
// to determine how many pods should be removed.
|
||||
// +optional
|
||||
ScaleDownFactor string `json:"scaleDownFactor,omitempty"`
|
||||
}
|
||||
|
||||
type HorizontalRunnerAutoscalerStatus struct {
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import (
|
|||
|
||||
const (
|
||||
AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns"
|
||||
AutoscalingMetricTypePercentageRunnersBusy = "PercentageRunnersBusy"
|
||||
)
|
||||
|
||||
// RunnerReplicaSetSpec defines the desired state of RunnerDeployment
|
||||
|
|
|
|||
|
|
@ -64,6 +64,24 @@ spec:
|
|||
items:
|
||||
type: string
|
||||
type: array
|
||||
scaleDownFactor:
|
||||
description: ScaleDownFactor is the multiplicative factor applied
|
||||
to the current number of runners used to determine how many
|
||||
pods should be removed.
|
||||
type: string
|
||||
scaleDownThreshold:
|
||||
description: ScaleDownThreshold is the percentage of busy runners
|
||||
less than which will trigger the hpa to scale the runners down.
|
||||
type: string
|
||||
scaleUpFactor:
|
||||
description: ScaleUpFactor is the multiplicative factor applied
|
||||
to the current number of runners used to determine how many
|
||||
pods should be added.
|
||||
type: string
|
||||
scaleUpThreshold:
|
||||
description: ScaleUpThreshold is the percentage of busy runners
|
||||
greater than which will trigger the hpa to scale runners up.
|
||||
type: string
|
||||
type:
|
||||
description: Type is the type of metric to be used for autoscaling.
|
||||
The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
|
||||
|
|
|
|||
|
|
@ -64,6 +64,24 @@ spec:
|
|||
items:
|
||||
type: string
|
||||
type: array
|
||||
scaleDownFactor:
|
||||
description: ScaleDownFactor is the multiplicative factor applied
|
||||
to the current number of runners used to determine how many
|
||||
pods should be removed.
|
||||
type: string
|
||||
scaleDownThreshold:
|
||||
description: ScaleDownThreshold is the percentage of busy runners
|
||||
less than which will trigger the hpa to scale the runners down.
|
||||
type: string
|
||||
scaleUpFactor:
|
||||
description: ScaleUpFactor is the multiplicative factor applied
|
||||
to the current number of runners used to determine how many
|
||||
pods should be added.
|
||||
type: string
|
||||
scaleUpThreshold:
|
||||
description: ScaleUpThreshold is the percentage of busy runners
|
||||
greater than which will trigger the hpa to scale runners up.
|
||||
type: string
|
||||
type:
|
||||
description: Type is the type of metric to be used for autoscaling.
|
||||
The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
|
||||
|
|
|
|||
|
|
@ -4,9 +4,18 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultScaleUpThreshold = 0.8
|
||||
defaultScaleDownThreshold = 0.3
|
||||
defaultScaleUpFactor = 1.3
|
||||
defaultScaleDownFactor = 0.7
|
||||
)
|
||||
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
|
|
@ -16,8 +25,20 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
|
|||
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name)
|
||||
}
|
||||
|
||||
var repos [][]string
|
||||
metrics := hra.Spec.Metrics
|
||||
if len(metrics) == 0 || metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
|
||||
return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra)
|
||||
} else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy {
|
||||
return r.calculateReplicasByPercentageRunnersBusy(rd, hra)
|
||||
} else {
|
||||
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
|
||||
var repos [][]string
|
||||
metrics := hra.Spec.Metrics
|
||||
repoID := rd.Spec.Template.Spec.Repository
|
||||
if repoID == "" {
|
||||
orgName := rd.Spec.Template.Spec.Organization
|
||||
|
|
@ -25,13 +46,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
|
|||
return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path")
|
||||
}
|
||||
|
||||
metrics := hra.Spec.Metrics
|
||||
|
||||
if len(metrics) == 0 {
|
||||
return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required")
|
||||
} else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
|
||||
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns)
|
||||
} else if len(metrics[0].RepositoryNames) == 0 {
|
||||
if len(metrics[0].RepositoryNames) == 0 {
|
||||
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment")
|
||||
}
|
||||
|
||||
|
|
@ -135,3 +150,103 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
|
|||
|
||||
return &replicas, nil
|
||||
}
|
||||
|
||||
func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||
ctx := context.Background()
|
||||
orgName := rd.Spec.Template.Spec.Organization
|
||||
minReplicas := *hra.Spec.MinReplicas
|
||||
maxReplicas := *hra.Spec.MaxReplicas
|
||||
metrics := hra.Spec.Metrics[0]
|
||||
scaleUpThreshold := defaultScaleUpThreshold
|
||||
scaleDownThreshold := defaultScaleDownThreshold
|
||||
scaleUpFactor := defaultScaleUpFactor
|
||||
scaleDownFactor := defaultScaleDownFactor
|
||||
|
||||
if metrics.ScaleUpThreshold != "" {
|
||||
sut, err := strconv.ParseFloat(metrics.ScaleUpThreshold, 64)
|
||||
if err != nil {
|
||||
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpThreshold cannot be parsed into a float64")
|
||||
}
|
||||
scaleUpThreshold = sut
|
||||
}
|
||||
if metrics.ScaleDownThreshold != "" {
|
||||
sdt, err := strconv.ParseFloat(metrics.ScaleDownThreshold, 64)
|
||||
if err != nil {
|
||||
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownThreshold cannot be parsed into a float64")
|
||||
}
|
||||
|
||||
scaleDownThreshold = sdt
|
||||
}
|
||||
if metrics.ScaleUpFactor != "" {
|
||||
suf, err := strconv.ParseFloat(metrics.ScaleUpFactor, 64)
|
||||
if err != nil {
|
||||
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpFactor cannot be parsed into a float64")
|
||||
}
|
||||
scaleUpFactor = suf
|
||||
}
|
||||
if metrics.ScaleDownFactor != "" {
|
||||
sdf, err := strconv.ParseFloat(metrics.ScaleDownFactor, 64)
|
||||
if err != nil {
|
||||
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownFactor cannot be parsed into a float64")
|
||||
}
|
||||
scaleDownFactor = sdf
|
||||
}
|
||||
|
||||
// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
|
||||
var runnerList v1alpha1.RunnerList
|
||||
if err := r.List(ctx, &runnerList, client.InNamespace(rd.Namespace)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
runnerMap := make(map[string]struct{})
|
||||
for _, items := range runnerList.Items {
|
||||
runnerMap[items.Name] = struct{}{}
|
||||
}
|
||||
|
||||
// ListRunners will return all runners managed by GitHub - not restricted to ns
|
||||
runners, err := r.GitHubClient.ListRunners(ctx, orgName, "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
numRunners := len(runnerList.Items)
|
||||
numRunnersBusy := 0
|
||||
for _, runner := range runners {
|
||||
if _, ok := runnerMap[*runner.Name]; ok && runner.GetBusy() {
|
||||
numRunnersBusy++
|
||||
}
|
||||
}
|
||||
|
||||
var desiredReplicas int
|
||||
fractionBusy := float64(numRunnersBusy) / float64(numRunners)
|
||||
if fractionBusy >= scaleUpThreshold {
|
||||
scaleUpReplicas := int(float64(numRunners)*scaleUpFactor + 0.5)
|
||||
if scaleUpReplicas > maxReplicas {
|
||||
desiredReplicas = maxReplicas
|
||||
} else {
|
||||
desiredReplicas = scaleUpReplicas
|
||||
}
|
||||
} else if fractionBusy < scaleDownThreshold {
|
||||
scaleDownReplicas := int(float64(numRunners) * scaleDownFactor)
|
||||
if scaleDownReplicas < minReplicas {
|
||||
desiredReplicas = minReplicas
|
||||
} else {
|
||||
desiredReplicas = scaleDownReplicas
|
||||
}
|
||||
} else {
|
||||
desiredReplicas = *rd.Spec.Replicas
|
||||
}
|
||||
|
||||
r.Log.V(1).Info(
|
||||
"Calculated desired replicas",
|
||||
"computed_replicas_desired", desiredReplicas,
|
||||
"spec_replicas_min", minReplicas,
|
||||
"spec_replicas_max", maxReplicas,
|
||||
"current_replicas", rd.Spec.Replicas,
|
||||
"num_runners", numRunners,
|
||||
"num_runners_busy", numRunnersBusy,
|
||||
)
|
||||
|
||||
rd.Status.Replicas = &desiredReplicas
|
||||
replicas := desiredReplicas
|
||||
|
||||
return &replicas, nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue