Add "PercentageRunnersBusy" horizontal runner autoscaler metric type (#223)
* hpa scheme based off busy runners * running make manifests Co-authored-by: Zachary Benamram <zacharybenamram@blend.com>
This commit is contained in:
parent
c13704d7e2
commit
466b30728d
|
|
@ -56,6 +56,26 @@ type MetricSpec struct {
|
||||||
// For example, a repository name is the REPO part of `github.com/USER/REPO`.
|
// For example, a repository name is the REPO part of `github.com/USER/REPO`.
|
||||||
// +optional
|
// +optional
|
||||||
RepositoryNames []string `json:"repositoryNames,omitempty"`
|
RepositoryNames []string `json:"repositoryNames,omitempty"`
|
||||||
|
|
||||||
|
// ScaleUpThreshold is the percentage of busy runners greater than which will
|
||||||
|
// trigger the hpa to scale runners up.
|
||||||
|
// +optional
|
||||||
|
ScaleUpThreshold string `json:"scaleUpThreshold,omitempty"`
|
||||||
|
|
||||||
|
// ScaleDownThreshold is the percentage of busy runners less than which will
|
||||||
|
// trigger the hpa to scale the runners down.
|
||||||
|
// +optional
|
||||||
|
ScaleDownThreshold string `json:"scaleDownThreshold,omitempty"`
|
||||||
|
|
||||||
|
// ScaleUpFactor is the multiplicative factor applied to the current number of runners used
|
||||||
|
// to determine how many pods should be added.
|
||||||
|
// +optional
|
||||||
|
ScaleUpFactor string `json:"scaleUpFactor,omitempty"`
|
||||||
|
|
||||||
|
// ScaleDownFactor is the multiplicative factor applied to the current number of runners used
|
||||||
|
// to determine how many pods should be removed.
|
||||||
|
// +optional
|
||||||
|
ScaleDownFactor string `json:"scaleDownFactor,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type HorizontalRunnerAutoscalerStatus struct {
|
type HorizontalRunnerAutoscalerStatus struct {
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ import (
|
||||||
|
|
||||||
const (
|
const (
|
||||||
AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns"
|
AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns"
|
||||||
|
AutoscalingMetricTypePercentageRunnersBusy = "PercentageRunnersBusy"
|
||||||
)
|
)
|
||||||
|
|
||||||
// RunnerReplicaSetSpec defines the desired state of RunnerDeployment
|
// RunnerReplicaSetSpec defines the desired state of RunnerDeployment
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,24 @@ spec:
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
|
scaleDownFactor:
|
||||||
|
description: ScaleDownFactor is the multiplicative factor applied
|
||||||
|
to the current number of runners used to determine how many
|
||||||
|
pods should be removed.
|
||||||
|
type: string
|
||||||
|
scaleDownThreshold:
|
||||||
|
description: ScaleDownThreshold is the percentage of busy runners
|
||||||
|
less than which will trigger the hpa to scale the runners down.
|
||||||
|
type: string
|
||||||
|
scaleUpFactor:
|
||||||
|
description: ScaleUpFactor is the multiplicative factor applied
|
||||||
|
to the current number of runners used to determine how many
|
||||||
|
pods should be added.
|
||||||
|
type: string
|
||||||
|
scaleUpThreshold:
|
||||||
|
description: ScaleUpThreshold is the percentage of busy runners
|
||||||
|
greater than which will trigger the hpa to scale runners up.
|
||||||
|
type: string
|
||||||
type:
|
type:
|
||||||
description: Type is the type of metric to be used for autoscaling.
|
description: Type is the type of metric to be used for autoscaling.
|
||||||
The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
|
The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,24 @@ spec:
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
|
scaleDownFactor:
|
||||||
|
description: ScaleDownFactor is the multiplicative factor applied
|
||||||
|
to the current number of runners used to determine how many
|
||||||
|
pods should be removed.
|
||||||
|
type: string
|
||||||
|
scaleDownThreshold:
|
||||||
|
description: ScaleDownThreshold is the percentage of busy runners
|
||||||
|
less than which will trigger the hpa to scale the runners down.
|
||||||
|
type: string
|
||||||
|
scaleUpFactor:
|
||||||
|
description: ScaleUpFactor is the multiplicative factor applied
|
||||||
|
to the current number of runners used to determine how many
|
||||||
|
pods should be added.
|
||||||
|
type: string
|
||||||
|
scaleUpThreshold:
|
||||||
|
description: ScaleUpThreshold is the percentage of busy runners
|
||||||
|
greater than which will trigger the hpa to scale runners up.
|
||||||
|
type: string
|
||||||
type:
|
type:
|
||||||
description: Type is the type of metric to be used for autoscaling.
|
description: Type is the type of metric to be used for autoscaling.
|
||||||
The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
|
The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,18 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
||||||
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultScaleUpThreshold = 0.8
|
||||||
|
defaultScaleDownThreshold = 0.3
|
||||||
|
defaultScaleUpFactor = 1.3
|
||||||
|
defaultScaleDownFactor = 0.7
|
||||||
)
|
)
|
||||||
|
|
||||||
func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||||
|
|
@ -16,8 +25,20 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
|
||||||
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name)
|
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
var repos [][]string
|
metrics := hra.Spec.Metrics
|
||||||
|
if len(metrics) == 0 || metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
|
||||||
|
return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra)
|
||||||
|
} else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy {
|
||||||
|
return r.calculateReplicasByPercentageRunnersBusy(rd, hra)
|
||||||
|
} else {
|
||||||
|
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||||
|
|
||||||
|
var repos [][]string
|
||||||
|
metrics := hra.Spec.Metrics
|
||||||
repoID := rd.Spec.Template.Spec.Repository
|
repoID := rd.Spec.Template.Spec.Repository
|
||||||
if repoID == "" {
|
if repoID == "" {
|
||||||
orgName := rd.Spec.Template.Spec.Organization
|
orgName := rd.Spec.Template.Spec.Organization
|
||||||
|
|
@ -25,13 +46,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
|
||||||
return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path")
|
return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path")
|
||||||
}
|
}
|
||||||
|
|
||||||
metrics := hra.Spec.Metrics
|
if len(metrics[0].RepositoryNames) == 0 {
|
||||||
|
|
||||||
if len(metrics) == 0 {
|
|
||||||
return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required")
|
|
||||||
} else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
|
|
||||||
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns)
|
|
||||||
} else if len(metrics[0].RepositoryNames) == 0 {
|
|
||||||
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment")
|
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -135,3 +150,103 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp
|
||||||
|
|
||||||
return &replicas, nil
|
return &replicas, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
|
||||||
|
ctx := context.Background()
|
||||||
|
orgName := rd.Spec.Template.Spec.Organization
|
||||||
|
minReplicas := *hra.Spec.MinReplicas
|
||||||
|
maxReplicas := *hra.Spec.MaxReplicas
|
||||||
|
metrics := hra.Spec.Metrics[0]
|
||||||
|
scaleUpThreshold := defaultScaleUpThreshold
|
||||||
|
scaleDownThreshold := defaultScaleDownThreshold
|
||||||
|
scaleUpFactor := defaultScaleUpFactor
|
||||||
|
scaleDownFactor := defaultScaleDownFactor
|
||||||
|
|
||||||
|
if metrics.ScaleUpThreshold != "" {
|
||||||
|
sut, err := strconv.ParseFloat(metrics.ScaleUpThreshold, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpThreshold cannot be parsed into a float64")
|
||||||
|
}
|
||||||
|
scaleUpThreshold = sut
|
||||||
|
}
|
||||||
|
if metrics.ScaleDownThreshold != "" {
|
||||||
|
sdt, err := strconv.ParseFloat(metrics.ScaleDownThreshold, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownThreshold cannot be parsed into a float64")
|
||||||
|
}
|
||||||
|
|
||||||
|
scaleDownThreshold = sdt
|
||||||
|
}
|
||||||
|
if metrics.ScaleUpFactor != "" {
|
||||||
|
suf, err := strconv.ParseFloat(metrics.ScaleUpFactor, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpFactor cannot be parsed into a float64")
|
||||||
|
}
|
||||||
|
scaleUpFactor = suf
|
||||||
|
}
|
||||||
|
if metrics.ScaleDownFactor != "" {
|
||||||
|
sdf, err := strconv.ParseFloat(metrics.ScaleDownFactor, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownFactor cannot be parsed into a float64")
|
||||||
|
}
|
||||||
|
scaleDownFactor = sdf
|
||||||
|
}
|
||||||
|
|
||||||
|
// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
|
||||||
|
var runnerList v1alpha1.RunnerList
|
||||||
|
if err := r.List(ctx, &runnerList, client.InNamespace(rd.Namespace)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
runnerMap := make(map[string]struct{})
|
||||||
|
for _, items := range runnerList.Items {
|
||||||
|
runnerMap[items.Name] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListRunners will return all runners managed by GitHub - not restricted to ns
|
||||||
|
runners, err := r.GitHubClient.ListRunners(ctx, orgName, "")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
numRunners := len(runnerList.Items)
|
||||||
|
numRunnersBusy := 0
|
||||||
|
for _, runner := range runners {
|
||||||
|
if _, ok := runnerMap[*runner.Name]; ok && runner.GetBusy() {
|
||||||
|
numRunnersBusy++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var desiredReplicas int
|
||||||
|
fractionBusy := float64(numRunnersBusy) / float64(numRunners)
|
||||||
|
if fractionBusy >= scaleUpThreshold {
|
||||||
|
scaleUpReplicas := int(float64(numRunners)*scaleUpFactor + 0.5)
|
||||||
|
if scaleUpReplicas > maxReplicas {
|
||||||
|
desiredReplicas = maxReplicas
|
||||||
|
} else {
|
||||||
|
desiredReplicas = scaleUpReplicas
|
||||||
|
}
|
||||||
|
} else if fractionBusy < scaleDownThreshold {
|
||||||
|
scaleDownReplicas := int(float64(numRunners) * scaleDownFactor)
|
||||||
|
if scaleDownReplicas < minReplicas {
|
||||||
|
desiredReplicas = minReplicas
|
||||||
|
} else {
|
||||||
|
desiredReplicas = scaleDownReplicas
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
desiredReplicas = *rd.Spec.Replicas
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Log.V(1).Info(
|
||||||
|
"Calculated desired replicas",
|
||||||
|
"computed_replicas_desired", desiredReplicas,
|
||||||
|
"spec_replicas_min", minReplicas,
|
||||||
|
"spec_replicas_max", maxReplicas,
|
||||||
|
"current_replicas", rd.Spec.Replicas,
|
||||||
|
"num_runners", numRunners,
|
||||||
|
"num_runners_busy", numRunnersBusy,
|
||||||
|
)
|
||||||
|
|
||||||
|
rd.Status.Replicas = &desiredReplicas
|
||||||
|
replicas := desiredReplicas
|
||||||
|
|
||||||
|
return &replicas, nil
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue