diff --git a/api/v1alpha1/horizontalrunnerautoscaler_types.go b/api/v1alpha1/horizontalrunnerautoscaler_types.go index b45e0ccf..0f920f4e 100644 --- a/api/v1alpha1/horizontalrunnerautoscaler_types.go +++ b/api/v1alpha1/horizontalrunnerautoscaler_types.go @@ -56,6 +56,26 @@ type MetricSpec struct { // For example, a repository name is the REPO part of `github.com/USER/REPO`. // +optional RepositoryNames []string `json:"repositoryNames,omitempty"` + + // ScaleUpThreshold is the percentage of busy runners greater than which will + // trigger the hpa to scale runners up. + // +optional + ScaleUpThreshold string `json:"scaleUpThreshold,omitempty"` + + // ScaleDownThreshold is the percentage of busy runners less than which will + // trigger the hpa to scale the runners down. + // +optional + ScaleDownThreshold string `json:"scaleDownThreshold,omitempty"` + + // ScaleUpFactor is the multiplicative factor applied to the current number of runners used + // to determine how many pods should be added. + // +optional + ScaleUpFactor string `json:"scaleUpFactor,omitempty"` + + // ScaleDownFactor is the multiplicative factor applied to the current number of runners used + // to determine how many pods should be removed. + // +optional + ScaleDownFactor string `json:"scaleDownFactor,omitempty"` } type HorizontalRunnerAutoscalerStatus struct { diff --git a/api/v1alpha1/runnerdeployment_types.go b/api/v1alpha1/runnerdeployment_types.go index 8a79d306..ba6085bd 100644 --- a/api/v1alpha1/runnerdeployment_types.go +++ b/api/v1alpha1/runnerdeployment_types.go @@ -22,6 +22,7 @@ import ( const ( AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns" + AutoscalingMetricTypePercentageRunnersBusy = "PercentageRunnersBusy" ) // RunnerReplicaSetSpec defines the desired state of RunnerDeployment diff --git a/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml b/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml index 0baebf49..b7a9f13c 100644 --- a/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml +++ b/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml @@ -64,6 +64,24 @@ spec: items: type: string type: array + scaleDownFactor: + description: ScaleDownFactor is the multiplicative factor applied + to the current number of runners used to determine how many + pods should be removed. + type: string + scaleDownThreshold: + description: ScaleDownThreshold is the percentage of busy runners + less than which will trigger the hpa to scale the runners down. + type: string + scaleUpFactor: + description: ScaleUpFactor is the multiplicative factor applied + to the current number of runners used to determine how many + pods should be added. + type: string + scaleUpThreshold: + description: ScaleUpThreshold is the percentage of busy runners + greater than which will trigger the hpa to scale runners up. + type: string type: description: Type is the type of metric to be used for autoscaling. The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns diff --git a/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml b/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml index 0baebf49..b7a9f13c 100644 --- a/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml +++ b/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml @@ -64,6 +64,24 @@ spec: items: type: string type: array + scaleDownFactor: + description: ScaleDownFactor is the multiplicative factor applied + to the current number of runners used to determine how many + pods should be removed. + type: string + scaleDownThreshold: + description: ScaleDownThreshold is the percentage of busy runners + less than which will trigger the hpa to scale the runners down. + type: string + scaleUpFactor: + description: ScaleUpFactor is the multiplicative factor applied + to the current number of runners used to determine how many + pods should be added. + type: string + scaleUpThreshold: + description: ScaleUpThreshold is the percentage of busy runners + greater than which will trigger the hpa to scale runners up. + type: string type: description: Type is the type of metric to be used for autoscaling. The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns diff --git a/controllers/autoscaling.go b/controllers/autoscaling.go index 95e9dfed..da351692 100644 --- a/controllers/autoscaling.go +++ b/controllers/autoscaling.go @@ -4,9 +4,18 @@ import ( "context" "errors" "fmt" + "strconv" "strings" "github.com/summerwind/actions-runner-controller/api/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + defaultScaleUpThreshold = 0.8 + defaultScaleDownThreshold = 0.3 + defaultScaleUpFactor = 1.3 + defaultScaleDownFactor = 0.7 ) func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { @@ -16,8 +25,20 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name) } - var repos [][]string + metrics := hra.Spec.Metrics + if len(metrics) == 0 || metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns { + return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra) + } else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy { + return r.calculateReplicasByPercentageRunnersBusy(rd, hra) + } else { + return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type) + } +} +func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { + + var repos [][]string + metrics := hra.Spec.Metrics repoID := rd.Spec.Template.Spec.Repository if repoID == "" { orgName := rd.Spec.Template.Spec.Organization @@ -25,13 +46,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path") } - metrics := hra.Spec.Metrics - - if len(metrics) == 0 { - return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required") - } else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns { - return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns) - } else if len(metrics[0].RepositoryNames) == 0 { + if len(metrics[0].RepositoryNames) == 0 { return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment") } @@ -135,3 +150,103 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp return &replicas, nil } + +func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { + ctx := context.Background() + orgName := rd.Spec.Template.Spec.Organization + minReplicas := *hra.Spec.MinReplicas + maxReplicas := *hra.Spec.MaxReplicas + metrics := hra.Spec.Metrics[0] + scaleUpThreshold := defaultScaleUpThreshold + scaleDownThreshold := defaultScaleDownThreshold + scaleUpFactor := defaultScaleUpFactor + scaleDownFactor := defaultScaleDownFactor + + if metrics.ScaleUpThreshold != "" { + sut, err := strconv.ParseFloat(metrics.ScaleUpThreshold, 64) + if err != nil { + return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpThreshold cannot be parsed into a float64") + } + scaleUpThreshold = sut + } + if metrics.ScaleDownThreshold != "" { + sdt, err := strconv.ParseFloat(metrics.ScaleDownThreshold, 64) + if err != nil { + return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownThreshold cannot be parsed into a float64") + } + + scaleDownThreshold = sdt + } + if metrics.ScaleUpFactor != "" { + suf, err := strconv.ParseFloat(metrics.ScaleUpFactor, 64) + if err != nil { + return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpFactor cannot be parsed into a float64") + } + scaleUpFactor = suf + } + if metrics.ScaleDownFactor != "" { + sdf, err := strconv.ParseFloat(metrics.ScaleDownFactor, 64) + if err != nil { + return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownFactor cannot be parsed into a float64") + } + scaleDownFactor = sdf + } + + // return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns. + var runnerList v1alpha1.RunnerList + if err := r.List(ctx, &runnerList, client.InNamespace(rd.Namespace)); err != nil { + return nil, err + } + runnerMap := make(map[string]struct{}) + for _, items := range runnerList.Items { + runnerMap[items.Name] = struct{}{} + } + + // ListRunners will return all runners managed by GitHub - not restricted to ns + runners, err := r.GitHubClient.ListRunners(ctx, orgName, "") + if err != nil { + return nil, err + } + numRunners := len(runnerList.Items) + numRunnersBusy := 0 + for _, runner := range runners { + if _, ok := runnerMap[*runner.Name]; ok && runner.GetBusy() { + numRunnersBusy++ + } + } + + var desiredReplicas int + fractionBusy := float64(numRunnersBusy) / float64(numRunners) + if fractionBusy >= scaleUpThreshold { + scaleUpReplicas := int(float64(numRunners)*scaleUpFactor + 0.5) + if scaleUpReplicas > maxReplicas { + desiredReplicas = maxReplicas + } else { + desiredReplicas = scaleUpReplicas + } + } else if fractionBusy < scaleDownThreshold { + scaleDownReplicas := int(float64(numRunners) * scaleDownFactor) + if scaleDownReplicas < minReplicas { + desiredReplicas = minReplicas + } else { + desiredReplicas = scaleDownReplicas + } + } else { + desiredReplicas = *rd.Spec.Replicas + } + + r.Log.V(1).Info( + "Calculated desired replicas", + "computed_replicas_desired", desiredReplicas, + "spec_replicas_min", minReplicas, + "spec_replicas_max", maxReplicas, + "current_replicas", rd.Spec.Replicas, + "num_runners", numRunners, + "num_runners_busy", numRunnersBusy, + ) + + rd.Status.Replicas = &desiredReplicas + replicas := desiredReplicas + + return &replicas, nil +}