Add "PercentageRunnersBusy" horizontal runner autoscaler metric type (#223)
* hpa scheme based off busy runners * running make manifests Co-authored-by: Zachary Benamram <zacharybenamram@blend.com>
This commit is contained in:
		
							parent
							
								
									c13704d7e2
								
							
						
					
					
						commit
						466b30728d
					
				|  | @ -56,6 +56,26 @@ type MetricSpec struct { | ||||||
| 	// For example, a repository name is the REPO part of `github.com/USER/REPO`.
 | 	// For example, a repository name is the REPO part of `github.com/USER/REPO`.
 | ||||||
| 	// +optional
 | 	// +optional
 | ||||||
| 	RepositoryNames []string `json:"repositoryNames,omitempty"` | 	RepositoryNames []string `json:"repositoryNames,omitempty"` | ||||||
|  | 
 | ||||||
|  | 	// ScaleUpThreshold is the percentage of busy runners greater than which will
 | ||||||
|  | 	// trigger the hpa to scale runners up.
 | ||||||
|  | 	// +optional
 | ||||||
|  | 	ScaleUpThreshold string `json:"scaleUpThreshold,omitempty"` | ||||||
|  | 
 | ||||||
|  | 	// ScaleDownThreshold is the percentage of busy runners less than which will
 | ||||||
|  | 	// trigger the hpa to scale the runners down.
 | ||||||
|  | 	// +optional
 | ||||||
|  | 	ScaleDownThreshold string `json:"scaleDownThreshold,omitempty"` | ||||||
|  | 
 | ||||||
|  | 	// ScaleUpFactor is the multiplicative factor applied to the current number of runners used
 | ||||||
|  | 	// to determine how many pods should be added.
 | ||||||
|  | 	// +optional
 | ||||||
|  | 	ScaleUpFactor string `json:"scaleUpFactor,omitempty"` | ||||||
|  | 
 | ||||||
|  | 	// ScaleDownFactor is the multiplicative factor applied to the current number of runners used
 | ||||||
|  | 	// to determine how many pods should be removed.
 | ||||||
|  | 	// +optional
 | ||||||
|  | 	ScaleDownFactor string `json:"scaleDownFactor,omitempty"` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| type HorizontalRunnerAutoscalerStatus struct { | type HorizontalRunnerAutoscalerStatus struct { | ||||||
|  |  | ||||||
|  | @ -22,6 +22,7 @@ import ( | ||||||
| 
 | 
 | ||||||
| const ( | const ( | ||||||
| 	AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns" | 	AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns" | ||||||
|  | 	AutoscalingMetricTypePercentageRunnersBusy                        = "PercentageRunnersBusy" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| // RunnerReplicaSetSpec defines the desired state of RunnerDeployment
 | // RunnerReplicaSetSpec defines the desired state of RunnerDeployment
 | ||||||
|  |  | ||||||
|  | @ -64,6 +64,24 @@ spec: | ||||||
|                     items: |                     items: | ||||||
|                       type: string |                       type: string | ||||||
|                     type: array |                     type: array | ||||||
|  |                   scaleDownFactor: | ||||||
|  |                     description: ScaleDownFactor is the multiplicative factor applied | ||||||
|  |                       to the current number of runners used to determine how many | ||||||
|  |                       pods should be removed. | ||||||
|  |                     type: string | ||||||
|  |                   scaleDownThreshold: | ||||||
|  |                     description: ScaleDownThreshold is the percentage of busy runners | ||||||
|  |                       less than which will trigger the hpa to scale the runners down. | ||||||
|  |                     type: string | ||||||
|  |                   scaleUpFactor: | ||||||
|  |                     description: ScaleUpFactor is the multiplicative factor applied | ||||||
|  |                       to the current number of runners used to determine how many | ||||||
|  |                       pods should be added. | ||||||
|  |                     type: string | ||||||
|  |                   scaleUpThreshold: | ||||||
|  |                     description: ScaleUpThreshold is the percentage of busy runners | ||||||
|  |                       greater than which will trigger the hpa to scale runners up. | ||||||
|  |                     type: string | ||||||
|                   type: |                   type: | ||||||
|                     description: Type is the type of metric to be used for autoscaling. |                     description: Type is the type of metric to be used for autoscaling. | ||||||
|                       The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns |                       The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns | ||||||
|  |  | ||||||
|  | @ -64,6 +64,24 @@ spec: | ||||||
|                     items: |                     items: | ||||||
|                       type: string |                       type: string | ||||||
|                     type: array |                     type: array | ||||||
|  |                   scaleDownFactor: | ||||||
|  |                     description: ScaleDownFactor is the multiplicative factor applied | ||||||
|  |                       to the current number of runners used to determine how many | ||||||
|  |                       pods should be removed. | ||||||
|  |                     type: string | ||||||
|  |                   scaleDownThreshold: | ||||||
|  |                     description: ScaleDownThreshold is the percentage of busy runners | ||||||
|  |                       less than which will trigger the hpa to scale the runners down. | ||||||
|  |                     type: string | ||||||
|  |                   scaleUpFactor: | ||||||
|  |                     description: ScaleUpFactor is the multiplicative factor applied | ||||||
|  |                       to the current number of runners used to determine how many | ||||||
|  |                       pods should be added. | ||||||
|  |                     type: string | ||||||
|  |                   scaleUpThreshold: | ||||||
|  |                     description: ScaleUpThreshold is the percentage of busy runners | ||||||
|  |                       greater than which will trigger the hpa to scale runners up. | ||||||
|  |                     type: string | ||||||
|                   type: |                   type: | ||||||
|                     description: Type is the type of metric to be used for autoscaling. |                     description: Type is the type of metric to be used for autoscaling. | ||||||
|                       The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns |                       The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns | ||||||
|  |  | ||||||
|  | @ -4,9 +4,18 @@ import ( | ||||||
| 	"context" | 	"context" | ||||||
| 	"errors" | 	"errors" | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"strconv" | ||||||
| 	"strings" | 	"strings" | ||||||
| 
 | 
 | ||||||
| 	"github.com/summerwind/actions-runner-controller/api/v1alpha1" | 	"github.com/summerwind/actions-runner-controller/api/v1alpha1" | ||||||
|  | 	"sigs.k8s.io/controller-runtime/pkg/client" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | const ( | ||||||
|  | 	defaultScaleUpThreshold   = 0.8 | ||||||
|  | 	defaultScaleDownThreshold = 0.3 | ||||||
|  | 	defaultScaleUpFactor      = 1.3 | ||||||
|  | 	defaultScaleDownFactor    = 0.7 | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { | func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { | ||||||
|  | @ -16,8 +25,20 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp | ||||||
| 		return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name) | 		return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	var repos [][]string | 	metrics := hra.Spec.Metrics | ||||||
|  | 	if len(metrics) == 0 || metrics[0].Type == v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns { | ||||||
|  | 		return r.calculateReplicasByQueuedAndInProgressWorkflowRuns(rd, hra) | ||||||
|  | 	} else if metrics[0].Type == v1alpha1.AutoscalingMetricTypePercentageRunnersBusy { | ||||||
|  | 		return r.calculateReplicasByPercentageRunnersBusy(rd, hra) | ||||||
|  | 	} else { | ||||||
|  | 		return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q", metrics[0].Type) | ||||||
|  | 	} | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
|  | func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByQueuedAndInProgressWorkflowRuns(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { | ||||||
|  | 
 | ||||||
|  | 	var repos [][]string | ||||||
|  | 	metrics := hra.Spec.Metrics | ||||||
| 	repoID := rd.Spec.Template.Spec.Repository | 	repoID := rd.Spec.Template.Spec.Repository | ||||||
| 	if repoID == "" { | 	if repoID == "" { | ||||||
| 		orgName := rd.Spec.Template.Spec.Organization | 		orgName := rd.Spec.Template.Spec.Organization | ||||||
|  | @ -25,13 +46,7 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp | ||||||
| 			return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path") | 			return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path") | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		metrics := hra.Spec.Metrics | 		if len(metrics[0].RepositoryNames) == 0 { | ||||||
| 
 |  | ||||||
| 		if len(metrics) == 0 { |  | ||||||
| 			return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required") |  | ||||||
| 		} else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns { |  | ||||||
| 			return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns) |  | ||||||
| 		} else if len(metrics[0].RepositoryNames) == 0 { |  | ||||||
| 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment") | 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment") | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | @ -135,3 +150,103 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp | ||||||
| 
 | 
 | ||||||
| 	return &replicas, nil | 	return &replicas, nil | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | func (r *HorizontalRunnerAutoscalerReconciler) calculateReplicasByPercentageRunnersBusy(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { | ||||||
|  | 	ctx := context.Background() | ||||||
|  | 	orgName := rd.Spec.Template.Spec.Organization | ||||||
|  | 	minReplicas := *hra.Spec.MinReplicas | ||||||
|  | 	maxReplicas := *hra.Spec.MaxReplicas | ||||||
|  | 	metrics := hra.Spec.Metrics[0] | ||||||
|  | 	scaleUpThreshold := defaultScaleUpThreshold | ||||||
|  | 	scaleDownThreshold := defaultScaleDownThreshold | ||||||
|  | 	scaleUpFactor := defaultScaleUpFactor | ||||||
|  | 	scaleDownFactor := defaultScaleDownFactor | ||||||
|  | 
 | ||||||
|  | 	if metrics.ScaleUpThreshold != "" { | ||||||
|  | 		sut, err := strconv.ParseFloat(metrics.ScaleUpThreshold, 64) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpThreshold cannot be parsed into a float64") | ||||||
|  | 		} | ||||||
|  | 		scaleUpThreshold = sut | ||||||
|  | 	} | ||||||
|  | 	if metrics.ScaleDownThreshold != "" { | ||||||
|  | 		sdt, err := strconv.ParseFloat(metrics.ScaleDownThreshold, 64) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownThreshold cannot be parsed into a float64") | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		scaleDownThreshold = sdt | ||||||
|  | 	} | ||||||
|  | 	if metrics.ScaleUpFactor != "" { | ||||||
|  | 		suf, err := strconv.ParseFloat(metrics.ScaleUpFactor, 64) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleUpFactor cannot be parsed into a float64") | ||||||
|  | 		} | ||||||
|  | 		scaleUpFactor = suf | ||||||
|  | 	} | ||||||
|  | 	if metrics.ScaleDownFactor != "" { | ||||||
|  | 		sdf, err := strconv.ParseFloat(metrics.ScaleDownFactor, 64) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].scaleDownFactor cannot be parsed into a float64") | ||||||
|  | 		} | ||||||
|  | 		scaleDownFactor = sdf | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// return the list of runners in namespace. Horizontal Runner Autoscaler should only be responsible for scaling resources in its own ns.
 | ||||||
|  | 	var runnerList v1alpha1.RunnerList | ||||||
|  | 	if err := r.List(ctx, &runnerList, client.InNamespace(rd.Namespace)); err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 	runnerMap := make(map[string]struct{}) | ||||||
|  | 	for _, items := range runnerList.Items { | ||||||
|  | 		runnerMap[items.Name] = struct{}{} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// ListRunners will return all runners managed by GitHub - not restricted to ns
 | ||||||
|  | 	runners, err := r.GitHubClient.ListRunners(ctx, orgName, "") | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 	numRunners := len(runnerList.Items) | ||||||
|  | 	numRunnersBusy := 0 | ||||||
|  | 	for _, runner := range runners { | ||||||
|  | 		if _, ok := runnerMap[*runner.Name]; ok && runner.GetBusy() { | ||||||
|  | 			numRunnersBusy++ | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	var desiredReplicas int | ||||||
|  | 	fractionBusy := float64(numRunnersBusy) / float64(numRunners) | ||||||
|  | 	if fractionBusy >= scaleUpThreshold { | ||||||
|  | 		scaleUpReplicas := int(float64(numRunners)*scaleUpFactor + 0.5) | ||||||
|  | 		if scaleUpReplicas > maxReplicas { | ||||||
|  | 			desiredReplicas = maxReplicas | ||||||
|  | 		} else { | ||||||
|  | 			desiredReplicas = scaleUpReplicas | ||||||
|  | 		} | ||||||
|  | 	} else if fractionBusy < scaleDownThreshold { | ||||||
|  | 		scaleDownReplicas := int(float64(numRunners) * scaleDownFactor) | ||||||
|  | 		if scaleDownReplicas < minReplicas { | ||||||
|  | 			desiredReplicas = minReplicas | ||||||
|  | 		} else { | ||||||
|  | 			desiredReplicas = scaleDownReplicas | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		desiredReplicas = *rd.Spec.Replicas | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	r.Log.V(1).Info( | ||||||
|  | 		"Calculated desired replicas", | ||||||
|  | 		"computed_replicas_desired", desiredReplicas, | ||||||
|  | 		"spec_replicas_min", minReplicas, | ||||||
|  | 		"spec_replicas_max", maxReplicas, | ||||||
|  | 		"current_replicas", rd.Spec.Replicas, | ||||||
|  | 		"num_runners", numRunners, | ||||||
|  | 		"num_runners_busy", numRunnersBusy, | ||||||
|  | 	) | ||||||
|  | 
 | ||||||
|  | 	rd.Status.Replicas = &desiredReplicas | ||||||
|  | 	replicas := desiredReplicas | ||||||
|  | 
 | ||||||
|  | 	return &replicas, nil | ||||||
|  | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue