diff --git a/Makefile b/Makefile index 404ceb54..134f2927 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ KUBE_RBAC_PROXY_VERSION ?= v0.11.0 SHELLCHECK_VERSION ?= 0.8.0 # Produce CRDs that work back to Kubernetes 1.11 (no version conversion) -CRD_OPTIONS ?= "crd:generateEmbeddedObjectMeta=true" +CRD_OPTIONS ?= "crd:generateEmbeddedObjectMeta=true,allowDangerousTypes=true" # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) ifeq (,$(shell go env GOBIN)) diff --git a/apis/actions.github.com/v1alpha1/autoscalinglistener_types.go b/apis/actions.github.com/v1alpha1/autoscalinglistener_types.go index 5c35b88b..57363dba 100644 --- a/apis/actions.github.com/v1alpha1/autoscalinglistener_types.go +++ b/apis/actions.github.com/v1alpha1/autoscalinglistener_types.go @@ -61,6 +61,9 @@ type AutoscalingListenerSpec struct { // +optional GitHubServerTLS *GitHubServerTLSConfig `json:"githubServerTLS,omitempty"` + // +optional + Metrics *MetricsConfig `json:"metrics,omitempty"` + // +optional Template *corev1.PodTemplateSpec `json:"template,omitempty"` } diff --git a/apis/actions.github.com/v1alpha1/autoscalingrunnerset_types.go b/apis/actions.github.com/v1alpha1/autoscalingrunnerset_types.go index 55644d6b..2d7946f9 100644 --- a/apis/actions.github.com/v1alpha1/autoscalingrunnerset_types.go +++ b/apis/actions.github.com/v1alpha1/autoscalingrunnerset_types.go @@ -74,6 +74,9 @@ type AutoscalingRunnerSetSpec struct { // Required Template corev1.PodTemplateSpec `json:"template,omitempty"` + // +optional + ListenerMetrics *MetricsConfig `json:"listenerMetrics,omitempty"` + // +optional ListenerTemplate *corev1.PodTemplateSpec `json:"listenerTemplate,omitempty"` @@ -232,6 +235,32 @@ type ProxyServerConfig struct { CredentialSecretRef string `json:"credentialSecretRef,omitempty"` } +// MetricsConfig holds configuration parameters for each metric type +type MetricsConfig struct { + // +optional + Counters map[string]*CounterMetric `json:"counters,omitempty"` + // +optional + Gauges map[string]*GaugeMetric `json:"gauges,omitempty"` + // +optional + Histograms map[string]*HistogramMetric `json:"histograms,omitempty"` +} + +// CounterMetric holds configuration of a single metric of type Counter +type CounterMetric struct { + Labels []string `json:"labels"` +} + +// GaugeMetric holds configuration of a single metric of type Gauge +type GaugeMetric struct { + Labels []string `json:"labels"` +} + +// HistogramMetric holds configuration of a single metric of type Histogram +type HistogramMetric struct { + Labels []string `json:"labels"` + Buckets []float64 `json:"buckets,omitempty"` +} + // AutoscalingRunnerSetStatus defines the observed state of AutoscalingRunnerSet type AutoscalingRunnerSetStatus struct { // +optional diff --git a/apis/actions.github.com/v1alpha1/zz_generated.deepcopy.go b/apis/actions.github.com/v1alpha1/zz_generated.deepcopy.go index 2640710b..dd7553f0 100644 --- a/apis/actions.github.com/v1alpha1/zz_generated.deepcopy.go +++ b/apis/actions.github.com/v1alpha1/zz_generated.deepcopy.go @@ -102,6 +102,11 @@ func (in *AutoscalingListenerSpec) DeepCopyInto(out *AutoscalingListenerSpec) { *out = new(GitHubServerTLSConfig) (*in).DeepCopyInto(*out) } + if in.Metrics != nil { + in, out := &in.Metrics, &out.Metrics + *out = new(MetricsConfig) + (*in).DeepCopyInto(*out) + } if in.Template != nil { in, out := &in.Template, &out.Template *out = new(v1.PodTemplateSpec) @@ -207,6 +212,11 @@ func (in *AutoscalingRunnerSetSpec) DeepCopyInto(out *AutoscalingRunnerSetSpec) (*in).DeepCopyInto(*out) } in.Template.DeepCopyInto(&out.Template) + if in.ListenerMetrics != nil { + in, out := &in.ListenerMetrics, &out.ListenerMetrics + *out = new(MetricsConfig) + (*in).DeepCopyInto(*out) + } if in.ListenerTemplate != nil { in, out := &in.ListenerTemplate, &out.ListenerTemplate *out = new(v1.PodTemplateSpec) @@ -249,6 +259,26 @@ func (in *AutoscalingRunnerSetStatus) DeepCopy() *AutoscalingRunnerSetStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CounterMetric) DeepCopyInto(out *CounterMetric) { + *out = *in + if in.Labels != nil { + in, out := &in.Labels, &out.Labels + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CounterMetric. +func (in *CounterMetric) DeepCopy() *CounterMetric { + if in == nil { + return nil + } + out := new(CounterMetric) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EphemeralRunner) DeepCopyInto(out *EphemeralRunner) { *out = *in @@ -446,6 +476,26 @@ func (in *EphemeralRunnerStatus) DeepCopy() *EphemeralRunnerStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GaugeMetric) DeepCopyInto(out *GaugeMetric) { + *out = *in + if in.Labels != nil { + in, out := &in.Labels, &out.Labels + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GaugeMetric. +func (in *GaugeMetric) DeepCopy() *GaugeMetric { + if in == nil { + return nil + } + out := new(GaugeMetric) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GitHubServerTLSConfig) DeepCopyInto(out *GitHubServerTLSConfig) { *out = *in @@ -466,6 +516,94 @@ func (in *GitHubServerTLSConfig) DeepCopy() *GitHubServerTLSConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HistogramMetric) DeepCopyInto(out *HistogramMetric) { + *out = *in + if in.Labels != nil { + in, out := &in.Labels, &out.Labels + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Buckets != nil { + in, out := &in.Buckets, &out.Buckets + *out = make([]float64, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HistogramMetric. +func (in *HistogramMetric) DeepCopy() *HistogramMetric { + if in == nil { + return nil + } + out := new(HistogramMetric) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricsConfig) DeepCopyInto(out *MetricsConfig) { + *out = *in + if in.Counters != nil { + in, out := &in.Counters, &out.Counters + *out = make(map[string]*CounterMetric, len(*in)) + for key, val := range *in { + var outVal *CounterMetric + if val == nil { + (*out)[key] = nil + } else { + inVal := (*in)[key] + in, out := &inVal, &outVal + *out = new(CounterMetric) + (*in).DeepCopyInto(*out) + } + (*out)[key] = outVal + } + } + if in.Gauges != nil { + in, out := &in.Gauges, &out.Gauges + *out = make(map[string]*GaugeMetric, len(*in)) + for key, val := range *in { + var outVal *GaugeMetric + if val == nil { + (*out)[key] = nil + } else { + inVal := (*in)[key] + in, out := &inVal, &outVal + *out = new(GaugeMetric) + (*in).DeepCopyInto(*out) + } + (*out)[key] = outVal + } + } + if in.Histograms != nil { + in, out := &in.Histograms, &out.Histograms + *out = make(map[string]*HistogramMetric, len(*in)) + for key, val := range *in { + var outVal *HistogramMetric + if val == nil { + (*out)[key] = nil + } else { + inVal := (*in)[key] + in, out := &inVal, &outVal + *out = new(HistogramMetric) + (*in).DeepCopyInto(*out) + } + (*out)[key] = outVal + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricsConfig. +func (in *MetricsConfig) DeepCopy() *MetricsConfig { + if in == nil { + return nil + } + out := new(MetricsConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ProxyConfig) DeepCopyInto(out *ProxyConfig) { *out = *in diff --git a/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalinglisteners.yaml b/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalinglisteners.yaml index 5edb826d..7af045bd 100644 --- a/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalinglisteners.yaml +++ b/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalinglisteners.yaml @@ -119,6 +119,50 @@ spec: description: Required minimum: 0 type: integer + metrics: + description: MetricsConfig holds configuration parameters for each metric type + properties: + counters: + additionalProperties: + description: CounterMetric holds configuration of a single metric of type Counter + properties: + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + gauges: + additionalProperties: + description: GaugeMetric holds configuration of a single metric of type Gauge + properties: + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + histograms: + additionalProperties: + description: HistogramMetric holds configuration of a single metric of type Histogram + properties: + buckets: + items: + type: number + type: array + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + type: object minRunners: description: Required minimum: 0 diff --git a/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalingrunnersets.yaml b/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalingrunnersets.yaml index 33782f42..bf6f60cc 100644 --- a/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalingrunnersets.yaml +++ b/charts/gha-runner-scale-set-controller/crds/actions.github.com_autoscalingrunnersets.yaml @@ -99,6 +99,50 @@ spec: x-kubernetes-map-type: atomic type: object type: object + listenerMetrics: + description: MetricsConfig holds configuration parameters for each metric type + properties: + counters: + additionalProperties: + description: CounterMetric holds configuration of a single metric of type Counter + properties: + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + gauges: + additionalProperties: + description: GaugeMetric holds configuration of a single metric of type Gauge + properties: + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + histograms: + additionalProperties: + description: HistogramMetric holds configuration of a single metric of type Histogram + properties: + buckets: + items: + type: number + type: array + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + type: object listenerTemplate: description: PodTemplateSpec describes the data a pod should have when created from a template properties: diff --git a/charts/gha-runner-scale-set/templates/autoscalingrunnerset.yaml b/charts/gha-runner-scale-set/templates/autoscalingrunnerset.yaml index 276c8640..f2f094d1 100644 --- a/charts/gha-runner-scale-set/templates/autoscalingrunnerset.yaml +++ b/charts/gha-runner-scale-set/templates/autoscalingrunnerset.yaml @@ -106,11 +106,16 @@ spec: minRunners: {{ .Values.minRunners | int }} {{- end }} - {{- with .Values.listenerTemplate}} + {{- with .Values.listenerTemplate }} listenerTemplate: {{- toYaml . | nindent 4}} {{- end }} + {{- with .Values.listenerMetrics }} + listenerMetrics: + {{- toYaml . | nindent 4 }} + {{- end }} + template: {{- with .Values.template.metadata }} metadata: diff --git a/charts/gha-runner-scale-set/values.yaml b/charts/gha-runner-scale-set/values.yaml index 145058c7..e3f69992 100644 --- a/charts/gha-runner-scale-set/values.yaml +++ b/charts/gha-runner-scale-set/values.yaml @@ -119,6 +119,156 @@ githubConfigSecret: # - name: side-car # image: example-sidecar +## listenerMetrics are configurable metrics applied to the listener. +## In order to avoid helm merging these fields, we left the metrics commented out. +## When configuring metrics, please uncomment the listenerMetrics object below. +## You can modify the configuration to remove the label or specify custom buckets for histogram. +## +## If the buckets field is not specified, the default buckets will be applied. Default buckets are +## provided here for documentation purposes +# listenerMetrics: +# counters: +# gha_started_jobs_total: +# labels: +# ["repository", "organization", "enterprise", "job_name", "event_name"] +# gha_completed_jobs_total: +# labels: +# [ +# "repository", +# "organization", +# "enterprise", +# "job_name", +# "event_name", +# "job_result", +# ] +# gauges: +# gha_assigned_jobs: +# labels: ["name", "namespace", "repository", "organization", "enterprise"] +# gha_running_jobs: +# labels: ["name", "namespace", "repository", "organization", "enterprise"] +# gha_registered_runners: +# labels: ["name", "namespace", "repository", "organization", "enterprise"] +# gha_busy_runners: +# labels: ["name", "namespace", "repository", "organization", "enterprise"] +# gha_min_runners: +# labels: ["name", "namespace", "repository", "organization", "enterprise"] +# gha_max_runners: +# labels: ["name", "namespace", "repository", "organization", "enterprise"] +# gha_desired_runners: +# labels: ["name", "namespace", "repository", "organization", "enterprise"] +# gha_idle_runners: +# labels: ["name", "namespace", "repository", "organization", "enterprise"] +# histograms: +# gha_job_startup_duration_seconds: +# labels: +# ["repository", "organization", "enterprise", "job_name", "event_name"] +# buckets: +# [ +# 0.01, +# 0.05, +# 0.1, +# 0.5, +# 1.0, +# 2.0, +# 3.0, +# 4.0, +# 5.0, +# 6.0, +# 7.0, +# 8.0, +# 9.0, +# 10.0, +# 12.0, +# 15.0, +# 18.0, +# 20.0, +# 25.0, +# 30.0, +# 40.0, +# 50.0, +# 60.0, +# 70.0, +# 80.0, +# 90.0, +# 100.0, +# 110.0, +# 120.0, +# 150.0, +# 180.0, +# 210.0, +# 240.0, +# 300.0, +# 360.0, +# 420.0, +# 480.0, +# 540.0, +# 600.0, +# 900.0, +# 1200.0, +# 1800.0, +# 2400.0, +# 3000.0, +# 3600.0, +# ] +# gha_job_execution_duration_seconds: +# labels: +# [ +# "repository", +# "organization", +# "enterprise", +# "job_name", +# "event_name", +# "job_result", +# ] +# buckets: +# [ +# 0.01, +# 0.05, +# 0.1, +# 0.5, +# 1.0, +# 2.0, +# 3.0, +# 4.0, +# 5.0, +# 6.0, +# 7.0, +# 8.0, +# 9.0, +# 10.0, +# 12.0, +# 15.0, +# 18.0, +# 20.0, +# 25.0, +# 30.0, +# 40.0, +# 50.0, +# 60.0, +# 70.0, +# 80.0, +# 90.0, +# 100.0, +# 110.0, +# 120.0, +# 150.0, +# 180.0, +# 210.0, +# 240.0, +# 300.0, +# 360.0, +# 420.0, +# 480.0, +# 540.0, +# 600.0, +# 900.0, +# 1200.0, +# 1800.0, +# 2400.0, +# 3000.0, +# 3600.0, +# ] + ## template is the PodSpec for each runner Pod ## For reference: https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec template: @@ -205,7 +355,6 @@ template: - name: runner image: ghcr.io/actions/actions-runner:latest command: ["/home/runner/run.sh"] - ## Optional controller service account that needs to have required Role and RoleBinding ## to operate this gha-runner-scale-set installation. ## The helm chart will try to find the controller deployment and its service account at installation time. diff --git a/cmd/ghalistener/app/app.go b/cmd/ghalistener/app/app.go index e21703c9..529b5ba3 100644 --- a/cmd/ghalistener/app/app.go +++ b/cmd/ghalistener/app/app.go @@ -23,7 +23,7 @@ type App struct { // initialized fields listener Listener worker Worker - metrics metrics.ServerPublisher + metrics metrics.ServerExporter } //go:generate mockery --name Listener --output ./mocks --outpkg mocks --case underscore @@ -69,6 +69,8 @@ func New(config config.Config) (*App, error) { Repository: ghConfig.Repository, ServerAddr: config.MetricsAddr, ServerEndpoint: config.MetricsEndpoint, + Logger: app.logger.WithName("metrics exporter"), + Metrics: *config.Metrics, }) } diff --git a/cmd/ghalistener/config/config.go b/cmd/ghalistener/config/config.go index 838eb3fe..b2fa0acd 100644 --- a/cmd/ghalistener/config/config.go +++ b/cmd/ghalistener/config/config.go @@ -8,6 +8,7 @@ import ( "net/url" "os" + "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1" "github.com/actions/actions-runner-controller/build" "github.com/actions/actions-runner-controller/github/actions" "github.com/actions/actions-runner-controller/logging" @@ -16,22 +17,23 @@ import ( ) type Config struct { - ConfigureUrl string `json:"configure_url"` - AppID int64 `json:"app_id"` - AppInstallationID int64 `json:"app_installation_id"` - AppPrivateKey string `json:"app_private_key"` - Token string `json:"token"` - EphemeralRunnerSetNamespace string `json:"ephemeral_runner_set_namespace"` - EphemeralRunnerSetName string `json:"ephemeral_runner_set_name"` - MaxRunners int `json:"max_runners"` - MinRunners int `json:"min_runners"` - RunnerScaleSetId int `json:"runner_scale_set_id"` - RunnerScaleSetName string `json:"runner_scale_set_name"` - ServerRootCA string `json:"server_root_ca"` - LogLevel string `json:"log_level"` - LogFormat string `json:"log_format"` - MetricsAddr string `json:"metrics_addr"` - MetricsEndpoint string `json:"metrics_endpoint"` + ConfigureUrl string `json:"configure_url"` + AppID int64 `json:"app_id"` + AppInstallationID int64 `json:"app_installation_id"` + AppPrivateKey string `json:"app_private_key"` + Token string `json:"token"` + EphemeralRunnerSetNamespace string `json:"ephemeral_runner_set_namespace"` + EphemeralRunnerSetName string `json:"ephemeral_runner_set_name"` + MaxRunners int `json:"max_runners"` + MinRunners int `json:"min_runners"` + RunnerScaleSetId int `json:"runner_scale_set_id"` + RunnerScaleSetName string `json:"runner_scale_set_name"` + ServerRootCA string `json:"server_root_ca"` + LogLevel string `json:"log_level"` + LogFormat string `json:"log_format"` + MetricsAddr string `json:"metrics_addr"` + MetricsEndpoint string `json:"metrics_endpoint"` + Metrics *v1alpha1.MetricsConfig `json:"metrics"` } func Read(path string) (Config, error) { diff --git a/cmd/ghalistener/metrics/metrics.go b/cmd/ghalistener/metrics/metrics.go index 14717589..e4f4798f 100644 --- a/cmd/ghalistener/metrics/metrics.go +++ b/cmd/ghalistener/metrics/metrics.go @@ -2,9 +2,12 @@ package metrics import ( "context" + "errors" "net/http" + "strings" "time" + "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1" "github.com/actions/actions-runner-controller/github/actions" "github.com/go-logr/logr" "github.com/prometheus/client_golang/prometheus" @@ -22,145 +25,345 @@ const ( labelKeyJobResult = "job_result" ) -const githubScaleSetSubsystem = "gha" - -// labels -var ( - scaleSetLabels = []string{ - labelKeyRunnerScaleSetName, - labelKeyRepository, - labelKeyOrganization, - labelKeyEnterprise, - labelKeyRunnerScaleSetNamespace, - } - - jobLabels = []string{ - labelKeyRepository, - labelKeyOrganization, - labelKeyEnterprise, - labelKeyJobName, - labelKeyEventName, - } - - completedJobsTotalLabels = append(jobLabels, labelKeyJobResult) - jobExecutionDurationLabels = append(jobLabels, labelKeyJobResult) - startedJobsTotalLabels = jobLabels - jobStartupDurationLabels = jobLabels +const ( + githubScaleSetSubsystem = "gha" + githubScaleSetSubsystemPrefix = "gha_" ) -var ( - assignedJobs = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "assigned_jobs", - Help: "Number of jobs assigned to this scale set.", - }, - scaleSetLabels, - ) - - runningJobs = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "running_jobs", - Help: "Number of jobs running (or about to be run).", - }, - scaleSetLabels, - ) - - registeredRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "registered_runners", - Help: "Number of runners registered by the scale set.", - }, - scaleSetLabels, - ) - - busyRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "busy_runners", - Help: "Number of registered runners running a job.", - }, - scaleSetLabels, - ) - - minRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "min_runners", - Help: "Minimum number of runners.", - }, - scaleSetLabels, - ) - - maxRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "max_runners", - Help: "Maximum number of runners.", - }, - scaleSetLabels, - ) - - desiredRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "desired_runners", - Help: "Number of runners desired by the scale set.", - }, - scaleSetLabels, - ) - - idleRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "idle_runners", - Help: "Number of registered runners not running a job.", - }, - scaleSetLabels, - ) - - startedJobsTotal = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "started_jobs_total", - Help: "Total number of jobs started.", - }, - startedJobsTotalLabels, - ) - - completedJobsTotal = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "completed_jobs_total", - Help: "Total number of jobs completed.", - Subsystem: githubScaleSetSubsystem, - }, - completedJobsTotalLabels, - ) - - jobStartupDurationSeconds = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "job_startup_duration_seconds", - Help: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).", - Buckets: runtimeBuckets, - }, - jobStartupDurationLabels, - ) - - jobExecutionDurationSeconds = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "job_execution_duration_seconds", - Help: "Time spent executing workflow jobs by the scale set (in seconds).", - Buckets: runtimeBuckets, - }, - jobExecutionDurationLabels, - ) +// Names of all metrics available on the listener +const ( + MetricAssignedJobs = "gha_assigned_jobs" + MetricRunningJobs = "gha_running_jobs" + MetricRegisteredRunners = "gha_registered_runners" + MetricBusyRunners = "gha_busy_runners" + MetricMinRunners = "gha_min_runners" + MetricMaxRunners = "gha_max_runners" + MetricDesiredRunners = "gha_desired_runners" + MetricIdleRunners = "gha_idle_runners" + MetricStartedJobsTotal = "gha_started_jobs_total" + MetricCompletedJobsTotal = "gha_completed_jobs_total" + MetricJobStartupDurationSeconds = "gha_job_startup_duration_seconds" + MetricJobExecutionDurationSeconds = "gha_job_execution_duration_seconds" ) -var runtimeBuckets []float64 = []float64{ +type metricsHelpRegistry struct { + counters map[string]string + gauges map[string]string + histograms map[string]string +} + +var metricsHelp = metricsHelpRegistry{ + counters: map[string]string{ + MetricStartedJobsTotal: "Total number of jobs started.", + MetricCompletedJobsTotal: "Total number of jobs completed.", + }, + gauges: map[string]string{ + MetricAssignedJobs: "Number of jobs assigned to this scale set.", + MetricRunningJobs: "Number of jobs running (or about to be run).", + MetricRegisteredRunners: "Number of runners registered by the scale set.", + MetricBusyRunners: "Number of registered runners running a job.", + MetricMinRunners: "Minimum number of runners.", + MetricMaxRunners: "Maximum number of runners.", + MetricDesiredRunners: "Number of runners desired by the scale set.", + MetricIdleRunners: "Number of registered runners not running a job.", + }, + histograms: map[string]string{ + MetricJobStartupDurationSeconds: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).", + MetricJobExecutionDurationSeconds: "Time spent executing workflow jobs by the scale set (in seconds).", + }, +} + +func (e *exporter) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels { + return prometheus.Labels{ + labelKeyEnterprise: e.scaleSetLabels[labelKeyEnterprise], + labelKeyOrganization: jobBase.OwnerName, + labelKeyRepository: jobBase.RepositoryName, + labelKeyJobName: jobBase.JobDisplayName, + labelKeyEventName: jobBase.EventName, + } +} + +func (e *exporter) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels { + l := e.jobLabels(&msg.JobMessageBase) + l[labelKeyJobResult] = msg.Result + return l +} + +func (e *exporter) startedJobLabels(msg *actions.JobStarted) prometheus.Labels { + return e.jobLabels(&msg.JobMessageBase) +} + +//go:generate mockery --name Publisher --output ./mocks --outpkg mocks --case underscore +type Publisher interface { + PublishStatic(min, max int) + PublishStatistics(stats *actions.RunnerScaleSetStatistic) + PublishJobStarted(msg *actions.JobStarted) + PublishJobCompleted(msg *actions.JobCompleted) + PublishDesiredRunners(count int) +} + +//go:generate mockery --name ServerPublisher --output ./mocks --outpkg mocks --case underscore +type ServerExporter interface { + Publisher + ListenAndServe(ctx context.Context) error +} + +var ( + _ Publisher = &discard{} + _ ServerExporter = &exporter{} +) + +var Discard Publisher = &discard{} + +type exporter struct { + logger logr.Logger + scaleSetLabels prometheus.Labels + *metrics + srv *http.Server +} + +type metrics struct { + counters map[string]*counterMetric + gauges map[string]*gaugeMetric + histograms map[string]*histogramMetric +} + +type counterMetric struct { + counter *prometheus.CounterVec + config *v1alpha1.CounterMetric +} + +type gaugeMetric struct { + gauge *prometheus.GaugeVec + config *v1alpha1.GaugeMetric +} + +type histogramMetric struct { + histogram *prometheus.HistogramVec + config *v1alpha1.HistogramMetric +} + +type ExporterConfig struct { + ScaleSetName string + ScaleSetNamespace string + Enterprise string + Organization string + Repository string + ServerAddr string + ServerEndpoint string + Logger logr.Logger + Metrics v1alpha1.MetricsConfig +} + +func NewExporter(config ExporterConfig) ServerExporter { + reg := prometheus.NewRegistry() + + metrics := installMetrics(config.Metrics, reg, config.Logger) + + mux := http.NewServeMux() + mux.Handle( + config.ServerEndpoint, + promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}), + ) + + return &exporter{ + logger: config.Logger.WithName("metrics"), + scaleSetLabels: prometheus.Labels{ + labelKeyRunnerScaleSetName: config.ScaleSetName, + labelKeyRunnerScaleSetNamespace: config.ScaleSetNamespace, + labelKeyEnterprise: config.Enterprise, + labelKeyOrganization: config.Organization, + labelKeyRepository: config.Repository, + }, + metrics: metrics, + srv: &http.Server{ + Addr: config.ServerAddr, + Handler: mux, + }, + } +} + +var errUnknownMetricName = errors.New("unknown metric name") + +func installMetrics(config v1alpha1.MetricsConfig, reg *prometheus.Registry, logger logr.Logger) *metrics { + logger.Info( + "Registering metrics", + "gauges", + config.Gauges, + "counters", + config.Counters, + "histograms", + config.Histograms, + ) + metrics := &metrics{ + counters: make(map[string]*counterMetric, len(config.Counters)), + gauges: make(map[string]*gaugeMetric, len(config.Gauges)), + histograms: make(map[string]*histogramMetric, len(config.Histograms)), + } + for name, cfg := range config.Gauges { + help, ok := metricsHelp.gauges[name] + if !ok { + logger.Error(errUnknownMetricName, "name", name, "kind", "gauge") + continue + } + + g := prometheus.V2.NewGaugeVec(prometheus.GaugeVecOpts{ + GaugeOpts: prometheus.GaugeOpts{ + Subsystem: githubScaleSetSubsystem, + Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix), + Help: help, + }, + VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels), + }) + reg.MustRegister(g) + metrics.gauges[name] = &gaugeMetric{ + gauge: g, + config: cfg, + } + } + + for name, cfg := range config.Counters { + help, ok := metricsHelp.counters[name] + if !ok { + logger.Error(errUnknownMetricName, "name", name, "kind", "counter") + continue + } + c := prometheus.V2.NewCounterVec(prometheus.CounterVecOpts{ + CounterOpts: prometheus.CounterOpts{ + Subsystem: githubScaleSetSubsystem, + Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix), + Help: help, + }, + VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels), + }) + reg.MustRegister(c) + metrics.counters[name] = &counterMetric{ + counter: c, + config: cfg, + } + } + + for name, cfg := range config.Histograms { + help, ok := metricsHelp.histograms[name] + if !ok { + logger.Error(errUnknownMetricName, "name", name, "kind", "histogram") + continue + } + + buckets := defaultRuntimeBuckets + if len(cfg.Buckets) > 0 { + buckets = cfg.Buckets + } + h := prometheus.V2.NewHistogramVec(prometheus.HistogramVecOpts{ + HistogramOpts: prometheus.HistogramOpts{ + Subsystem: githubScaleSetSubsystem, + Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix), + Help: help, + Buckets: buckets, + }, + VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels), + }) + cfg.Buckets = buckets + reg.MustRegister(h) + metrics.histograms[name] = &histogramMetric{ + histogram: h, + config: cfg, + } + } + + return metrics +} + +func (e *exporter) ListenAndServe(ctx context.Context) error { + e.logger.Info("starting metrics server", "addr", e.srv.Addr) + go func() { + <-ctx.Done() + e.logger.Info("stopping metrics server", "err", ctx.Err()) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + e.srv.Shutdown(ctx) + }() + return e.srv.ListenAndServe() +} + +func (e *exporter) setGauge(name string, allLabels prometheus.Labels, val float64) { + m, ok := e.metrics.gauges[name] + if !ok { + return + } + labels := make(prometheus.Labels, len(m.config.Labels)) + for _, label := range m.config.Labels { + labels[label] = allLabels[label] + } + m.gauge.With(labels).Set(val) +} + +func (e *exporter) incCounter(name string, allLabels prometheus.Labels) { + m, ok := e.metrics.counters[name] + if !ok { + return + } + labels := make(prometheus.Labels, len(m.config.Labels)) + for _, label := range m.config.Labels { + labels[label] = allLabels[label] + } + m.counter.With(labels).Inc() +} + +func (e *exporter) observeHistogram(name string, allLabels prometheus.Labels, val float64) { + m, ok := e.metrics.histograms[name] + if !ok { + return + } + labels := make(prometheus.Labels, len(m.config.Labels)) + for _, label := range m.config.Labels { + labels[label] = allLabels[label] + } + m.histogram.With(labels).Observe(val) +} + +func (e *exporter) PublishStatic(min, max int) { + e.setGauge(MetricMaxRunners, e.scaleSetLabels, float64(max)) + e.setGauge(MetricMinRunners, e.scaleSetLabels, float64(min)) +} + +func (e *exporter) PublishStatistics(stats *actions.RunnerScaleSetStatistic) { + e.setGauge(MetricAssignedJobs, e.scaleSetLabels, float64(stats.TotalAssignedJobs)) + e.setGauge(MetricRunningJobs, e.scaleSetLabels, float64(stats.TotalRunningJobs)) + e.setGauge(MetricRegisteredRunners, e.scaleSetLabels, float64(stats.TotalRegisteredRunners)) + e.setGauge(MetricBusyRunners, e.scaleSetLabels, float64(float64(stats.TotalRegisteredRunners))) + e.setGauge(MetricIdleRunners, e.scaleSetLabels, float64(stats.TotalIdleRunners)) +} + +func (e *exporter) PublishJobStarted(msg *actions.JobStarted) { + l := e.startedJobLabels(msg) + e.incCounter(MetricStartedJobsTotal, l) + + startupDuration := msg.JobMessageBase.RunnerAssignTime.Unix() - msg.JobMessageBase.ScaleSetAssignTime.Unix() + e.observeHistogram(MetricJobStartupDurationSeconds, l, float64(startupDuration)) +} + +func (e *exporter) PublishJobCompleted(msg *actions.JobCompleted) { + l := e.completedJobLabels(msg) + e.incCounter(MetricCompletedJobsTotal, l) + + executionDuration := msg.JobMessageBase.FinishTime.Unix() - msg.JobMessageBase.RunnerAssignTime.Unix() + e.observeHistogram(MetricJobExecutionDurationSeconds, l, float64(executionDuration)) +} + +func (e *exporter) PublishDesiredRunners(count int) { + e.setGauge(MetricDesiredRunners, e.scaleSetLabels, float64(count)) +} + +type discard struct{} + +func (*discard) PublishStatic(int, int) {} +func (*discard) PublishStatistics(*actions.RunnerScaleSetStatistic) {} +func (*discard) PublishJobStarted(*actions.JobStarted) {} +func (*discard) PublishJobCompleted(*actions.JobCompleted) {} +func (*discard) PublishDesiredRunners(int) {} + +var defaultRuntimeBuckets []float64 = []float64{ 0.01, 0.05, 0.1, @@ -207,176 +410,3 @@ var runtimeBuckets []float64 = []float64{ 3000, 3600, } - -type baseLabels struct { - scaleSetName string - scaleSetNamespace string - enterprise string - organization string - repository string -} - -func (b *baseLabels) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels { - return prometheus.Labels{ - labelKeyEnterprise: b.enterprise, - labelKeyOrganization: jobBase.OwnerName, - labelKeyRepository: jobBase.RepositoryName, - labelKeyJobName: jobBase.JobDisplayName, - labelKeyEventName: jobBase.EventName, - } -} - -func (b *baseLabels) scaleSetLabels() prometheus.Labels { - return prometheus.Labels{ - labelKeyRunnerScaleSetName: b.scaleSetName, - labelKeyRunnerScaleSetNamespace: b.scaleSetNamespace, - labelKeyEnterprise: b.enterprise, - labelKeyOrganization: b.organization, - labelKeyRepository: b.repository, - } -} - -func (b *baseLabels) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels { - l := b.jobLabels(&msg.JobMessageBase) - l[labelKeyJobResult] = msg.Result - return l -} - -func (b *baseLabels) startedJobLabels(msg *actions.JobStarted) prometheus.Labels { - l := b.jobLabels(&msg.JobMessageBase) - return l -} - -//go:generate mockery --name Publisher --output ./mocks --outpkg mocks --case underscore -type Publisher interface { - PublishStatic(min, max int) - PublishStatistics(stats *actions.RunnerScaleSetStatistic) - PublishJobStarted(msg *actions.JobStarted) - PublishJobCompleted(msg *actions.JobCompleted) - PublishDesiredRunners(count int) -} - -//go:generate mockery --name ServerPublisher --output ./mocks --outpkg mocks --case underscore -type ServerPublisher interface { - Publisher - ListenAndServe(ctx context.Context) error -} - -var ( - _ Publisher = &discard{} - _ ServerPublisher = &exporter{} -) - -var Discard Publisher = &discard{} - -type exporter struct { - logger logr.Logger - baseLabels - srv *http.Server -} - -type ExporterConfig struct { - ScaleSetName string - ScaleSetNamespace string - Enterprise string - Organization string - Repository string - ServerAddr string - ServerEndpoint string - Logger logr.Logger -} - -func NewExporter(config ExporterConfig) ServerPublisher { - reg := prometheus.NewRegistry() - reg.MustRegister( - assignedJobs, - runningJobs, - registeredRunners, - busyRunners, - minRunners, - maxRunners, - desiredRunners, - idleRunners, - startedJobsTotal, - completedJobsTotal, - jobStartupDurationSeconds, - jobExecutionDurationSeconds, - ) - - mux := http.NewServeMux() - mux.Handle( - config.ServerEndpoint, - promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}), - ) - - return &exporter{ - logger: config.Logger.WithName("metrics"), - baseLabels: baseLabels{ - scaleSetName: config.ScaleSetName, - scaleSetNamespace: config.ScaleSetNamespace, - enterprise: config.Enterprise, - organization: config.Organization, - repository: config.Repository, - }, - srv: &http.Server{ - Addr: config.ServerAddr, - Handler: mux, - }, - } -} - -func (e *exporter) ListenAndServe(ctx context.Context) error { - e.logger.Info("starting metrics server", "addr", e.srv.Addr) - go func() { - <-ctx.Done() - e.logger.Info("stopping metrics server", "err", ctx.Err()) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - e.srv.Shutdown(ctx) - }() - return e.srv.ListenAndServe() -} - -func (m *exporter) PublishStatic(min, max int) { - l := m.scaleSetLabels() - maxRunners.With(l).Set(float64(max)) - minRunners.With(l).Set(float64(min)) -} - -func (e *exporter) PublishStatistics(stats *actions.RunnerScaleSetStatistic) { - l := e.scaleSetLabels() - - assignedJobs.With(l).Set(float64(stats.TotalAssignedJobs)) - runningJobs.With(l).Set(float64(stats.TotalRunningJobs)) - registeredRunners.With(l).Set(float64(stats.TotalRegisteredRunners)) - busyRunners.With(l).Set(float64(stats.TotalBusyRunners)) - idleRunners.With(l).Set(float64(stats.TotalIdleRunners)) -} - -func (e *exporter) PublishJobStarted(msg *actions.JobStarted) { - l := e.startedJobLabels(msg) - startedJobsTotal.With(l).Inc() - - startupDuration := msg.JobMessageBase.RunnerAssignTime.Unix() - msg.JobMessageBase.ScaleSetAssignTime.Unix() - jobStartupDurationSeconds.With(l).Observe(float64(startupDuration)) -} - -func (e *exporter) PublishJobCompleted(msg *actions.JobCompleted) { - l := e.completedJobLabels(msg) - completedJobsTotal.With(l).Inc() - - executionDuration := msg.JobMessageBase.FinishTime.Unix() - msg.JobMessageBase.RunnerAssignTime.Unix() - jobExecutionDurationSeconds.With(l).Observe(float64(executionDuration)) -} - -func (m *exporter) PublishDesiredRunners(count int) { - desiredRunners.With(m.scaleSetLabels()).Set(float64(count)) -} - -type discard struct{} - -func (*discard) PublishStatic(int, int) {} -func (*discard) PublishStatistics(*actions.RunnerScaleSetStatistic) {} -func (*discard) PublishJobStarted(*actions.JobStarted) {} -func (*discard) PublishJobCompleted(*actions.JobCompleted) {} -func (*discard) PublishDesiredRunners(int) {} diff --git a/cmd/ghalistener/metrics/metrics_test.go b/cmd/ghalistener/metrics/metrics_test.go new file mode 100644 index 00000000..e808bfc2 --- /dev/null +++ b/cmd/ghalistener/metrics/metrics_test.go @@ -0,0 +1,88 @@ +package metrics + +import ( + "testing" + + "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1" + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" +) + +func TestInstallMetrics(t *testing.T) { + metricsConfig := v1alpha1.MetricsConfig{ + Counters: map[string]*v1alpha1.CounterMetric{ + // unknown metric shouldn't be registered + "gha_unknown": { + Labels: []string{labelKeyRepository}, + }, + // gauge metric shouldn't be registered from this section + MetricAssignedJobs: { + Labels: []string{labelKeyRepository}, + }, + // histogram metric shouldn't be registered from this section + MetricJobStartupDurationSeconds: { + Labels: []string{labelKeyRepository}, + }, + // counter metric should be registered + MetricStartedJobsTotal: { + Labels: []string{labelKeyRepository}, + }, + }, + Gauges: map[string]*v1alpha1.GaugeMetric{ + // unknown metric shouldn't be registered + "gha_unknown": { + Labels: []string{labelKeyRepository}, + }, + // counter metric shouldn't be registered from this section + MetricStartedJobsTotal: { + Labels: []string{labelKeyRepository}, + }, + // histogram metric shouldn't be registered from this section + MetricJobStartupDurationSeconds: { + Labels: []string{labelKeyRepository}, + }, + // gauge metric should be registered + MetricAssignedJobs: { + Labels: []string{labelKeyRepository}, + }, + }, + Histograms: map[string]*v1alpha1.HistogramMetric{ + // unknown metric shouldn't be registered + "gha_unknown": { + Labels: []string{labelKeyRepository}, + }, + // counter metric shouldn't be registered from this section + MetricStartedJobsTotal: { + Labels: []string{labelKeyRepository}, + }, + // gauge metric shouldn't be registered from this section + MetricAssignedJobs: { + Labels: []string{labelKeyRepository}, + }, + // histogram metric should be registered + MetricJobExecutionDurationSeconds: { + Labels: []string{labelKeyRepository}, + Buckets: []float64{0.1, 1}, + }, + // histogram metric should be registered with default runtime buckets + MetricJobStartupDurationSeconds: { + Labels: []string{labelKeyRepository}, + }, + }, + } + reg := prometheus.NewRegistry() + + got := installMetrics(metricsConfig, reg, logr.Discard()) + assert.Len(t, got.counters, 1) + assert.Len(t, got.gauges, 1) + assert.Len(t, got.histograms, 2) + + assert.Equal(t, got.counters[MetricStartedJobsTotal].config, metricsConfig.Counters[MetricStartedJobsTotal]) + assert.Equal(t, got.gauges[MetricAssignedJobs].config, metricsConfig.Gauges[MetricAssignedJobs]) + assert.Equal(t, got.histograms[MetricJobExecutionDurationSeconds].config, metricsConfig.Histograms[MetricJobExecutionDurationSeconds]) + + duration := got.histograms[MetricJobStartupDurationSeconds] + assert.Equal(t, duration.config.Labels, metricsConfig.Histograms[MetricJobStartupDurationSeconds].Labels) + assert.Equal(t, duration.config.Buckets, defaultRuntimeBuckets) +} diff --git a/config/crd/bases/actions.github.com_autoscalinglisteners.yaml b/config/crd/bases/actions.github.com_autoscalinglisteners.yaml index 5edb826d..7af045bd 100644 --- a/config/crd/bases/actions.github.com_autoscalinglisteners.yaml +++ b/config/crd/bases/actions.github.com_autoscalinglisteners.yaml @@ -119,6 +119,50 @@ spec: description: Required minimum: 0 type: integer + metrics: + description: MetricsConfig holds configuration parameters for each metric type + properties: + counters: + additionalProperties: + description: CounterMetric holds configuration of a single metric of type Counter + properties: + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + gauges: + additionalProperties: + description: GaugeMetric holds configuration of a single metric of type Gauge + properties: + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + histograms: + additionalProperties: + description: HistogramMetric holds configuration of a single metric of type Histogram + properties: + buckets: + items: + type: number + type: array + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + type: object minRunners: description: Required minimum: 0 diff --git a/config/crd/bases/actions.github.com_autoscalingrunnersets.yaml b/config/crd/bases/actions.github.com_autoscalingrunnersets.yaml index 33782f42..bf6f60cc 100644 --- a/config/crd/bases/actions.github.com_autoscalingrunnersets.yaml +++ b/config/crd/bases/actions.github.com_autoscalingrunnersets.yaml @@ -99,6 +99,50 @@ spec: x-kubernetes-map-type: atomic type: object type: object + listenerMetrics: + description: MetricsConfig holds configuration parameters for each metric type + properties: + counters: + additionalProperties: + description: CounterMetric holds configuration of a single metric of type Counter + properties: + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + gauges: + additionalProperties: + description: GaugeMetric holds configuration of a single metric of type Gauge + properties: + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + histograms: + additionalProperties: + description: HistogramMetric holds configuration of a single metric of type Histogram + properties: + buckets: + items: + type: number + type: array + labels: + items: + type: string + type: array + required: + - labels + type: object + type: object + type: object listenerTemplate: description: PodTemplateSpec describes the data a pod should have when created from a template properties: diff --git a/controllers/actions.github.com/resourcebuilder.go b/controllers/actions.github.com/resourcebuilder.go index d91d2697..2b7c9030 100644 --- a/controllers/actions.github.com/resourcebuilder.go +++ b/controllers/actions.github.com/resourcebuilder.go @@ -130,6 +130,7 @@ func (b *ResourceBuilder) newAutoScalingListener(autoscalingRunnerSet *v1alpha1. ImagePullSecrets: imagePullSecrets, Proxy: autoscalingRunnerSet.Spec.Proxy, GitHubServerTLS: autoscalingRunnerSet.Spec.GitHubServerTLS, + Metrics: autoscalingRunnerSet.Spec.ListenerMetrics, Template: autoscalingRunnerSet.Spec.ListenerTemplate, }, } @@ -203,6 +204,7 @@ func (b *ResourceBuilder) newScaleSetListenerConfig(autoscalingListener *v1alpha LogFormat: scaleSetListenerLogFormat, MetricsAddr: metricsAddr, MetricsEndpoint: metricsEndpoint, + Metrics: autoscalingListener.Spec.Metrics, } var buf bytes.Buffer