Create configurable metrics (#3975)

This commit is contained in:
Nikola Jokic 2025-03-24 15:27:42 +01:00 committed by GitHub
parent 7033e299cd
commit 5a960b5ebb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 952 additions and 328 deletions

View File

@ -23,7 +23,7 @@ KUBE_RBAC_PROXY_VERSION ?= v0.11.0
SHELLCHECK_VERSION ?= 0.8.0 SHELLCHECK_VERSION ?= 0.8.0
# Produce CRDs that work back to Kubernetes 1.11 (no version conversion) # Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
CRD_OPTIONS ?= "crd:generateEmbeddedObjectMeta=true" CRD_OPTIONS ?= "crd:generateEmbeddedObjectMeta=true,allowDangerousTypes=true"
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN)) ifeq (,$(shell go env GOBIN))

View File

@ -61,6 +61,9 @@ type AutoscalingListenerSpec struct {
// +optional // +optional
GitHubServerTLS *GitHubServerTLSConfig `json:"githubServerTLS,omitempty"` GitHubServerTLS *GitHubServerTLSConfig `json:"githubServerTLS,omitempty"`
// +optional
Metrics *MetricsConfig `json:"metrics,omitempty"`
// +optional // +optional
Template *corev1.PodTemplateSpec `json:"template,omitempty"` Template *corev1.PodTemplateSpec `json:"template,omitempty"`
} }

View File

@ -74,6 +74,9 @@ type AutoscalingRunnerSetSpec struct {
// Required // Required
Template corev1.PodTemplateSpec `json:"template,omitempty"` Template corev1.PodTemplateSpec `json:"template,omitempty"`
// +optional
ListenerMetrics *MetricsConfig `json:"listenerMetrics,omitempty"`
// +optional // +optional
ListenerTemplate *corev1.PodTemplateSpec `json:"listenerTemplate,omitempty"` ListenerTemplate *corev1.PodTemplateSpec `json:"listenerTemplate,omitempty"`
@ -232,6 +235,32 @@ type ProxyServerConfig struct {
CredentialSecretRef string `json:"credentialSecretRef,omitempty"` CredentialSecretRef string `json:"credentialSecretRef,omitempty"`
} }
// MetricsConfig holds configuration parameters for each metric type
type MetricsConfig struct {
// +optional
Counters map[string]*CounterMetric `json:"counters,omitempty"`
// +optional
Gauges map[string]*GaugeMetric `json:"gauges,omitempty"`
// +optional
Histograms map[string]*HistogramMetric `json:"histograms,omitempty"`
}
// CounterMetric holds configuration of a single metric of type Counter
type CounterMetric struct {
Labels []string `json:"labels"`
}
// GaugeMetric holds configuration of a single metric of type Gauge
type GaugeMetric struct {
Labels []string `json:"labels"`
}
// HistogramMetric holds configuration of a single metric of type Histogram
type HistogramMetric struct {
Labels []string `json:"labels"`
Buckets []float64 `json:"buckets,omitempty"`
}
// AutoscalingRunnerSetStatus defines the observed state of AutoscalingRunnerSet // AutoscalingRunnerSetStatus defines the observed state of AutoscalingRunnerSet
type AutoscalingRunnerSetStatus struct { type AutoscalingRunnerSetStatus struct {
// +optional // +optional

View File

@ -102,6 +102,11 @@ func (in *AutoscalingListenerSpec) DeepCopyInto(out *AutoscalingListenerSpec) {
*out = new(GitHubServerTLSConfig) *out = new(GitHubServerTLSConfig)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.Metrics != nil {
in, out := &in.Metrics, &out.Metrics
*out = new(MetricsConfig)
(*in).DeepCopyInto(*out)
}
if in.Template != nil { if in.Template != nil {
in, out := &in.Template, &out.Template in, out := &in.Template, &out.Template
*out = new(v1.PodTemplateSpec) *out = new(v1.PodTemplateSpec)
@ -207,6 +212,11 @@ func (in *AutoscalingRunnerSetSpec) DeepCopyInto(out *AutoscalingRunnerSetSpec)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
in.Template.DeepCopyInto(&out.Template) in.Template.DeepCopyInto(&out.Template)
if in.ListenerMetrics != nil {
in, out := &in.ListenerMetrics, &out.ListenerMetrics
*out = new(MetricsConfig)
(*in).DeepCopyInto(*out)
}
if in.ListenerTemplate != nil { if in.ListenerTemplate != nil {
in, out := &in.ListenerTemplate, &out.ListenerTemplate in, out := &in.ListenerTemplate, &out.ListenerTemplate
*out = new(v1.PodTemplateSpec) *out = new(v1.PodTemplateSpec)
@ -249,6 +259,26 @@ func (in *AutoscalingRunnerSetStatus) DeepCopy() *AutoscalingRunnerSetStatus {
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *CounterMetric) DeepCopyInto(out *CounterMetric) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CounterMetric.
func (in *CounterMetric) DeepCopy() *CounterMetric {
if in == nil {
return nil
}
out := new(CounterMetric)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *EphemeralRunner) DeepCopyInto(out *EphemeralRunner) { func (in *EphemeralRunner) DeepCopyInto(out *EphemeralRunner) {
*out = *in *out = *in
@ -446,6 +476,26 @@ func (in *EphemeralRunnerStatus) DeepCopy() *EphemeralRunnerStatus {
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *GaugeMetric) DeepCopyInto(out *GaugeMetric) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GaugeMetric.
func (in *GaugeMetric) DeepCopy() *GaugeMetric {
if in == nil {
return nil
}
out := new(GaugeMetric)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *GitHubServerTLSConfig) DeepCopyInto(out *GitHubServerTLSConfig) { func (in *GitHubServerTLSConfig) DeepCopyInto(out *GitHubServerTLSConfig) {
*out = *in *out = *in
@ -466,6 +516,94 @@ func (in *GitHubServerTLSConfig) DeepCopy() *GitHubServerTLSConfig {
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HistogramMetric) DeepCopyInto(out *HistogramMetric) {
*out = *in
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.Buckets != nil {
in, out := &in.Buckets, &out.Buckets
*out = make([]float64, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HistogramMetric.
func (in *HistogramMetric) DeepCopy() *HistogramMetric {
if in == nil {
return nil
}
out := new(HistogramMetric)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetricsConfig) DeepCopyInto(out *MetricsConfig) {
*out = *in
if in.Counters != nil {
in, out := &in.Counters, &out.Counters
*out = make(map[string]*CounterMetric, len(*in))
for key, val := range *in {
var outVal *CounterMetric
if val == nil {
(*out)[key] = nil
} else {
inVal := (*in)[key]
in, out := &inVal, &outVal
*out = new(CounterMetric)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
if in.Gauges != nil {
in, out := &in.Gauges, &out.Gauges
*out = make(map[string]*GaugeMetric, len(*in))
for key, val := range *in {
var outVal *GaugeMetric
if val == nil {
(*out)[key] = nil
} else {
inVal := (*in)[key]
in, out := &inVal, &outVal
*out = new(GaugeMetric)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
if in.Histograms != nil {
in, out := &in.Histograms, &out.Histograms
*out = make(map[string]*HistogramMetric, len(*in))
for key, val := range *in {
var outVal *HistogramMetric
if val == nil {
(*out)[key] = nil
} else {
inVal := (*in)[key]
in, out := &inVal, &outVal
*out = new(HistogramMetric)
(*in).DeepCopyInto(*out)
}
(*out)[key] = outVal
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricsConfig.
func (in *MetricsConfig) DeepCopy() *MetricsConfig {
if in == nil {
return nil
}
out := new(MetricsConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ProxyConfig) DeepCopyInto(out *ProxyConfig) { func (in *ProxyConfig) DeepCopyInto(out *ProxyConfig) {
*out = *in *out = *in

View File

@ -119,6 +119,50 @@ spec:
description: Required description: Required
minimum: 0 minimum: 0
type: integer type: integer
metrics:
description: MetricsConfig holds configuration parameters for each metric type
properties:
counters:
additionalProperties:
description: CounterMetric holds configuration of a single metric of type Counter
properties:
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
gauges:
additionalProperties:
description: GaugeMetric holds configuration of a single metric of type Gauge
properties:
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
histograms:
additionalProperties:
description: HistogramMetric holds configuration of a single metric of type Histogram
properties:
buckets:
items:
type: number
type: array
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
type: object
minRunners: minRunners:
description: Required description: Required
minimum: 0 minimum: 0

View File

@ -99,6 +99,50 @@ spec:
x-kubernetes-map-type: atomic x-kubernetes-map-type: atomic
type: object type: object
type: object type: object
listenerMetrics:
description: MetricsConfig holds configuration parameters for each metric type
properties:
counters:
additionalProperties:
description: CounterMetric holds configuration of a single metric of type Counter
properties:
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
gauges:
additionalProperties:
description: GaugeMetric holds configuration of a single metric of type Gauge
properties:
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
histograms:
additionalProperties:
description: HistogramMetric holds configuration of a single metric of type Histogram
properties:
buckets:
items:
type: number
type: array
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
type: object
listenerTemplate: listenerTemplate:
description: PodTemplateSpec describes the data a pod should have when created from a template description: PodTemplateSpec describes the data a pod should have when created from a template
properties: properties:

View File

@ -106,11 +106,16 @@ spec:
minRunners: {{ .Values.minRunners | int }} minRunners: {{ .Values.minRunners | int }}
{{- end }} {{- end }}
{{- with .Values.listenerTemplate}} {{- with .Values.listenerTemplate }}
listenerTemplate: listenerTemplate:
{{- toYaml . | nindent 4}} {{- toYaml . | nindent 4}}
{{- end }} {{- end }}
{{- with .Values.listenerMetrics }}
listenerMetrics:
{{- toYaml . | nindent 4 }}
{{- end }}
template: template:
{{- with .Values.template.metadata }} {{- with .Values.template.metadata }}
metadata: metadata:

View File

@ -119,6 +119,156 @@ githubConfigSecret:
# - name: side-car # - name: side-car
# image: example-sidecar # image: example-sidecar
## listenerMetrics are configurable metrics applied to the listener.
## In order to avoid helm merging these fields, we left the metrics commented out.
## When configuring metrics, please uncomment the listenerMetrics object below.
## You can modify the configuration to remove the label or specify custom buckets for histogram.
##
## If the buckets field is not specified, the default buckets will be applied. Default buckets are
## provided here for documentation purposes
# listenerMetrics:
# counters:
# gha_started_jobs_total:
# labels:
# ["repository", "organization", "enterprise", "job_name", "event_name"]
# gha_completed_jobs_total:
# labels:
# [
# "repository",
# "organization",
# "enterprise",
# "job_name",
# "event_name",
# "job_result",
# ]
# gauges:
# gha_assigned_jobs:
# labels: ["name", "namespace", "repository", "organization", "enterprise"]
# gha_running_jobs:
# labels: ["name", "namespace", "repository", "organization", "enterprise"]
# gha_registered_runners:
# labels: ["name", "namespace", "repository", "organization", "enterprise"]
# gha_busy_runners:
# labels: ["name", "namespace", "repository", "organization", "enterprise"]
# gha_min_runners:
# labels: ["name", "namespace", "repository", "organization", "enterprise"]
# gha_max_runners:
# labels: ["name", "namespace", "repository", "organization", "enterprise"]
# gha_desired_runners:
# labels: ["name", "namespace", "repository", "organization", "enterprise"]
# gha_idle_runners:
# labels: ["name", "namespace", "repository", "organization", "enterprise"]
# histograms:
# gha_job_startup_duration_seconds:
# labels:
# ["repository", "organization", "enterprise", "job_name", "event_name"]
# buckets:
# [
# 0.01,
# 0.05,
# 0.1,
# 0.5,
# 1.0,
# 2.0,
# 3.0,
# 4.0,
# 5.0,
# 6.0,
# 7.0,
# 8.0,
# 9.0,
# 10.0,
# 12.0,
# 15.0,
# 18.0,
# 20.0,
# 25.0,
# 30.0,
# 40.0,
# 50.0,
# 60.0,
# 70.0,
# 80.0,
# 90.0,
# 100.0,
# 110.0,
# 120.0,
# 150.0,
# 180.0,
# 210.0,
# 240.0,
# 300.0,
# 360.0,
# 420.0,
# 480.0,
# 540.0,
# 600.0,
# 900.0,
# 1200.0,
# 1800.0,
# 2400.0,
# 3000.0,
# 3600.0,
# ]
# gha_job_execution_duration_seconds:
# labels:
# [
# "repository",
# "organization",
# "enterprise",
# "job_name",
# "event_name",
# "job_result",
# ]
# buckets:
# [
# 0.01,
# 0.05,
# 0.1,
# 0.5,
# 1.0,
# 2.0,
# 3.0,
# 4.0,
# 5.0,
# 6.0,
# 7.0,
# 8.0,
# 9.0,
# 10.0,
# 12.0,
# 15.0,
# 18.0,
# 20.0,
# 25.0,
# 30.0,
# 40.0,
# 50.0,
# 60.0,
# 70.0,
# 80.0,
# 90.0,
# 100.0,
# 110.0,
# 120.0,
# 150.0,
# 180.0,
# 210.0,
# 240.0,
# 300.0,
# 360.0,
# 420.0,
# 480.0,
# 540.0,
# 600.0,
# 900.0,
# 1200.0,
# 1800.0,
# 2400.0,
# 3000.0,
# 3600.0,
# ]
## template is the PodSpec for each runner Pod ## template is the PodSpec for each runner Pod
## For reference: https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec ## For reference: https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec
template: template:
@ -205,7 +355,6 @@ template:
- name: runner - name: runner
image: ghcr.io/actions/actions-runner:latest image: ghcr.io/actions/actions-runner:latest
command: ["/home/runner/run.sh"] command: ["/home/runner/run.sh"]
## Optional controller service account that needs to have required Role and RoleBinding ## Optional controller service account that needs to have required Role and RoleBinding
## to operate this gha-runner-scale-set installation. ## to operate this gha-runner-scale-set installation.
## The helm chart will try to find the controller deployment and its service account at installation time. ## The helm chart will try to find the controller deployment and its service account at installation time.

View File

@ -23,7 +23,7 @@ type App struct {
// initialized fields // initialized fields
listener Listener listener Listener
worker Worker worker Worker
metrics metrics.ServerPublisher metrics metrics.ServerExporter
} }
//go:generate mockery --name Listener --output ./mocks --outpkg mocks --case underscore //go:generate mockery --name Listener --output ./mocks --outpkg mocks --case underscore
@ -69,6 +69,8 @@ func New(config config.Config) (*App, error) {
Repository: ghConfig.Repository, Repository: ghConfig.Repository,
ServerAddr: config.MetricsAddr, ServerAddr: config.MetricsAddr,
ServerEndpoint: config.MetricsEndpoint, ServerEndpoint: config.MetricsEndpoint,
Logger: app.logger.WithName("metrics exporter"),
Metrics: *config.Metrics,
}) })
} }

View File

@ -8,6 +8,7 @@ import (
"net/url" "net/url"
"os" "os"
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/actions/actions-runner-controller/build" "github.com/actions/actions-runner-controller/build"
"github.com/actions/actions-runner-controller/github/actions" "github.com/actions/actions-runner-controller/github/actions"
"github.com/actions/actions-runner-controller/logging" "github.com/actions/actions-runner-controller/logging"
@ -32,6 +33,7 @@ type Config struct {
LogFormat string `json:"log_format"` LogFormat string `json:"log_format"`
MetricsAddr string `json:"metrics_addr"` MetricsAddr string `json:"metrics_addr"`
MetricsEndpoint string `json:"metrics_endpoint"` MetricsEndpoint string `json:"metrics_endpoint"`
Metrics *v1alpha1.MetricsConfig `json:"metrics"`
} }
func Read(path string) (Config, error) { func Read(path string) (Config, error) {

View File

@ -2,9 +2,12 @@ package metrics
import ( import (
"context" "context"
"errors"
"net/http" "net/http"
"strings"
"time" "time"
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/actions/actions-runner-controller/github/actions" "github.com/actions/actions-runner-controller/github/actions"
"github.com/go-logr/logr" "github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
@ -22,145 +25,345 @@ const (
labelKeyJobResult = "job_result" labelKeyJobResult = "job_result"
) )
const githubScaleSetSubsystem = "gha" const (
githubScaleSetSubsystem = "gha"
// labels githubScaleSetSubsystemPrefix = "gha_"
var (
scaleSetLabels = []string{
labelKeyRunnerScaleSetName,
labelKeyRepository,
labelKeyOrganization,
labelKeyEnterprise,
labelKeyRunnerScaleSetNamespace,
}
jobLabels = []string{
labelKeyRepository,
labelKeyOrganization,
labelKeyEnterprise,
labelKeyJobName,
labelKeyEventName,
}
completedJobsTotalLabels = append(jobLabels, labelKeyJobResult)
jobExecutionDurationLabels = append(jobLabels, labelKeyJobResult)
startedJobsTotalLabels = jobLabels
jobStartupDurationLabels = jobLabels
) )
var ( // Names of all metrics available on the listener
assignedJobs = prometheus.NewGaugeVec( const (
prometheus.GaugeOpts{ MetricAssignedJobs = "gha_assigned_jobs"
Subsystem: githubScaleSetSubsystem, MetricRunningJobs = "gha_running_jobs"
Name: "assigned_jobs", MetricRegisteredRunners = "gha_registered_runners"
Help: "Number of jobs assigned to this scale set.", MetricBusyRunners = "gha_busy_runners"
}, MetricMinRunners = "gha_min_runners"
scaleSetLabels, MetricMaxRunners = "gha_max_runners"
) MetricDesiredRunners = "gha_desired_runners"
MetricIdleRunners = "gha_idle_runners"
runningJobs = prometheus.NewGaugeVec( MetricStartedJobsTotal = "gha_started_jobs_total"
prometheus.GaugeOpts{ MetricCompletedJobsTotal = "gha_completed_jobs_total"
Subsystem: githubScaleSetSubsystem, MetricJobStartupDurationSeconds = "gha_job_startup_duration_seconds"
Name: "running_jobs", MetricJobExecutionDurationSeconds = "gha_job_execution_duration_seconds"
Help: "Number of jobs running (or about to be run).",
},
scaleSetLabels,
)
registeredRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetSubsystem,
Name: "registered_runners",
Help: "Number of runners registered by the scale set.",
},
scaleSetLabels,
)
busyRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetSubsystem,
Name: "busy_runners",
Help: "Number of registered runners running a job.",
},
scaleSetLabels,
)
minRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetSubsystem,
Name: "min_runners",
Help: "Minimum number of runners.",
},
scaleSetLabels,
)
maxRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetSubsystem,
Name: "max_runners",
Help: "Maximum number of runners.",
},
scaleSetLabels,
)
desiredRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetSubsystem,
Name: "desired_runners",
Help: "Number of runners desired by the scale set.",
},
scaleSetLabels,
)
idleRunners = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: githubScaleSetSubsystem,
Name: "idle_runners",
Help: "Number of registered runners not running a job.",
},
scaleSetLabels,
)
startedJobsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: githubScaleSetSubsystem,
Name: "started_jobs_total",
Help: "Total number of jobs started.",
},
startedJobsTotalLabels,
)
completedJobsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "completed_jobs_total",
Help: "Total number of jobs completed.",
Subsystem: githubScaleSetSubsystem,
},
completedJobsTotalLabels,
)
jobStartupDurationSeconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: githubScaleSetSubsystem,
Name: "job_startup_duration_seconds",
Help: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).",
Buckets: runtimeBuckets,
},
jobStartupDurationLabels,
)
jobExecutionDurationSeconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: githubScaleSetSubsystem,
Name: "job_execution_duration_seconds",
Help: "Time spent executing workflow jobs by the scale set (in seconds).",
Buckets: runtimeBuckets,
},
jobExecutionDurationLabels,
)
) )
var runtimeBuckets []float64 = []float64{ type metricsHelpRegistry struct {
counters map[string]string
gauges map[string]string
histograms map[string]string
}
var metricsHelp = metricsHelpRegistry{
counters: map[string]string{
MetricStartedJobsTotal: "Total number of jobs started.",
MetricCompletedJobsTotal: "Total number of jobs completed.",
},
gauges: map[string]string{
MetricAssignedJobs: "Number of jobs assigned to this scale set.",
MetricRunningJobs: "Number of jobs running (or about to be run).",
MetricRegisteredRunners: "Number of runners registered by the scale set.",
MetricBusyRunners: "Number of registered runners running a job.",
MetricMinRunners: "Minimum number of runners.",
MetricMaxRunners: "Maximum number of runners.",
MetricDesiredRunners: "Number of runners desired by the scale set.",
MetricIdleRunners: "Number of registered runners not running a job.",
},
histograms: map[string]string{
MetricJobStartupDurationSeconds: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).",
MetricJobExecutionDurationSeconds: "Time spent executing workflow jobs by the scale set (in seconds).",
},
}
func (e *exporter) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels {
return prometheus.Labels{
labelKeyEnterprise: e.scaleSetLabels[labelKeyEnterprise],
labelKeyOrganization: jobBase.OwnerName,
labelKeyRepository: jobBase.RepositoryName,
labelKeyJobName: jobBase.JobDisplayName,
labelKeyEventName: jobBase.EventName,
}
}
func (e *exporter) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels {
l := e.jobLabels(&msg.JobMessageBase)
l[labelKeyJobResult] = msg.Result
return l
}
func (e *exporter) startedJobLabels(msg *actions.JobStarted) prometheus.Labels {
return e.jobLabels(&msg.JobMessageBase)
}
//go:generate mockery --name Publisher --output ./mocks --outpkg mocks --case underscore
type Publisher interface {
PublishStatic(min, max int)
PublishStatistics(stats *actions.RunnerScaleSetStatistic)
PublishJobStarted(msg *actions.JobStarted)
PublishJobCompleted(msg *actions.JobCompleted)
PublishDesiredRunners(count int)
}
//go:generate mockery --name ServerPublisher --output ./mocks --outpkg mocks --case underscore
type ServerExporter interface {
Publisher
ListenAndServe(ctx context.Context) error
}
var (
_ Publisher = &discard{}
_ ServerExporter = &exporter{}
)
var Discard Publisher = &discard{}
type exporter struct {
logger logr.Logger
scaleSetLabels prometheus.Labels
*metrics
srv *http.Server
}
type metrics struct {
counters map[string]*counterMetric
gauges map[string]*gaugeMetric
histograms map[string]*histogramMetric
}
type counterMetric struct {
counter *prometheus.CounterVec
config *v1alpha1.CounterMetric
}
type gaugeMetric struct {
gauge *prometheus.GaugeVec
config *v1alpha1.GaugeMetric
}
type histogramMetric struct {
histogram *prometheus.HistogramVec
config *v1alpha1.HistogramMetric
}
type ExporterConfig struct {
ScaleSetName string
ScaleSetNamespace string
Enterprise string
Organization string
Repository string
ServerAddr string
ServerEndpoint string
Logger logr.Logger
Metrics v1alpha1.MetricsConfig
}
func NewExporter(config ExporterConfig) ServerExporter {
reg := prometheus.NewRegistry()
metrics := installMetrics(config.Metrics, reg, config.Logger)
mux := http.NewServeMux()
mux.Handle(
config.ServerEndpoint,
promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}),
)
return &exporter{
logger: config.Logger.WithName("metrics"),
scaleSetLabels: prometheus.Labels{
labelKeyRunnerScaleSetName: config.ScaleSetName,
labelKeyRunnerScaleSetNamespace: config.ScaleSetNamespace,
labelKeyEnterprise: config.Enterprise,
labelKeyOrganization: config.Organization,
labelKeyRepository: config.Repository,
},
metrics: metrics,
srv: &http.Server{
Addr: config.ServerAddr,
Handler: mux,
},
}
}
var errUnknownMetricName = errors.New("unknown metric name")
func installMetrics(config v1alpha1.MetricsConfig, reg *prometheus.Registry, logger logr.Logger) *metrics {
logger.Info(
"Registering metrics",
"gauges",
config.Gauges,
"counters",
config.Counters,
"histograms",
config.Histograms,
)
metrics := &metrics{
counters: make(map[string]*counterMetric, len(config.Counters)),
gauges: make(map[string]*gaugeMetric, len(config.Gauges)),
histograms: make(map[string]*histogramMetric, len(config.Histograms)),
}
for name, cfg := range config.Gauges {
help, ok := metricsHelp.gauges[name]
if !ok {
logger.Error(errUnknownMetricName, "name", name, "kind", "gauge")
continue
}
g := prometheus.V2.NewGaugeVec(prometheus.GaugeVecOpts{
GaugeOpts: prometheus.GaugeOpts{
Subsystem: githubScaleSetSubsystem,
Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix),
Help: help,
},
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
})
reg.MustRegister(g)
metrics.gauges[name] = &gaugeMetric{
gauge: g,
config: cfg,
}
}
for name, cfg := range config.Counters {
help, ok := metricsHelp.counters[name]
if !ok {
logger.Error(errUnknownMetricName, "name", name, "kind", "counter")
continue
}
c := prometheus.V2.NewCounterVec(prometheus.CounterVecOpts{
CounterOpts: prometheus.CounterOpts{
Subsystem: githubScaleSetSubsystem,
Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix),
Help: help,
},
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
})
reg.MustRegister(c)
metrics.counters[name] = &counterMetric{
counter: c,
config: cfg,
}
}
for name, cfg := range config.Histograms {
help, ok := metricsHelp.histograms[name]
if !ok {
logger.Error(errUnknownMetricName, "name", name, "kind", "histogram")
continue
}
buckets := defaultRuntimeBuckets
if len(cfg.Buckets) > 0 {
buckets = cfg.Buckets
}
h := prometheus.V2.NewHistogramVec(prometheus.HistogramVecOpts{
HistogramOpts: prometheus.HistogramOpts{
Subsystem: githubScaleSetSubsystem,
Name: strings.TrimPrefix(name, githubScaleSetSubsystemPrefix),
Help: help,
Buckets: buckets,
},
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
})
cfg.Buckets = buckets
reg.MustRegister(h)
metrics.histograms[name] = &histogramMetric{
histogram: h,
config: cfg,
}
}
return metrics
}
func (e *exporter) ListenAndServe(ctx context.Context) error {
e.logger.Info("starting metrics server", "addr", e.srv.Addr)
go func() {
<-ctx.Done()
e.logger.Info("stopping metrics server", "err", ctx.Err())
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
e.srv.Shutdown(ctx)
}()
return e.srv.ListenAndServe()
}
func (e *exporter) setGauge(name string, allLabels prometheus.Labels, val float64) {
m, ok := e.metrics.gauges[name]
if !ok {
return
}
labels := make(prometheus.Labels, len(m.config.Labels))
for _, label := range m.config.Labels {
labels[label] = allLabels[label]
}
m.gauge.With(labels).Set(val)
}
func (e *exporter) incCounter(name string, allLabels prometheus.Labels) {
m, ok := e.metrics.counters[name]
if !ok {
return
}
labels := make(prometheus.Labels, len(m.config.Labels))
for _, label := range m.config.Labels {
labels[label] = allLabels[label]
}
m.counter.With(labels).Inc()
}
func (e *exporter) observeHistogram(name string, allLabels prometheus.Labels, val float64) {
m, ok := e.metrics.histograms[name]
if !ok {
return
}
labels := make(prometheus.Labels, len(m.config.Labels))
for _, label := range m.config.Labels {
labels[label] = allLabels[label]
}
m.histogram.With(labels).Observe(val)
}
func (e *exporter) PublishStatic(min, max int) {
e.setGauge(MetricMaxRunners, e.scaleSetLabels, float64(max))
e.setGauge(MetricMinRunners, e.scaleSetLabels, float64(min))
}
func (e *exporter) PublishStatistics(stats *actions.RunnerScaleSetStatistic) {
e.setGauge(MetricAssignedJobs, e.scaleSetLabels, float64(stats.TotalAssignedJobs))
e.setGauge(MetricRunningJobs, e.scaleSetLabels, float64(stats.TotalRunningJobs))
e.setGauge(MetricRegisteredRunners, e.scaleSetLabels, float64(stats.TotalRegisteredRunners))
e.setGauge(MetricBusyRunners, e.scaleSetLabels, float64(float64(stats.TotalRegisteredRunners)))
e.setGauge(MetricIdleRunners, e.scaleSetLabels, float64(stats.TotalIdleRunners))
}
func (e *exporter) PublishJobStarted(msg *actions.JobStarted) {
l := e.startedJobLabels(msg)
e.incCounter(MetricStartedJobsTotal, l)
startupDuration := msg.JobMessageBase.RunnerAssignTime.Unix() - msg.JobMessageBase.ScaleSetAssignTime.Unix()
e.observeHistogram(MetricJobStartupDurationSeconds, l, float64(startupDuration))
}
func (e *exporter) PublishJobCompleted(msg *actions.JobCompleted) {
l := e.completedJobLabels(msg)
e.incCounter(MetricCompletedJobsTotal, l)
executionDuration := msg.JobMessageBase.FinishTime.Unix() - msg.JobMessageBase.RunnerAssignTime.Unix()
e.observeHistogram(MetricJobExecutionDurationSeconds, l, float64(executionDuration))
}
func (e *exporter) PublishDesiredRunners(count int) {
e.setGauge(MetricDesiredRunners, e.scaleSetLabels, float64(count))
}
type discard struct{}
func (*discard) PublishStatic(int, int) {}
func (*discard) PublishStatistics(*actions.RunnerScaleSetStatistic) {}
func (*discard) PublishJobStarted(*actions.JobStarted) {}
func (*discard) PublishJobCompleted(*actions.JobCompleted) {}
func (*discard) PublishDesiredRunners(int) {}
var defaultRuntimeBuckets []float64 = []float64{
0.01, 0.01,
0.05, 0.05,
0.1, 0.1,
@ -207,176 +410,3 @@ var runtimeBuckets []float64 = []float64{
3000, 3000,
3600, 3600,
} }
type baseLabels struct {
scaleSetName string
scaleSetNamespace string
enterprise string
organization string
repository string
}
func (b *baseLabels) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels {
return prometheus.Labels{
labelKeyEnterprise: b.enterprise,
labelKeyOrganization: jobBase.OwnerName,
labelKeyRepository: jobBase.RepositoryName,
labelKeyJobName: jobBase.JobDisplayName,
labelKeyEventName: jobBase.EventName,
}
}
func (b *baseLabels) scaleSetLabels() prometheus.Labels {
return prometheus.Labels{
labelKeyRunnerScaleSetName: b.scaleSetName,
labelKeyRunnerScaleSetNamespace: b.scaleSetNamespace,
labelKeyEnterprise: b.enterprise,
labelKeyOrganization: b.organization,
labelKeyRepository: b.repository,
}
}
func (b *baseLabels) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels {
l := b.jobLabels(&msg.JobMessageBase)
l[labelKeyJobResult] = msg.Result
return l
}
func (b *baseLabels) startedJobLabels(msg *actions.JobStarted) prometheus.Labels {
l := b.jobLabels(&msg.JobMessageBase)
return l
}
//go:generate mockery --name Publisher --output ./mocks --outpkg mocks --case underscore
type Publisher interface {
PublishStatic(min, max int)
PublishStatistics(stats *actions.RunnerScaleSetStatistic)
PublishJobStarted(msg *actions.JobStarted)
PublishJobCompleted(msg *actions.JobCompleted)
PublishDesiredRunners(count int)
}
//go:generate mockery --name ServerPublisher --output ./mocks --outpkg mocks --case underscore
type ServerPublisher interface {
Publisher
ListenAndServe(ctx context.Context) error
}
var (
_ Publisher = &discard{}
_ ServerPublisher = &exporter{}
)
var Discard Publisher = &discard{}
type exporter struct {
logger logr.Logger
baseLabels
srv *http.Server
}
type ExporterConfig struct {
ScaleSetName string
ScaleSetNamespace string
Enterprise string
Organization string
Repository string
ServerAddr string
ServerEndpoint string
Logger logr.Logger
}
func NewExporter(config ExporterConfig) ServerPublisher {
reg := prometheus.NewRegistry()
reg.MustRegister(
assignedJobs,
runningJobs,
registeredRunners,
busyRunners,
minRunners,
maxRunners,
desiredRunners,
idleRunners,
startedJobsTotal,
completedJobsTotal,
jobStartupDurationSeconds,
jobExecutionDurationSeconds,
)
mux := http.NewServeMux()
mux.Handle(
config.ServerEndpoint,
promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}),
)
return &exporter{
logger: config.Logger.WithName("metrics"),
baseLabels: baseLabels{
scaleSetName: config.ScaleSetName,
scaleSetNamespace: config.ScaleSetNamespace,
enterprise: config.Enterprise,
organization: config.Organization,
repository: config.Repository,
},
srv: &http.Server{
Addr: config.ServerAddr,
Handler: mux,
},
}
}
func (e *exporter) ListenAndServe(ctx context.Context) error {
e.logger.Info("starting metrics server", "addr", e.srv.Addr)
go func() {
<-ctx.Done()
e.logger.Info("stopping metrics server", "err", ctx.Err())
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
e.srv.Shutdown(ctx)
}()
return e.srv.ListenAndServe()
}
func (m *exporter) PublishStatic(min, max int) {
l := m.scaleSetLabels()
maxRunners.With(l).Set(float64(max))
minRunners.With(l).Set(float64(min))
}
func (e *exporter) PublishStatistics(stats *actions.RunnerScaleSetStatistic) {
l := e.scaleSetLabels()
assignedJobs.With(l).Set(float64(stats.TotalAssignedJobs))
runningJobs.With(l).Set(float64(stats.TotalRunningJobs))
registeredRunners.With(l).Set(float64(stats.TotalRegisteredRunners))
busyRunners.With(l).Set(float64(stats.TotalBusyRunners))
idleRunners.With(l).Set(float64(stats.TotalIdleRunners))
}
func (e *exporter) PublishJobStarted(msg *actions.JobStarted) {
l := e.startedJobLabels(msg)
startedJobsTotal.With(l).Inc()
startupDuration := msg.JobMessageBase.RunnerAssignTime.Unix() - msg.JobMessageBase.ScaleSetAssignTime.Unix()
jobStartupDurationSeconds.With(l).Observe(float64(startupDuration))
}
func (e *exporter) PublishJobCompleted(msg *actions.JobCompleted) {
l := e.completedJobLabels(msg)
completedJobsTotal.With(l).Inc()
executionDuration := msg.JobMessageBase.FinishTime.Unix() - msg.JobMessageBase.RunnerAssignTime.Unix()
jobExecutionDurationSeconds.With(l).Observe(float64(executionDuration))
}
func (m *exporter) PublishDesiredRunners(count int) {
desiredRunners.With(m.scaleSetLabels()).Set(float64(count))
}
type discard struct{}
func (*discard) PublishStatic(int, int) {}
func (*discard) PublishStatistics(*actions.RunnerScaleSetStatistic) {}
func (*discard) PublishJobStarted(*actions.JobStarted) {}
func (*discard) PublishJobCompleted(*actions.JobCompleted) {}
func (*discard) PublishDesiredRunners(int) {}

View File

@ -0,0 +1,88 @@
package metrics
import (
"testing"
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
)
func TestInstallMetrics(t *testing.T) {
metricsConfig := v1alpha1.MetricsConfig{
Counters: map[string]*v1alpha1.CounterMetric{
// unknown metric shouldn't be registered
"gha_unknown": {
Labels: []string{labelKeyRepository},
},
// gauge metric shouldn't be registered from this section
MetricAssignedJobs: {
Labels: []string{labelKeyRepository},
},
// histogram metric shouldn't be registered from this section
MetricJobStartupDurationSeconds: {
Labels: []string{labelKeyRepository},
},
// counter metric should be registered
MetricStartedJobsTotal: {
Labels: []string{labelKeyRepository},
},
},
Gauges: map[string]*v1alpha1.GaugeMetric{
// unknown metric shouldn't be registered
"gha_unknown": {
Labels: []string{labelKeyRepository},
},
// counter metric shouldn't be registered from this section
MetricStartedJobsTotal: {
Labels: []string{labelKeyRepository},
},
// histogram metric shouldn't be registered from this section
MetricJobStartupDurationSeconds: {
Labels: []string{labelKeyRepository},
},
// gauge metric should be registered
MetricAssignedJobs: {
Labels: []string{labelKeyRepository},
},
},
Histograms: map[string]*v1alpha1.HistogramMetric{
// unknown metric shouldn't be registered
"gha_unknown": {
Labels: []string{labelKeyRepository},
},
// counter metric shouldn't be registered from this section
MetricStartedJobsTotal: {
Labels: []string{labelKeyRepository},
},
// gauge metric shouldn't be registered from this section
MetricAssignedJobs: {
Labels: []string{labelKeyRepository},
},
// histogram metric should be registered
MetricJobExecutionDurationSeconds: {
Labels: []string{labelKeyRepository},
Buckets: []float64{0.1, 1},
},
// histogram metric should be registered with default runtime buckets
MetricJobStartupDurationSeconds: {
Labels: []string{labelKeyRepository},
},
},
}
reg := prometheus.NewRegistry()
got := installMetrics(metricsConfig, reg, logr.Discard())
assert.Len(t, got.counters, 1)
assert.Len(t, got.gauges, 1)
assert.Len(t, got.histograms, 2)
assert.Equal(t, got.counters[MetricStartedJobsTotal].config, metricsConfig.Counters[MetricStartedJobsTotal])
assert.Equal(t, got.gauges[MetricAssignedJobs].config, metricsConfig.Gauges[MetricAssignedJobs])
assert.Equal(t, got.histograms[MetricJobExecutionDurationSeconds].config, metricsConfig.Histograms[MetricJobExecutionDurationSeconds])
duration := got.histograms[MetricJobStartupDurationSeconds]
assert.Equal(t, duration.config.Labels, metricsConfig.Histograms[MetricJobStartupDurationSeconds].Labels)
assert.Equal(t, duration.config.Buckets, defaultRuntimeBuckets)
}

View File

@ -119,6 +119,50 @@ spec:
description: Required description: Required
minimum: 0 minimum: 0
type: integer type: integer
metrics:
description: MetricsConfig holds configuration parameters for each metric type
properties:
counters:
additionalProperties:
description: CounterMetric holds configuration of a single metric of type Counter
properties:
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
gauges:
additionalProperties:
description: GaugeMetric holds configuration of a single metric of type Gauge
properties:
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
histograms:
additionalProperties:
description: HistogramMetric holds configuration of a single metric of type Histogram
properties:
buckets:
items:
type: number
type: array
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
type: object
minRunners: minRunners:
description: Required description: Required
minimum: 0 minimum: 0

View File

@ -99,6 +99,50 @@ spec:
x-kubernetes-map-type: atomic x-kubernetes-map-type: atomic
type: object type: object
type: object type: object
listenerMetrics:
description: MetricsConfig holds configuration parameters for each metric type
properties:
counters:
additionalProperties:
description: CounterMetric holds configuration of a single metric of type Counter
properties:
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
gauges:
additionalProperties:
description: GaugeMetric holds configuration of a single metric of type Gauge
properties:
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
histograms:
additionalProperties:
description: HistogramMetric holds configuration of a single metric of type Histogram
properties:
buckets:
items:
type: number
type: array
labels:
items:
type: string
type: array
required:
- labels
type: object
type: object
type: object
listenerTemplate: listenerTemplate:
description: PodTemplateSpec describes the data a pod should have when created from a template description: PodTemplateSpec describes the data a pod should have when created from a template
properties: properties:

View File

@ -130,6 +130,7 @@ func (b *ResourceBuilder) newAutoScalingListener(autoscalingRunnerSet *v1alpha1.
ImagePullSecrets: imagePullSecrets, ImagePullSecrets: imagePullSecrets,
Proxy: autoscalingRunnerSet.Spec.Proxy, Proxy: autoscalingRunnerSet.Spec.Proxy,
GitHubServerTLS: autoscalingRunnerSet.Spec.GitHubServerTLS, GitHubServerTLS: autoscalingRunnerSet.Spec.GitHubServerTLS,
Metrics: autoscalingRunnerSet.Spec.ListenerMetrics,
Template: autoscalingRunnerSet.Spec.ListenerTemplate, Template: autoscalingRunnerSet.Spec.ListenerTemplate,
}, },
} }
@ -203,6 +204,7 @@ func (b *ResourceBuilder) newScaleSetListenerConfig(autoscalingListener *v1alpha
LogFormat: scaleSetListenerLogFormat, LogFormat: scaleSetListenerLogFormat,
MetricsAddr: metricsAddr, MetricsAddr: metricsAddr,
MetricsEndpoint: metricsEndpoint, MetricsEndpoint: metricsEndpoint,
Metrics: autoscalingListener.Spec.Metrics,
} }
var buf bytes.Buffer var buf bytes.Buffer