diff --git a/charts/gha-runner-scale-set/values.yaml b/charts/gha-runner-scale-set/values.yaml index 2a9e7984..831e26d0 100644 --- a/charts/gha-runner-scale-set/values.yaml +++ b/charts/gha-runner-scale-set/values.yaml @@ -122,31 +122,33 @@ githubConfigSecret: listenerMetrics: counters: gha_started_jobs_total: - labels: [] + labels: ["repository", "organization", "enterprise", "job_name", "event_name"] gha_completed_jobs_total: - labels: [] + labels: ["repository", "organization", "enterprise", "job_name", "event_name", "job_result"] gauges: gha_assigned_jobs: - labels: [] + labels: ["name", "namespace", "repository", "organization", "enterprise"] gha_running_jobs: - labels: [] + labels: ["name", "namespace", "repository", "organization", "enterprise"] gha_registered_runners: - labels: [] + labels: ["name", "namespace", "repository", "organization", "enterprise"] gha_busy_runners: - labels: [] + labels: ["name", "namespace", "repository", "organization", "enterprise"] gha_min_runners: - labels: [] + labels: ["name", "namespace", "repository", "organization", "enterprise"] gha_max_runners: - labels: [] + labels: ["name", "namespace", "repository", "organization", "enterprise"] gha_desired_runners: - labels: [] + labels: ["name", "namespace", "repository", "organization", "enterprise"] gha_idle_runners: - labels: [] + labels: ["name", "namespace", "repository", "organization", "enterprise"] histograms: gha_job_startup_duration_seconds: - labels: [] + labels: ["repository", "organization", "enterprise", "job_name", "event_name"] + buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 18, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 150, 180, 210, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 2400, 3000, 3600] gha_job_execution_duration_seconds: - labels: [] + labels: ["repository", "organization", "enterprise", "job_name", "event_name", "job_result"] + buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 18, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 150, 180, 210, 240, 300, 360, 420, 480, 540, 600, 900, 1200, 1800, 2400, 3000, 3600] ## template is the PodSpec for each runner Pod ## For reference: https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec diff --git a/cmd/ghalistener/metrics/metrics.go b/cmd/ghalistener/metrics/metrics.go index 6dda6237..f3ab58e8 100644 --- a/cmd/ghalistener/metrics/metrics.go +++ b/cmd/ghalistener/metrics/metrics.go @@ -57,190 +57,6 @@ var metricsHelp = map[string]string{ MetricJobExecutionDurationSeconds: "Time spent executing workflow jobs by the scale set (in seconds).", } -// labels -var ( - scaleSetLabels = []string{ - labelKeyRunnerScaleSetName, - labelKeyRepository, - labelKeyOrganization, - labelKeyEnterprise, - labelKeyRunnerScaleSetNamespace, - } - - jobLabels = []string{ - labelKeyRepository, - labelKeyOrganization, - labelKeyEnterprise, - labelKeyJobName, - labelKeyEventName, - } - - completedJobsTotalLabels = append(jobLabels, labelKeyJobResult) - jobExecutionDurationLabels = append(jobLabels, labelKeyJobResult) - startedJobsTotalLabels = jobLabels - jobStartupDurationLabels = jobLabels -) - -var ( - assignedJobs = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "assigned_jobs", - Help: "Number of jobs assigned to this scale set.", - }, - scaleSetLabels, - ) - - runningJobs = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "running_jobs", - Help: "Number of jobs running (or about to be run).", - }, - scaleSetLabels, - ) - - registeredRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "registered_runners", - Help: "Number of runners registered by the scale set.", - }, - scaleSetLabels, - ) - - busyRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "busy_runners", - Help: "Number of registered runners running a job.", - }, - scaleSetLabels, - ) - - minRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "min_runners", - Help: "Minimum number of runners.", - }, - scaleSetLabels, - ) - - maxRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "max_runners", - Help: "Maximum number of runners.", - }, - scaleSetLabels, - ) - - desiredRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "desired_runners", - Help: "Number of runners desired by the scale set.", - }, - scaleSetLabels, - ) - - idleRunners = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "idle_runners", - Help: "Number of registered runners not running a job.", - }, - scaleSetLabels, - ) - - startedJobsTotal = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "started_jobs_total", - Help: "Total number of jobs started.", - }, - startedJobsTotalLabels, - ) - - completedJobsTotal = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "completed_jobs_total", - Help: "Total number of jobs completed.", - Subsystem: githubScaleSetSubsystem, - }, - completedJobsTotalLabels, - ) - - jobStartupDurationSeconds = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "job_startup_duration_seconds", - Help: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).", - Buckets: runtimeBuckets, - }, - jobStartupDurationLabels, - ) - - jobExecutionDurationSeconds = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Subsystem: githubScaleSetSubsystem, - Name: "job_execution_duration_seconds", - Help: "Time spent executing workflow jobs by the scale set (in seconds).", - Buckets: runtimeBuckets, - }, - jobExecutionDurationLabels, - ) -) - -var runtimeBuckets []float64 = []float64{ - 0.01, - 0.05, - 0.1, - 0.5, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 12, - 15, - 18, - 20, - 25, - 30, - 40, - 50, - 60, - 70, - 80, - 90, - 100, - 110, - 120, - 150, - 180, - 210, - 240, - 300, - 360, - 420, - 480, - 540, - 600, - 900, - 1200, - 1800, - 2400, - 3000, - 3600, -} - func (e *exporter) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels { return prometheus.Labels{ labelKeyEnterprise: e.scaleSetLabels[labelKeyEnterprise], @@ -364,7 +180,7 @@ func NewExporter(config ExporterConfig) ServerExporter { } for name, cfg := range config.Metrics.Histograms { - buckets := runtimeBuckets + buckets := defaultRuntimeBuckets if len(cfg.Buckets) > 0 { b := make([]float64, 0, len(cfg.Buckets)) ok := true @@ -508,3 +324,51 @@ func (*discard) PublishStatistics(*actions.RunnerScaleSetStatistic) {} func (*discard) PublishJobStarted(*actions.JobStarted) {} func (*discard) PublishJobCompleted(*actions.JobCompleted) {} func (*discard) PublishDesiredRunners(int) {} + +var defaultRuntimeBuckets []float64 = []float64{ + 0.01, + 0.05, + 0.1, + 0.5, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 12, + 15, + 18, + 20, + 25, + 30, + 40, + 50, + 60, + 70, + 80, + 90, + 100, + 110, + 120, + 150, + 180, + 210, + 240, + 300, + 360, + 420, + 480, + 540, + 600, + 900, + 1200, + 1800, + 2400, + 3000, + 3600, +}