Dynamically register metrics in ghalistener

This commit is contained in:
Nikola Jokic 2025-03-14 17:17:53 +01:00
parent dbac795ab0
commit a634b2cd41
No known key found for this signature in database
GPG Key ID: 09265A4BEA4F568A
3 changed files with 214 additions and 86 deletions

View File

@ -23,7 +23,7 @@ type App struct {
// initialized fields
listener Listener
worker Worker
metrics metrics.ServerPublisher
metrics metrics.ServerExporter
}
//go:generate mockery --name Listener --output ./mocks --outpkg mocks --case underscore

View File

@ -17,23 +17,23 @@ import (
)
type Config struct {
ConfigureUrl string `json:"configure_url"`
AppID int64 `json:"app_id"`
AppInstallationID int64 `json:"app_installation_id"`
AppPrivateKey string `json:"app_private_key"`
Token string `json:"token"`
EphemeralRunnerSetNamespace string `json:"ephemeral_runner_set_namespace"`
EphemeralRunnerSetName string `json:"ephemeral_runner_set_name"`
MaxRunners int `json:"max_runners"`
MinRunners int `json:"min_runners"`
RunnerScaleSetId int `json:"runner_scale_set_id"`
RunnerScaleSetName string `json:"runner_scale_set_name"`
ServerRootCA string `json:"server_root_ca"`
LogLevel string `json:"log_level"`
LogFormat string `json:"log_format"`
MetricsAddr string `json:"metrics_addr"`
MetricsEndpoint string `json:"metrics_endpoint"`
Metrics map[string]*v1alpha1.MetricConfig `json:"metrics"`
ConfigureUrl string `json:"configure_url"`
AppID int64 `json:"app_id"`
AppInstallationID int64 `json:"app_installation_id"`
AppPrivateKey string `json:"app_private_key"`
Token string `json:"token"`
EphemeralRunnerSetNamespace string `json:"ephemeral_runner_set_namespace"`
EphemeralRunnerSetName string `json:"ephemeral_runner_set_name"`
MaxRunners int `json:"max_runners"`
MinRunners int `json:"min_runners"`
RunnerScaleSetId int `json:"runner_scale_set_id"`
RunnerScaleSetName string `json:"runner_scale_set_name"`
ServerRootCA string `json:"server_root_ca"`
LogLevel string `json:"log_level"`
LogFormat string `json:"log_format"`
MetricsAddr string `json:"metrics_addr"`
MetricsEndpoint string `json:"metrics_endpoint"`
Metrics *v1alpha1.MetricsConfig `json:"metrics"`
}
func Read(path string) (Config, error) {

View File

@ -3,8 +3,10 @@ package metrics
import (
"context"
"net/http"
"strings"
"time"
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/actions/actions-runner-controller/github/actions"
"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
@ -24,6 +26,37 @@ const (
const githubScaleSetSubsystem = "gha"
// Names of all metrics available on the listener
const (
MetricAssignedJobs = "gha_assigned_jobs"
MetricRunningJobs = "gha_running_jobs"
MetricRegisteredRunners = "gha_registered_runners"
MetricBusyRunners = "gha_busy_runners"
MetricMinRunners = "gha_min_runners"
MetricMaxRunners = "gha_max_runners"
MetricDesiredRunners = "gha_desired_runners"
MetricIdleRunners = "gha_idle_runners"
MetricStartedJobsTotal = "gha_started_jobs_total"
MetricCompletedJobsTotal = "gha_completed_jobs_total"
MetricJobStartupDurationSeconds = "gha_job_startup_duration_seconds"
MetricJobExecutionDurationSeconds = "gha_job_execution_duration_seconds"
)
var metricsHelp = map[string]string{
MetricAssignedJobs: "Number of jobs assigned to this scale set.",
MetricRunningJobs: "Number of jobs running (or about to be run).",
MetricRegisteredRunners: "Number of runners registered by the scale set.",
MetricBusyRunners: "Number of registered runners running a job.",
MetricMinRunners: "Minimum number of runners.",
MetricMaxRunners: "Maximum number of runners.",
MetricDesiredRunners: "Number of runners desired by the scale set.",
MetricIdleRunners: "Number of registered runners not running a job.",
MetricStartedJobsTotal: "Total number of jobs started.",
MetricCompletedJobsTotal: "Total number of jobs completed.",
MetricJobStartupDurationSeconds: "Time spent waiting for workflow job to get started on the runner owned by the scale set (in seconds).",
MetricJobExecutionDurationSeconds: "Time spent executing workflow jobs by the scale set (in seconds).",
}
// labels
var (
scaleSetLabels = []string{
@ -208,17 +241,9 @@ var runtimeBuckets []float64 = []float64{
3600,
}
type baseLabels struct {
scaleSetName string
scaleSetNamespace string
enterprise string
organization string
repository string
}
func (b *baseLabels) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels {
func (e *exporter) jobLabels(jobBase *actions.JobMessageBase) prometheus.Labels {
return prometheus.Labels{
labelKeyEnterprise: b.enterprise,
labelKeyEnterprise: e.scaleSetLabels[labelKeyEnterprise],
labelKeyOrganization: jobBase.OwnerName,
labelKeyRepository: jobBase.RepositoryName,
labelKeyJobName: jobBase.JobDisplayName,
@ -226,25 +251,14 @@ func (b *baseLabels) jobLabels(jobBase *actions.JobMessageBase) prometheus.Label
}
}
func (b *baseLabels) scaleSetLabels() prometheus.Labels {
return prometheus.Labels{
labelKeyRunnerScaleSetName: b.scaleSetName,
labelKeyRunnerScaleSetNamespace: b.scaleSetNamespace,
labelKeyEnterprise: b.enterprise,
labelKeyOrganization: b.organization,
labelKeyRepository: b.repository,
}
}
func (b *baseLabels) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels {
l := b.jobLabels(&msg.JobMessageBase)
func (e *exporter) completedJobLabels(msg *actions.JobCompleted) prometheus.Labels {
l := e.jobLabels(&msg.JobMessageBase)
l[labelKeyJobResult] = msg.Result
return l
}
func (b *baseLabels) startedJobLabels(msg *actions.JobStarted) prometheus.Labels {
l := b.jobLabels(&msg.JobMessageBase)
return l
func (e *exporter) startedJobLabels(msg *actions.JobStarted) prometheus.Labels {
return e.jobLabels(&msg.JobMessageBase)
}
//go:generate mockery --name Publisher --output ./mocks --outpkg mocks --case underscore
@ -257,24 +271,46 @@ type Publisher interface {
}
//go:generate mockery --name ServerPublisher --output ./mocks --outpkg mocks --case underscore
type ServerPublisher interface {
type ServerExporter interface {
Publisher
ListenAndServe(ctx context.Context) error
}
var (
_ Publisher = &discard{}
_ ServerPublisher = &exporter{}
_ Publisher = &discard{}
_ ServerExporter = &exporter{}
)
var Discard Publisher = &discard{}
type exporter struct {
logger logr.Logger
baseLabels
logger logr.Logger
scaleSetLabels prometheus.Labels
*metrics
srv *http.Server
}
type metrics struct {
counters map[string]*counterMetric
gauges map[string]*gaugeMetric
histograms map[string]*histogramMetric
}
type counterMetric struct {
counter *prometheus.CounterVec
config *v1alpha1.CounterMetric
}
type gaugeMetric struct {
gauge *prometheus.GaugeVec
config *v1alpha1.GaugeMetric
}
type histogramMetric struct {
histogram *prometheus.HistogramVec
config *v1alpha1.HistogramMetric
}
type ExporterConfig struct {
ScaleSetName string
ScaleSetNamespace string
@ -284,24 +320,82 @@ type ExporterConfig struct {
ServerAddr string
ServerEndpoint string
Logger logr.Logger
Metrics v1alpha1.MetricsConfig
}
func NewExporter(config ExporterConfig) ServerPublisher {
func NewExporter(config ExporterConfig) ServerExporter {
reg := prometheus.NewRegistry()
reg.MustRegister(
assignedJobs,
runningJobs,
registeredRunners,
busyRunners,
minRunners,
maxRunners,
desiredRunners,
idleRunners,
startedJobsTotal,
completedJobsTotal,
jobStartupDurationSeconds,
jobExecutionDurationSeconds,
)
metrics := &metrics{
counters: make(map[string]*counterMetric, len(config.Metrics.Gauges)),
gauges: make(map[string]*gaugeMetric, len(config.Metrics.Counters)),
histograms: make(map[string]*histogramMetric, len(config.Metrics.Histograms)),
}
for name, cfg := range config.Metrics.Gauges {
g := prometheus.V2.NewGaugeVec(prometheus.GaugeVecOpts{
GaugeOpts: prometheus.GaugeOpts{
Subsystem: githubScaleSetSubsystem,
Name: strings.TrimPrefix(name, githubScaleSetSubsystem),
Help: metricsHelp[name],
},
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
})
reg.MustRegister(g)
metrics.gauges[name] = &gaugeMetric{
gauge: g,
config: cfg,
}
}
for name, cfg := range config.Metrics.Counters {
c := prometheus.V2.NewCounterVec(prometheus.CounterVecOpts{
CounterOpts: prometheus.CounterOpts{
Subsystem: githubScaleSetSubsystem,
Name: strings.TrimPrefix(name, githubScaleSetSubsystem),
Help: metricsHelp[name],
},
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
})
reg.MustRegister(c)
metrics.counters[name] = &counterMetric{
counter: c,
config: cfg,
}
}
for name, cfg := range config.Metrics.Histograms {
buckets := runtimeBuckets
if len(cfg.Buckets) > 0 {
b := make([]float64, 0, len(cfg.Buckets))
ok := true
for _, v := range cfg.Buckets {
f, err := v.Float64()
if err != nil {
ok = false
config.Logger.Error(err, "Failed to parse number in %q bucket: %w; continuing with the default buckets", name, err)
break
}
b = append(b, f)
}
if ok {
buckets = b
}
}
h := prometheus.V2.NewHistogramVec(prometheus.HistogramVecOpts{
HistogramOpts: prometheus.HistogramOpts{
Subsystem: githubScaleSetSubsystem,
Name: strings.TrimPrefix(name, githubScaleSetSubsystem),
Help: metricsHelp[name],
Buckets: buckets,
},
VariableLabels: prometheus.UnconstrainedLabels(cfg.Labels),
})
reg.MustRegister(h)
metrics.histograms[name] = &histogramMetric{
histogram: h,
config: cfg,
}
}
mux := http.NewServeMux()
mux.Handle(
@ -311,13 +405,14 @@ func NewExporter(config ExporterConfig) ServerPublisher {
return &exporter{
logger: config.Logger.WithName("metrics"),
baseLabels: baseLabels{
scaleSetName: config.ScaleSetName,
scaleSetNamespace: config.ScaleSetNamespace,
enterprise: config.Enterprise,
organization: config.Organization,
repository: config.Repository,
scaleSetLabels: prometheus.Labels{
labelKeyRunnerScaleSetName: config.ScaleSetName,
labelKeyRunnerScaleSetNamespace: config.ScaleSetNamespace,
labelKeyEnterprise: config.Enterprise,
labelKeyOrganization: config.Organization,
labelKeyRepository: config.Repository,
},
metrics: metrics,
srv: &http.Server{
Addr: config.ServerAddr,
Handler: mux,
@ -337,40 +432,73 @@ func (e *exporter) ListenAndServe(ctx context.Context) error {
return e.srv.ListenAndServe()
}
func (m *exporter) PublishStatic(min, max int) {
l := m.scaleSetLabels()
maxRunners.With(l).Set(float64(max))
minRunners.With(l).Set(float64(min))
func (e *exporter) setGauge(name string, allLabels prometheus.Labels, val float64) {
m, ok := e.metrics.gauges[name]
if !ok {
return
}
labels := make(prometheus.Labels, len(m.config.Labels))
for _, label := range m.config.Labels {
labels[label] = allLabels[label]
}
m.gauge.With(labels).Set(val)
}
func (e *exporter) incCounter(name string, allLabels prometheus.Labels) {
m, ok := e.metrics.counters[name]
if !ok {
return
}
labels := make(prometheus.Labels, len(m.config.Labels))
for _, label := range m.config.Labels {
labels[label] = allLabels[label]
}
m.counter.With(labels).Inc()
}
func (e *exporter) observeHistogram(name string, allLabels prometheus.Labels, val float64) {
m, ok := e.metrics.histograms[name]
if !ok {
return
}
labels := make(prometheus.Labels, len(m.config.Labels))
for _, label := range m.config.Labels {
labels[label] = allLabels[label]
}
m.histogram.With(labels).Observe(val)
}
func (e *exporter) PublishStatic(min, max int) {
e.setGauge(MetricMaxRunners, e.scaleSetLabels, float64(max))
e.setGauge(MetricMinRunners, e.scaleSetLabels, float64(min))
}
func (e *exporter) PublishStatistics(stats *actions.RunnerScaleSetStatistic) {
l := e.scaleSetLabels()
assignedJobs.With(l).Set(float64(stats.TotalAssignedJobs))
runningJobs.With(l).Set(float64(stats.TotalRunningJobs))
registeredRunners.With(l).Set(float64(stats.TotalRegisteredRunners))
busyRunners.With(l).Set(float64(stats.TotalBusyRunners))
idleRunners.With(l).Set(float64(stats.TotalIdleRunners))
e.setGauge(MetricAssignedJobs, e.scaleSetLabels, float64(stats.TotalAssignedJobs))
e.setGauge(MetricRunningJobs, e.scaleSetLabels, float64(stats.TotalRunningJobs))
e.setGauge(MetricRegisteredRunners, e.scaleSetLabels, float64(stats.TotalRegisteredRunners))
e.setGauge(MetricBusyRunners, e.scaleSetLabels, float64(float64(stats.TotalRegisteredRunners)))
e.setGauge(MetricIdleRunners, e.scaleSetLabels, float64(stats.TotalIdleRunners))
}
func (e *exporter) PublishJobStarted(msg *actions.JobStarted) {
l := e.startedJobLabels(msg)
startedJobsTotal.With(l).Inc()
e.incCounter(MetricStartedJobsTotal, l)
startupDuration := msg.JobMessageBase.RunnerAssignTime.Unix() - msg.JobMessageBase.ScaleSetAssignTime.Unix()
jobStartupDurationSeconds.With(l).Observe(float64(startupDuration))
e.observeHistogram(MetricJobStartupDurationSeconds, l, float64(startupDuration))
}
func (e *exporter) PublishJobCompleted(msg *actions.JobCompleted) {
l := e.completedJobLabels(msg)
completedJobsTotal.With(l).Inc()
e.incCounter(MetricCompletedJobsTotal, l)
executionDuration := msg.JobMessageBase.FinishTime.Unix() - msg.JobMessageBase.RunnerAssignTime.Unix()
jobExecutionDurationSeconds.With(l).Observe(float64(executionDuration))
e.observeHistogram(MetricJobExecutionDurationSeconds, l, float64(executionDuration))
}
func (m *exporter) PublishDesiredRunners(count int) {
desiredRunners.With(m.scaleSetLabels()).Set(float64(count))
func (e *exporter) PublishDesiredRunners(count int) {
e.setGauge(MetricDesiredRunners, e.scaleSetLabels, float64(count))
}
type discard struct{}