Expose workflow job metrics via new actions-metrics-server (#2057)

* Add workflow job metrics to Github webhook server

* Fix handling of workflow_job.Conclusion

* Make the prometheus metrics exporter for the workflow jobs a dedicated application

* chart: Add support for deploying actions-metrics-server

* A few improvements to make it easy to cover in E2E

* chart: Add missing actionsmetrics.service.yaml

* chart: Do not modify actionsMetricsServer.replicaCount

* chart: Add documentation for actionsMetrics and actionsMetricsServer

Co-authored-by: Colin Heathman <cheathman@benchsci.com>
This commit is contained in:
Yusuke Kuoka 2022-12-10 08:24:28 +09:00 committed by GitHub
parent 0285da1a32
commit 300e93c59d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1019 additions and 2 deletions

View File

@ -37,7 +37,8 @@ RUN --mount=target=. \
--mount=type=cache,mode=0777,target=${GOCACHE} \
export GOOS=${TARGETOS} GOARCH=${TARGETARCH} GOARM=${TARGETVARIANT#v} && \
go build -trimpath -ldflags="-s -w -X 'github.com/actions-runner-controller/actions-runner-controller/build.Version=${VERSION}'" -o /out/manager main.go && \
go build -trimpath -ldflags="-s -w" -o /out/github-webhook-server ./cmd/githubwebhookserver
go build -trimpath -ldflags="-s -w" -o /out/github-webhook-server ./cmd/githubwebhookserver && \
go build -trimpath -ldflags="-s -w" -o /out/actions-metrics-server ./cmd/actionsmetricsserver
# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
@ -47,6 +48,7 @@ WORKDIR /
COPY --from=builder /out/manager .
COPY --from=builder /out/github-webhook-server .
COPY --from=builder /out/actions-metrics-server .
USER 65532:65532

View File

@ -88,6 +88,9 @@ data:
no-autoupdate: true
ingress:
# The first rule proxies traffic to the httpbin sample Service defined in app.yaml
- hostname: ${TUNNEL_HOSTNAME}
service: http://actions-runner-controller-actions-metrics-server.actions-runner-system:80
path: /metrics$
- hostname: ${TUNNEL_HOSTNAME}
service: http://actions-runner-controller-github-webhook-server.actions-runner-system:80
# This rule matches any traffic which didn't match a previous rule, and responds with HTTP 404.

View File

@ -35,6 +35,16 @@ else
echo 'Skipped deploying secret "github-webhook-server". Set WEBHOOK_GITHUB_TOKEN to deploy.' 1>&2
fi
if [ -n "${WEBHOOK_GITHUB_TOKEN}" ]; then
kubectl -n actions-runner-system delete secret \
actions-metrics-server || :
kubectl -n actions-runner-system create secret generic \
actions-metrics-server \
--from-literal=github_token=${WEBHOOK_GITHUB_TOKEN:?WEBHOOK_GITHUB_TOKEN must not be empty}
else
echo 'Skipped deploying secret "actions-metrics-server". Set WEBHOOK_GITHUB_TOKEN to deploy.' 1>&2
fi
tool=${ACCEPTANCE_TEST_DEPLOYMENT_TOOL}
TEST_ID=${TEST_ID:-default}
@ -49,6 +59,7 @@ if [ "${tool}" == "helm" ]; then
flags+=( --set imagePullSecrets[0].name=${IMAGE_PULL_SECRET})
flags+=( --set image.actionsRunnerImagePullSecrets[0].name=${IMAGE_PULL_SECRET})
flags+=( --set githubWebhookServer.imagePullSecrets[0].name=${IMAGE_PULL_SECRET})
flags+=( --set actionsMetricsServer.imagePullSecrets[0].name=${IMAGE_PULL_SECRET})
fi
if [ "${CHART_VERSION}" != "" ]; then
flags+=( --version ${CHART_VERSION})
@ -56,6 +67,7 @@ if [ "${tool}" == "helm" ]; then
if [ "${LOG_FORMAT}" != "" ]; then
flags+=( --set logFormat=${LOG_FORMAT})
flags+=( --set githubWebhookServer.logFormat=${LOG_FORMAT})
flags+=( --set actionsMetricsServer.logFormat=${LOG_FORMAT})
fi
set -vx
@ -70,6 +82,7 @@ if [ "${tool}" == "helm" ]; then
--set image.tag=${VERSION} \
--set podAnnotations.test-id=${TEST_ID} \
--set githubWebhookServer.podAnnotations.test-id=${TEST_ID} \
--set actionsMetricsServer.podAnnotations.test-id=${TEST_ID} \
${flags[@]} --set image.imagePullPolicy=${IMAGE_PULL_POLICY} \
-f ${VALUES_FILE}
set +v

View File

@ -33,3 +33,23 @@ githubWebhookServer:
protocol: TCP
name: http
nodePort: 31000
actionsMetricsServer:
imagePullSecrets: []
logLevel: "-4"
enabled: true
labels: {}
replicaCount: 1
secret:
enabled: true
# create: true
name: "actions-metrics-server"
### GitHub Webhook Configuration
#github_webhook_secret_token: ""
service:
type: NodePort
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
nodePort: 31001

View File

@ -112,3 +112,40 @@ All additional docs are kept in the `docs/` folder, this README is solely for do
| `githubWebhookServer.podDisruptionBudget.enabled` | Enables a PDB to ensure HA of githubwebhook pods | false |
| `githubWebhookServer.podDisruptionBudget.minAvailable` | Minimum number of pods that must be available after eviction | |
| `githubWebhookServer.podDisruptionBudget.maxUnavailable` | Maximum number of pods that can be unavailable after eviction. Kubernetes 1.7+ required. | |
| `actionsMetricsServer.logLevel` | Set the log level of the actionsMetricsServer container | |
| `actionsMetricsServer.logFormat` | Set the log format of the actionsMetricsServer controller. Valid options are "text" and "json" | text |
| `actionsMetricsServer.enabled` | Deploy the actions metrics server pod | false |
| `actionsMetricsServer.secret.enabled` | Passes the webhook hook secret to the github-webhook-server | false |
| `actionsMetricsServer.secret.create` | Deploy the webhook hook secret | false |
| `actionsMetricsServer.secret.name` | Set the name of the webhook hook secret | github-webhook-server |
| `actionsMetricsServer.secret.github_webhook_secret_token` | Set the webhook secret token value | |
| `actionsMetricsServer.imagePullSecrets` | Specifies the secret to be used when pulling the actionsMetricsServer pod containers | |
| `actionsMetricsServer.nameOverride` | Override the resource name prefix | |
| `actionsMetricsServer.fullnameOverride` | Override the full resource names | |
| `actionsMetricsServer.serviceAccount.create` | Deploy the actionsMetricsServer under a service account | true |
| `actionsMetricsServer.serviceAccount.annotations` | Set annotations for the service account | |
| `actionsMetricsServer.serviceAccount.name` | Set the service account name | |
| `actionsMetricsServer.podAnnotations` | Set annotations for the actionsMetricsServer pod | |
| `actionsMetricsServer.podLabels` | Set labels for the actionsMetricsServer pod | |
| `actionsMetricsServer.podSecurityContext` | Set the security context to actionsMetricsServer pod | |
| `actionsMetricsServer.securityContext` | Set the security context for each container in the actionsMetricsServer pod | |
| `actionsMetricsServer.resources` | Set the actionsMetricsServer pod resources | |
| `actionsMetricsServer.topologySpreadConstraints` | Set the actionsMetricsServer pod topologySpreadConstraints | |
| `actionsMetricsServer.nodeSelector` | Set the actionsMetricsServer pod nodeSelector | |
| `actionsMetricsServer.tolerations` | Set the actionsMetricsServer pod tolerations | |
| `actionsMetricsServer.affinity` | Set the actionsMetricsServer pod affinity rules | |
| `actionsMetricsServer.priorityClassName` | Set the actionsMetricsServer pod priorityClassName | |
| `actionsMetricsServer.service.type` | Set actionsMetricsServer service type | |
| `actionsMetricsServer.service.ports` | Set actionsMetricsServer service ports | `[{"port":80, "targetPort:"http", "protocol":"TCP", "name":"http"}]` |
| `actionsMetricsServer.ingress.enabled` | Deploy an ingress kind for the actionsMetricsServer | false |
| `actionsMetricsServer.ingress.annotations` | Set annotations for the ingress kind | |
| `actionsMetricsServer.ingress.hosts` | Set hosts configuration for ingress | `[{"host": "chart-example.local", "paths": []}]` |
| `actionsMetricsServer.ingress.tls` | Set tls configuration for ingress | |
| `actionsMetricsServer.ingress.ingressClassName` | Set ingress class name | |
| `actionsMetrics.serviceMonitor` | Deploy serviceMonitor kind for for use with prometheus-operator CRDs | false |
| `actionsMetrics.serviceAnnotations` | Set annotations for the provisioned actions metrics service resource | |
| `actionsMetrics.port` | Set port of actions metrics service | 8443 |
| `actionsMetrics.proxy.enabled` | Deploy kube-rbac-proxy container in controller pod | true |
| `actionsMetrics.proxy.image.repository` | The "repository/image" of the kube-proxy container | quay.io/brancz/kube-rbac-proxy |
| `actionsMetrics.proxy.image.tag` | The tag of the kube-proxy image to use when pulling the container | v0.10.0 |
| `actionsMetrics.serviceMonitorLabels` | Set labels to apply to ServiceMonitor resources | |

View File

@ -0,0 +1,60 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "actions-runner-controller-actions-metrics-server.name" -}}
{{- default .Chart.Name .Values.actionsMetricsServer.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- define "actions-runner-controller-actions-metrics-server.instance" -}}
{{- printf "%s-%s" .Release.Name "actions-metrics-server" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "actions-runner-controller-actions-metrics-server.fullname" -}}
{{- if .Values.actionsMetricsServer.fullnameOverride }}
{{- .Values.actionsMetricsServer.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.actionsMetricsServer.nameOverride }}
{{- $instance := include "actions-runner-controller-actions-metrics-server.instance" . }}
{{- if contains $name $instance }}
{{- $instance | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s-%s" .Release.Name $name "actions-metrics-server" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "actions-runner-controller-actions-metrics-server.selectorLabels" -}}
app.kubernetes.io/name: {{ include "actions-runner-controller-actions-metrics-server.name" . }}
app.kubernetes.io/instance: {{ include "actions-runner-controller-actions-metrics-server.instance" . }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "actions-runner-controller-actions-metrics-server.serviceAccountName" -}}
{{- if .Values.actionsMetricsServer.serviceAccount.create }}
{{- default (include "actions-runner-controller-actions-metrics-server.fullname" .) .Values.actionsMetricsServer.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.actionsMetricsServer.serviceAccount.name }}
{{- end }}
{{- end }}
{{- define "actions-runner-controller-actions-metrics-server.secretName" -}}
{{- default (include "actions-runner-controller-actions-metrics-server.fullname" .) .Values.actionsMetricsServer.secret.name }}
{{- end }}
{{- define "actions-runner-controller-actions-metrics-server.roleName" -}}
{{- include "actions-runner-controller-actions-metrics-server.fullname" . }}
{{- end }}
{{- define "actions-runner-controller-actions-metrics-server.serviceMonitorName" -}}
{{- include "actions-runner-controller-actions-metrics-server.fullname" . | trunc 47 }}-service-monitor
{{- end }}

View File

@ -0,0 +1,162 @@
{{- if .Values.actionsMetricsServer.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "actions-runner-controller-actions-metrics-server.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "actions-runner-controller.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.actionsMetricsServer.replicaCount }}
selector:
matchLabels:
{{- include "actions-runner-controller-actions-metrics-server.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.actionsMetricsServer.podAnnotations }}
annotations:
kubectl.kubernetes.io/default-logs-container: "github-webhook-server"
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "actions-runner-controller-actions-metrics-server.selectorLabels" . | nindent 8 }}
{{- with .Values.actionsMetricsServer.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.actionsMetricsServer.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "actions-runner-controller-actions-metrics-server.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.actionsMetricsServer.podSecurityContext | nindent 8 }}
{{- with .Values.actionsMetricsServer.priorityClassName }}
priorityClassName: "{{ . }}"
{{- end }}
containers:
- args:
{{- $metricsHost := .Values.metrics.proxy.enabled | ternary "127.0.0.1" "0.0.0.0" }}
{{- $metricsPort := .Values.metrics.proxy.enabled | ternary "8080" .Values.metrics.port }}
- "--metrics-addr={{ $metricsHost }}:{{ $metricsPort }}"
{{- if .Values.actionsMetricsServer.logLevel }}
- "--log-level={{ .Values.actionsMetricsServer.logLevel }}"
{{- end }}
{{- if .Values.runnerGithubURL }}
- "--runner-github-url={{ .Values.runnerGithubURL }}"
{{- end }}
{{- if .Values.actionsMetricsServer.logFormat }}
- "--log-format={{ .Values.actionsMetricsServer.logFormat }}"
{{- end }}
command:
- "/actions-metrics-server"
env:
- name: GITHUB_WEBHOOK_SECRET_TOKEN
valueFrom:
secretKeyRef:
key: github_webhook_secret_token
name: {{ include "actions-runner-controller-actions-metrics-server.secretName" . }}
optional: true
{{- if .Values.githubEnterpriseServerURL }}
- name: GITHUB_ENTERPRISE_URL
value: {{ .Values.githubEnterpriseServerURL }}
{{- end }}
{{- if .Values.githubURL }}
- name: GITHUB_URL
value: {{ .Values.githubURL }}
{{- end }}
{{- if .Values.githubUploadURL }}
- name: GITHUB_UPLOAD_URL
value: {{ .Values.githubUploadURL }}
{{- end }}
{{- if .Values.actionsMetricsServer.secret.enabled }}
- name: GITHUB_TOKEN
valueFrom:
secretKeyRef:
key: github_token
name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }}
optional: true
- name: GITHUB_APP_ID
valueFrom:
secretKeyRef:
key: github_app_id
name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }}
optional: true
- name: GITHUB_APP_INSTALLATION_ID
valueFrom:
secretKeyRef:
key: github_app_installation_id
name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }}
optional: true
- name: GITHUB_APP_PRIVATE_KEY
valueFrom:
secretKeyRef:
key: github_app_private_key
name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }}
optional: true
{{- if .Values.authSecret.github_basicauth_username }}
- name: GITHUB_BASICAUTH_USERNAME
value: {{ .Values.authSecret.github_basicauth_username }}
{{- end }}
- name: GITHUB_BASICAUTH_PASSWORD
valueFrom:
secretKeyRef:
key: github_basicauth_password
name: {{ include "actions-runner-controller.secretName" . }}
optional: true
{{- end }}
{{- range $key, $val := .Values.actionsMetricsServer.env }}
- name: {{ $key }}
value: {{ $val | quote }}
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (cat "v" .Chart.AppVersion | replace " " "") }}"
name: actions-metrics-server
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- containerPort: 8000
name: http
protocol: TCP
{{- if not .Values.metrics.proxy.enabled }}
- containerPort: {{ .Values.metrics.port }}
name: metrics-port
protocol: TCP
{{- end }}
resources:
{{- toYaml .Values.actionsMetricsServer.resources | nindent 12 }}
securityContext:
{{- toYaml .Values.actionsMetricsServer.securityContext | nindent 12 }}
{{- if .Values.metrics.proxy.enabled }}
- args:
- "--secure-listen-address=0.0.0.0:{{ .Values.metrics.port }}"
- "--upstream=http://127.0.0.1:8080/"
- "--logtostderr=true"
- "--v=10"
image: "{{ .Values.metrics.proxy.image.repository }}:{{ .Values.metrics.proxy.image.tag }}"
name: kube-rbac-proxy
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- containerPort: {{ .Values.metrics.port }}
name: metrics-port
resources:
{{- toYaml .Values.resources | nindent 12 }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
{{- end }}
terminationGracePeriodSeconds: 10
{{- with .Values.actionsMetricsServer.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.actionsMetricsServer.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.actionsMetricsServer.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.actionsMetricsServer.topologySpreadConstraints }}
topologySpreadConstraints:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,47 @@
{{- if .Values.actionsMetricsServer.ingress.enabled -}}
{{- $fullName := include "actions-runner-controller-actions-metrics-server.fullname" . -}}
{{- $svcPort := (index .Values.actionsMetricsServer.service.ports 0).port -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ $fullName }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "actions-runner-controller.labels" . | nindent 4 }}
{{- with .Values.actionsMetricsServer.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.actionsMetricsServer.ingress.tls }}
tls:
{{- range .Values.actionsMetricsServer.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
{{- with .Values.actionsMetricsServer.ingress.ingressClassName }}
ingressClassName: {{ . }}
{{- end }}
rules:
{{- range .Values.actionsMetricsServer.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- if .extraPaths }}
{{- toYaml .extraPaths | nindent 10 }}
{{- end }}
{{- range .paths }}
- path: {{ .path }}
pathType: {{ .pathType }}
backend:
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,26 @@
{{- if .Values.actionsMetricsServer.enabled }}
apiVersion: v1
kind: Service
metadata:
name: {{ include "actions-runner-controller-actions-metrics-server.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "actions-runner-controller.labels" . | nindent 4 }}
{{- if .Values.actionsMetricsServer.service.annotations }}
annotations:
{{ toYaml .Values.actionsMetricsServer.service.annotations | nindent 4 }}
{{- end }}
spec:
type: {{ .Values.actionsMetricsServer.service.type }}
ports:
{{ range $_, $port := .Values.actionsMetricsServer.service.ports -}}
- {{ $port | toYaml | nindent 6 }}
{{- end }}
{{- if .Values.metrics.serviceMonitor }}
- name: metrics-port
port: {{ .Values.metrics.port }}
targetPort: metrics-port
{{- end }}
selector:
{{- include "actions-runner-controller-actions-metrics-server.selectorLabels" . | nindent 4 }}
{{- end }}

View File

@ -0,0 +1,15 @@
{{- if .Values.actionsMetricsServer.enabled -}}
{{- if .Values.actionsMetricsServer.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "actions-runner-controller-actions-metrics-server.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "actions-runner-controller.labels" . | nindent 4 }}
{{- with .Values.actionsMetricsServer.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,25 @@
{{- if and .Values.actionsMetricsServer.enabled .Values.actionsMetrics.serviceMonitor }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
{{- include "actions-runner-controller.labels" . | nindent 4 }}
{{- with .Values.actionsMetricsServer.serviceMonitorLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
name: {{ include "actions-runner-controller-actions-metrics-server.serviceMonitorName" . }}
namespace: {{ .Release.Namespace }}
spec:
endpoints:
- path: /metrics
port: metrics-port
{{- if .Values.actionsMetrics.proxy.enabled }}
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
scheme: https
tlsConfig:
insecureSkipVerify: true
{{- end }}
selector:
matchLabels:
{{- include "actions-runner-controller-actions-metrics-server.selectorLabels" . | nindent 6 }}
{{- end }}

View File

@ -191,7 +191,6 @@ admissionWebHooks:
## specify log format for actions runner controller. Valid options are "text" and "json"
logFormat: text
githubWebhookServer:
enabled: false
replicaCount: 1
@ -277,3 +276,100 @@ githubWebhookServer:
# minAvailable: 1
# maxUnavailable: 3
# queueLimit: 100
actionsMetrics:
serviceAnnotations: {}
# Set serviceMonitor=true to create a service monitor
# as a part of the helm release.
# Do note that you also need actionsMetricsServer.enabled=true
# to deploy the actions-metrics-server whose k8s service is referenced by the service monitor.
serviceMonitor: false
serviceMonitorLabels: {}
port: 8443
proxy:
enabled: true
image:
repository: quay.io/brancz/kube-rbac-proxy
tag: v0.13.1
actionsMetricsServer:
enabled: false
# DO NOT CHANGE THIS!
# See the thread below for more context.
# https://github.com/actions-runner-controller/actions-runner-controller/pull/1814#discussion_r974758924
replicaCount: 1
## specify log format for github webhook controller. Valid options are "text" and "json"
logFormat: text
secret:
enabled: false
create: false
name: "actions-metrics-server"
### GitHub Webhook Configuration
github_webhook_secret_token: ""
### GitHub Apps Configuration
## NOTE: IDs MUST be strings, use quotes
#github_app_id: ""
#github_app_installation_id: ""
#github_app_private_key: |
### GitHub PAT Configuration
#github_token: ""
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
# Specifies whether a service account should be created
create: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
podAnnotations: {}
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
resources: {}
nodeSelector: {}
tolerations: []
affinity: {}
priorityClassName: ""
service:
type: ClusterIP
annotations: {}
ports:
- port: 80
targetPort: http
protocol: TCP
name: http
#nodePort: someFixedPortForUseWithTerraformCdkCfnEtc
ingress:
enabled: false
ingressClassName: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths: []
# - path: /*
# pathType: ImplementationSpecific
# Extra paths that are not automatically connected to the server. This is useful when working with annotation based services.
extraPaths: []
# - path: /*
# backend:
# serviceName: ssl-redirect
# servicePort: use-annotation
## for Kubernetes >=1.19 (when "networking.k8s.io/v1" is used)
# - path: /*
# pathType: Prefix
# backend:
# service:
# name: ssl-redirect
# port:
# name: use-annotation
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local

View File

@ -30,6 +30,7 @@ import (
"github.com/actions-runner-controller/actions-runner-controller/controllers"
"github.com/actions-runner-controller/actions-runner-controller/github"
"github.com/actions-runner-controller/actions-runner-controller/logging"
"github.com/kelseyhightower/envconfig"
"k8s.io/apimachinery/pkg/runtime"

View File

@ -0,0 +1,227 @@
package actionsmetrics
import (
"bufio"
"context"
"fmt"
"net/http"
"regexp"
"strings"
"time"
"github.com/go-logr/logr"
gogithub "github.com/google/go-github/v47/github"
"github.com/prometheus/client_golang/prometheus"
"github.com/actions-runner-controller/actions-runner-controller/github"
)
type EventReader struct {
Log logr.Logger
// GitHub Client to fetch information about job failures
GitHubClient *github.Client
// Event queue
Events chan interface{}
}
// HandleWorkflowJobEvent send event to reader channel for processing
//
// forcing the events through a channel ensures they are processed in sequentially,
// and prevents any race conditions with githubWorkflowJobStatus
func (reader *EventReader) HandleWorkflowJobEvent(event interface{}) {
reader.Events <- event
}
// ProcessWorkflowJobEvents pop events in a loop for processing
//
// Should be called asynchronously with `go`
func (reader *EventReader) ProcessWorkflowJobEvents(ctx context.Context) {
for {
select {
case event := <-reader.Events:
reader.ProcessWorkflowJobEvent(ctx, event)
case <-ctx.Done():
return
}
}
}
// ProcessWorkflowJobEvent processes a single event
//
// Events should be processed in the same order that Github emits them
func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event interface{}) {
e, ok := event.(*gogithub.WorkflowJobEvent)
if !ok {
return
}
// collect labels
labels := make(prometheus.Labels)
runsOn := strings.Join(e.WorkflowJob.Labels, `,`)
labels["runs_on"] = runsOn
labels["job_name"] = *e.WorkflowJob.Name
// switch on job status
switch action := e.GetAction(); action {
case "queued":
githubWorkflowJobsQueuedTotal.With(labels).Inc()
case "in_progress":
githubWorkflowJobsStartedTotal.With(labels).Inc()
if reader.GitHubClient == nil {
return
}
parseResult, err := reader.fetchAndParseWorkflowJobLogs(ctx, e)
if err != nil {
reader.Log.Error(err, "reading workflow job log")
return
} else {
reader.Log.Info("reading workflow_job logs",
"job_name", *e.WorkflowJob.Name,
"job_id", fmt.Sprint(*e.WorkflowJob.ID),
)
}
githubWorkflowJobQueueDurationSeconds.With(labels).Observe(parseResult.QueueTime.Seconds())
case "completed":
githubWorkflowJobsCompletedTotal.With(labels).Inc()
// job_conclusion -> (neutral, success, skipped, cancelled, timed_out, action_required, failure)
githubWorkflowJobConclusionsTotal.With(extraLabel("job_conclusion", *e.WorkflowJob.Conclusion, labels)).Inc()
parseResult, err := reader.fetchAndParseWorkflowJobLogs(ctx, e)
if err != nil {
reader.Log.Error(err, "reading workflow job log")
return
} else {
reader.Log.Info("reading workflow_job logs",
"job_name", *e.WorkflowJob.Name,
"job_id", fmt.Sprint(*e.WorkflowJob.ID),
)
}
if *e.WorkflowJob.Conclusion == "failure" {
failedStep := "null"
for i, step := range e.WorkflowJob.Steps {
// *step.Conclusion ~
// "success",
// "failure",
// "neutral",
// "cancelled",
// "skipped",
// "timed_out",
// "action_required",
// null
if *step.Conclusion == "failure" {
failedStep = fmt.Sprint(i)
break
}
if *step.Conclusion == "timed_out" {
failedStep = fmt.Sprint(i)
parseResult.ExitCode = "timed_out"
break
}
}
githubWorkflowJobFailuresTotal.With(
extraLabel("failed_step", failedStep,
extraLabel("exit_code", parseResult.ExitCode, labels),
),
).Inc()
}
githubWorkflowJobRunDurationSeconds.With(extraLabel("job_conclusion", *e.WorkflowJob.Conclusion, labels)).Observe(parseResult.RunTime.Seconds())
}
}
func extraLabel(key string, value string, labels prometheus.Labels) prometheus.Labels {
fixedLabels := make(prometheus.Labels)
for k, v := range labels {
fixedLabels[k] = v
}
fixedLabels[key] = value
return fixedLabels
}
type ParseResult struct {
ExitCode string
QueueTime time.Duration
RunTime time.Duration
}
var logLine = regexp.MustCompile(`^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{7}Z)\s(.+)$`)
var exitCodeLine = regexp.MustCompile(`##\[error\]Process completed with exit code (\d)\.`)
func (reader *EventReader) fetchAndParseWorkflowJobLogs(ctx context.Context, e *gogithub.WorkflowJobEvent) (*ParseResult, error) {
owner := *e.Repo.Owner.Login
repo := *e.Repo.Name
id := *e.WorkflowJob.ID
url, _, err := reader.GitHubClient.Actions.GetWorkflowJobLogs(ctx, owner, repo, id, true)
if err != nil {
return nil, err
}
jobLogs, err := http.DefaultClient.Get(url.String())
if err != nil {
return nil, err
}
exitCode := "null"
var (
queuedTime time.Time
startedTime time.Time
completedTime time.Time
)
func() {
// Read jobLogs.Body line by line
defer jobLogs.Body.Close()
lines := bufio.NewScanner(jobLogs.Body)
for lines.Scan() {
matches := logLine.FindStringSubmatch(lines.Text())
if matches == nil {
continue
}
timestamp := matches[1]
line := matches[2]
if strings.HasPrefix(line, "##[error]") {
// Get exit code
exitCodeMatch := exitCodeLine.FindStringSubmatch(line)
if exitCodeMatch != nil {
exitCode = exitCodeMatch[1]
}
continue
}
if strings.HasPrefix(line, "Waiting for a runner to pick up this job...") {
queuedTime, _ = time.Parse(time.RFC3339, timestamp)
continue
}
if strings.HasPrefix(line, "Job is about to start running on the runner:") {
startedTime, _ = time.Parse(time.RFC3339, timestamp)
continue
}
// Last line in the log will count as the completed time
completedTime, _ = time.Parse(time.RFC3339, timestamp)
}
}()
return &ParseResult{
ExitCode: exitCode,
QueueTime: startedTime.Sub(queuedTime),
RunTime: completedTime.Sub(startedTime),
}, nil
}

View File

@ -0,0 +1,126 @@
// Package metrics provides monitoring of the GitHub related metrics.
//
// This depends on the metrics exporter of kubebuilder.
// See https://book.kubebuilder.io/reference/metrics.html for details.
package actionsmetrics
import (
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)
func init() {
metrics.Registry.MustRegister(
githubWorkflowJobQueueDurationSeconds,
githubWorkflowJobRunDurationSeconds,
githubWorkflowJobConclusionsTotal,
githubWorkflowJobsQueuedTotal,
githubWorkflowJobsStartedTotal,
githubWorkflowJobsCompletedTotal,
githubWorkflowJobFailuresTotal,
)
}
var (
runtimeBuckets []float64 = []float64{
0.01,
0.05,
0.1,
0.5,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
12,
15,
18,
20,
25,
30,
40,
50,
60,
70,
80,
90,
100,
110,
120,
150,
180,
210,
240,
300,
360,
420,
480,
540,
600,
900,
1200,
1800,
2400,
3000,
3600,
}
)
var (
githubWorkflowJobQueueDurationSeconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "github_workflow_job_queue_duration_seconds",
Help: "Queue times for workflow jobs in seconds",
Buckets: runtimeBuckets,
},
[]string{"runs_on", "job_name"},
)
githubWorkflowJobRunDurationSeconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "github_workflow_job_run_duration_seconds",
Help: "Run times for workflow jobs in seconds",
Buckets: runtimeBuckets,
},
[]string{"runs_on", "job_name", "job_conclusion"},
)
githubWorkflowJobConclusionsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "github_workflow_job_conclusions_total",
Help: "Conclusions for tracked workflow jobs",
},
[]string{"runs_on", "job_name", "job_conclusion"},
)
githubWorkflowJobsQueuedTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "github_workflow_jobs_queued_total",
Help: "Total count of workflow jobs queued (events where job_status=queued)",
},
[]string{"runs_on", "job_name"},
)
githubWorkflowJobsStartedTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "github_workflow_jobs_started_total",
Help: "Total count of workflow jobs started (events where job_status=in_progress)",
},
[]string{"runs_on", "job_name"},
)
githubWorkflowJobsCompletedTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "github_workflow_jobs_completed_total",
Help: "Total count of workflow jobs completed (events where job_status=completed)",
},
[]string{"runs_on", "job_name"},
)
githubWorkflowJobFailuresTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "github_workflow_job_failures_total",
Help: "Conclusions for tracked workflow runs",
},
[]string{"runs_on", "job_name", "failed_step", "exit_code"},
)
)

View File

@ -0,0 +1,157 @@
package actionsmetrics
/*
Copyright 2022 The actions-runner-controller authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import (
"context"
"fmt"
"io"
"net/http"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"github.com/go-logr/logr"
gogithub "github.com/google/go-github/v47/github"
ctrl "sigs.k8s.io/controller-runtime"
"github.com/actions-runner-controller/actions-runner-controller/github"
)
type EventHook func(interface{})
// WebhookServer is a HTTP server that handles workflow_job events sent from GitHub Actions
type WebhookServer struct {
Log logr.Logger
// SecretKeyBytes is the byte representation of the Webhook secret token
// the administrator is generated and specified in GitHub Web UI.
SecretKeyBytes []byte
// GitHub Client to discover runner groups assigned to a repository
GitHubClient *github.Client
// When HorizontalRunnerAutoscalerGitHubWebhook handles a request, each EventHook is sent the webhook event
EventHooks []EventHook
}
func (autoscaler *WebhookServer) Reconcile(_ context.Context, request reconcile.Request) (reconcile.Result, error) {
return ctrl.Result{}, nil
}
func (autoscaler *WebhookServer) Handle(w http.ResponseWriter, r *http.Request) {
var (
ok bool
err error
)
defer func() {
if !ok {
w.WriteHeader(http.StatusInternalServerError)
if err != nil {
msg := err.Error()
if written, err := w.Write([]byte(msg)); err != nil {
autoscaler.Log.V(1).Error(err, "failed writing http error response", "msg", msg, "written", written)
}
}
}
}()
defer func() {
if r.Body != nil {
r.Body.Close()
}
}()
// respond ok to GET / e.g. for health check
if r.Method == http.MethodGet {
ok = true
fmt.Fprintln(w, "actions-metrics-server is running")
return
}
var payload []byte
if len(autoscaler.SecretKeyBytes) > 0 {
payload, err = gogithub.ValidatePayload(r, autoscaler.SecretKeyBytes)
if err != nil {
autoscaler.Log.Error(err, "error validating request body")
return
}
} else {
payload, err = io.ReadAll(r.Body)
if err != nil {
autoscaler.Log.Error(err, "error reading request body")
return
}
}
webhookType := gogithub.WebHookType(r)
event, err := gogithub.ParseWebHook(webhookType, payload)
if err != nil {
var s string
if payload != nil {
s = string(payload)
}
autoscaler.Log.Error(err, "could not parse webhook", "webhookType", webhookType, "payload", s)
return
}
log := autoscaler.Log.WithValues(
"event", webhookType,
"hookID", r.Header.Get("X-GitHub-Hook-ID"),
"delivery", r.Header.Get("X-GitHub-Delivery"),
)
switch event.(type) {
case *gogithub.PingEvent:
ok = true
w.WriteHeader(http.StatusOK)
msg := "pong"
if written, err := w.Write([]byte(msg)); err != nil {
log.Error(err, "failed writing http response", "msg", msg, "written", written)
}
log.Info("handled ping event")
return
}
for _, eventHook := range autoscaler.EventHooks {
eventHook(event)
}
ok = true
w.WriteHeader(http.StatusOK)
msg := "ok"
log.Info(msg)
if written, err := w.Write([]byte(msg)); err != nil {
log.Error(err, "failed writing http response", "msg", msg, "written", written)
}
}