From 053b8f9ae516e5d650d11d270aa178417c322506 Mon Sep 17 00:00:00 2001 From: Junya Okabe <86868255+Okabe-Junya@users.noreply.github.com> Date: Thu, 30 Apr 2026 01:08:46 +0900 Subject: [PATCH] Add health and readiness probes to controller manager (#4459) --- .../templates/deployment.yaml | 17 ++++++++++++++ charts/actions-runner-controller/values.yaml | 4 ++++ .../templates/_controller_template.tpl | 17 ++++++++++++++ .../values.yaml | 4 ++++ .../templates/deployment.yaml | 17 ++++++++++++++ .../values.yaml | 4 ++++ config/manager/manager.yaml | 14 +++++++++++ main.go | 23 +++++++++++++++---- 8 files changed, 96 insertions(+), 4 deletions(-) diff --git a/charts/actions-runner-controller/templates/deployment.yaml b/charts/actions-runner-controller/templates/deployment.yaml index 4e17a854..4021b82e 100644 --- a/charts/actions-runner-controller/templates/deployment.yaml +++ b/charts/actions-runner-controller/templates/deployment.yaml @@ -73,6 +73,9 @@ spec: {{- if .Values.dockerGID }} - "--docker-gid={{ .Values.dockerGID }}" {{- end }} + {{- with .Values.healthProbeBindAddress }} + - "--health-probe-bind-address={{ . }}" + {{- end }} command: - "/manager" env: @@ -144,6 +147,20 @@ spec: name: metrics-port protocol: TCP {{- end }} + {{- with .Values.healthProbeBindAddress }} + livenessProbe: + httpGet: + path: /healthz + port: {{ regexFind "[0-9]+$" . }} + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: {{ regexFind "[0-9]+$" . }} + initialDelaySeconds: 5 + periodSeconds: 10 + {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} securityContext: diff --git a/charts/actions-runner-controller/values.yaml b/charts/actions-runner-controller/values.yaml index fff18cc0..bc7217c5 100644 --- a/charts/actions-runner-controller/values.yaml +++ b/charts/actions-runner-controller/values.yaml @@ -190,6 +190,10 @@ admissionWebHooks: # There may be alternatives to setting `hostNetwork: true`, see # https://github.com/actions/actions-runner-controller/issues/1005#issuecomment-993097155 +# The address the health probe endpoint binds to. Disabled if empty. +# When set, liveness and readiness probes are added to the controller pod. +#healthProbeBindAddress: ":8081" + #hostNetwork: true # If you use `hostNetwork: true`, then you need dnsPolicy: ClusterFirstWithHostNet diff --git a/charts/gha-runner-scale-set-controller-experimental/templates/_controller_template.tpl b/charts/gha-runner-scale-set-controller-experimental/templates/_controller_template.tpl index 2c6461b7..44a17b3c 100644 --- a/charts/gha-runner-scale-set-controller-experimental/templates/_controller_template.tpl +++ b/charts/gha-runner-scale-set-controller-experimental/templates/_controller_template.tpl @@ -73,6 +73,9 @@ args: {{- with .Values.controller.manager.config.k8sClientRateLimiterBurst }} - "--k8s-client-rate-limiter-burst={{ . }}" {{- end }} +{{- with .Values.controller.manager.config.healthProbeBindAddress }} + - "--health-probe-bind-address={{ . }}" +{{- end }} {{- with .Values.controller.manager.container.extraArgs }} {{- range . }} - "{{ . }}" @@ -92,6 +95,20 @@ args: ports: {{- toYaml $ports | nindent 2 }} {{- end }} +{{- with .Values.controller.manager.config.healthProbeBindAddress }} +livenessProbe: + httpGet: + path: /healthz + port: {{ regexFind "[0-9]+$" . }} + initialDelaySeconds: 15 + periodSeconds: 20 +readinessProbe: + httpGet: + path: /readyz + port: {{ regexFind "[0-9]+$" . }} + initialDelaySeconds: 5 + periodSeconds: 10 +{{- end }} env: - name: CONTROLLER_MANAGER_CONTAINER_IMAGE value: "{{ .Values.controller.manager.container.image }}" diff --git a/charts/gha-runner-scale-set-controller-experimental/values.yaml b/charts/gha-runner-scale-set-controller-experimental/values.yaml index 9023b0be..75bcc314 100644 --- a/charts/gha-runner-scale-set-controller-experimental/values.yaml +++ b/charts/gha-runner-scale-set-controller-experimental/values.yaml @@ -44,6 +44,10 @@ controller: k8sClientRateLimiterQPS: null k8sClientRateLimiterBurst: null + # The address the health probe endpoint binds to. Disabled if empty/null. + # When set, liveness and readiness probes are added to the controller pod. + # healthProbeBindAddress: ":8081" + container: image: "ghcr.io/actions/gha-runner-scale-set-controller:latest" pullPolicy: IfNotPresent diff --git a/charts/gha-runner-scale-set-controller/templates/deployment.yaml b/charts/gha-runner-scale-set-controller/templates/deployment.yaml index c077b9bc..628336f4 100644 --- a/charts/gha-runner-scale-set-controller/templates/deployment.yaml +++ b/charts/gha-runner-scale-set-controller/templates/deployment.yaml @@ -101,6 +101,9 @@ spec: - "--workqueue-rate-limiter={{ . }}" {{- end }} {{- end }} + {{- with .Values.flags.healthProbeBindAddress }} + - "--health-probe-bind-address={{ . }}" + {{- end }} command: - "/manager" {{- if or .Values.metrics .Values.pprof.addr }} @@ -116,6 +119,20 @@ spec: protocol: TCP name: pprof {{- end }} + {{- with .Values.flags.healthProbeBindAddress }} + livenessProbe: + httpGet: + path: /healthz + port: {{ regexFind "[0-9]+$" . }} + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: {{ regexFind "[0-9]+$" . }} + initialDelaySeconds: 5 + periodSeconds: 10 + {{- end }} env: - name: CONTROLLER_MANAGER_CONTAINER_IMAGE value: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" diff --git a/charts/gha-runner-scale-set-controller/values.yaml b/charts/gha-runner-scale-set-controller/values.yaml index bc84324f..5e76406e 100644 --- a/charts/gha-runner-scale-set-controller/values.yaml +++ b/charts/gha-runner-scale-set-controller/values.yaml @@ -150,6 +150,10 @@ flags: # Overrides the default `.Release.Namespace` for all resources in this chart. namespaceOverride: "" + ## The address the health probe endpoint binds to. Disabled if empty. + ## When set, liveness and readiness probes are added to the controller pod. + # healthProbeBindAddress: ":8081" + ## Defines the K8s client rate limiter parameters. # k8sClientRateLimiterQPS: 20 # k8sClientRateLimiterBurst: 30 diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index f90df347..a0a8b85e 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -56,6 +56,20 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + # To enable health probes, uncomment the following and add + # "--health-probe-bind-address=:8081" to the container args. + # livenessProbe: + # httpGet: + # path: /healthz + # port: 8081 + # initialDelaySeconds: 15 + # periodSeconds: 20 + # readinessProbe: + # httpGet: + # path: /readyz + # port: 8081 + # initialDelaySeconds: 5 + # periodSeconds: 10 volumeMounts: - name: controller-manager mountPath: "/etc/actions-runner-controller" diff --git a/main.go b/main.go index f436d194..1b4c023c 100644 --- a/main.go +++ b/main.go @@ -43,6 +43,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/healthz" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/webhook" @@ -86,6 +87,7 @@ func main() { metricsAddr string pprofAddr string + probeAddr string autoScalingRunnerSetOnly bool enableLeaderElection bool disableAdmissionWebhook bool @@ -127,6 +129,7 @@ func main() { flag.StringVar(&listenerMetricsEndpoint, "listener-metrics-endpoint", "/metrics", "The AutoscalingListener metrics server endpoint from which the metrics are collected") flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&pprofAddr, "pprof-addr", "", "The address the pprof endpoint binds to.") + flag.StringVar(&probeAddr, "health-probe-bind-address", "", "The address the health probe endpoint binds to. Disabled if empty.") flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.") flag.StringVar(&leaderElectionID, "leader-election-id", "actions-runner-controller", "Controller id for leader election.") @@ -246,10 +249,11 @@ func main() { SyncPeriod: &syncPeriod, DefaultNamespaces: defaultNamespaces, }, - PprofBindAddress: pprofAddr, - WebhookServer: webhookServer, - LeaderElection: enableLeaderElection, - LeaderElectionID: leaderElectionID, + PprofBindAddress: pprofAddr, + WebhookServer: webhookServer, + HealthProbeBindAddress: probeAddr, + LeaderElection: enableLeaderElection, + LeaderElectionID: leaderElectionID, Client: client.Options{ Cache: &client.CacheOptions{ DisableFor: []client.Object{ @@ -264,6 +268,17 @@ func main() { os.Exit(1) } + if probeAddr != "" { + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + log.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + log.Error(err, "unable to set up ready check") + os.Exit(1) + } + } + if autoScalingRunnerSetOnly { if err := actionsgithubcom.SetupIndexers(mgr); err != nil { log.Error(err, "unable to setup indexers")