From 9b18826253aba67a4cce2c4b61bfdc9fc9809b5e Mon Sep 17 00:00:00 2001 From: Kuangyu Jing Date: Thu, 17 Jul 2025 15:47:34 +0900 Subject: [PATCH] Add ArgoCD health checks for ARC runner resources --- .../health-check-autoscalingrunnerset.yaml | 39 +++++++++ .../argocd/health-check-ephemeralrunner.yaml | 42 +++++++++ config/argocd/health-check-pod.yaml | 87 +++++++++++++++++++ config/argocd/health-check-runner.yaml | 40 +++++++++ 4 files changed, 208 insertions(+) create mode 100644 config/argocd/health-check-autoscalingrunnerset.yaml create mode 100644 config/argocd/health-check-ephemeralrunner.yaml create mode 100644 config/argocd/health-check-pod.yaml create mode 100644 config/argocd/health-check-runner.yaml diff --git a/config/argocd/health-check-autoscalingrunnerset.yaml b/config/argocd/health-check-autoscalingrunnerset.yaml new file mode 100644 index 00000000..82878230 --- /dev/null +++ b/config/argocd/health-check-autoscalingrunnerset.yaml @@ -0,0 +1,39 @@ +# Health check patch for actions.github.com/v1alpha1 AutoScalingRunnerSet +# This patch adds health check for AutoScalingRunnerSet resources to ArgoCD +apiVersion: v1 +kind: ConfigMap +metadata: + name: argocd-cm + namespace: argocd +data: + resource.customizations.health.actions.github.com_AutoScalingRunnerSet: | + hs = {} + if obj.status then + local desired = obj.status.desiredReplicas or obj.status.replicas or 0 + local ready = obj.status.readyReplicas or 0 + local current = obj.status.currentReplicas or 0 + local pending = obj.status.pendingEphemeralRunners or 0 + local terminating = obj.status.terminatingEphemeralRunners or 0 + local running = obj.status.runningEphemeralRunners or 0 + + if desired == 0 and current == 0 then + hs.status = "Healthy" + hs.message = "AutoScaler scaled down to zero" + elseif desired > 0 and ready == desired and pending == 0 and terminating == 0 then + hs.status = "Healthy" + hs.message = string.format("All runners ready: %d/%d (Running: %d)", ready, desired, running) + elseif terminating > 0 then + hs.status = "Progressing" + hs.message = string.format("Scaling down: %d terminating, %d/%d ready", terminating, ready, desired) + elseif pending > 0 or ready < desired then + hs.status = "Progressing" + hs.message = string.format("Scaling up: %d/%d ready, %d pending", ready, desired, pending) + else + hs.status = "Progressing" + hs.message = string.format("Runners: %d current, %d desired", current, desired) + end + else + hs.status = "Progressing" + hs.message = "Waiting for AutoScalingRunnerSet status" + end + return hs \ No newline at end of file diff --git a/config/argocd/health-check-ephemeralrunner.yaml b/config/argocd/health-check-ephemeralrunner.yaml new file mode 100644 index 00000000..1c428a7a --- /dev/null +++ b/config/argocd/health-check-ephemeralrunner.yaml @@ -0,0 +1,42 @@ +# Health check patch for actions.github.com/v1alpha1 EphemeralRunner (New API) +# This patch adds health check for EphemeralRunner resources to ArgoCD +apiVersion: v1 +kind: ConfigMap +metadata: + name: argocd-cm + namespace: argocd +data: + resource.customizations.health.actions.github.com_EphemeralRunner: | + hs = {} + if obj.status then + local phase = obj.status.phase + if phase == "Running" or phase == "Succeeded" or phase == "Finished" then + if obj.status.jobRequestId then + hs.status = "Healthy" + hs.message = "EphemeralRunner is running job: " .. tostring(obj.status.jobRequestId) + else + hs.status = "Healthy" + hs.message = "EphemeralRunner is running and ready for jobs" + end + elseif phase == "Pending" or phase == "PodCreated" then + hs.status = "Progressing" + hs.message = "EphemeralRunner is starting up" + elseif phase == "Failed" then + hs.status = "Degraded" + hs.message = obj.status.message or "EphemeralRunner has failed" + elseif phase == "Deleting" or phase == "Terminating" then + hs.status = "Progressing" + hs.message = "EphemeralRunner is cleaning up" + else + hs.status = "Progressing" + hs.message = "EphemeralRunner status: " .. (phase or "(nil)") + end + + if obj.status.runnerId then + hs.message = hs.message .. " (ID: " .. tostring(obj.status.runnerId) .. ")" + end + else + hs.status = "Progressing" + hs.message = "Waiting for EphemeralRunner status" + end + return hs \ No newline at end of file diff --git a/config/argocd/health-check-pod.yaml b/config/argocd/health-check-pod.yaml new file mode 100644 index 00000000..03b814e9 --- /dev/null +++ b/config/argocd/health-check-pod.yaml @@ -0,0 +1,87 @@ +# Health check patch for core/v1 Pod - specifically for Runner pods +# This patch adds health check for ARC runner pods to ArgoCD +apiVersion: v1 +kind: ConfigMap +metadata: + name: argocd-cm + namespace: argocd +data: + resource.customizations.health.core_Pod: | + hs = {} + + -- Detect if this is an ARC runner pod by label + local labels = (obj.metadata or {}).labels or {} + local isRunnerPod = + labels["actions.github.com/runner-pod"] or + labels["runner-deployment-name"] or + labels["actions-runner-controller/inject-registration-token"] or + labels["app.kubernetes.io/component"] == "runner" or + labels["app.kubernetes.io/part-of"] == "actions-runner-controller" + + if not isRunnerPod then + return nil -- let ArgoCD fallback to default Pod checker + end + + if obj.status then + local phase = obj.status.phase + local containerStatuses = obj.status.containerStatuses or {} + local allReady, anyRunning, failedCnt = true, false, 0 + local msgs = {} + + for _, cs in ipairs(containerStatuses) do + if not cs.ready then allReady = false end + if cs.state and cs.state.running then anyRunning = true end + + if cs.state and cs.state.terminated and cs.state.terminated.exitCode ~= 0 then + failedCnt = failedCnt + 1 + table.insert(msgs, + string.format("Container %s exited %d (%s)", + cs.name, + cs.state.terminated.exitCode, + cs.state.terminated.reason or "")) + end + + if cs.state and cs.state.waiting then + local reason = cs.state.waiting.reason or "" + if reason == "CrashLoopBackOff" or reason == "ErrImagePull" or reason == "ImagePullBackOff" then + failedCnt = failedCnt + 1 + table.insert(msgs, + string.format("Container %s waiting: %s", cs.name, reason)) + end + end + end + + if phase == "Running" and allReady and anyRunning then + hs.status = "Healthy" + hs.message = "Runner pod is running and all containers are ready" + elseif phase == "Succeeded" then + hs.status = "Healthy" + hs.message = "Runner pod completed successfully" + elseif phase == "Failed" or failedCnt > 0 then + hs.status = "Degraded" + local concatenatedMsgs = (#msgs > 0) and table.concat(msgs, "; ") or nil + local fallbackMsg = obj.status.message or "Pod has failed" + hs.message = concatenatedMsgs or fallbackMsg + elseif phase == "Pending" then + local msg = "Pod is pending" + for _, cond in ipairs(obj.status.conditions or {}) do + if cond.type == "PodScheduled" and cond.status == "False" then + msg = "Pod unschedulable: " .. (cond.reason or "") + break + end + end + hs.status = "Progressing" + hs.message = msg + elseif phase == "Running" and not allReady then + hs.status = "Progressing" + hs.message = "Pod is running but containers not ready" + else + hs.status = "Progressing" + hs.message = "Pod status: " .. (phase or "(nil)") + end + else + hs.status = "Progressing" + hs.message = "Waiting for pod status" + end + + return hs \ No newline at end of file diff --git a/config/argocd/health-check-runner.yaml b/config/argocd/health-check-runner.yaml new file mode 100644 index 00000000..c3f1ddd1 --- /dev/null +++ b/config/argocd/health-check-runner.yaml @@ -0,0 +1,40 @@ +# Health check patch for actions.summerwind.dev/v1alpha1 Runner (Legacy API) +# This patch adds health check for legacy Runner resources to ArgoCD +apiVersion: v1 +kind: ConfigMap +metadata: + name: argocd-cm + namespace: argocd +data: + resource.customizations.health.actions.summerwind.dev_Runner: | + hs = {} + if obj.status then + local phase = obj.status.phase + if obj.status.ready and phase == "Running" then + hs.status = "Healthy" + hs.message = "Runner is ready and running" + elseif phase == "Running" then + hs.status = "Progressing" + hs.message = "Runner is running but not ready" + elseif phase == "Pending" or phase == "Created" or phase == "Initializing" then + hs.status = "Progressing" + hs.message = "Runner is starting up: " .. phase + elseif phase == "Failed" or phase == "Error" then + hs.status = "Degraded" + hs.message = obj.status.message or "Runner has failed" + elseif phase == "Terminating" then + hs.status = "Progressing" + hs.message = "Runner is terminating" + else + hs.status = "Progressing" + hs.message = "Runner status: " .. (phase or "(nil)") + end + + if obj.status.podName then + hs.message = hs.message .. " (Pod: " .. obj.status.podName .. ")" + end + else + hs.status = "Progressing" + hs.message = "Waiting for runner status" + end + return hs \ No newline at end of file