From 0f2a659878884cd9af613193e6c89defdb55f21a Mon Sep 17 00:00:00 2001 From: Junya Okabe <86868255+Okabe-Junya@users.noreply.github.com> Date: Thu, 7 May 2026 20:26:46 +0900 Subject: [PATCH 1/5] Fix: Detect init container failure in EphemeralRunner controller (#4457) --- .../ephemeralrunner_controller.go | 16 +++ .../ephemeralrunner_controller_test.go | 109 ++++++++++++++++++ 2 files changed, 125 insertions(+) diff --git a/controllers/actions.github.com/ephemeralrunner_controller.go b/controllers/actions.github.com/ephemeralrunner_controller.go index 616c0e04..ee5e2c87 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller.go +++ b/controllers/actions.github.com/ephemeralrunner_controller.go @@ -351,6 +351,12 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ ) return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log) + case initContainerFailed(pod): + log.Info("Pod has a failed init container, deleting pod as failed so it can be restarted", + "initContainerStatuses", pod.Status.InitContainerStatuses, + ) + return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log) + case cs == nil: // starting, no container state yet log.Info("Waiting for runner container status to be available") @@ -862,3 +868,13 @@ func runnerContainerStatus(pod *corev1.Pod) *corev1.ContainerStatus { } return nil } + +func initContainerFailed(pod *corev1.Pod) bool { + for i := range pod.Status.InitContainerStatuses { + cs := &pod.Status.InitContainerStatuses[i] + if cs.State.Terminated != nil && cs.State.Terminated.ExitCode != 0 { + return true + } + } + return false +} diff --git a/controllers/actions.github.com/ephemeralrunner_controller_test.go b/controllers/actions.github.com/ephemeralrunner_controller_test.go index 559d9469..6b92a0ee 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller_test.go +++ b/controllers/actions.github.com/ephemeralrunner_controller_test.go @@ -355,6 +355,115 @@ var _ = Describe("EphemeralRunner", func() { ).Should(BeTrue(), "Pod should be re-created") }) + It("It should re-create pod when init container fails before pod phase transitions to Failed", func() { + pod := new(corev1.Pod) + Eventually(func() (bool, error) { + if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil { + return false, err + } + return true, nil + }, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo(true)) + + oldPodUID := pod.UID + + // Simulate init container failure without PodFailed phase. + // This can happen when the kubelet has not yet transitioned the pod phase. + pod.Status.Phase = corev1.PodPending + pod.Status.InitContainerStatuses = []corev1.ContainerStatus{ + { + Name: "setup", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "StartError", + Message: "failed to create containerd task: context canceled", + }, + }, + }, + } + err := k8sClient.Status().Update(ctx, pod) + Expect(err).To(BeNil(), "Failed to update pod status") + + Eventually( + func() (int, error) { + updated := new(v1alpha1.EphemeralRunner) + err := k8sClient.Get( + ctx, + client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, + updated, + ) + if err != nil { + return 0, err + } + return len(updated.Status.Failures), nil + }, + ephemeralRunnerTimeout, + ephemeralRunnerInterval, + ).Should(BeEquivalentTo(1)) + + Eventually( + func() (bool, error) { + newPod := new(corev1.Pod) + err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, newPod) + if err != nil { + return false, err + } + return newPod.UID != oldPodUID, nil + }, + ephemeralRunnerTimeout, + ephemeralRunnerInterval, + ).Should(BeTrue(), "Pod should be re-created after init container failure") + }) + + It("It should delete ephemeral runner when init container fails and job is assigned", func() { + er := new(v1alpha1.EphemeralRunner) + Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, er) + }, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(Succeed(), "failed to get ephemeral runner") + + er.Status.JobID = "1" + err := k8sClient.Status().Update(ctx, er) + Expect(err).To(BeNil(), "failed to update ephemeral runner status") + + Eventually(func() (string, error) { + current := new(v1alpha1.EphemeralRunner) + if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, current); err != nil { + return "", err + } + return current.Status.JobID, nil + }, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo("1")) + + pod := new(corev1.Pod) + Eventually(func() (bool, error) { + if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil { + return false, err + } + return true, nil + }, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo(true)) + + // Simulate init container failure with job assigned + pod.Status.Phase = corev1.PodPending + pod.Status.InitContainerStatuses = []corev1.ContainerStatus{ + { + Name: "setup", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "StartError", + }, + }, + }, + } + err = k8sClient.Status().Update(ctx, pod) + Expect(err).To(BeNil(), "Failed to update pod status") + + Eventually(func() bool { + check := new(v1alpha1.EphemeralRunner) + err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, check) + return kerrors.IsNotFound(err) + }, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeTrue(), "Ephemeral runner should eventually be deleted when init container fails with job assigned") + }) + It("It should treat pod failed with runner container exit 0 as success with job id", func() { er := new(v1alpha1.EphemeralRunner) Eventually(func() error { From 9d3ed7cb5835639f2d66fcecd629a6a38618b72e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 13:52:41 +0200 Subject: [PATCH 2/5] Bump the actions group with 3 updates (#4483) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Nikola Jokic --- .github/workflows/arc-publish-chart.yaml | 2 +- .github/workflows/arc-validate-chart.yaml | 2 +- .github/workflows/gha-publish-chart.yaml | 12 ++++++------ .github/workflows/gha-validate-chart.yaml | 4 ++-- .github/workflows/global-publish-canary.yaml | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/arc-publish-chart.yaml b/.github/workflows/arc-publish-chart.yaml index 4bb579f6..2f76ccfd 100644 --- a/.github/workflows/arc-publish-chart.yaml +++ b/.github/workflows/arc-publish-chart.yaml @@ -45,7 +45,7 @@ jobs: fetch-depth: 0 - name: Set up Helm - uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 with: version: ${{ env.HELM_VERSION }} diff --git a/.github/workflows/arc-validate-chart.yaml b/.github/workflows/arc-validate-chart.yaml index f40f1215..41be263a 100644 --- a/.github/workflows/arc-validate-chart.yaml +++ b/.github/workflows/arc-validate-chart.yaml @@ -45,7 +45,7 @@ jobs: fetch-depth: 0 - name: Set up Helm - uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 with: version: ${{ env.HELM_VERSION }} diff --git a/.github/workflows/gha-publish-chart.yaml b/.github/workflows/gha-publish-chart.yaml index d3df1d08..2b92f0a3 100644 --- a/.github/workflows/gha-publish-chart.yaml +++ b/.github/workflows/gha-publish-chart.yaml @@ -94,14 +94,14 @@ jobs: driver-opts: image=moby/buildkit:v0.10.6 - name: Login to GitHub Container Registry - uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build & push controller image - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f with: file: Dockerfile platforms: linux/amd64,linux/arm64 @@ -148,7 +148,7 @@ jobs: echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT - name: Set up Helm - uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 with: version: ${{ env.HELM_VERSION }} @@ -196,7 +196,7 @@ jobs: echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT - name: Set up Helm - uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 with: version: ${{ env.HELM_VERSION }} @@ -243,7 +243,7 @@ jobs: echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT - name: Set up Helm - uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 with: version: ${{ env.HELM_VERSION }} @@ -292,7 +292,7 @@ jobs: echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT - name: Set up Helm - uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 with: version: ${{ env.HELM_VERSION }} diff --git a/.github/workflows/gha-validate-chart.yaml b/.github/workflows/gha-validate-chart.yaml index 46d7b065..cfcad8fc 100644 --- a/.github/workflows/gha-validate-chart.yaml +++ b/.github/workflows/gha-validate-chart.yaml @@ -41,7 +41,7 @@ jobs: fetch-depth: 0 - name: Set up Helm - uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 + uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 with: version: ${{ env.HELM_VERSION }} @@ -93,7 +93,7 @@ jobs: version: latest - name: Build controller image - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f if: steps.list-changed.outputs.changed == 'true' with: file: Dockerfile diff --git a/.github/workflows/global-publish-canary.yaml b/.github/workflows/global-publish-canary.yaml index 2da48be3..1abb3314 100644 --- a/.github/workflows/global-publish-canary.yaml +++ b/.github/workflows/global-publish-canary.yaml @@ -93,7 +93,7 @@ jobs: uses: actions/checkout@v6 - name: Login to GitHub Container Registry - uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 with: registry: ghcr.io username: ${{ github.actor }} @@ -119,7 +119,7 @@ jobs: # Unstable builds - run at your own risk - name: Build and Push - uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f with: context: . file: ./Dockerfile From ef48f429b34f51218d9ccf1552968f30caf951e9 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Mon, 11 May 2026 13:56:01 +0200 Subject: [PATCH 3/5] Render empty arrays for kubernetes-novolume volumes fields (#4461) --- .../templates/_helpers.tpl | 4 ++ .../templates/autoscalingrunnerset.yaml | 4 +- .../tests/template_test.go | 69 +++++++++++++++++++ ...ues_k8s_novolume_custom_volume_mounts.yaml | 18 +++++ .../values_k8s_novolume_custom_volumes.yaml | 13 ++++ 5 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 charts/gha-runner-scale-set/tests/values_k8s_novolume_custom_volume_mounts.yaml create mode 100644 charts/gha-runner-scale-set/tests/values_k8s_novolume_custom_volumes.yaml diff --git a/charts/gha-runner-scale-set/templates/_helpers.tpl b/charts/gha-runner-scale-set/templates/_helpers.tpl index 53b1c2b3..4ad4bfef 100644 --- a/charts/gha-runner-scale-set/templates/_helpers.tpl +++ b/charts/gha-runner-scale-set/templates/_helpers.tpl @@ -468,6 +468,7 @@ env: {{- if $tlsConfig.runnerMountPath }} {{- $mountGitHubServerTLS = 1 }} {{- end }} + {{- if or $container.volumeMounts $mountGitHubServerTLS }} volumeMounts: {{- with $container.volumeMounts }} {{- range $i, $volMount := . }} @@ -482,6 +483,9 @@ volumeMounts: mountPath: {{ clean (print $tlsConfig.runnerMountPath "/" $tlsConfig.certificateFrom.configMapKeyRef.key) }} subPath: {{ $tlsConfig.certificateFrom.configMapKeyRef.key }} {{- end }} + {{- else }} +volumeMounts: [] + {{- end }} {{- end }} {{- end }} {{- end }} diff --git a/charts/gha-runner-scale-set/templates/autoscalingrunnerset.yaml b/charts/gha-runner-scale-set/templates/autoscalingrunnerset.yaml index 1a466d5b..45817de2 100644 --- a/charts/gha-runner-scale-set/templates/autoscalingrunnerset.yaml +++ b/charts/gha-runner-scale-set/templates/autoscalingrunnerset.yaml @@ -269,7 +269,7 @@ spec: {{- include "gha-runner-scale-set.default-mode-runner-containers" . | nindent 6 }} {{- end }} {{- $tlsConfig := (default (dict) .Values.githubServerTLS) }} - {{- if or .Values.template.spec.volumes (eq $containerMode.type "dind") (eq $containerMode.type "kubernetes") (eq $containerMode.type "kubernetes-novolume") $tlsConfig.runnerMountPath }} + {{- if or .Values.template.spec.volumes (eq $containerMode.type "dind") (eq $containerMode.type "kubernetes") $tlsConfig.runnerMountPath }} volumes: {{- if $tlsConfig.runnerMountPath }} {{- include "gha-runner-scale-set.tls-volume" $tlsConfig | nindent 6 }} @@ -286,4 +286,6 @@ spec: {{- toYaml . | nindent 6 }} {{- end }} {{- end }} + {{- else if eq $containerMode.type "kubernetes-novolume" }} + volumes: [] {{- end }} diff --git a/charts/gha-runner-scale-set/tests/template_test.go b/charts/gha-runner-scale-set/tests/template_test.go index 00e16a7f..ba67ea90 100644 --- a/charts/gha-runner-scale-set/tests/template_test.go +++ b/charts/gha-runner-scale-set/tests/template_test.go @@ -1154,6 +1154,75 @@ func TestTemplateRenderedAutoScalingRunnerSet_EnableKubernetesModeNoVolume(t *te assert.Equal(t, ars.Spec.Template.Spec.Containers[0].Image, ars.Spec.Template.Spec.Containers[0].Env[3].Value) assert.Len(t, ars.Spec.Template.Spec.Volumes, 0, "Template.Spec should have 0 volumes") + assert.NotNil(t, ars.Spec.Template.Spec.Volumes, "Template.Spec.Volumes should be non-nil empty slice, not null") + + // Regression check: ensure volumeMounts is also non-nil empty slice for kubernetes-novolume + runnerContainer := ars.Spec.Template.Spec.Containers[0] + assert.NotNil(t, runnerContainer.VolumeMounts, "runner container VolumeMounts should be non-nil empty slice, not null") + assert.Len(t, runnerContainer.VolumeMounts, 0, "runner container should have 0 volumeMounts in kubernetes-novolume mode") +} + +func TestTemplateRenderedAutoScalingRunnerSet_EnableKubernetesModeNoVolume_WithCustomVolumes(t *testing.T) { + t.Parallel() + + // Path to the helm chart we will test + helmChartPath, err := filepath.Abs("../../gha-runner-scale-set") + require.NoError(t, err) + + testValuesPath, err := filepath.Abs("../tests/values_k8s_novolume_custom_volumes.yaml") + require.NoError(t, err) + + releaseName := "test-runners" + namespaceName := "test-" + strings.ToLower(random.UniqueId()) + + // Test that user-provided volumes are preserved even in kubernetes-novolume mode + options := &helm.Options{ + Logger: logger.Discard, + ValuesFiles: []string{testValuesPath}, + KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), + } + + output := helm.RenderTemplate(t, options, helmChartPath, releaseName, []string{"templates/autoscalingrunnerset.yaml"}) + + var ars v1alpha1.AutoscalingRunnerSet + helm.UnmarshalK8SYaml(t, output, &ars) + + // Override preservation: user-provided volume should be present, not replaced by empty array + assert.Len(t, ars.Spec.Template.Spec.Volumes, 1, "Template.Spec should have 1 volume (user-provided override)") + assert.Equal(t, "custom-volume", ars.Spec.Template.Spec.Volumes[0].Name, "Volume name should be preserved as custom-volume") + assert.NotNil(t, ars.Spec.Template.Spec.Volumes[0].EmptyDir, "Volume should have EmptyDir configured") +} + +func TestTemplateRenderedAutoScalingRunnerSet_EnableKubernetesModeNoVolume_WithCustomVolumeMounts(t *testing.T) { + t.Parallel() + + // Path to the helm chart we will test + helmChartPath, err := filepath.Abs("../../gha-runner-scale-set") + require.NoError(t, err) + + testValuesPath, err := filepath.Abs("../tests/values_k8s_novolume_custom_volume_mounts.yaml") + require.NoError(t, err) + + releaseName := "test-runners" + namespaceName := "test-" + strings.ToLower(random.UniqueId()) + + // Test that user-provided volumeMounts are preserved in kubernetes-novolume runner container + options := &helm.Options{ + Logger: logger.Discard, + ValuesFiles: []string{testValuesPath}, + KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), + } + + output := helm.RenderTemplate(t, options, helmChartPath, releaseName, []string{"templates/autoscalingrunnerset.yaml"}) + + var ars v1alpha1.AutoscalingRunnerSet + helm.UnmarshalK8SYaml(t, output, &ars) + + runnerContainer := ars.Spec.Template.Spec.Containers[0] + // Override preservation: user-provided volumeMounts should be present, not replaced by empty array + assert.Len(t, runnerContainer.VolumeMounts, 1, "runner container should have 1 volumeMount (user-provided override)") + assert.Equal(t, "custom-volume", runnerContainer.VolumeMounts[0].Name, "VolumeMount name should be preserved as custom-volume") + assert.Equal(t, "/mnt/custom", runnerContainer.VolumeMounts[0].MountPath, "VolumeMount path should be preserved as /mnt/custom") } func TestTemplateRenderedAutoscalingRunnerSet_ListenerPodTemplate(t *testing.T) { diff --git a/charts/gha-runner-scale-set/tests/values_k8s_novolume_custom_volume_mounts.yaml b/charts/gha-runner-scale-set/tests/values_k8s_novolume_custom_volume_mounts.yaml new file mode 100644 index 00000000..fda9bd39 --- /dev/null +++ b/charts/gha-runner-scale-set/tests/values_k8s_novolume_custom_volume_mounts.yaml @@ -0,0 +1,18 @@ +githubConfigUrl: https://github.com/actions +githubConfigSecret: + github_token: gh_token12345 +containerMode: + type: kubernetes-novolume +controllerServiceAccount: + name: arc + namespace: arc-system +template: + spec: + volumes: + - name: custom-volume + emptyDir: {} + containers: + - name: runner + volumeMounts: + - name: custom-volume + mountPath: /mnt/custom diff --git a/charts/gha-runner-scale-set/tests/values_k8s_novolume_custom_volumes.yaml b/charts/gha-runner-scale-set/tests/values_k8s_novolume_custom_volumes.yaml new file mode 100644 index 00000000..820f1298 --- /dev/null +++ b/charts/gha-runner-scale-set/tests/values_k8s_novolume_custom_volumes.yaml @@ -0,0 +1,13 @@ +githubConfigUrl: https://github.com/actions +githubConfigSecret: + github_token: gh_token12345 +containerMode: + type: kubernetes-novolume +controllerServiceAccount: + name: arc + namespace: arc-system +template: + spec: + volumes: + - name: custom-volume + emptyDir: {} From 081b9ce1ee65a6dc9ca8d2492188f27c06d47e76 Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Mon, 11 May 2026 15:04:07 +0200 Subject: [PATCH 4/5] Fix secret reconciliation updates for the listener pod (#4492) --- .../autoscalinglistener_controller.go | 107 +++++++++++++++--- .../autoscalinglistener_controller_test.go | 6 +- 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/controllers/actions.github.com/autoscalinglistener_controller.go b/controllers/actions.github.com/autoscalinglistener_controller.go index f83c760e..b45792d1 100644 --- a/controllers/actions.github.com/autoscalinglistener_controller.go +++ b/controllers/actions.github.com/autoscalinglistener_controller.go @@ -17,8 +17,10 @@ limitations under the License. package actionsgithubcom import ( + "bytes" "context" "fmt" + "maps" "time" "github.com/go-logr/logr" @@ -210,6 +212,30 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl. // TODO: make sure the role binding has the up-to-date role and service account + // Reconcile listener config secret and detect drift + cert := "" + if autoscalingListener.Spec.GitHubServerTLS != nil { + var err error + cert, err = r.certificate(ctx, &autoscalingRunnerSet, autoscalingListener) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to build GitHub server TLS certificate value for listener config: %w", err) + } + } + + var metricsConfig *listenerMetricsServerConfig + if r.ListenerMetricsAddr != "0" { + metricsConfig = &listenerMetricsServerConfig{ + addr: r.ListenerMetricsAddr, + endpoint: r.ListenerMetricsEndpoint, + } + } + + secretChanged, err := r.reconcileListenerConfigSecret(ctx, autoscalingListener, appConfig, metricsConfig, cert, log) + if err != nil { + log.Error(err, "Failed to reconcile listener config secret") + return ctrl.Result{}, err + } + listenerPod := new(corev1.Pod) if err := r.Get( ctx, @@ -234,6 +260,12 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl. return r.createListenerPod(ctx, &autoscalingRunnerSet, autoscalingListener, serviceAccount, appConfig, log) } + // If listener config secret changed and pod exists, restart the pod + if secretChanged { + log.Info("Listener config secret changed, restarting listener pod") + return ctrl.Result{}, r.deleteListenerPod(ctx, autoscalingListener, listenerPod, log) + } + cs := listenerContainerStatus(listenerPod) switch { case listenerPod.Status.Reason == "Evicted": @@ -285,19 +317,6 @@ func (r *AutoscalingListenerReconciler) deleteListenerPod(ctx context.Context, a log.Error(err, "Unable to delete the listener pod", "namespace", listenerPod.Namespace, "name", listenerPod.Name) return err } - - // delete the listener config secret as well, so it gets recreated when the listener pod is recreated, with any new data if it exists - var configSecret corev1.Secret - err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: scaleSetListenerConfigName(autoscalingListener)}, &configSecret) - switch { - case err == nil && configSecret.DeletionTimestamp.IsZero(): - log.Info("Deleting the listener config secret") - if err := r.Delete(ctx, &configSecret); err != nil { - return fmt.Errorf("failed to delete listener config secret: %w", err) - } - case !kerrors.IsNotFound(err): - return fmt.Errorf("failed to get the listener config secret: %w", err) - } } return nil } @@ -425,6 +444,47 @@ func (r *AutoscalingListenerReconciler) createServiceAccountForListener(ctx cont return ctrl.Result{}, nil } +func (r *AutoscalingListenerReconciler) reconcileListenerConfigSecret(ctx context.Context, autoscalingListener *v1alpha1.AutoscalingListener, appConfig *appconfig.AppConfig, metricsConfig *listenerMetricsServerConfig, cert string, logger logr.Logger) (changed bool, err error) { + desiredSecret, err := r.newScaleSetListenerConfig(autoscalingListener, appConfig, metricsConfig, cert) + if err != nil { + return false, fmt.Errorf("failed to build listener config secret: %w", err) + } + + existingSecret := &corev1.Secret{} + err = r.Get(ctx, types.NamespacedName{Name: desiredSecret.Name, Namespace: desiredSecret.Namespace}, existingSecret) + if err != nil { + if kerrors.IsNotFound(err) { + if err := ctrl.SetControllerReference(autoscalingListener, desiredSecret, r.Scheme); err != nil { + return false, fmt.Errorf("failed to set controller reference: %w", err) + } + + logger.Info("Creating listener config secret", "namespace", desiredSecret.Namespace, "name", desiredSecret.Name) + if err := r.Create(ctx, desiredSecret); err != nil { + return false, fmt.Errorf("failed to create listener config secret: %w", err) + } + + return true, nil + } + return false, fmt.Errorf("failed to get listener config secret: %w", err) + } + + if listenerConfigSecretDrifted(existingSecret, desiredSecret) { + updatedSecret := existingSecret.DeepCopy() + updatedSecret.Data = desiredSecret.Data + updatedSecret.Labels = desiredSecret.Labels + updatedSecret.Annotations = desiredSecret.Annotations + + logger.Info("Updating listener config secret", "namespace", updatedSecret.Namespace, "name", updatedSecret.Name) + if err := r.Update(ctx, updatedSecret); err != nil { + return false, fmt.Errorf("failed to update listener config secret: %w", err) + } + + return true, nil + } + + return false, nil +} + func (r *AutoscalingListenerReconciler) createListenerPod(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, autoscalingListener *v1alpha1.AutoscalingListener, serviceAccount *corev1.ServiceAccount, appConfig *appconfig.AppConfig, logger logr.Logger) (ctrl.Result, error) { var envs []corev1.EnvVar if autoscalingListener.Spec.Proxy != nil { @@ -691,6 +751,27 @@ func (r *AutoscalingListenerReconciler) publishRunningListener(autoscalingListen return nil } +// listenerConfigSecretDrifted detects if the listener config secret has drifted from the desired state. +// It compares the config.json data, labels, and annotations deterministically. +func listenerConfigSecretDrifted(existing *corev1.Secret, desired *corev1.Secret) bool { + // Compare config.json data + if !bytes.Equal(existing.Data["config.json"], desired.Data["config.json"]) { + return true + } + + // Compare labels + if !maps.Equal(existing.Labels, desired.Labels) { + return true + } + + // Compare annotations + if !maps.Equal(existing.Annotations, desired.Annotations) { + return true + } + + return false +} + // SetupWithManager sets up the controller with the Manager. func (r *AutoscalingListenerReconciler) SetupWithManager(mgr ctrl.Manager, opts ...Option) error { labelBasedWatchFunc := func(_ context.Context, obj client.Object) []reconcile.Request { diff --git a/controllers/actions.github.com/autoscalinglistener_controller_test.go b/controllers/actions.github.com/autoscalinglistener_controller_test.go index 974cf74a..b9a6c2b0 100644 --- a/controllers/actions.github.com/autoscalinglistener_controller_test.go +++ b/controllers/actions.github.com/autoscalinglistener_controller_test.go @@ -350,7 +350,7 @@ var _ = Describe("Test AutoScalingListener controller", func() { autoscalingListenerTestInterval).Should(BeEquivalentTo(rulesForListenerRole([]string{updated.Spec.EphemeralRunnerSetName})), "Role should be updated") }) - It("It should re-create pod and config secret whenever listener container is terminated", func() { + It("It should re-create pod but persist config secret whenever listener container is terminated", func() { // Waiting for the pod is created pod := new(corev1.Pod) Eventually( @@ -407,7 +407,7 @@ var _ = Describe("Test AutoScalingListener controller", func() { autoscalingListenerTestInterval, ).ShouldNot(BeEquivalentTo(oldPodUID), "Pod should be re-created") - // Check if config secret is re-created + // Check if config secret persists (not re-created) to avoid race condition Eventually( func() (string, error) { secret := new(corev1.Secret) @@ -420,7 +420,7 @@ var _ = Describe("Test AutoScalingListener controller", func() { }, autoscalingListenerTestTimeout, autoscalingListenerTestInterval, - ).ShouldNot(BeEquivalentTo(oldSecretUID), "Config secret should be re-created") + ).Should(BeEquivalentTo(oldSecretUID), "Config secret should persist (not be re-created)") }) }) }) From e7b64827610afb780c6a39a45e649892cce2678c Mon Sep 17 00:00:00 2001 From: Nikola Jokic Date: Mon, 11 May 2026 15:57:43 +0200 Subject: [PATCH 5/5] Fix job execution duration when runner assign time is not set (#4472) --- cmd/ghalistener/metrics/metrics.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cmd/ghalistener/metrics/metrics.go b/cmd/ghalistener/metrics/metrics.go index e35df92f..a1bbd472 100644 --- a/cmd/ghalistener/metrics/metrics.go +++ b/cmd/ghalistener/metrics/metrics.go @@ -493,11 +493,13 @@ func (e *exporter) RecordJobStarted(msg *scaleset.JobStarted) { } func (e *exporter) RecordJobCompleted(msg *scaleset.JobCompleted) { + if msg.RunnerAssignTime.IsZero() { + return + } + l := e.completedJobLabels(msg) e.incCounter(MetricCompletedJobsTotal, l) - - executionDuration := msg.FinishTime.Unix() - msg.RunnerAssignTime.Unix() - e.observeHistogram(MetricJobExecutionDurationSeconds, l, float64(executionDuration)) + e.observeHistogram(MetricJobExecutionDurationSeconds, l, float64(msg.FinishTime.Unix()-msg.RunnerAssignTime.Unix())) } func (e *exporter) RecordDesiredRunners(count int) {