Merge remote-tracking branch 'origin/master' into nikola-jokic/workqueue-rl
# Conflicts: # .github/workflows/gha-validate-chart.yaml Co-authored-by: nikola-jokic <97525037+nikola-jokic@users.noreply.github.com>
This commit is contained in:
commit
238b852442
|
|
@ -45,7 +45,7 @@ jobs:
|
|||
fetch-depth: 0
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4
|
||||
uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2
|
||||
with:
|
||||
version: ${{ env.HELM_VERSION }}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ jobs:
|
|||
fetch-depth: 0
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4
|
||||
uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2
|
||||
with:
|
||||
version: ${{ env.HELM_VERSION }}
|
||||
|
||||
|
|
|
|||
|
|
@ -94,14 +94,14 @@ jobs:
|
|||
driver-opts: image=moby/buildkit:v0.10.6
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build & push controller image
|
||||
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f
|
||||
with:
|
||||
file: Dockerfile
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
|
@ -148,7 +148,7 @@ jobs:
|
|||
echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4
|
||||
uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2
|
||||
with:
|
||||
version: ${{ env.HELM_VERSION }}
|
||||
|
||||
|
|
@ -196,7 +196,7 @@ jobs:
|
|||
echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4
|
||||
uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2
|
||||
with:
|
||||
version: ${{ env.HELM_VERSION }}
|
||||
|
||||
|
|
@ -243,7 +243,7 @@ jobs:
|
|||
echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4
|
||||
uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2
|
||||
with:
|
||||
version: ${{ env.HELM_VERSION }}
|
||||
|
||||
|
|
@ -292,7 +292,7 @@ jobs:
|
|||
echo "repository_owner=$(echo ${{ github.repository_owner }} | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4
|
||||
uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2
|
||||
with:
|
||||
version: ${{ env.HELM_VERSION }}
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ jobs:
|
|||
fetch-depth: 0
|
||||
|
||||
- name: Set up Helm
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4
|
||||
uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2
|
||||
with:
|
||||
version: ${{ env.HELM_VERSION }}
|
||||
|
||||
|
|
@ -69,6 +69,46 @@ jobs:
|
|||
run: |
|
||||
ct lint --config charts/.ci/ct-config-gha.yaml
|
||||
|
||||
- name: Set up docker buildx
|
||||
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
with:
|
||||
version: latest
|
||||
|
||||
- name: Build controller image
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
with:
|
||||
file: Dockerfile
|
||||
platforms: linux/amd64
|
||||
load: true
|
||||
build-args: |
|
||||
DOCKER_IMAGE_NAME=test-arc
|
||||
VERSION=dev
|
||||
tags: |
|
||||
test-arc:dev
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Create kind cluster
|
||||
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
with:
|
||||
cluster_name: chart-testing
|
||||
|
||||
- name: Load image into cluster
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
export DOCKER_IMAGE_NAME=test-arc
|
||||
export VERSION=dev
|
||||
export IMG_RESULT=load
|
||||
make docker-buildx
|
||||
kind load docker-image test-arc:dev --name chart-testing
|
||||
|
||||
- name: Run chart-testing (install)
|
||||
if: steps.list-changed.outputs.changed == 'true'
|
||||
run: |
|
||||
ct install --config charts/.ci/ct-config-gha.yaml
|
||||
test-chart:
|
||||
name: Test Chart
|
||||
runs-on: ubuntu-latest
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ jobs:
|
|||
uses: actions/checkout@v6
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2
|
||||
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
|
|
@ -119,7 +119,7 @@ jobs:
|
|||
|
||||
# Unstable builds - run at your own risk
|
||||
- name: Build and Push
|
||||
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294
|
||||
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
|
|
|
|||
|
|
@ -468,6 +468,7 @@ env:
|
|||
{{- if $tlsConfig.runnerMountPath }}
|
||||
{{- $mountGitHubServerTLS = 1 }}
|
||||
{{- end }}
|
||||
{{- if or $container.volumeMounts $mountGitHubServerTLS }}
|
||||
volumeMounts:
|
||||
{{- with $container.volumeMounts }}
|
||||
{{- range $i, $volMount := . }}
|
||||
|
|
@ -482,6 +483,9 @@ volumeMounts:
|
|||
mountPath: {{ clean (print $tlsConfig.runnerMountPath "/" $tlsConfig.certificateFrom.configMapKeyRef.key) }}
|
||||
subPath: {{ $tlsConfig.certificateFrom.configMapKeyRef.key }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
volumeMounts: []
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
|
|
|||
|
|
@ -269,7 +269,7 @@ spec:
|
|||
{{- include "gha-runner-scale-set.default-mode-runner-containers" . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- $tlsConfig := (default (dict) .Values.githubServerTLS) }}
|
||||
{{- if or .Values.template.spec.volumes (eq $containerMode.type "dind") (eq $containerMode.type "kubernetes") (eq $containerMode.type "kubernetes-novolume") $tlsConfig.runnerMountPath }}
|
||||
{{- if or .Values.template.spec.volumes (eq $containerMode.type "dind") (eq $containerMode.type "kubernetes") $tlsConfig.runnerMountPath }}
|
||||
volumes:
|
||||
{{- if $tlsConfig.runnerMountPath }}
|
||||
{{- include "gha-runner-scale-set.tls-volume" $tlsConfig | nindent 6 }}
|
||||
|
|
@ -286,4 +286,6 @@ spec:
|
|||
{{- toYaml . | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else if eq $containerMode.type "kubernetes-novolume" }}
|
||||
volumes: []
|
||||
{{- end }}
|
||||
|
|
|
|||
|
|
@ -1154,6 +1154,75 @@ func TestTemplateRenderedAutoScalingRunnerSet_EnableKubernetesModeNoVolume(t *te
|
|||
assert.Equal(t, ars.Spec.Template.Spec.Containers[0].Image, ars.Spec.Template.Spec.Containers[0].Env[3].Value)
|
||||
|
||||
assert.Len(t, ars.Spec.Template.Spec.Volumes, 0, "Template.Spec should have 0 volumes")
|
||||
assert.NotNil(t, ars.Spec.Template.Spec.Volumes, "Template.Spec.Volumes should be non-nil empty slice, not null")
|
||||
|
||||
// Regression check: ensure volumeMounts is also non-nil empty slice for kubernetes-novolume
|
||||
runnerContainer := ars.Spec.Template.Spec.Containers[0]
|
||||
assert.NotNil(t, runnerContainer.VolumeMounts, "runner container VolumeMounts should be non-nil empty slice, not null")
|
||||
assert.Len(t, runnerContainer.VolumeMounts, 0, "runner container should have 0 volumeMounts in kubernetes-novolume mode")
|
||||
}
|
||||
|
||||
func TestTemplateRenderedAutoScalingRunnerSet_EnableKubernetesModeNoVolume_WithCustomVolumes(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Path to the helm chart we will test
|
||||
helmChartPath, err := filepath.Abs("../../gha-runner-scale-set")
|
||||
require.NoError(t, err)
|
||||
|
||||
testValuesPath, err := filepath.Abs("../tests/values_k8s_novolume_custom_volumes.yaml")
|
||||
require.NoError(t, err)
|
||||
|
||||
releaseName := "test-runners"
|
||||
namespaceName := "test-" + strings.ToLower(random.UniqueId())
|
||||
|
||||
// Test that user-provided volumes are preserved even in kubernetes-novolume mode
|
||||
options := &helm.Options{
|
||||
Logger: logger.Discard,
|
||||
ValuesFiles: []string{testValuesPath},
|
||||
KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName),
|
||||
}
|
||||
|
||||
output := helm.RenderTemplate(t, options, helmChartPath, releaseName, []string{"templates/autoscalingrunnerset.yaml"})
|
||||
|
||||
var ars v1alpha1.AutoscalingRunnerSet
|
||||
helm.UnmarshalK8SYaml(t, output, &ars)
|
||||
|
||||
// Override preservation: user-provided volume should be present, not replaced by empty array
|
||||
assert.Len(t, ars.Spec.Template.Spec.Volumes, 1, "Template.Spec should have 1 volume (user-provided override)")
|
||||
assert.Equal(t, "custom-volume", ars.Spec.Template.Spec.Volumes[0].Name, "Volume name should be preserved as custom-volume")
|
||||
assert.NotNil(t, ars.Spec.Template.Spec.Volumes[0].EmptyDir, "Volume should have EmptyDir configured")
|
||||
}
|
||||
|
||||
func TestTemplateRenderedAutoScalingRunnerSet_EnableKubernetesModeNoVolume_WithCustomVolumeMounts(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
// Path to the helm chart we will test
|
||||
helmChartPath, err := filepath.Abs("../../gha-runner-scale-set")
|
||||
require.NoError(t, err)
|
||||
|
||||
testValuesPath, err := filepath.Abs("../tests/values_k8s_novolume_custom_volume_mounts.yaml")
|
||||
require.NoError(t, err)
|
||||
|
||||
releaseName := "test-runners"
|
||||
namespaceName := "test-" + strings.ToLower(random.UniqueId())
|
||||
|
||||
// Test that user-provided volumeMounts are preserved in kubernetes-novolume runner container
|
||||
options := &helm.Options{
|
||||
Logger: logger.Discard,
|
||||
ValuesFiles: []string{testValuesPath},
|
||||
KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName),
|
||||
}
|
||||
|
||||
output := helm.RenderTemplate(t, options, helmChartPath, releaseName, []string{"templates/autoscalingrunnerset.yaml"})
|
||||
|
||||
var ars v1alpha1.AutoscalingRunnerSet
|
||||
helm.UnmarshalK8SYaml(t, output, &ars)
|
||||
|
||||
runnerContainer := ars.Spec.Template.Spec.Containers[0]
|
||||
// Override preservation: user-provided volumeMounts should be present, not replaced by empty array
|
||||
assert.Len(t, runnerContainer.VolumeMounts, 1, "runner container should have 1 volumeMount (user-provided override)")
|
||||
assert.Equal(t, "custom-volume", runnerContainer.VolumeMounts[0].Name, "VolumeMount name should be preserved as custom-volume")
|
||||
assert.Equal(t, "/mnt/custom", runnerContainer.VolumeMounts[0].MountPath, "VolumeMount path should be preserved as /mnt/custom")
|
||||
}
|
||||
|
||||
func TestTemplateRenderedAutoscalingRunnerSet_ListenerPodTemplate(t *testing.T) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
githubConfigUrl: https://github.com/actions
|
||||
githubConfigSecret:
|
||||
github_token: gh_token12345
|
||||
containerMode:
|
||||
type: kubernetes-novolume
|
||||
controllerServiceAccount:
|
||||
name: arc
|
||||
namespace: arc-system
|
||||
template:
|
||||
spec:
|
||||
volumes:
|
||||
- name: custom-volume
|
||||
emptyDir: {}
|
||||
containers:
|
||||
- name: runner
|
||||
volumeMounts:
|
||||
- name: custom-volume
|
||||
mountPath: /mnt/custom
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
githubConfigUrl: https://github.com/actions
|
||||
githubConfigSecret:
|
||||
github_token: gh_token12345
|
||||
containerMode:
|
||||
type: kubernetes-novolume
|
||||
controllerServiceAccount:
|
||||
name: arc
|
||||
namespace: arc-system
|
||||
template:
|
||||
spec:
|
||||
volumes:
|
||||
- name: custom-volume
|
||||
emptyDir: {}
|
||||
|
|
@ -493,11 +493,13 @@ func (e *exporter) RecordJobStarted(msg *scaleset.JobStarted) {
|
|||
}
|
||||
|
||||
func (e *exporter) RecordJobCompleted(msg *scaleset.JobCompleted) {
|
||||
if msg.RunnerAssignTime.IsZero() {
|
||||
return
|
||||
}
|
||||
|
||||
l := e.completedJobLabels(msg)
|
||||
e.incCounter(MetricCompletedJobsTotal, l)
|
||||
|
||||
executionDuration := msg.FinishTime.Unix() - msg.RunnerAssignTime.Unix()
|
||||
e.observeHistogram(MetricJobExecutionDurationSeconds, l, float64(executionDuration))
|
||||
e.observeHistogram(MetricJobExecutionDurationSeconds, l, float64(msg.FinishTime.Unix()-msg.RunnerAssignTime.Unix()))
|
||||
}
|
||||
|
||||
func (e *exporter) RecordDesiredRunners(count int) {
|
||||
|
|
|
|||
|
|
@ -17,8 +17,10 @@ limitations under the License.
|
|||
package actionsgithubcom
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"maps"
|
||||
"time"
|
||||
|
||||
"github.com/go-logr/logr"
|
||||
|
|
@ -210,6 +212,30 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
|
|||
|
||||
// TODO: make sure the role binding has the up-to-date role and service account
|
||||
|
||||
// Reconcile listener config secret and detect drift
|
||||
cert := ""
|
||||
if autoscalingListener.Spec.GitHubServerTLS != nil {
|
||||
var err error
|
||||
cert, err = r.certificate(ctx, &autoscalingRunnerSet, autoscalingListener)
|
||||
if err != nil {
|
||||
return ctrl.Result{}, fmt.Errorf("failed to build GitHub server TLS certificate value for listener config: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
var metricsConfig *listenerMetricsServerConfig
|
||||
if r.ListenerMetricsAddr != "0" {
|
||||
metricsConfig = &listenerMetricsServerConfig{
|
||||
addr: r.ListenerMetricsAddr,
|
||||
endpoint: r.ListenerMetricsEndpoint,
|
||||
}
|
||||
}
|
||||
|
||||
secretChanged, err := r.reconcileListenerConfigSecret(ctx, autoscalingListener, appConfig, metricsConfig, cert, log)
|
||||
if err != nil {
|
||||
log.Error(err, "Failed to reconcile listener config secret")
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
listenerPod := new(corev1.Pod)
|
||||
if err := r.Get(
|
||||
ctx,
|
||||
|
|
@ -234,6 +260,12 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
|
|||
return r.createListenerPod(ctx, &autoscalingRunnerSet, autoscalingListener, serviceAccount, appConfig, log)
|
||||
}
|
||||
|
||||
// If listener config secret changed and pod exists, restart the pod
|
||||
if secretChanged {
|
||||
log.Info("Listener config secret changed, restarting listener pod")
|
||||
return ctrl.Result{}, r.deleteListenerPod(ctx, autoscalingListener, listenerPod, log)
|
||||
}
|
||||
|
||||
cs := listenerContainerStatus(listenerPod)
|
||||
switch {
|
||||
case listenerPod.Status.Reason == "Evicted":
|
||||
|
|
@ -285,19 +317,6 @@ func (r *AutoscalingListenerReconciler) deleteListenerPod(ctx context.Context, a
|
|||
log.Error(err, "Unable to delete the listener pod", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
|
||||
return err
|
||||
}
|
||||
|
||||
// delete the listener config secret as well, so it gets recreated when the listener pod is recreated, with any new data if it exists
|
||||
var configSecret corev1.Secret
|
||||
err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: scaleSetListenerConfigName(autoscalingListener)}, &configSecret)
|
||||
switch {
|
||||
case err == nil && configSecret.DeletionTimestamp.IsZero():
|
||||
log.Info("Deleting the listener config secret")
|
||||
if err := r.Delete(ctx, &configSecret); err != nil {
|
||||
return fmt.Errorf("failed to delete listener config secret: %w", err)
|
||||
}
|
||||
case !kerrors.IsNotFound(err):
|
||||
return fmt.Errorf("failed to get the listener config secret: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
@ -425,6 +444,47 @@ func (r *AutoscalingListenerReconciler) createServiceAccountForListener(ctx cont
|
|||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
func (r *AutoscalingListenerReconciler) reconcileListenerConfigSecret(ctx context.Context, autoscalingListener *v1alpha1.AutoscalingListener, appConfig *appconfig.AppConfig, metricsConfig *listenerMetricsServerConfig, cert string, logger logr.Logger) (changed bool, err error) {
|
||||
desiredSecret, err := r.newScaleSetListenerConfig(autoscalingListener, appConfig, metricsConfig, cert)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to build listener config secret: %w", err)
|
||||
}
|
||||
|
||||
existingSecret := &corev1.Secret{}
|
||||
err = r.Get(ctx, types.NamespacedName{Name: desiredSecret.Name, Namespace: desiredSecret.Namespace}, existingSecret)
|
||||
if err != nil {
|
||||
if kerrors.IsNotFound(err) {
|
||||
if err := ctrl.SetControllerReference(autoscalingListener, desiredSecret, r.Scheme); err != nil {
|
||||
return false, fmt.Errorf("failed to set controller reference: %w", err)
|
||||
}
|
||||
|
||||
logger.Info("Creating listener config secret", "namespace", desiredSecret.Namespace, "name", desiredSecret.Name)
|
||||
if err := r.Create(ctx, desiredSecret); err != nil {
|
||||
return false, fmt.Errorf("failed to create listener config secret: %w", err)
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
return false, fmt.Errorf("failed to get listener config secret: %w", err)
|
||||
}
|
||||
|
||||
if listenerConfigSecretDrifted(existingSecret, desiredSecret) {
|
||||
updatedSecret := existingSecret.DeepCopy()
|
||||
updatedSecret.Data = desiredSecret.Data
|
||||
updatedSecret.Labels = desiredSecret.Labels
|
||||
updatedSecret.Annotations = desiredSecret.Annotations
|
||||
|
||||
logger.Info("Updating listener config secret", "namespace", updatedSecret.Namespace, "name", updatedSecret.Name)
|
||||
if err := r.Update(ctx, updatedSecret); err != nil {
|
||||
return false, fmt.Errorf("failed to update listener config secret: %w", err)
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (r *AutoscalingListenerReconciler) createListenerPod(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, autoscalingListener *v1alpha1.AutoscalingListener, serviceAccount *corev1.ServiceAccount, appConfig *appconfig.AppConfig, logger logr.Logger) (ctrl.Result, error) {
|
||||
var envs []corev1.EnvVar
|
||||
if autoscalingListener.Spec.Proxy != nil {
|
||||
|
|
@ -691,6 +751,27 @@ func (r *AutoscalingListenerReconciler) publishRunningListener(autoscalingListen
|
|||
return nil
|
||||
}
|
||||
|
||||
// listenerConfigSecretDrifted detects if the listener config secret has drifted from the desired state.
|
||||
// It compares the config.json data, labels, and annotations deterministically.
|
||||
func listenerConfigSecretDrifted(existing *corev1.Secret, desired *corev1.Secret) bool {
|
||||
// Compare config.json data
|
||||
if !bytes.Equal(existing.Data["config.json"], desired.Data["config.json"]) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Compare labels
|
||||
if !maps.Equal(existing.Labels, desired.Labels) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Compare annotations
|
||||
if !maps.Equal(existing.Annotations, desired.Annotations) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager.
|
||||
func (r *AutoscalingListenerReconciler) SetupWithManager(mgr ctrl.Manager, opts ...Option) error {
|
||||
labelBasedWatchFunc := func(_ context.Context, obj client.Object) []reconcile.Request {
|
||||
|
|
|
|||
|
|
@ -350,7 +350,7 @@ var _ = Describe("Test AutoScalingListener controller", func() {
|
|||
autoscalingListenerTestInterval).Should(BeEquivalentTo(rulesForListenerRole([]string{updated.Spec.EphemeralRunnerSetName})), "Role should be updated")
|
||||
})
|
||||
|
||||
It("It should re-create pod and config secret whenever listener container is terminated", func() {
|
||||
It("It should re-create pod but persist config secret whenever listener container is terminated", func() {
|
||||
// Waiting for the pod is created
|
||||
pod := new(corev1.Pod)
|
||||
Eventually(
|
||||
|
|
@ -407,7 +407,7 @@ var _ = Describe("Test AutoScalingListener controller", func() {
|
|||
autoscalingListenerTestInterval,
|
||||
).ShouldNot(BeEquivalentTo(oldPodUID), "Pod should be re-created")
|
||||
|
||||
// Check if config secret is re-created
|
||||
// Check if config secret persists (not re-created) to avoid race condition
|
||||
Eventually(
|
||||
func() (string, error) {
|
||||
secret := new(corev1.Secret)
|
||||
|
|
@ -420,7 +420,7 @@ var _ = Describe("Test AutoScalingListener controller", func() {
|
|||
},
|
||||
autoscalingListenerTestTimeout,
|
||||
autoscalingListenerTestInterval,
|
||||
).ShouldNot(BeEquivalentTo(oldSecretUID), "Config secret should be re-created")
|
||||
).Should(BeEquivalentTo(oldSecretUID), "Config secret should persist (not be re-created)")
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -351,6 +351,12 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
|
|||
)
|
||||
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log)
|
||||
|
||||
case initContainerFailed(pod):
|
||||
log.Info("Pod has a failed init container, deleting pod as failed so it can be restarted",
|
||||
"initContainerStatuses", pod.Status.InitContainerStatuses,
|
||||
)
|
||||
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log)
|
||||
|
||||
case cs == nil:
|
||||
// starting, no container state yet
|
||||
log.Info("Waiting for runner container status to be available")
|
||||
|
|
@ -862,3 +868,13 @@ func runnerContainerStatus(pod *corev1.Pod) *corev1.ContainerStatus {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func initContainerFailed(pod *corev1.Pod) bool {
|
||||
for i := range pod.Status.InitContainerStatuses {
|
||||
cs := &pod.Status.InitContainerStatuses[i]
|
||||
if cs.State.Terminated != nil && cs.State.Terminated.ExitCode != 0 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -355,6 +355,115 @@ var _ = Describe("EphemeralRunner", func() {
|
|||
).Should(BeTrue(), "Pod should be re-created")
|
||||
})
|
||||
|
||||
It("It should re-create pod when init container fails before pod phase transitions to Failed", func() {
|
||||
pod := new(corev1.Pod)
|
||||
Eventually(func() (bool, error) {
|
||||
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo(true))
|
||||
|
||||
oldPodUID := pod.UID
|
||||
|
||||
// Simulate init container failure without PodFailed phase.
|
||||
// This can happen when the kubelet has not yet transitioned the pod phase.
|
||||
pod.Status.Phase = corev1.PodPending
|
||||
pod.Status.InitContainerStatuses = []corev1.ContainerStatus{
|
||||
{
|
||||
Name: "setup",
|
||||
State: corev1.ContainerState{
|
||||
Terminated: &corev1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
Reason: "StartError",
|
||||
Message: "failed to create containerd task: context canceled",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
err := k8sClient.Status().Update(ctx, pod)
|
||||
Expect(err).To(BeNil(), "Failed to update pod status")
|
||||
|
||||
Eventually(
|
||||
func() (int, error) {
|
||||
updated := new(v1alpha1.EphemeralRunner)
|
||||
err := k8sClient.Get(
|
||||
ctx,
|
||||
client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace},
|
||||
updated,
|
||||
)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return len(updated.Status.Failures), nil
|
||||
},
|
||||
ephemeralRunnerTimeout,
|
||||
ephemeralRunnerInterval,
|
||||
).Should(BeEquivalentTo(1))
|
||||
|
||||
Eventually(
|
||||
func() (bool, error) {
|
||||
newPod := new(corev1.Pod)
|
||||
err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, newPod)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return newPod.UID != oldPodUID, nil
|
||||
},
|
||||
ephemeralRunnerTimeout,
|
||||
ephemeralRunnerInterval,
|
||||
).Should(BeTrue(), "Pod should be re-created after init container failure")
|
||||
})
|
||||
|
||||
It("It should delete ephemeral runner when init container fails and job is assigned", func() {
|
||||
er := new(v1alpha1.EphemeralRunner)
|
||||
Eventually(func() error {
|
||||
return k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, er)
|
||||
}, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(Succeed(), "failed to get ephemeral runner")
|
||||
|
||||
er.Status.JobID = "1"
|
||||
err := k8sClient.Status().Update(ctx, er)
|
||||
Expect(err).To(BeNil(), "failed to update ephemeral runner status")
|
||||
|
||||
Eventually(func() (string, error) {
|
||||
current := new(v1alpha1.EphemeralRunner)
|
||||
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, current); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return current.Status.JobID, nil
|
||||
}, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo("1"))
|
||||
|
||||
pod := new(corev1.Pod)
|
||||
Eventually(func() (bool, error) {
|
||||
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo(true))
|
||||
|
||||
// Simulate init container failure with job assigned
|
||||
pod.Status.Phase = corev1.PodPending
|
||||
pod.Status.InitContainerStatuses = []corev1.ContainerStatus{
|
||||
{
|
||||
Name: "setup",
|
||||
State: corev1.ContainerState{
|
||||
Terminated: &corev1.ContainerStateTerminated{
|
||||
ExitCode: 1,
|
||||
Reason: "StartError",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
err = k8sClient.Status().Update(ctx, pod)
|
||||
Expect(err).To(BeNil(), "Failed to update pod status")
|
||||
|
||||
Eventually(func() bool {
|
||||
check := new(v1alpha1.EphemeralRunner)
|
||||
err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, check)
|
||||
return kerrors.IsNotFound(err)
|
||||
}, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeTrue(), "Ephemeral runner should eventually be deleted when init container fails with job assigned")
|
||||
})
|
||||
|
||||
It("It should treat pod failed with runner container exit 0 as success with job id", func() {
|
||||
er := new(v1alpha1.EphemeralRunner)
|
||||
Eventually(func() error {
|
||||
|
|
|
|||
Loading…
Reference in New Issue