diff --git a/Dockerfile b/Dockerfile index 3e64d273..329653d7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Build the manager binary -FROM --platform=$BUILDPLATFORM golang:1.24.0 as builder +FROM --platform=$BUILDPLATFORM golang:1.24.3 AS builder WORKDIR /workspace @@ -30,7 +30,7 @@ ARG TARGETPLATFORM TARGETOS TARGETARCH TARGETVARIANT VERSION=dev COMMIT_SHA=dev # to avoid https://github.com/moby/buildkit/issues/2334 # We can use docker layer cache so the build is fast enogh anyway # We also use per-platform GOCACHE for the same reason. -ENV GOCACHE /build/${TARGETPLATFORM}/root/.cache/go-build +ENV GOCACHE="/build/${TARGETPLATFORM}/root/.cache/go-build" # Build RUN --mount=target=. \ diff --git a/Makefile b/Makefile index 0c9e5700..382ade7e 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ endif DOCKER_USER ?= $(shell echo ${DOCKER_IMAGE_NAME} | cut -d / -f1) VERSION ?= dev COMMIT_SHA = $(shell git rev-parse HEAD) -RUNNER_VERSION ?= 2.323.0 +RUNNER_VERSION ?= 2.324.0 TARGETPLATFORM ?= $(shell arch) RUNNER_NAME ?= ${DOCKER_USER}/actions-runner RUNNER_TAG ?= ${VERSION} @@ -310,7 +310,7 @@ github-release: release # Otherwise we get errors like the below: # Error: failed to install CRD crds/actions.summerwind.dev_runnersets.yaml: CustomResourceDefinition.apiextensions.k8s.io "runnersets.actions.summerwind.dev" is invalid: [spec.validation.openAPIV3Schema.properties[spec].properties[template].properties[spec].properties[containers].items.properties[ports].items.properties[protocol].default: Required value: this property is in x-kubernetes-list-map-keys, so it must have a default or be a required property, spec.validation.openAPIV3Schema.properties[spec].properties[template].properties[spec].properties[initContainers].items.properties[ports].items.properties[protocol].default: Required value: this property is in x-kubernetes-list-map-keys, so it must have a default or be a required property] # -# Note that controller-gen newer than 0.6.2 is needed due to https://github.com/kubernetes-sigs/controller-tools/issues/448 +# Note that controller-gen newer than 0.7.0 is needed due to https://github.com/kubernetes-sigs/controller-tools/issues/448 # Otherwise ObjectMeta embedded in Spec results in empty on the storage. controller-gen: ifeq (, $(shell which controller-gen)) diff --git a/apis/actions.github.com/v1alpha1/ephemeralrunner_types.go b/apis/actions.github.com/v1alpha1/ephemeralrunner_types.go index e34b255e..0c13bf7c 100644 --- a/apis/actions.github.com/v1alpha1/ephemeralrunner_types.go +++ b/apis/actions.github.com/v1alpha1/ephemeralrunner_types.go @@ -119,7 +119,7 @@ type EphemeralRunnerStatus struct { RunnerJITConfig string `json:"runnerJITConfig,omitempty"` // +optional - Failures map[string]bool `json:"failures,omitempty"` + Failures map[string]metav1.Time `json:"failures,omitempty"` // +optional JobRequestId int64 `json:"jobRequestId,omitempty"` @@ -137,6 +137,20 @@ type EphemeralRunnerStatus struct { JobDisplayName string `json:"jobDisplayName,omitempty"` } +func (s *EphemeralRunnerStatus) LastFailure() metav1.Time { + var maxTime metav1.Time + if len(s.Failures) == 0 { + return maxTime + } + + for _, ts := range s.Failures { + if ts.After(maxTime.Time) { + maxTime = ts + } + } + return maxTime +} + // +kubebuilder:object:root=true // EphemeralRunnerList contains a list of EphemeralRunner diff --git a/apis/actions.github.com/v1alpha1/version.go b/apis/actions.github.com/v1alpha1/version.go new file mode 100644 index 00000000..731c6011 --- /dev/null +++ b/apis/actions.github.com/v1alpha1/version.go @@ -0,0 +1,72 @@ +package v1alpha1 + +import "strings" + +func IsVersionAllowed(resourceVersion, buildVersion string) bool { + if buildVersion == "dev" || resourceVersion == buildVersion || strings.HasPrefix(buildVersion, "canary-") { + return true + } + + rv, ok := parseSemver(resourceVersion) + if !ok { + return false + } + bv, ok := parseSemver(buildVersion) + if !ok { + return false + } + return rv.major == bv.major && rv.minor == bv.minor +} + +type semver struct { + major string + minor string +} + +func parseSemver(v string) (p semver, ok bool) { + if v == "" { + return + } + p.major, v, ok = parseInt(v) + if !ok { + return p, false + } + if v == "" { + p.minor = "0" + return p, true + } + if v[0] != '.' { + return p, false + } + p.minor, v, ok = parseInt(v[1:]) + if !ok { + return p, false + } + if v == "" { + return p, true + } + if v[0] != '.' { + return p, false + } + if _, _, ok = parseInt(v[1:]); !ok { + return p, false + } + return p, true +} + +func parseInt(v string) (t, rest string, ok bool) { + if v == "" { + return + } + if v[0] < '0' || '9' < v[0] { + return + } + i := 1 + for i < len(v) && '0' <= v[i] && v[i] <= '9' { + i++ + } + if v[0] == '0' && i != 1 { + return + } + return v[:i], v[i:], true +} diff --git a/apis/actions.github.com/v1alpha1/version_test.go b/apis/actions.github.com/v1alpha1/version_test.go new file mode 100644 index 00000000..8b4e8025 --- /dev/null +++ b/apis/actions.github.com/v1alpha1/version_test.go @@ -0,0 +1,60 @@ +package v1alpha1_test + +import ( + "testing" + + "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1" + "github.com/stretchr/testify/assert" +) + +func TestIsVersionAllowed(t *testing.T) { + t.Parallel() + tt := map[string]struct { + resourceVersion string + buildVersion string + want bool + }{ + "dev should always be allowed": { + resourceVersion: "0.11.0", + buildVersion: "dev", + want: true, + }, + "resourceVersion is not semver": { + resourceVersion: "dev", + buildVersion: "0.11.0", + want: false, + }, + "buildVersion is not semver": { + resourceVersion: "0.11.0", + buildVersion: "NA", + want: false, + }, + "major version mismatch": { + resourceVersion: "0.11.0", + buildVersion: "1.11.0", + want: false, + }, + "minor version mismatch": { + resourceVersion: "0.11.0", + buildVersion: "0.10.0", + want: false, + }, + "patch version mismatch": { + resourceVersion: "0.11.1", + buildVersion: "0.11.0", + want: true, + }, + "arbitrary version match": { + resourceVersion: "abc", + buildVersion: "abc", + want: true, + }, + } + + for name, tc := range tt { + t.Run(name, func(t *testing.T) { + got := v1alpha1.IsVersionAllowed(tc.resourceVersion, tc.buildVersion) + assert.Equal(t, tc.want, got) + }) + } +} diff --git a/apis/actions.github.com/v1alpha1/zz_generated.deepcopy.go b/apis/actions.github.com/v1alpha1/zz_generated.deepcopy.go index dd7553f0..b0947659 100644 --- a/apis/actions.github.com/v1alpha1/zz_generated.deepcopy.go +++ b/apis/actions.github.com/v1alpha1/zz_generated.deepcopy.go @@ -22,6 +22,7 @@ package v1alpha1 import ( "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -459,9 +460,9 @@ func (in *EphemeralRunnerStatus) DeepCopyInto(out *EphemeralRunnerStatus) { *out = *in if in.Failures != nil { in, out := &in.Failures, &out.Failures - *out = make(map[string]bool, len(*in)) + *out = make(map[string]metav1.Time, len(*in)) for key, val := range *in { - (*out)[key] = val + (*out)[key] = *val.DeepCopy() } } } diff --git a/charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunners.yaml b/charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunners.yaml index e1505280..f7cf1139 100644 --- a/charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunners.yaml +++ b/charts/gha-runner-scale-set-controller/crds/actions.github.com_ephemeralrunners.yaml @@ -7794,7 +7794,8 @@ spec: properties: failures: additionalProperties: - type: boolean + format: date-time + type: string type: object jobDisplayName: type: string diff --git a/config/crd/bases/actions.github.com_ephemeralrunners.yaml b/config/crd/bases/actions.github.com_ephemeralrunners.yaml index e1505280..f7cf1139 100644 --- a/config/crd/bases/actions.github.com_ephemeralrunners.yaml +++ b/config/crd/bases/actions.github.com_ephemeralrunners.yaml @@ -7794,7 +7794,8 @@ spec: properties: failures: additionalProperties: - type: boolean + format: date-time + type: string type: object jobDisplayName: type: string diff --git a/controllers/actions.github.com/autoscalinglistener_controller.go b/controllers/actions.github.com/autoscalinglistener_controller.go index 70b215e8..cecfdfbf 100644 --- a/controllers/actions.github.com/autoscalinglistener_controller.go +++ b/controllers/actions.github.com/autoscalinglistener_controller.go @@ -139,9 +139,9 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl. // Create a mirror secret in the same namespace as the AutoscalingListener mirrorSecret := new(corev1.Secret) - if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: scaleSetListenerSecretMirrorName(autoscalingListener)}, mirrorSecret); err != nil { + if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: autoscalingListener.Name}, mirrorSecret); err != nil { if !kerrors.IsNotFound(err) { - log.Error(err, "Unable to get listener secret mirror", "namespace", autoscalingListener.Namespace, "name", scaleSetListenerSecretMirrorName(autoscalingListener)) + log.Error(err, "Unable to get listener secret mirror", "namespace", autoscalingListener.Namespace, "name", autoscalingListener.Name) return ctrl.Result{}, err } @@ -160,9 +160,9 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl. // Make sure the runner scale set listener service account is created for the listener pod in the controller namespace serviceAccount := new(corev1.ServiceAccount) - if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: scaleSetListenerServiceAccountName(autoscalingListener)}, serviceAccount); err != nil { + if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: autoscalingListener.Name}, serviceAccount); err != nil { if !kerrors.IsNotFound(err) { - log.Error(err, "Unable to get listener service accounts", "namespace", autoscalingListener.Namespace, "name", scaleSetListenerServiceAccountName(autoscalingListener)) + log.Error(err, "Unable to get listener service accounts", "namespace", autoscalingListener.Namespace, "name", autoscalingListener.Name) return ctrl.Result{}, err } @@ -175,9 +175,9 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl. // Make sure the runner scale set listener role is created in the AutoscalingRunnerSet namespace listenerRole := new(rbacv1.Role) - if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: scaleSetListenerRoleName(autoscalingListener)}, listenerRole); err != nil { + if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: autoscalingListener.Name}, listenerRole); err != nil { if !kerrors.IsNotFound(err) { - log.Error(err, "Unable to get listener role", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", scaleSetListenerRoleName(autoscalingListener)) + log.Error(err, "Unable to get listener role", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", autoscalingListener.Name) return ctrl.Result{}, err } @@ -197,9 +197,9 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl. // Make sure the runner scale set listener role binding is created listenerRoleBinding := new(rbacv1.RoleBinding) - if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: scaleSetListenerRoleName(autoscalingListener)}, listenerRoleBinding); err != nil { + if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: autoscalingListener.Name}, listenerRoleBinding); err != nil { if !kerrors.IsNotFound(err) { - log.Error(err, "Unable to get listener role binding", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", scaleSetListenerRoleName(autoscalingListener)) + log.Error(err, "Unable to get listener role binding", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", autoscalingListener.Name) return ctrl.Result{}, err } @@ -330,7 +330,7 @@ func (r *AutoscalingListenerReconciler) cleanupResources(ctx context.Context, au } listenerRoleBinding := new(rbacv1.RoleBinding) - err = r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: scaleSetListenerRoleName(autoscalingListener)}, listenerRoleBinding) + err = r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: autoscalingListener.Name}, listenerRoleBinding) switch { case err == nil: if listenerRoleBinding.DeletionTimestamp.IsZero() { @@ -346,7 +346,7 @@ func (r *AutoscalingListenerReconciler) cleanupResources(ctx context.Context, au logger.Info("Listener role binding is deleted") listenerRole := new(rbacv1.Role) - err = r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: scaleSetListenerRoleName(autoscalingListener)}, listenerRole) + err = r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: autoscalingListener.Name}, listenerRole) switch { case err == nil: if listenerRole.DeletionTimestamp.IsZero() { @@ -363,7 +363,7 @@ func (r *AutoscalingListenerReconciler) cleanupResources(ctx context.Context, au logger.Info("Cleaning up the listener service account") listenerSa := new(corev1.ServiceAccount) - err = r.Get(ctx, types.NamespacedName{Name: scaleSetListenerServiceAccountName(autoscalingListener), Namespace: autoscalingListener.Namespace}, listenerSa) + err = r.Get(ctx, types.NamespacedName{Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace}, listenerSa) switch { case err == nil: if listenerSa.DeletionTimestamp.IsZero() { diff --git a/controllers/actions.github.com/autoscalinglistener_controller_test.go b/controllers/actions.github.com/autoscalinglistener_controller_test.go index 69b7978c..5ce5ee48 100644 --- a/controllers/actions.github.com/autoscalinglistener_controller_test.go +++ b/controllers/actions.github.com/autoscalinglistener_controller_test.go @@ -134,37 +134,25 @@ var _ = Describe("Test AutoScalingListener controller", func() { autoscalingListenerTestTimeout, autoscalingListenerTestInterval).Should(BeEquivalentTo(autoscalingListenerFinalizerName), "AutoScalingListener should have a finalizer") - // Check if secret is created - mirrorSecret := new(corev1.Secret) - Eventually( - func() (string, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerSecretMirrorName(autoscalingListener), Namespace: autoscalingListener.Namespace}, mirrorSecret) - if err != nil { - return "", err - } - return string(mirrorSecret.Data["github_token"]), nil - }, - autoscalingListenerTestTimeout, - autoscalingListenerTestInterval).Should(BeEquivalentTo(autoscalingListenerTestGitHubToken), "Mirror secret should be created") - // Check if service account is created serviceAccount := new(corev1.ServiceAccount) Eventually( func() (string, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerServiceAccountName(autoscalingListener), Namespace: autoscalingListener.Namespace}, serviceAccount) + err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace}, serviceAccount) if err != nil { return "", err } return serviceAccount.Name, nil }, autoscalingListenerTestTimeout, - autoscalingListenerTestInterval).Should(BeEquivalentTo(scaleSetListenerServiceAccountName(autoscalingListener)), "Service account should be created") + autoscalingListenerTestInterval, + ).Should(BeEquivalentTo(autoscalingListener.Name), "Service account should be created") // Check if role is created role := new(rbacv1.Role) Eventually( func() ([]rbacv1.PolicyRule, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerRoleName(autoscalingListener), Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, role) + err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, role) if err != nil { return nil, err } @@ -178,7 +166,7 @@ var _ = Describe("Test AutoScalingListener controller", func() { roleBinding := new(rbacv1.RoleBinding) Eventually( func() (string, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerRoleName(autoscalingListener), Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, roleBinding) + err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, roleBinding) if err != nil { return "", err } @@ -186,7 +174,7 @@ var _ = Describe("Test AutoScalingListener controller", func() { return roleBinding.RoleRef.Name, nil }, autoscalingListenerTestTimeout, - autoscalingListenerTestInterval).Should(BeEquivalentTo(scaleSetListenerRoleName(autoscalingListener)), "Rolebinding should be created") + autoscalingListenerTestInterval).Should(BeEquivalentTo(autoscalingListener.Name), "Rolebinding should be created") // Check if pod is created pod := new(corev1.Pod) @@ -248,7 +236,7 @@ var _ = Describe("Test AutoScalingListener controller", func() { Eventually( func() bool { roleBinding := new(rbacv1.RoleBinding) - err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerRoleName(autoscalingListener), Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, roleBinding) + err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, roleBinding) return kerrors.IsNotFound(err) }, autoscalingListenerTestTimeout, @@ -259,7 +247,7 @@ var _ = Describe("Test AutoScalingListener controller", func() { Eventually( func() bool { role := new(rbacv1.Role) - err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerRoleName(autoscalingListener), Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, role) + err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, role) return kerrors.IsNotFound(err) }, autoscalingListenerTestTimeout, @@ -340,7 +328,7 @@ var _ = Describe("Test AutoScalingListener controller", func() { role := new(rbacv1.Role) Eventually( func() ([]rbacv1.PolicyRule, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerRoleName(autoscalingListener), Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, role) + err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, role) if err != nil { return nil, err } @@ -397,75 +385,6 @@ var _ = Describe("Test AutoScalingListener controller", func() { autoscalingListenerTestInterval, ).ShouldNot(BeEquivalentTo(oldPodUID), "Pod should be re-created") }) - - It("It should update mirror secrets to match secret used by AutoScalingRunnerSet", func() { - // Waiting for the pod is created - pod := new(corev1.Pod) - Eventually( - func() (string, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace}, pod) - if err != nil { - return "", err - } - - return pod.Name, nil - }, - autoscalingListenerTestTimeout, - autoscalingListenerTestInterval).Should(BeEquivalentTo(autoscalingListener.Name), "Pod should be created") - - // Update the secret - updatedSecret := configSecret.DeepCopy() - updatedSecret.Data["github_token"] = []byte(autoscalingListenerTestGitHubToken + "_updated") - err := k8sClient.Update(ctx, updatedSecret) - Expect(err).NotTo(HaveOccurred(), "failed to update test secret") - - updatedPod := pod.DeepCopy() - // Ignore status running and consult the container state - updatedPod.Status.Phase = corev1.PodRunning - updatedPod.Status.ContainerStatuses = []corev1.ContainerStatus{ - { - Name: autoscalingListenerContainerName, - State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{ - ExitCode: 1, - }, - }, - }, - } - err = k8sClient.Status().Update(ctx, updatedPod) - Expect(err).NotTo(HaveOccurred(), "failed to update test pod to failed") - - // Check if mirror secret is updated with right data - mirrorSecret := new(corev1.Secret) - Eventually( - func() (map[string][]byte, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerSecretMirrorName(autoscalingListener), Namespace: autoscalingListener.Namespace}, mirrorSecret) - if err != nil { - return nil, err - } - - return mirrorSecret.Data, nil - }, - autoscalingListenerTestTimeout, - autoscalingListenerTestInterval).Should(BeEquivalentTo(updatedSecret.Data), "Mirror secret should be updated") - - // Check if we re-created a new pod - Eventually( - func() error { - latestPod := new(corev1.Pod) - err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace}, latestPod) - if err != nil { - return err - } - if latestPod.UID == pod.UID { - return fmt.Errorf("Pod should be recreated") - } - - return nil - }, - autoscalingListenerTestTimeout, - autoscalingListenerTestInterval).Should(Succeed(), "Pod should be recreated") - }) }) }) diff --git a/controllers/actions.github.com/autoscalingrunnerset_controller.go b/controllers/actions.github.com/autoscalingrunnerset_controller.go index ad4a0514..21740ff6 100644 --- a/controllers/actions.github.com/autoscalingrunnerset_controller.go +++ b/controllers/actions.github.com/autoscalingrunnerset_controller.go @@ -151,7 +151,7 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl return ctrl.Result{}, nil } - if autoscalingRunnerSet.Labels[LabelKeyKubernetesVersion] != build.Version { + if !v1alpha1.IsVersionAllowed(autoscalingRunnerSet.Labels[LabelKeyKubernetesVersion], build.Version) { if err := r.Delete(ctx, autoscalingRunnerSet); err != nil { log.Error(err, "Failed to delete autoscaling runner set on version mismatch", "buildVersion", build.Version, diff --git a/controllers/actions.github.com/ephemeralrunner_controller.go b/controllers/actions.github.com/ephemeralrunner_controller.go index 750ea1b6..5ac80df7 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller.go +++ b/controllers/actions.github.com/ephemeralrunner_controller.go @@ -28,6 +28,7 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -50,6 +51,19 @@ type EphemeralRunnerReconciler struct { ResourceBuilder } +// precompute backoff durations for failed ephemeral runners +// the len(failedRunnerBackoff) must be equal to maxFailures + 1 +var failedRunnerBackoff = []time.Duration{ + 0, + 5 * time.Second, + 10 * time.Second, + 20 * time.Second, + 40 * time.Second, + 80 * time.Second, +} + +const maxFailures = 5 + // +kubebuilder:rbac:groups=actions.github.com,resources=ephemeralrunners,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=actions.github.com,resources=ephemeralrunners/status,verbs=get;update;patch // +kubebuilder:rbac:groups=actions.github.com,resources=ephemeralrunners/finalizers,verbs=get;list;watch;create;update;patch;delete @@ -173,6 +187,29 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ } } + if len(ephemeralRunner.Status.Failures) > maxFailures { + log.Info(fmt.Sprintf("EphemeralRunner has failed more than %d times. Deleting ephemeral runner so it can be re-created", maxFailures)) + if err := r.Delete(ctx, ephemeralRunner); err != nil { + log.Error(fmt.Errorf("failed to delete ephemeral runner after %d failures: %w", maxFailures, err), "Failed to delete ephemeral runner") + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil + } + + now := metav1.Now() + lastFailure := ephemeralRunner.Status.LastFailure() + backoffDuration := failedRunnerBackoff[len(ephemeralRunner.Status.Failures)] + nextReconciliation := lastFailure.Add(backoffDuration) + if !lastFailure.IsZero() && now.Before(&metav1.Time{Time: nextReconciliation}) { + log.Info("Backing off the next reconciliation due to failure", + "lastFailure", lastFailure, + "nextReconciliation", nextReconciliation, + "requeueAfter", nextReconciliation.Sub(now.Time), + ) + return ctrl.Result{RequeueAfter: now.Sub(nextReconciliation)}, nil + } + secret := new(corev1.Secret) if err := r.Get(ctx, req.NamespacedName, secret); err != nil { if !kerrors.IsNotFound(err) { @@ -196,39 +233,28 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ pod := new(corev1.Pod) if err := r.Get(ctx, req.NamespacedName, pod); err != nil { - switch { - case !kerrors.IsNotFound(err): + if !kerrors.IsNotFound(err) { log.Error(err, "Failed to fetch the pod") return ctrl.Result{}, err + } - case len(ephemeralRunner.Status.Failures) > 5: - log.Info("EphemeralRunner has failed more than 5 times. Marking it as failed") - errMessage := fmt.Sprintf("Pod has failed to start more than 5 times: %s", pod.Status.Message) - if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonTooManyPodFailures, log); err != nil { + // Pod was not found. Create if the pod has never been created + log.Info("Creating new EphemeralRunner pod.") + result, err := r.createPod(ctx, ephemeralRunner, secret, log) + switch { + case err == nil: + return result, nil + case kerrors.IsInvalid(err) || kerrors.IsForbidden(err): + log.Error(err, "Failed to create a pod due to unrecoverable failure") + errMessage := fmt.Sprintf("Failed to create the pod: %v", err) + if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil { log.Error(err, "Failed to set ephemeral runner to phase Failed") return ctrl.Result{}, err } return ctrl.Result{}, nil - default: - // Pod was not found. Create if the pod has never been created - log.Info("Creating new EphemeralRunner pod.") - result, err := r.createPod(ctx, ephemeralRunner, secret, log) - switch { - case err == nil: - return result, nil - case kerrors.IsInvalid(err) || kerrors.IsForbidden(err): - log.Error(err, "Failed to create a pod due to unrecoverable failure") - errMessage := fmt.Sprintf("Failed to create the pod: %v", err) - if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil { - log.Error(err, "Failed to set ephemeral runner to phase Failed") - return ctrl.Result{}, err - } - return ctrl.Result{}, nil - default: - log.Error(err, "Failed to create the pod") - return ctrl.Result{}, err - } + log.Error(err, "Failed to create the pod") + return ctrl.Result{}, err } } @@ -484,9 +510,9 @@ func (r *EphemeralRunnerReconciler) deletePodAsFailed(ctx context.Context, ephem log.Info("Updating ephemeral runner status to track the failure count") if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) { if obj.Status.Failures == nil { - obj.Status.Failures = make(map[string]bool) + obj.Status.Failures = make(map[string]metav1.Time) } - obj.Status.Failures[string(pod.UID)] = true + obj.Status.Failures[string(pod.UID)] = metav1.Now() obj.Status.Ready = false obj.Status.Reason = pod.Status.Reason obj.Status.Message = pod.Status.Message diff --git a/controllers/actions.github.com/ephemeralrunner_controller_test.go b/controllers/actions.github.com/ephemeralrunner_controller_test.go index 1305bfca..0b842d5c 100644 --- a/controllers/actions.github.com/ephemeralrunner_controller_test.go +++ b/controllers/actions.github.com/ephemeralrunner_controller_test.go @@ -30,7 +30,7 @@ import ( const ( ephemeralRunnerTimeout = time.Second * 20 - ephemeralRunnerInterval = time.Millisecond * 250 + ephemeralRunnerInterval = time.Millisecond * 10 runnerImage = "ghcr.io/actions/actions-runner:latest" ) @@ -528,44 +528,26 @@ var _ = Describe("EphemeralRunner", func() { ).Should(BeEquivalentTo("")) }) - It("It should not re-create pod indefinitely", func() { + It("It should eventually delete ephemeral runner after consecutive failures", func() { updated := new(v1alpha1.EphemeralRunner) - pod := new(corev1.Pod) Eventually( - func() (bool, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated) - if err != nil { - return false, err - } - - err = k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod) - if err != nil { - if kerrors.IsNotFound(err) && len(updated.Status.Failures) > 5 { - return true, nil - } - - return false, err - } - - pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{ - Name: v1alpha1.EphemeralRunnerContainerName, - State: corev1.ContainerState{ - Terminated: &corev1.ContainerStateTerminated{ - ExitCode: 1, - }, - }, - }) - err = k8sClient.Status().Update(ctx, pod) - Expect(err).To(BeNil(), "Failed to update pod status") - return false, fmt.Errorf("pod haven't failed for 5 times.") + func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated) }, ephemeralRunnerTimeout, ephemeralRunnerInterval, - ).Should(BeEquivalentTo(true), "we should stop creating pod after 5 failures") + ).Should(Succeed(), "failed to get ephemeral runner") + + failEphemeralRunnerPod := func() *corev1.Pod { + pod := new(corev1.Pod) + Eventually( + func() error { + return k8sClient.Get(ctx, client.ObjectKey{Name: updated.Name, Namespace: updated.Namespace}, pod) + }, + ephemeralRunnerTimeout, + ephemeralRunnerInterval, + ).Should(Succeed(), "failed to get ephemeral runner pod") - // In case we still have pod created due to controller-runtime cache delay, mark the container as exited - err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod) - if err == nil { pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, corev1.ContainerStatus{ Name: v1alpha1.EphemeralRunnerContainerName, State: corev1.ContainerState{ @@ -576,25 +558,70 @@ var _ = Describe("EphemeralRunner", func() { }) err := k8sClient.Status().Update(ctx, pod) Expect(err).To(BeNil(), "Failed to update pod status") + + return pod } - // EphemeralRunner should failed with reason TooManyPodFailures - Eventually(func() (string, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated) - if err != nil { - return "", err - } - return updated.Status.Reason, nil - }, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo("TooManyPodFailures"), "Reason should be TooManyPodFailures") + for i := range 5 { + pod := failEphemeralRunnerPod() - // EphemeralRunner should not have any pod - Eventually(func() (bool, error) { - err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod) - if err == nil { - return false, nil - } - return kerrors.IsNotFound(err), nil - }, ephemeralRunnerTimeout, ephemeralRunnerInterval).Should(BeEquivalentTo(true)) + Eventually( + func() (int, error) { + err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated) + if err != nil { + return 0, err + } + return len(updated.Status.Failures), nil + }, + ephemeralRunnerTimeout, + ephemeralRunnerInterval, + ).Should(BeEquivalentTo(i + 1)) + + Eventually( + func() error { + nextPod := new(corev1.Pod) + err := k8sClient.Get(ctx, client.ObjectKey{Name: pod.Name, Namespace: pod.Namespace}, nextPod) + if err != nil { + return err + } + if nextPod.UID != pod.UID { + return nil + } + return fmt.Errorf("pod not recreated") + }, + ).WithTimeout(20*time.Second).WithPolling(10*time.Millisecond).Should(Succeed(), "pod should be recreated") + + Eventually( + func() (bool, error) { + pod := new(corev1.Pod) + err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod) + if err != nil { + return false, err + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.Name == v1alpha1.EphemeralRunnerContainerName { + return cs.State.Terminated == nil, nil + } + } + + return true, nil + }, + ).WithTimeout(20*time.Second).WithPolling(10*time.Millisecond).Should(BeEquivalentTo(true), "pod should be terminated") + } + + failEphemeralRunnerPod() + + Eventually( + func() (bool, error) { + err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, updated) + if kerrors.IsNotFound(err) { + return true, nil + } + return false, err + }, + ephemeralRunnerTimeout, + ephemeralRunnerInterval, + ).Should(BeTrue(), "Ephemeral runner should eventually be deleted") }) It("It should re-create pod on eviction", func() { diff --git a/controllers/actions.github.com/ephemeralrunnerset_controller_test.go b/controllers/actions.github.com/ephemeralrunnerset_controller_test.go index 0ea2027d..665279e8 100644 --- a/controllers/actions.github.com/ephemeralrunnerset_controller_test.go +++ b/controllers/actions.github.com/ephemeralrunnerset_controller_test.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "strings" + "testing" "time" corev1 "k8s.io/api/core/v1" @@ -21,6 +22,7 @@ import ( "github.com/go-logr/logr" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1" @@ -35,6 +37,10 @@ const ( ephemeralRunnerSetTestGitHubToken = "gh_token" ) +func TestPrecomputedConstants(t *testing.T) { + require.Equal(t, len(failedRunnerBackoff), maxFailures+1) +} + var _ = Describe("Test EphemeralRunnerSet controller", func() { var ctx context.Context var mgr ctrl.Manager diff --git a/controllers/actions.github.com/resourcebuilder.go b/controllers/actions.github.com/resourcebuilder.go index cd08b3a8..0726b82d 100644 --- a/controllers/actions.github.com/resourcebuilder.go +++ b/controllers/actions.github.com/resourcebuilder.go @@ -429,7 +429,7 @@ func mergeListenerContainer(base, from *corev1.Container) { func (b *ResourceBuilder) newScaleSetListenerServiceAccount(autoscalingListener *v1alpha1.AutoscalingListener) *corev1.ServiceAccount { return &corev1.ServiceAccount{ ObjectMeta: metav1.ObjectMeta{ - Name: scaleSetListenerServiceAccountName(autoscalingListener), + Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace, Labels: b.mergeLabels(autoscalingListener.Labels, map[string]string{ LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, @@ -444,7 +444,7 @@ func (b *ResourceBuilder) newScaleSetListenerRole(autoscalingListener *v1alpha1. rulesHash := hash.ComputeTemplateHash(&rules) newRole := &rbacv1.Role{ ObjectMeta: metav1.ObjectMeta{ - Name: scaleSetListenerRoleName(autoscalingListener), + Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Labels: b.mergeLabels(autoscalingListener.Labels, map[string]string{ LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, @@ -478,7 +478,7 @@ func (b *ResourceBuilder) newScaleSetListenerRoleBinding(autoscalingListener *v1 newRoleBinding := &rbacv1.RoleBinding{ ObjectMeta: metav1.ObjectMeta{ - Name: scaleSetListenerRoleName(autoscalingListener), + Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Labels: b.mergeLabels(autoscalingListener.Labels, map[string]string{ LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, @@ -501,7 +501,7 @@ func (b *ResourceBuilder) newScaleSetListenerSecretMirror(autoscalingListener *v newListenerSecret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: scaleSetListenerSecretMirrorName(autoscalingListener), + Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace, Labels: b.mergeLabels(autoscalingListener.Labels, map[string]string{ LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, @@ -713,30 +713,6 @@ func scaleSetListenerName(autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet) s return fmt.Sprintf("%v-%v-listener", autoscalingRunnerSet.Name, namespaceHash) } -func scaleSetListenerServiceAccountName(autoscalingListener *v1alpha1.AutoscalingListener) string { - namespaceHash := hash.FNVHashString(autoscalingListener.Spec.AutoscalingRunnerSetNamespace) - if len(namespaceHash) > 8 { - namespaceHash = namespaceHash[:8] - } - return fmt.Sprintf("%v-%v-listener", autoscalingListener.Spec.AutoscalingRunnerSetName, namespaceHash) -} - -func scaleSetListenerRoleName(autoscalingListener *v1alpha1.AutoscalingListener) string { - namespaceHash := hash.FNVHashString(autoscalingListener.Spec.AutoscalingRunnerSetNamespace) - if len(namespaceHash) > 8 { - namespaceHash = namespaceHash[:8] - } - return fmt.Sprintf("%v-%v-listener", autoscalingListener.Spec.AutoscalingRunnerSetName, namespaceHash) -} - -func scaleSetListenerSecretMirrorName(autoscalingListener *v1alpha1.AutoscalingListener) string { - namespaceHash := hash.FNVHashString(autoscalingListener.Spec.AutoscalingRunnerSetNamespace) - if len(namespaceHash) > 8 { - namespaceHash = namespaceHash[:8] - } - return fmt.Sprintf("%v-%v-listener", autoscalingListener.Spec.AutoscalingRunnerSetName, namespaceHash) -} - func proxyListenerSecretName(autoscalingListener *v1alpha1.AutoscalingListener) string { namespaceHash := hash.FNVHashString(autoscalingListener.Spec.AutoscalingRunnerSetNamespace) if len(namespaceHash) > 8 { diff --git a/controllers/actions.github.com/suite_test.go b/controllers/actions.github.com/suite_test.go index 80fb4196..46b97eb7 100644 --- a/controllers/actions.github.com/suite_test.go +++ b/controllers/actions.github.com/suite_test.go @@ -20,6 +20,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/onsi/ginkgo/config" @@ -79,6 +80,15 @@ var _ = BeforeSuite(func() { k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) Expect(err).ToNot(HaveOccurred()) Expect(k8sClient).ToNot(BeNil()) + + failedRunnerBackoff = []time.Duration{ + 20 * time.Millisecond, + 20 * time.Millisecond, + 20 * time.Millisecond, + 20 * time.Millisecond, + 20 * time.Millisecond, + 20 * time.Millisecond, + } }) var _ = AfterSuite(func() { diff --git a/go.mod b/go.mod index 7ca02e5f..12f60832 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,7 @@ module github.com/actions/actions-runner-controller -go 1.24.0 +go 1.24.3 + require ( github.com/bradleyfalzon/ghinstallation/v2 v2.14.0 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc diff --git a/runner/Makefile b/runner/Makefile index f9388f89..7fdc7827 100644 --- a/runner/Makefile +++ b/runner/Makefile @@ -6,8 +6,8 @@ DIND_ROOTLESS_RUNNER_NAME ?= ${DOCKER_USER}/actions-runner-dind-rootless OS_IMAGE ?= ubuntu-22.04 TARGETPLATFORM ?= $(shell arch) -RUNNER_VERSION ?= 2.323.0 -RUNNER_CONTAINER_HOOKS_VERSION ?= 0.6.2 +RUNNER_VERSION ?= 2.324.0 +RUNNER_CONTAINER_HOOKS_VERSION ?= 0.7.0 DOCKER_VERSION ?= 24.0.7 # default list of platforms for which multiarch image is built diff --git a/runner/VERSION b/runner/VERSION index 9b74807c..bef5082a 100644 --- a/runner/VERSION +++ b/runner/VERSION @@ -1,2 +1,2 @@ -RUNNER_VERSION=2.323.0 -RUNNER_CONTAINER_HOOKS_VERSION=0.6.2 \ No newline at end of file +RUNNER_VERSION=2.324.0 +RUNNER_CONTAINER_HOOKS_VERSION=0.7.0 \ No newline at end of file diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 44d0c5b7..82b31979 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -36,8 +36,8 @@ var ( testResultCMNamePrefix = "test-result-" - RunnerVersion = "2.323.0" - RunnerContainerHooksVersion = "0.6.2" + RunnerVersion = "2.324.0" + RunnerContainerHooksVersion = "0.7.0" ) // If you're willing to run this test via VS Code "run test" or "debug test", @@ -1106,7 +1106,7 @@ func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNam testing.Step{ Uses: "actions/setup-go@v3", With: &testing.With{ - GoVersion: "1.24.0", + GoVersion: "1.24.3", }, }, )