refactor: Make RunnerReplicaSet and Runner backed by the same logic that backs RunnerSet

2022-03-05 12:13:22 +00:00 · 2022-03-05 12:13:22 +00:00 · 14a878bfae
parent c95e84a528
commit 14a878bfae
12 changed files with 278 additions and 903 deletions
--- a/acceptance/deploy.sh
+++ b/acceptance/deploy.sh
@ -84,8 +84,7 @@ if [ -n "${TEST_REPO}" ]; then
    cat acceptance/testdata/runnerset.envsubst.yaml | TEST_ENTERPRISE= TEST_ORG= RUNNER_MIN_REPLICAS=${REPO_RUNNER_MIN_REPLICAS} NAME=repo-runnerset envsubst | kubectl apply -f -
  else
    echo 'Deploying runnerdeployment and hra. Set USE_RUNNERSET if you want to deploy runnerset instead.'
-    cat acceptance/testdata/repo.runnerdeploy.yaml | envsubst | kubectl apply -f -
-    cat acceptance/testdata/repo.hra.yaml | envsubst | kubectl apply -f -
+    cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_ORG= RUNNER_MIN_REPLICAS=${REPO_RUNNER_MIN_REPLICAS} NAME=repo-runnerdeploy envsubst | kubectl apply -f -
  fi
 else
  echo 'Skipped deploying runnerdeployment and hra. Set TEST_REPO to "yourorg/yourrepo" to deploy.'
--- a/acceptance/testdata/org.hra.yaml
+++ b/acceptance/testdata/org.hra.yaml
@ -1,36 +0,0 @@
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: HorizontalRunnerAutoscaler
-metadata:
-  name: org
-spec:
-  scaleTargetRef:
-    name: org-runnerdeploy
-  scaleUpTriggers:
-  - githubEvent:
-      checkRun:
-        types: ["created"]
-        status: "queued"
-    amount: 1
-    duration: "1m"
-  scheduledOverrides:
-  - startTime: "2021-05-11T16:05:00+09:00"
-    endTime: "2021-05-11T16:40:00+09:00"
-    minReplicas: 2
-  - startTime: "2021-05-01T00:00:00+09:00"
-    endTime: "2021-05-03T00:00:00+09:00"
-    recurrenceRule:
-      frequency: Weekly
-      untilTime: "2022-05-01T00:00:00+09:00"
-    minReplicas: 0
-  minReplicas: 0
-  maxReplicas: 5
-  # Used to test that HRA is working for org runners
-  metrics:
-  - type: PercentageRunnersBusy
-    scaleUpThreshold: '0.75'
-    scaleDownThreshold: '0.3'
-    scaleUpFactor: '2'
-    scaleDownFactor: '0.5'
-  - type: TotalNumberOfQueuedAndInProgressWorkflowRuns
-    repositoryNames:
-    - ${TEST_ORG_REPO}
--- a/acceptance/testdata/org.runnerdeploy.yaml
+++ b/acceptance/testdata/org.runnerdeploy.yaml
@ -1,44 +0,0 @@
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: RunnerDeployment
-metadata:
-  name: org-runnerdeploy
-spec:
-#  replicas: 1
-  template:
-    spec:
-      organization: ${TEST_ORG}
-
-      #
-      # Custom runner image
-      #
-      image: ${RUNNER_NAME}:${RUNNER_TAG}
-      imagePullPolicy: IfNotPresent
-
-      # Whether to pass --ephemeral (true) or --once (false, deprecated)
-      env:
-      - name: RUNNER_FEATURE_FLAG_EPHEMERAL
-        value: "${RUNNER_FEATURE_FLAG_EPHEMERAL}"
-
-      #
-      # dockerd within runner container
-      #
-      ## Replace `mumoshu/actions-runner-dind:dev` with your dind image
-      #dockerdWithinRunnerContainer: true
-      #image: mumoshu/actions-runner-dind:dev
-
-      #
-      # Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
-      #
-      #dockerMTU: 1450
-
-      #Runner group
-      # labels:
-      # - "mylabel 1"
-      # - "mylabel 2"
-      labels:
-      - "${RUNNER_LABEL}"
-
-      #
-      # Non-standard working directory
-      #
-      # workDir: "/"
--- a/acceptance/testdata/repo.hra.yaml
+++ b/acceptance/testdata/repo.hra.yaml
@ -1,25 +0,0 @@
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: HorizontalRunnerAutoscaler
-metadata:
-  name: actions-runner-aos-autoscaler
-spec:
-  scaleTargetRef:
-    name: example-runnerdeploy
-  scaleUpTriggers:
-  - githubEvent:
-      checkRun:
-        types: ["created"]
-        status: "queued"
-    amount: 1
-    duration: "1m"
-  minReplicas: 0
-  maxReplicas: 5
-  metrics:
-  - type: PercentageRunnersBusy
-    scaleUpThreshold: '0.75'
-    scaleDownThreshold: '0.3'
-    scaleUpFactor: '2'
-    scaleDownFactor: '0.5'
-  - type: TotalNumberOfQueuedAndInProgressWorkflowRuns
-    repositoryNames:
-    - ${TEST_REPO}
--- a/acceptance/testdata/repo.runnerdeploy.yaml
+++ b/acceptance/testdata/repo.runnerdeploy.yaml
@ -1,44 +0,0 @@
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: RunnerDeployment
-metadata:
-  name: example-runnerdeploy
-spec:
-#  replicas: 1
-  template:
-    spec:
-      repository: ${TEST_REPO}
-
-      #
-      # Custom runner image
-      #
-      image: ${RUNNER_NAME}:${RUNNER_TAG}
-      imagePullPolicy: IfNotPresent
-
-      # Whether to pass --ephemeral (true) or --once (false, deprecated)
-      env:
-      - name: RUNNER_FEATURE_FLAG_EPHEMERAL
-        value: "${RUNNER_FEATURE_FLAG_EPHEMERAL}"
-
-      #
-      # dockerd within runner container
-      #
-      ## Replace `mumoshu/actions-runner-dind:dev` with your dind image
-      #dockerdWithinRunnerContainer: true
-      #image: mumoshu/actions-runner-dind:dev
-
-      #
-      # Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
-      #
-      #dockerMTU: 1450
-
-      #Runner group
-      # labels:
-      # - "mylabel 1"
-      # - "mylabel 2"
-      labels:
-      - "${RUNNER_LABEL}"
-
-      #
-      # Non-standard working directory
-      #
-      # workDir: "/"
--- a/acceptance/testdata/repo.runnerset.hra.yaml
+++ b/acceptance/testdata/repo.runnerset.hra.yaml
@ -1,29 +0,0 @@
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: HorizontalRunnerAutoscaler
-metadata:
-  name: example-runnerset
-spec:
-  scaleTargetRef:
-    kind: RunnerSet
-    name: example-runnerset
-  scaleUpTriggers:
-  - githubEvent:
-      checkRun:
-        types: ["created"]
-        status: "queued"
-    amount: 1
-    duration: "1m"
-  # RunnerSet doesn't support scale from/to zero yet
-  minReplicas: 1
-  maxReplicas: 5
-  # This should be less than 600(seconds, the default) for faster testing
-  scaleDownDelaySecondsAfterScaleOut: 60
-  metrics:
-  - type: PercentageRunnersBusy
-    scaleUpThreshold: '0.75'
-    scaleDownThreshold: '0.3'
-    scaleUpFactor: '2'
-    scaleDownFactor: '0.5'
-  - type: TotalNumberOfQueuedAndInProgressWorkflowRuns
-    repositoryNames:
-    - ${TEST_REPO}
--- a/controllers/runner_controller.go
+++ b/controllers/runner_controller.go
@ -18,15 +18,12 @@ package controllers

 import (
 	"context"
-	"errors"
 	"fmt"
 	"strings"
 	"time"

 	"github.com/actions-runner-controller/actions-runner-controller/hash"
 	"github.com/go-logr/logr"
-	gogithub "github.com/google/go-github/v39/github"
-	"k8s.io/apimachinery/pkg/util/wait"

 	kerrors "k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/runtime"
@ -92,12 +89,6 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
 		return ctrl.Result{}, client.IgnoreNotFound(err)
 	}

-	err := runner.Validate()
-	if err != nil {
-		log.Info("Failed to validate runner spec", "error", err.Error())
-		return ctrl.Result{}, nil
-	}
-
 	if runner.ObjectMeta.DeletionTimestamp.IsZero() {
 		finalizers, added := addFinalizer(runner.ObjectMeta.Finalizers, finalizerName)

@ -125,34 +116,6 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
 		return r.processRunnerDeletion(runner, ctx, log, &pod)
 	}

-	registrationOnly := metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly)
-	if registrationOnly && runner.Status.Phase != "" {
-		// At this point we are sure that the registration-only runner has successfully configured and
-		// is of `offline` status, because we set runner.Status.Phase to that of the runner pod only after
-		// successful registration.
-
-		var pod corev1.Pod
-		if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
-			if !kerrors.IsNotFound(err) {
-				log.Info(fmt.Sprintf("Retrying soon as we failed to get registration-only runner pod: %v", err))
-
-				return ctrl.Result{Requeue: true}, nil
-			}
-		} else if err := r.Delete(ctx, &pod); err != nil {
-			if !kerrors.IsNotFound(err) {
-				log.Info(fmt.Sprintf("Retrying soon as we failed to delete registration-only runner pod: %v", err))
-
-				return ctrl.Result{Requeue: true}, nil
-			}
-		}
-
-		log.Info("Successfully deleted registration-only runner pod to free node and cluster resource")
-
-		// Return here to not recreate the deleted pod, because recreating it is the waste of cluster and node resource,
-		// and also defeats the original purpose of scale-from/to-zero we're trying to implement by using the registration-only runner.
-		return ctrl.Result{}, nil
-	}
-
 	var pod corev1.Pod
 	if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
 		if !kerrors.IsNotFound(err) {
@ -162,281 +125,31 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
 		return r.processRunnerCreation(ctx, runner, log)
 	}

-	// Pod already exists
-
-	if !pod.ObjectMeta.DeletionTimestamp.IsZero() {
-		return r.processRunnerPodDeletion(ctx, runner, log, pod)
+	phase := string(pod.Status.Phase)
+	if phase == "" {
+		phase = "Created"
 	}

-	// If pod has ended up succeeded we need to restart it
-	// Happens e.g. when dind is in runner and run completes
-	stopped := runnerPodOrContainerIsStopped(&pod)
-
-	ephemeral := runner.Spec.Ephemeral == nil || *runner.Spec.Ephemeral
-
-	if stopped && ephemeral {
-		log.V(1).Info("Ephemeral runner has been stopped successfully. Marking this runner for deletion.")
-
-		// This is the key to make ephemeral runners to work reliably with webhook-based autoscale.
-		// See https://github.com/actions-runner-controller/actions-runner-controller/issues/911#issuecomment-1046161384 for more context.
-		//
-		// In the next reconcilation loop, this triggers a runner unregistration.
-		// (Note that the unregistration can fail safely because an ephemeral runner usually unregisters itself from GitHub but we do it just for confirmation)
-		//
-		// See the code path above that is executed when `runner.ObjectMeta.DeletionTimestamp.IsZero()` isn't true,
-		// which handles the unregistrationa the removal of the completed pod, and so on.
-		if err := r.Delete(ctx, &runner); err != nil {
-			log.V(1).Error(err, "Retrying to mark this runner for deletion in 10 seconds.")
-			return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
+	if runner.Status.Phase != phase {
+		if pod.Status.Phase == corev1.PodRunning {
+			// Seeing this message, you can expect the runner to become `Running` soon.
+			log.V(1).Info(
+				"Runner appears to have been registered and running.",
+				"podCreationTimestamp", pod.CreationTimestamp,
+			)
 		}

-		return ctrl.Result{Requeue: true}, nil
-	}
+		updated := runner.DeepCopy()
+		updated.Status.Phase = phase
+		updated.Status.Reason = pod.Status.Reason
+		updated.Status.Message = pod.Status.Message

-	restart := stopped
-
-	if registrationOnly && stopped {
-		restart = false
-
-		log.Info(
-			"Observed that registration-only runner for scaling-from-zero has successfully stopped. " +
-				"Unlike other pods, this one will be recreated only when runner spec changes.",
-		)
-	}
-
-	if updated, err := r.updateRegistrationToken(ctx, runner); err != nil {
-		return ctrl.Result{}, err
-	} else if updated {
-		return ctrl.Result{Requeue: true}, nil
-	}
-
-	newPod, err := r.newPod(runner)
-	if err != nil {
-		log.Error(err, "Could not create pod")
-		return ctrl.Result{}, err
-	}
-
-	if registrationOnly {
-		newPod.Spec.Containers[0].Env = append(
-			newPod.Spec.Containers[0].Env,
-			corev1.EnvVar{
-				Name:  "RUNNER_REGISTRATION_ONLY",
-				Value: "true",
-			},
-		)
-	}
-
-	var registrationRecheckDelay time.Duration
-
-	// all checks done below only decide whether a restart is needed
-	// if a restart was already decided before, there is no need for the checks
-	// saving API calls and scary log messages
-	if !restart {
-		registrationCheckInterval := time.Minute
-		if r.RegistrationRecheckInterval > 0 {
-			registrationCheckInterval = r.RegistrationRecheckInterval
-		}
-
-		// We want to call ListRunners GitHub Actions API only once per runner per minute.
-		// This if block, in conjunction with:
-		//   return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil
-		// achieves that.
-		if lastCheckTime := runner.Status.LastRegistrationCheckTime; lastCheckTime != nil {
-			nextCheckTime := lastCheckTime.Add(registrationCheckInterval)
-			now := time.Now()
-
-			// Requeue scheduled by RequeueAfter can happen a bit earlier (like dozens of milliseconds)
-			// so to avoid excessive, in-effective retry, we heuristically ignore the remaining delay in case it is
-			// shorter than 1s
-			requeueAfter := nextCheckTime.Sub(now) - time.Second
-			if requeueAfter > 0 {
-				log.Info(
-					fmt.Sprintf("Skipped registration check because it's deferred until %s. Retrying in %s at latest", nextCheckTime, requeueAfter),
-					"lastRegistrationCheckTime", lastCheckTime,
-					"registrationCheckInterval", registrationCheckInterval,
-				)
-
-				// Without RequeueAfter, the controller may not retry on scheduled. Instead, it must wait until the
-				// next sync period passes, which can be too much later than nextCheckTime.
-				//
-				// We need to requeue on this reconcilation even though we have already scheduled the initial
-				// requeue previously with `return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil`.
-				// Apparently, the workqueue used by controller-runtime seems to deduplicate and resets the delay on
-				// other requeues- so the initial scheduled requeue may have been reset due to requeue on
-				// spec/status change.
-				return ctrl.Result{RequeueAfter: requeueAfter}, nil
-			}
-		}
-
-		notFound := false
-		offline := false
-
-		runnerBusy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
-
-		currentTime := time.Now()
-
-		if err != nil {
-			var notFoundException *github.RunnerNotFound
-			var offlineException *github.RunnerOffline
-			if errors.As(err, &notFoundException) {
-				notFound = true
-			} else if errors.As(err, &offlineException) {
-				offline = true
-			} else {
-				var e *gogithub.RateLimitError
-				if errors.As(err, &e) {
-					// We log the underlying error when we failed calling GitHub API to list or unregisters,
-					// or the runner is still busy.
-					log.Error(
-						err,
-						fmt.Sprintf(
-							"Failed to check if runner is busy due to Github API rate limit. Retrying in %s to avoid excessive GitHub API calls",
-							retryDelayOnGitHubAPIRateLimitError,
-						),
-					)
-
-					return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
-				}
-
-				return ctrl.Result{}, err
-			}
-		}
-
-		// See the `newPod` function called above for more information
-		// about when this hash changes.
-		curHash := pod.Labels[LabelKeyPodTemplateHash]
-		newHash := newPod.Labels[LabelKeyPodTemplateHash]
-
-		if !runnerBusy && curHash != newHash {
-			restart = true
-		}
-
-		registrationTimeout := 10 * time.Minute
-		durationAfterRegistrationTimeout := currentTime.Sub(pod.CreationTimestamp.Add(registrationTimeout))
-		registrationDidTimeout := durationAfterRegistrationTimeout > 0
-
-		if notFound {
-			if registrationDidTimeout {
-				log.Info(
-					"Runner failed to register itself to GitHub in timely manner. "+
-						"Recreating the pod to see if it resolves the issue. "+
-						"CAUTION: If you see this a lot, you should investigate the root cause. "+
-						"See https://github.com/actions-runner-controller/actions-runner-controller/issues/288",
-					"podCreationTimestamp", pod.CreationTimestamp,
-					"currentTime", currentTime,
-					"configuredRegistrationTimeout", registrationTimeout,
-				)
-
-				restart = true
-			} else {
-				log.V(1).Info(
-					"Runner pod exists but we failed to check if runner is busy. Apparently it still needs more time.",
-					"runnerName", runner.Name,
-				)
-			}
-		} else if offline {
-			if registrationOnly {
-				log.Info(
-					"Observed that registration-only runner for scaling-from-zero has successfully been registered.",
-					"podCreationTimestamp", pod.CreationTimestamp,
-					"currentTime", currentTime,
-					"configuredRegistrationTimeout", registrationTimeout,
-				)
-			} else if registrationDidTimeout {
-				if runnerBusy {
-					log.Info(
-						"Timeout out while waiting for the runner to be online, but observed that it's busy at the same time."+
-							"This is a known (unintuitive) behaviour of a runner that is already running a job. Please see https://github.com/actions-runner-controller/actions-runner-controller/issues/911",
-						"podCreationTimestamp", pod.CreationTimestamp,
-						"currentTime", currentTime,
-						"configuredRegistrationTimeout", registrationTimeout,
-					)
-				} else {
-					log.Info(
-						"Already existing GitHub runner still appears offline . "+
-							"Recreating the pod to see if it resolves the issue. "+
-							"CAUTION: If you see this a lot, you should investigate the root cause. ",
-						"podCreationTimestamp", pod.CreationTimestamp,
-						"currentTime", currentTime,
-						"configuredRegistrationTimeout", registrationTimeout,
-					)
-
-					restart = true
-				}
-			} else {
-				log.V(1).Info(
-					"Runner pod exists but the GitHub runner appears to be still offline. Waiting for runner to get online ...",
-					"runnerName", runner.Name,
-				)
-			}
-		}
-
-		if (notFound || (offline && !registrationOnly)) && !registrationDidTimeout {
-			registrationRecheckJitter := 10 * time.Second
-			if r.RegistrationRecheckJitter > 0 {
-				registrationRecheckJitter = r.RegistrationRecheckJitter
-			}
-
-			registrationRecheckDelay = registrationCheckInterval + wait.Jitter(registrationRecheckJitter, 0.1)
+		if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
+			log.Error(err, "Failed to update runner status for Phase/Reason/Message")
+			return ctrl.Result{}, err
 		}
 	}

-	// Don't do anything if there's no need to restart the runner
-	if !restart {
-		// This guard enables us to update runner.Status.Phase to `Running` only after
-		// the runner is registered to GitHub.
-		if registrationRecheckDelay > 0 {
-			log.V(1).Info(fmt.Sprintf("Rechecking the runner registration in %s", registrationRecheckDelay))
-
-			updated := runner.DeepCopy()
-			updated.Status.LastRegistrationCheckTime = &metav1.Time{Time: time.Now()}
-
-			if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
-				log.Error(err, "Failed to update runner status for LastRegistrationCheckTime")
-				return ctrl.Result{}, err
-			}
-
-			return ctrl.Result{RequeueAfter: registrationRecheckDelay}, nil
-		}
-
-		if runner.Status.Phase != string(pod.Status.Phase) {
-			if pod.Status.Phase == corev1.PodRunning {
-				// Seeing this message, you can expect the runner to become `Running` soon.
-				log.Info(
-					"Runner appears to have registered and running.",
-					"podCreationTimestamp", pod.CreationTimestamp,
-				)
-			}
-
-			updated := runner.DeepCopy()
-			updated.Status.Phase = string(pod.Status.Phase)
-			updated.Status.Reason = pod.Status.Reason
-			updated.Status.Message = pod.Status.Message
-
-			if err := r.Status().Patch(ctx, updated, client.MergeFrom(&runner)); err != nil {
-				log.Error(err, "Failed to update runner status for Phase/Reason/Message")
-				return ctrl.Result{}, err
-			}
-		}
-
-		return ctrl.Result{}, nil
-	}
-
-	updatedPod, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name, &pod)
-	if res != nil {
-		return *res, err
-	}
-
-	// Only delete the pod if we successfully unregistered the runner or the runner is already deleted from the service.
-	// This should help us avoid race condition between runner pickup job after we think the runner is not busy.
-	if err := r.Delete(ctx, updatedPod); err != nil {
-		log.Error(err, "Failed to delete pod resource")
-		return ctrl.Result{}, err
-	}
-
-	r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Deleted pod '%s'", newPod.Name))
-	log.Info("Deleted runner pod", "repository", runner.Spec.Repository)
-
 	return ctrl.Result{}, nil
 }

@ -480,11 +193,6 @@ func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx con
 	finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName)

 	if removed {
-		_, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name, pod)
-		if res != nil {
-			return *res, err
-		}
-
 		newRunner := runner.DeepCopy()
 		newRunner.ObjectMeta.Finalizers = finalizers

@ -499,60 +207,6 @@ func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx con
 	return ctrl.Result{}, nil
 }

-func (r *RunnerReconciler) unregistrationTimeout() time.Duration {
-	unregistrationTimeout := DefaultUnregistrationTimeout
-
-	if r.UnregistrationTimeout > 0 {
-		unregistrationTimeout = r.UnregistrationTimeout
-	}
-	return unregistrationTimeout
-}
-
-func (r *RunnerReconciler) unregistrationRetryDelay() time.Duration {
-	retryDelay := DefaultUnregistrationRetryDelay
-
-	if r.UnregistrationRetryDelay > 0 {
-		retryDelay = r.UnregistrationRetryDelay
-	}
-	return retryDelay
-}
-
-func (r *RunnerReconciler) processRunnerPodDeletion(ctx context.Context, runner v1alpha1.Runner, log logr.Logger, pod corev1.Pod) (reconcile.Result, error) {
-	deletionTimeout := 1 * time.Minute
-	currentTime := time.Now()
-	deletionDidTimeout := currentTime.Sub(pod.DeletionTimestamp.Add(deletionTimeout)) > 0
-
-	if deletionDidTimeout {
-		log.Info(
-			fmt.Sprintf("Failed to delete pod within %s. ", deletionTimeout)+
-				"This is typically the case when a Kubernetes node became unreachable "+
-				"and the kube controller started evicting nodes. Forcefully deleting the pod to not get stuck.",
-			"podDeletionTimestamp", pod.DeletionTimestamp,
-			"currentTime", currentTime,
-			"configuredDeletionTimeout", deletionTimeout,
-		)
-
-		var force int64 = 0
-		// forcefully delete runner as we would otherwise get stuck if the node stays unreachable
-		if err := r.Delete(ctx, &pod, &client.DeleteOptions{GracePeriodSeconds: &force}); err != nil {
-			// probably
-			if !kerrors.IsNotFound(err) {
-				log.Error(err, "Failed to forcefully delete pod resource ...")
-				return ctrl.Result{}, err
-			}
-			// forceful deletion finally succeeded
-			return ctrl.Result{Requeue: true}, nil
-		}
-
-		r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Forcefully deleted pod '%s'", pod.Name))
-		log.Info("Forcefully deleted runner pod", "repository", runner.Spec.Repository)
-		// give kube manager a little time to forcefully delete the stuck pod
-		return ctrl.Result{RequeueAfter: 3 * time.Second}, nil
-	} else {
-		return ctrl.Result{}, nil
-	}
-}
-
 func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1alpha1.Runner, log logr.Logger) (reconcile.Result, error) {
 	if updated, err := r.updateRegistrationToken(ctx, runner); err != nil {
 		return ctrl.Result{}, err
@ -584,6 +238,7 @@ func (r *RunnerReconciler) processRunnerCreation(ctx context.Context, runner v1a

 	r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodCreated", fmt.Sprintf("Created pod '%s'", newPod.Name))
 	log.Info("Created runner pod", "repository", runner.Spec.Repository)
+
 	return ctrl.Result{}, nil
 }

@ -696,7 +351,7 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {

 	registrationOnly := metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly)

-	pod, err := newRunnerPod(template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL, registrationOnly)
+	pod, err := newRunnerPod(runner.Name, template, runner.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubClient.GithubBaseURL, registrationOnly)
 	if err != nil {
 		return pod, err
 	}
@ -813,7 +468,7 @@ func mutatePod(pod *corev1.Pod, token string) *corev1.Pod {
 	return updated
 }

-func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string, registrationOnly bool) (corev1.Pod, error) {
+func newRunnerPod(runnerName string, template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, defaultRunnerImage string, defaultRunnerImagePullSecrets []string, defaultDockerImage, defaultDockerRegistryMirror string, githubBaseURL string, registrationOnly bool) (corev1.Pod, error) {
 	var (
 		privileged                bool = true
 		dockerdInRunner           bool = runnerSpec.DockerdWithinRunnerContainer != nil && *runnerSpec.DockerdWithinRunnerContainer
@ -822,6 +477,12 @@ func newRunnerPod(template corev1.Pod, runnerSpec v1alpha1.RunnerConfig, default
 		dockerdInRunnerPrivileged bool = dockerdInRunner
 	)

+	template = *template.DeepCopy()
+
+	// This label selector is used by default when rd.Spec.Selector is empty.
+	template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerName)
+	template.ObjectMeta.Labels = CloneAndAddLabel(template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)
+
 	workDir := runnerSpec.WorkDir
 	if workDir == "" {
 		workDir = "/runner/_work"
--- a/controllers/runner_pod_owner.go
+++ b/controllers/runner_pod_owner.go
@ -10,6 +10,7 @@ import (
 	"github.com/go-logr/logr"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/controller-runtime/pkg/client"
@ -26,6 +27,7 @@ type podsForOwner struct {
 	runner       *v1alpha1.Runner
 	statefulSet  *appsv1.StatefulSet
 	owner        owner
+	object       client.Object
 	synced       bool
 	pods         []corev1.Pod
 }
@ -52,6 +54,9 @@ func (r *ownerRunner) pods(ctx context.Context, c client.Client) ([]corev1.Pod,
 	var pod corev1.Pod

 	if err := c.Get(ctx, types.NamespacedName{Namespace: r.Runner.Namespace, Name: r.Runner.Name}, &pod); err != nil {
+		if errors.IsNotFound(err) {
+			return nil, nil
+		}
 		r.Log.Error(err, "Failed to get pod managed by runner")
 		return nil, err
 	}
@ -70,7 +75,7 @@ func (r *ownerRunner) withAnnotation(k, v string) client.Object {
 }

 func (r *ownerRunner) synced() bool {
-	return true
+	return r.Runner.Status.Phase != ""
 }

 type ownerStatefulSet struct {
@ -104,7 +109,7 @@ func (s *ownerStatefulSet) pods(ctx context.Context, c client.Client) ([]corev1.
 }

 func (s *ownerStatefulSet) templateHash() (string, bool) {
-	return getStatefulSetTemplateHash(s.StatefulSet)
+	return getRunnerTemplateHash(s.StatefulSet)
 }

 func (s *ownerStatefulSet) withAnnotation(k, v string) client.Object {
@ -132,23 +137,26 @@ func getPodsForOwner(ctx context.Context, c client.Client, log logr.Logger, o cl
 		owner       owner
 		runner      *v1alpha1.Runner
 		statefulSet *appsv1.StatefulSet
+		object      client.Object
 	)

 	switch v := o.(type) {
 	case *v1alpha1.Runner:
 		owner = &ownerRunner{
-			Object: v,
 			Log:    log,
 			Runner: v,
+			Object: v,
 		}
 		runner = v
+		object = v
 	case *appsv1.StatefulSet:
 		owner = &ownerStatefulSet{
-			Object:      v,
 			Log:         log,
 			StatefulSet: v,
+			Object:      v,
 		}
 		statefulSet = v
+		object = v
 	default:
 		return nil, fmt.Errorf("BUG: Unsupported runner pods owner %v(%T)", v, v)
 	}
@ -209,19 +217,14 @@ func getPodsForOwner(ctx context.Context, c client.Client, log logr.Logger, o cl
 		runner:       runner,
 		statefulSet:  statefulSet,
 		owner:        owner,
+		object:       object,
 		synced:       synced,
 		pods:         pods,
 	}, nil
 }

-func getRunnerTemplateHash(r *v1alpha1.Runner) (string, bool) {
-	hash, ok := r.Labels[LabelKeyRunnerTemplateHash]
-
-	return hash, ok
-}
-
-func getStatefulSetTemplateHash(rs *appsv1.StatefulSet) (string, bool) {
-	hash, ok := rs.Labels[LabelKeyRunnerTemplateHash]
+func getRunnerTemplateHash(r client.Object) (string, bool) {
+	hash, ok := r.GetLabels()[LabelKeyRunnerTemplateHash]

 	return hash, ok
 }
@ -235,7 +238,18 @@ type result struct {
 	currentObjects []*podsForOwner
 }

-func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger, effectiveTime *metav1.Time, newDesiredReplicas int, desiredTemplateHash string, create client.Object, ephemeral bool, owners []client.Object) (*result, error) {
+// Why `create` must be a function rather than a client.Object? That's becase we use it to create one or more objects on scale up.
+//
+// We use client.Create to create a necessary number of client.Object. client.Create mutates the passed object on a successful creation.
+// It seems to set .Revision at least, and the existence of .Revision let client.Create fail due to K8s restriction that an object being just created
+// can't have .Revision.
+// Now, imagine that you are to add 2 runner replicas on scale up.
+// We create one resource object per a replica that ends up calling 2 client.Create calls.
+// If we were reusing client.Object to be passed to client.Create calls, only the first call suceeeds.
+// The second call fails due to the first call mutated the client.Object to have .Revision.
+// Passing a factory function of client.Object and creating a brand-new client.Object per a client.Create call resolves this issue,
+// allowing us to create two or more replicas in one reconcilation loop without being rejected by K8s.
+func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger, effectiveTime *metav1.Time, newDesiredReplicas int, create func() client.Object, ephemeral bool, owners []client.Object) (*result, error) {
 	state, err := collectPodsForOwners(ctx, c, log, owners)
 	if err != nil || state == nil {
 		return nil, err
@ -265,6 +279,13 @@ func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger,
 	// Even though the error message includes "Forbidden", this error's reason is "Invalid".
 	// So we used to match these errors by using errors.IsInvalid. But that's another story...

+	desiredTemplateHash, ok := getRunnerTemplateHash(create())
+	if !ok {
+		log.Info("Failed to get template hash of desired owner resource. It must be in an invalid state. Please manually delete the owner so that it is recreated")
+
+		return nil, nil
+	}
+
 	currentObjects := podsForOwnersPerTemplateHash[desiredTemplateHash]

 	sort.SliceStable(currentObjects, func(i, j int) bool {
@ -289,13 +310,20 @@ func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger,
 		regTimeout += ss.regTimeout
 	}

+	numOwners := len(owners)
+
+	var hashes []string
+	for h, _ := range state.podsForOwners {
+		hashes = append(hashes, h)
+	}
+
 	log.V(2).Info(
 		"Found some pods across owner(s)",
 		"pending", pending,
 		"running", running,
 		"regTimeout", regTimeout,
 		"desired", newDesiredReplicas,
-		"owners", len(owners),
+		"owners", numOwners,
 	)

 	maybeRunning := pending + running
@ -307,15 +335,39 @@ func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger,

 		for i := 0; i < num; i++ {
 			// Add more replicas
-			if err := c.Create(ctx, create); err != nil {
+			if err := c.Create(ctx, create()); err != nil {
 				return nil, err
 			}
 		}

-		log.V(2).Info("Created object(s) to add more replicas", "num", num)
+		log.V(1).Info("Created replica(s)",
+			"created", num,
+			"templateHashDesired", desiredTemplateHash,
+			"replicasDesired", newDesiredReplicas,
+			"replicasMaybeRunning", maybeRunning,
+			"templateHashObserved", hashes,
+		)

 		return nil, nil
 	} else if newDesiredReplicas <= running {
+		// If you use ephemeral runners with webhook-based autoscaler and the runner controller is working normally,
+		// you're unlikely to fall into this branch.
+		//
+		// That's because all the stakeholders work like this:
+		//
+		// 1. A runner pod completes with the runner container exiting with code 0
+		// 2. ARC runner controller detects the pod completion, marks the owner(runner or statefulset) resource on k8s for deletion (=Runner.DeletionTimestamp becomes non-zero)
+		// 3. GitHub triggers a corresponding workflow_job "complete" webhook event
+		// 4. ARC github-webhook-server (webhook-based autoscaler) receives the webhook event updates HRA with removing the oldest capacity reservation
+		// 5. ARC horizontalrunnerautoscaler updates RunnerDeployment's desired replicas based on capacity reservations
+		// 6. ARC runnerdeployment controller updates RunnerReplicaSet's desired replicas
+		// 7. (We're here) ARC runnerset or runnerreplicaset controller starts reconciling the owner resource (statefulset or runner)
+		//
+		// In a normally working ARC installation, the runner that was used to run the workflow job should already have been
+		// marked for deletion by the runner controller.
+		// This runnerreplicaset controller doesn't count marked runners into the `running` value, hence you're unlikely to
+		// fall into this branch when you're using ephemeral runners with webhook-based-autoscaler.
+
 		var retained int

 		var delete []*podsForOwner
@ -354,7 +406,7 @@ func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger,
 				if _, ok := getAnnotation(ss.owner, AnnotationKeyUnregistrationRequestTimestamp); !ok {
 					updated := ss.owner.withAnnotation(AnnotationKeyUnregistrationRequestTimestamp, time.Now().Format(time.RFC3339))

-					if err := c.Patch(ctx, updated, client.MergeFrom(ss.owner)); err != nil {
+					if err := c.Patch(ctx, updated, client.MergeFrom(ss.object)); err != nil {
 						log.Error(err, fmt.Sprintf("Failed to patch object to have %s annotation", AnnotationKeyUnregistrationRequestTimestamp))
 						return nil, err
 					}
@ -379,7 +431,7 @@ func syncRunnerPodsOwners(ctx context.Context, c client.Client, log logr.Logger,
 		for _, ss := range sss {
 			if ss.templateHash != desiredTemplateHash {
 				if ss.owner.GetDeletionTimestamp().IsZero() {
-					if err := c.Delete(ctx, ss.owner); err != nil {
+					if err := c.Delete(ctx, ss.object); err != nil {
 						log.Error(err, "Unable to delete object")
 						return nil, err
 					}
@ -417,6 +469,12 @@ func collectPodsForOwners(ctx context.Context, c client.Client, log logr.Logger,
 			return nil, err
 		}

+		if res.templateHash == "" {
+			log.Info("validation error: runner pod owner must have template hash", "object", res.object)
+
+			return nil, nil
+		}
+
 		// Statefulset termination process 4/4: Let Kubernetes cascade-delete the statefulset and the pods.
 		//
 		// If the runner is already marked for deletion(=has a non-zero deletion timestamp) by the runner controller (can be caused by an ephemeral runner completion)
@ -429,7 +487,7 @@ func collectPodsForOwners(ctx context.Context, c client.Client, log logr.Logger,

 		// Statefulset termination process 3/4: Set the deletionTimestamp to let Kubernetes start a cascade deletion of the statefulset and the pods.
 		if _, ok := getAnnotation(res.owner, AnnotationKeyUnregistrationCompleteTimestamp); ok {
-			if err := c.Delete(ctx, res.owner); err != nil {
+			if err := c.Delete(ctx, res.object); err != nil {
 				log.Error(err, "Failed to delete owner")
 				return nil, err
 			}
@ -454,7 +512,7 @@ func collectPodsForOwners(ctx context.Context, c client.Client, log logr.Logger,
 				if _, ok := getAnnotation(res.owner, AnnotationKeyUnregistrationCompleteTimestamp); !ok {
 					updated := res.owner.withAnnotation(AnnotationKeyUnregistrationCompleteTimestamp, time.Now().Format(time.RFC3339))

-					if err := c.Patch(ctx, updated, client.MergeFrom(res.owner)); err != nil {
+					if err := c.Patch(ctx, updated, client.MergeFrom(res.object)); err != nil {
 						log.Error(err, fmt.Sprintf("Failed to patch owner to have %s annotation", AnnotationKeyUnregistrationCompleteTimestamp))
 						return nil, err
 					}
@ -494,6 +552,8 @@ func collectPodsForOwners(ctx context.Context, c client.Client, log logr.Logger,
 		}

 		if !res.synced {
+			log.V(1).Info("Skipped reconcilation because owner is not synced yet", "pods", res.pods)
+
 			return nil, nil
 		}

--- a/controllers/runnerreplicaset_controller.go
+++ b/controllers/runnerreplicaset_controller.go
@ -18,13 +18,10 @@ package controllers

 import (
 	"context"
-	"errors"
-	"fmt"
 	"reflect"
 	"time"

 	"github.com/go-logr/logr"
-	gogithub "github.com/google/go-github/v39/github"

 	kerrors "k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/runtime"
@ -32,7 +29,6 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/client"

-	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

 	"github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
@ -72,15 +68,35 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
 		return ctrl.Result{}, nil
 	}

+	if rs.ObjectMeta.Labels == nil {
+		rs.ObjectMeta.Labels = map[string]string{}
+	}
+
+	// Template hash is usually set by the upstream controller(RunnerDeplloyment controller) on authoring
+	// RunerReplicaset resource, but it may be missing when the user directly created RunnerReplicaSet.
+	// As a template hash is required by by the runner replica management, we dynamically add it here without ever persisting it.
+	if rs.ObjectMeta.Labels[LabelKeyRunnerTemplateHash] == "" {
+		template := rs.Spec.DeepCopy()
+		template.Replicas = nil
+		template.EffectiveTime = nil
+		templateHash := ComputeHash(template)
+
+		log.Info("Using auto-generated template hash", "value", templateHash)
+
+		rs.ObjectMeta.Labels = CloneAndAddLabel(rs.ObjectMeta.Labels, LabelKeyRunnerTemplateHash, templateHash)
+		rs.Spec.Template.ObjectMeta.Labels = CloneAndAddLabel(rs.Spec.Template.ObjectMeta.Labels, LabelKeyRunnerTemplateHash, templateHash)
+	}
+
 	selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector)
 	if err != nil {
 		return ctrl.Result{}, err
 	}
+
 	// Get the Runners managed by the target RunnerReplicaSet
-	var allRunners v1alpha1.RunnerList
+	var runnerList v1alpha1.RunnerList
 	if err := r.List(
 		ctx,
-		&allRunners,
+		&runnerList,
 		client.InNamespace(req.Namespace),
 		client.MatchingLabelsSelector{Selector: selector},
 	); err != nil {
@ -89,218 +105,43 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
 		}
 	}

-	var (
-		current   int
-		ready     int
-		available int
-
-		lastSyncTime *time.Time
-	)
-
-	for _, r := range allRunners.Items {
-		// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
-		// to not treat all the runners in the namespace as its children.
-		if metav1.IsControlledBy(&r, &rs) && !metav1.HasAnnotation(r.ObjectMeta, annotationKeyRegistrationOnly) {
-			// If the runner is already marked for deletion(=has a non-zero deletion timestamp) by the runner controller (can be caused by an ephemeral runner completion)
-			// or by runnerreplicaset controller (in case it was deleted in the previous reconcilation loop),
-			// we don't need to bother calling GitHub API to re-mark the runner for deletion.
-			// Just hold on, and runners will disappear as long as the runner controller is up and running.
-			if !r.DeletionTimestamp.IsZero() {
-				continue
-			}
-
-			if r.Annotations != nil {
-				if a, ok := r.Annotations[SyncTimeAnnotationKey]; ok {
-					t, err := time.Parse(time.RFC3339, a)
-					if err == nil {
-						if lastSyncTime == nil || lastSyncTime.Before(t) {
-							lastSyncTime = &t
-						}
-					}
-				}
-			}
-
-			current += 1
-
-			if r.Status.Phase == string(corev1.PodRunning) {
-				ready += 1
-				// available is currently the same as ready, as we don't yet have minReadySeconds for runners
-				available += 1
-			}
-		}
-	}
-
-	var desired int
-
+	replicas := 1
 	if rs.Spec.Replicas != nil {
-		desired = *rs.Spec.Replicas
-	} else {
-		desired = 1
-	}
-
-	// TODO: remove this registration runner cleanup later (v0.23.0 or v0.24.0)
-	//
-	// We had to have a registration-only runner to support scale-from-zero before.
-	// But since Sep 2021 Actions update on GitHub Cloud and GHES 3.3, it is unneceesary.
-	// See the below issues for more contexts:
-	// https://github.com/actions-runner-controller/actions-runner-controller/issues/516
-	// https://github.com/actions-runner-controller/actions-runner-controller/issues/859
-	//
-	// In the below block, we have a logic to remove existing registration-only runners as unnecessary.
-	// This logic is introduced since actions-runner-controller 0.21.0 and probably last one or two minor releases
-	// so that actions-runner-controller instance in everyone's cluster won't leave dangling registration-only runners.
-	registrationOnlyRunnerNsName := req.NamespacedName
-	registrationOnlyRunnerNsName.Name = registrationOnlyRunnerNameFor(rs.Name)
-	registrationOnlyRunner := v1alpha1.Runner{}
-	registrationOnlyRunnerExists := false
-	if err := r.Get(
-		ctx,
-		registrationOnlyRunnerNsName,
-		&registrationOnlyRunner,
-	); err != nil {
-		if !kerrors.IsNotFound(err) {
-			return ctrl.Result{}, err
-		}
-	} else {
-		registrationOnlyRunnerExists = true
-	}
-
-	if registrationOnlyRunnerExists {
-		if err := r.Client.Delete(ctx, &registrationOnlyRunner); err != nil {
-			log.Error(err, "Retrying soon because we failed to delete registration-only runner")
-
-			return ctrl.Result{Requeue: true}, nil
-		}
+		replicas = *rs.Spec.Replicas
 	}

 	effectiveTime := rs.Spec.EffectiveTime
 	ephemeral := rs.Spec.Template.Spec.Ephemeral == nil || *rs.Spec.Template.Spec.Ephemeral

-	if current < desired && ephemeral && lastSyncTime != nil && effectiveTime != nil && lastSyncTime.After(effectiveTime.Time) {
-		log.V(1).Info("Detected that some ephemeral runners have disappeared. Usually this is due to that ephemeral runner completions so ARC does not create new runners until EffectiveTime is updated.", "lastSyncTime", metav1.Time{Time: *lastSyncTime}, "effectiveTime", *effectiveTime, "desired", desired, "available", current, "ready", ready)
-	} else if current > desired {
-		// If you use ephemeral runners with webhook-based autoscaler and the runner controller is working normally,
-		// you're unlikely to fall into this branch.
-		//
-		// That's becaseu all the stakeholders work like this:
-		//
-		// 1. A runner pod completes with the runner container exiting with code 0
-		// 2. ARC runner controller detects the pod completion, marks the runner resource on k8s for deletion (=Runner.DeletionTimestamp becomes non-zero)
-		// 3. GitHub triggers a corresponding workflow_job "complete" webhook event
-		// 4. ARC github-webhook-server (webhook-based autoscaler) receives the webhook event updates HRA with removing the oldest capacity reservation
-		// 5. ARC horizontalrunnerautoscaler updates RunnerDeployment's desired replicas based on capacity reservations
-		// 6. ARC runnerdeployment controller updates RunnerReplicaSet's desired replicas
-		// 7. (We're here) ARC runnerreplicaset controller (this controller) starts reconciling the RunnerReplicaSet
-		//
-		// In a normally working ARC installation, the runner that was used to run the workflow job should already have been
-		// marked for deletion by the runner controller.
-		// This runnerreplicaset controller doesn't count marked runners into the `current` value, hence you're unlikely to
-		// fall into this branch when you're using ephemeral runners with webhook-based-autoscaler.
+	desired, err := r.newRunner(rs)
+	if err != nil {
+		log.Error(err, "Could not create runner")

-		n := current - desired
-
-		log.V(0).Info(fmt.Sprintf("Deleting %d runners from RunnerReplicaSet %s", n, req.NamespacedName), "desired", desired, "current", current, "ready", ready)
-
-		// get runners that are currently offline/not busy/timed-out to register
-		var deletionCandidates []v1alpha1.Runner
-
-		for _, runner := range allRunners.Items {
-			busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
-			if err != nil {
-				notRegistered := false
-				offline := false
-
-				var notFoundException *github.RunnerNotFound
-				var offlineException *github.RunnerOffline
-				if errors.As(err, &notFoundException) {
-					log.V(1).Info("Failed to check if runner is busy. Either this runner has never been successfully registered to GitHub or it still needs more time.", "runnerName", runner.Name)
-					notRegistered = true
-				} else if errors.As(err, &offlineException) {
-					offline = true
-				} else {
-					var e *gogithub.RateLimitError
-					if errors.As(err, &e) {
-						// We log the underlying error when we failed calling GitHub API to list or unregisters,
-						// or the runner is still busy.
-						log.Error(
-							err,
-							fmt.Sprintf(
-								"Failed to check if runner is busy due to GitHub API rate limit. Retrying in %s to avoid excessive GitHub API calls",
-								retryDelayOnGitHubAPIRateLimitError,
-							),
-						)
-
-						return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
-					}
-
-					return ctrl.Result{}, err
-				}
-
-				registrationTimeout := 15 * time.Minute
-				currentTime := time.Now()
-				registrationDidTimeout := currentTime.Sub(runner.CreationTimestamp.Add(registrationTimeout)) > 0
-
-				if notRegistered && registrationDidTimeout {
-					log.Info(
-						"Runner failed to register itself to GitHub in timely manner. "+
-							"Marking the runner for scale down. "+
-							"CAUTION: If you see this a lot, you should investigate the root cause. "+
-							"See https://github.com/actions-runner-controller/actions-runner-controller/issues/288",
-						"runnerCreationTimestamp", runner.CreationTimestamp,
-						"currentTime", currentTime,
-						"configuredRegistrationTimeout", registrationTimeout,
-					)
-
-					deletionCandidates = append(deletionCandidates, runner)
-				}
-
-				// offline runners should always be a great target for scale down
-				if offline {
-					deletionCandidates = append(deletionCandidates, runner)
-				}
-			} else if !busy {
-				deletionCandidates = append(deletionCandidates, runner)
-			}
-		}
-
-		if len(deletionCandidates) < n {
-			n = len(deletionCandidates)
-		}
-
-		log.V(0).Info(fmt.Sprintf("Deleting %d runner(s)", n), "desired", desired, "current", current, "ready", ready)
-
-		for i := 0; i < n; i++ {
-			if err := r.Client.Delete(ctx, &deletionCandidates[i]); client.IgnoreNotFound(err) != nil {
-				log.Error(err, "Failed to delete runner resource")
-
-				return ctrl.Result{}, err
-			}
-
-			r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", deletionCandidates[i].Name))
-			log.Info(fmt.Sprintf("Deleted runner %s", deletionCandidates[i].Name))
-		}
-	} else if desired > current {
-		n := desired - current
-
-		log.V(0).Info(fmt.Sprintf("Creating %d runner(s)", n), "desired", desired, "available", current, "ready", ready)
-
-		for i := 0; i < n; i++ {
-			newRunner, err := r.newRunner(rs)
-			if err != nil {
-				log.Error(err, "Could not create runner")
-
-				return ctrl.Result{}, err
-			}
-
-			if err := r.Client.Create(ctx, &newRunner); err != nil {
-				log.Error(err, "Failed to create runner resource")
-
-				return ctrl.Result{}, err
-			}
-		}
+		return ctrl.Result{}, err
 	}

-	var status v1alpha1.RunnerReplicaSetStatus
+	var live []client.Object
+	for _, r := range runnerList.Items {
+		r := r
+		live = append(live, &r)
+	}
+
+	res, err := syncRunnerPodsOwners(ctx, r.Client, log, effectiveTime, replicas, func() client.Object { return desired.DeepCopy() }, ephemeral, live)
+	if err != nil || res == nil {
+		return ctrl.Result{}, err
+	}
+
+	var (
+		status v1alpha1.RunnerReplicaSetStatus
+
+		current, available, ready int
+	)
+
+	for _, o := range res.currentObjects {
+		current += o.total
+		available += o.running
+		ready += o.running
+	}

 	status.Replicas = &current
 	status.AvailableReplicas = &available
@ -322,6 +163,8 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
 }

 func (r *RunnerReplicaSetReconciler) newRunner(rs v1alpha1.RunnerReplicaSet) (v1alpha1.Runner, error) {
+	// Note that the upstream controller (runnerdeployment) is expected to add
+	// the "runner template hash" label to the template.meta which is necessary to make this controller work correctly
 	objectMeta := rs.Spec.Template.ObjectMeta.DeepCopy()

 	objectMeta.GenerateName = rs.ObjectMeta.Name + "-"
--- a/controllers/runnerreplicaset_controller_test.go
+++ b/controllers/runnerreplicaset_controller_test.go
@ -7,7 +7,6 @@ import (
 	"time"

 	corev1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/kubernetes/scheme"
 	ctrl "sigs.k8s.io/controller-runtime"
 	logf "sigs.k8s.io/controller-runtime/pkg/log"
@ -102,12 +101,40 @@ func intPtr(v int) *int {
 var _ = Context("Inside of a new namespace", func() {
 	ctx := context.TODO()
 	ns := SetupTest(ctx)
+	name := "example-runnerreplicaset"

-	Describe("when no existing resources exist", func() {
+	getRunnerCount := func() int {
+		runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}

-		It("should create a new Runner resource from the specified template, add a another Runner on replicas increased, and removes all the replicas when set to 0", func() {
-			name := "example-runnerreplicaset"
+		selector, err := metav1.LabelSelectorAsSelector(
+			&metav1.LabelSelector{
+				MatchLabels: map[string]string{
+					"foo": "bar",
+				},
+			},
+		)
+		if err != nil {
+			logf.Log.Error(err, "failed to create labelselector")
+			return -1
+		}

+		err = k8sClient.List(
+			ctx,
+			&runners,
+			client.InNamespace(ns.Name),
+			client.MatchingLabelsSelector{Selector: selector},
+		)
+		if err != nil {
+			logf.Log.Error(err, "list runners")
+		}
+
+		runnersList.Sync(runners.Items)
+
+		return len(runners.Items)
+	}
+
+	Describe("RunnerReplicaSet", func() {
+		It("should create a new Runner resource from the specified template", func() {
 			{
 				rs := &actionsv1alpha1.RunnerReplicaSet{
 					ObjectMeta: metav1.ObjectMeta{
@ -146,126 +173,99 @@ var _ = Context("Inside of a new namespace", func() {

 				Expect(err).NotTo(HaveOccurred(), "failed to create test RunnerReplicaSet resource")

-				runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}
-
 				Eventually(
-					func() int {
-						selector, err := metav1.LabelSelectorAsSelector(
-							&metav1.LabelSelector{
-								MatchLabels: map[string]string{
-									"foo": "bar",
-								},
-							},
-						)
-						if err != nil {
-							logf.Log.Error(err, "failed to create labelselector")
-							return -1
-						}
-						err = k8sClient.List(
-							ctx,
-							&runners,
-							client.InNamespace(ns.Name),
-							client.MatchingLabelsSelector{Selector: selector},
-						)
-						if err != nil {
-							logf.Log.Error(err, "list runners")
-							return -1
-						}
-
-						runnersList.Sync(runners.Items)
-
-						return len(runners.Items)
-					},
-					time.Second*5, time.Millisecond*500).Should(BeEquivalentTo(1))
+					getRunnerCount,
+					time.Second*5, time.Second).Should(BeEquivalentTo(1))
 			}
+		})

+		It("should create 2 runners when specified 2 replicas", func() {
 			{
-				// We wrap the update in the Eventually block to avoid the below error that occurs due to concurrent modification
-				// made by the controller to update .Status.AvailableReplicas and .Status.ReadyReplicas
-				//   Operation cannot be fulfilled on runnerreplicasets.actions.summerwind.dev "example-runnerreplicaset": the object has been modified; please apply your changes to the latest version and try again
-				Eventually(func() error {
-					var rs actionsv1alpha1.RunnerReplicaSet
-
-					err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns.Name, Name: name}, &rs)
-
-					Expect(err).NotTo(HaveOccurred(), "failed to get test RunnerReplicaSet resource")
-
-					rs.Spec.Replicas = intPtr(2)
-
-					return k8sClient.Update(ctx, &rs)
-				},
-					time.Second*1, time.Millisecond*500).Should(BeNil())
-
-				runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}
-
-				Eventually(
-					func() int {
-						selector, err := metav1.LabelSelectorAsSelector(
-							&metav1.LabelSelector{
-								MatchLabels: map[string]string{
-									"foo": "bar",
-								},
-							},
-						)
-						if err != nil {
-							logf.Log.Error(err, "failed to create labelselector")
-							return -1
-						}
-						err = k8sClient.List(
-							ctx,
-							&runners,
-							client.InNamespace(ns.Name),
-							client.MatchingLabelsSelector{Selector: selector},
-						)
-						if err != nil {
-							logf.Log.Error(err, "list runners")
-						}
-
-						runnersList.Sync(runners.Items)
-
-						return len(runners.Items)
+				rs := &actionsv1alpha1.RunnerReplicaSet{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      name,
+						Namespace: ns.Name,
 					},
-					time.Second*5, time.Millisecond*500).Should(BeEquivalentTo(2))
-			}
-
-			{
-				// We wrap the update in the Eventually block to avoid the below error that occurs due to concurrent modification
-				// made by the controller to update .Status.AvailableReplicas and .Status.ReadyReplicas
-				//   Operation cannot be fulfilled on runnersets.actions.summerwind.dev "example-runnerset": the object has been modified; please apply your changes to the latest version and try again
-				Eventually(func() error {
-					var rs actionsv1alpha1.RunnerReplicaSet
-
-					err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns.Name, Name: name}, &rs)
-
-					Expect(err).NotTo(HaveOccurred(), "failed to get test RunnerReplicaSet resource")
-
-					rs.Spec.Replicas = intPtr(0)
-
-					return k8sClient.Update(ctx, &rs)
-				},
-					time.Second*1, time.Millisecond*500).Should(BeNil())
-
-				runners := actionsv1alpha1.RunnerList{Items: []actionsv1alpha1.Runner{}}
-
-				Eventually(
-					func() int {
-						selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
+					Spec: actionsv1alpha1.RunnerReplicaSetSpec{
+						Replicas: intPtr(2),
+						Selector: &metav1.LabelSelector{
 							MatchLabels: map[string]string{
 								"foo": "bar",
 							},
-						})
-						Expect(err).ToNot(HaveOccurred())
-
-						if err := k8sClient.List(ctx, &runners, client.InNamespace(ns.Name), client.MatchingLabelsSelector{Selector: selector}); err != nil {
-							logf.Log.Error(err, "list runners")
-							return -1
-						}
-
-						runnersList.Sync(runners.Items)
-
-						return len(runners.Items)
+						},
+						Template: actionsv1alpha1.RunnerTemplate{
+							ObjectMeta: metav1.ObjectMeta{
+								Labels: map[string]string{
+									"foo": "bar",
+								},
+							},
+							Spec: actionsv1alpha1.RunnerSpec{
+								RunnerConfig: actionsv1alpha1.RunnerConfig{
+									Repository: "test/valid",
+									Image:      "bar",
+								},
+								RunnerPodSpec: actionsv1alpha1.RunnerPodSpec{
+									Env: []corev1.EnvVar{
+										{Name: "FOO", Value: "FOOVALUE"},
+									},
+								},
+							},
+						},
 					},
-					time.Second*5, time.Millisecond*500).Should(BeEquivalentTo(0))
+				}
+
+				err := k8sClient.Create(ctx, rs)
+
+				Expect(err).NotTo(HaveOccurred(), "failed to create test RunnerReplicaSet resource")
+
+				Eventually(
+					getRunnerCount,
+					time.Second*5, time.Second).Should(BeEquivalentTo(2))
+			}
+		})
+
+		It("should not create any runners when specified 0 replicas", func() {
+			{
+				rs := &actionsv1alpha1.RunnerReplicaSet{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      name,
+						Namespace: ns.Name,
+					},
+					Spec: actionsv1alpha1.RunnerReplicaSetSpec{
+						Replicas: intPtr(0),
+						Selector: &metav1.LabelSelector{
+							MatchLabels: map[string]string{
+								"foo": "bar",
+							},
+						},
+						Template: actionsv1alpha1.RunnerTemplate{
+							ObjectMeta: metav1.ObjectMeta{
+								Labels: map[string]string{
+									"foo": "bar",
+								},
+							},
+							Spec: actionsv1alpha1.RunnerSpec{
+								RunnerConfig: actionsv1alpha1.RunnerConfig{
+									Repository: "test/valid",
+									Image:      "bar",
+								},
+								RunnerPodSpec: actionsv1alpha1.RunnerPodSpec{
+									Env: []corev1.EnvVar{
+										{Name: "FOO", Value: "FOOVALUE"},
+									},
+								},
+							},
+						},
+					},
+				}
+
+				err := k8sClient.Create(ctx, rs)
+
+				Expect(err).NotTo(HaveOccurred(), "failed to create test RunnerReplicaSet resource")
+
+				Consistently(
+					getRunnerCount,
+					time.Second*5, time.Second).Should(BeEquivalentTo(0))
 			}
 		})
 	})
--- a/controllers/runnerset_controller.go
+++ b/controllers/runnerset_controller.go
@ -105,13 +105,6 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
 		return ctrl.Result{}, err
 	}

-	desiredTemplateHash, ok := getStatefulSetTemplateHash(desiredStatefulSet)
-	if !ok {
-		log.Info("Failed to get template hash of desired statefulset. It must be in an invalid state. Please manually delete the statefulset so that it is recreated")
-
-		return ctrl.Result{}, nil
-	}
-
 	addedReplicas := int32(1)
 	create := desiredStatefulSet.DeepCopy()
 	create.Spec.Replicas = &addedReplicas
@ -136,7 +129,7 @@ func (r *RunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
 		owners = append(owners, &ss)
 	}

-	res, err := syncRunnerPodsOwners(ctx, r.Client, log, effectiveTime, newDesiredReplicas, desiredTemplateHash, create, ephemeral, owners)
+	res, err := syncRunnerPodsOwners(ctx, r.Client, log, effectiveTime, newDesiredReplicas, func() client.Object { return create.DeepCopy() }, ephemeral, owners)
 	if err != nil || res == nil {
 		return ctrl.Result{}, err
 	}
@ -192,17 +185,12 @@ func (r *RunnerSetReconciler) newStatefulSet(runnerSet *v1alpha1.RunnerSet) (*ap
 		runnerSetWithOverrides.Labels = append(runnerSetWithOverrides.Labels, l)
 	}

-	// This label selector is used by default when rd.Spec.Selector is empty.
-	runnerSetWithOverrides.Template.ObjectMeta.Labels = CloneAndAddLabel(runnerSetWithOverrides.Template.ObjectMeta.Labels, LabelKeyRunnerSetName, runnerSet.Name)
-
-	runnerSetWithOverrides.Template.ObjectMeta.Labels = CloneAndAddLabel(runnerSetWithOverrides.Template.ObjectMeta.Labels, LabelKeyPodMutation, LabelValuePodMutation)
-
 	template := corev1.Pod{
 		ObjectMeta: runnerSetWithOverrides.StatefulSetSpec.Template.ObjectMeta,
 		Spec:       runnerSetWithOverrides.StatefulSetSpec.Template.Spec,
 	}

-	pod, err := newRunnerPod(template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL, false)
+	pod, err := newRunnerPod(runnerSet.Name, template, runnerSet.Spec.RunnerConfig, r.RunnerImage, r.RunnerImagePullSecrets, r.DockerImage, r.DockerRegistryMirror, r.GitHubBaseURL, false)
 	if err != nil {
 		return nil, err
 	}
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@ -265,6 +265,8 @@ func (e *env) installActionsRunnerController(t *testing.T) {

 	if e.useRunnerSet {
 		scriptEnv = append(scriptEnv, "USE_RUNNERSET=1")
+	} else {
+		scriptEnv = append(scriptEnv, "USE_RUNNERSET=false")
 	}

 	varEnv := []string{