Merge pull request #1127 from actions-runner-controller/github-api-cache

Enhances ARC(both the controller-manager and github-webhook-server) to cache any GitHub API responses with HTTP GET and an appropriate Cache-Control header. Ref #920 ## Cache Implementation `gregjones/httpcache` has been chosen as a library to implement this feature, as it is as recommended in `go-github`'s documentation: https://github.com/google/go-github#conditional-requests `gregjones/httpcache` supports a number of cache backends like `diskcache`, `s3cache`, and so on: https://github.com/gregjones/httpcache#cache-backends We stick to the built-in in-memory cache as a starter. Probably this will never becomes an issue as long as various HTTP responses for all the GitHub API calls that ARC makes, list-runners, list-workflow-jobs, list-runner-groups, etc., doesn't overflow the in-memory cache. `httpcache` has an known unfixed issue that it doesn't update cache on chunked responses. But we assume that the APIs that we call doesn't use chunked responses. See #1503 for more information on that. ## Ephemeral runner pods are no longer recreated The addition of the cache layer resulted in a slow down of a scale-down process and a trade-off between making the runner pod termination process fragile to various race conditions(shorter grace period before runner deletion) or delaying runner pod deletion depending on how long the grace period is(longer grace period). A grace period needs to be at least longer than 60s (which is the same as cache duration of ListRunners API) to not prematurely delete a runner pod that was just created. But once I disabled automatic recreation of ephemeral runner pod, it turned out to be no more of an issue when it's being scaled via workflow_job webhook. Ephemeral runner resources are still automatically added on demand by RunnerDeployment via RunnerReplicaSet(I've added `EffectiveTime` fields to our CRDs but that's an implementation detail so let's omit). A good side-effect of disabling ephemeral runner pod recreations is that ARC will no longer create redundant ephemeral runners when used with webhook-based autoscaler. Basically, autoscaling still works as everyone might expect. It's just better than before overall.
2022-02-28 08:37:26 +09:00 · 2022-02-28 08:37:26 +09:00 · 686d40c20d
parent f0fa99fc53 1463d4927f
commit 686d40c20d
32 changed files with 707 additions and 209 deletions
--- a/acceptance/deploy.sh
+++ b/acceptance/deploy.sh
@ -37,7 +37,10 @@ fi

 tool=${ACCEPTANCE_TEST_DEPLOYMENT_TOOL}

+TEST_ID=${TEST_ID:-default}
+
 if [ "${tool}" == "helm" ]; then
+  set -v
  helm upgrade --install actions-runner-controller \
    charts/actions-runner-controller \
    -n actions-runner-system \
@ -46,7 +49,10 @@ if [ "${tool}" == "helm" ]; then
    --set authSecret.create=false \
    --set image.repository=${NAME} \
    --set image.tag=${VERSION} \
+    --set podAnnotations.test-id=${TEST_ID} \
+    --set githubWebhookServer.podAnnotations.test-id=${TEST_ID} \
    -f ${VALUES_FILE}
+  set +v
  # To prevent `CustomResourceDefinition.apiextensions.k8s.io "runners.actions.summerwind.dev" is invalid: metadata.annotations: Too long: must have at most 262144 bytes`
  # errors
  kubectl create -f charts/actions-runner-controller/crds || kubectl replace -f charts/actions-runner-controller/crds
@ -77,7 +83,7 @@ else
 fi

 if [ -n "${TEST_ORG}" ]; then
-  cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= NAME=org-runnerdeploy envsubst | kubectl apply -f -
+  cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= RUNNER_MIN_REPLICAS=${ORG_RUNNER_MIN_REPLICAS} NAME=org-runnerdeploy envsubst | kubectl apply -f -

  if [ -n "${TEST_ORG_GROUP}" ]; then
    cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= TEST_GROUP=${TEST_ORG_GROUP} NAME=orggroup-runnerdeploy envsubst | kubectl apply -f -
--- a/acceptance/testdata/runnerdeploy.envsubst.yaml
+++ b/acceptance/testdata/runnerdeploy.envsubst.yaml
@ -28,6 +28,7 @@ spec:
      ## Replace `mumoshu/actions-runner-dind:dev` with your dind image
      #dockerdWithinRunnerContainer: true
      #image: mumoshu/actions-runner-dind:dev
+      dockerdWithinRunnerContainer: ${RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER}

      #
      # Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
@ -56,6 +57,7 @@ spec:
  scaleUpTriggers:
  - githubEvent: {}
    amount: 1
-    duration: "1m"
-  minReplicas: 0
+    duration: "10m"
+  minReplicas: ${RUNNER_MIN_REPLICAS}
  maxReplicas: 10
+  scaleDownDelaySecondsAfterScaleOut: ${RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT}
--- a/acceptance/values.yaml
+++ b/acceptance/values.yaml
@ -1,7 +1,8 @@
 # Set actions-runner-controller settings for testing
 githubAPICacheDuration: 10s
+logLevel: "-3"
 githubWebhookServer:
-  logLevel: debug
+  logLevel: "-3"
  enabled: true
  labels: {}
  replicaCount: 1
--- a/api/v1alpha1/horizontalrunnerautoscaler_types.go
+++ b/api/v1alpha1/horizontalrunnerautoscaler_types.go
@ -107,6 +107,9 @@ type CapacityReservation struct {
 	Name           string      `json:"name,omitempty"`
 	ExpirationTime metav1.Time `json:"expirationTime,omitempty"`
 	Replicas       int         `json:"replicas,omitempty"`
+
+	// +optional
+	EffectiveTime metav1.Time `json:"effectiveTime,omitempty"`
 }

 type ScaleTargetRef struct {
--- a/api/v1alpha1/runnerdeployment_types.go
+++ b/api/v1alpha1/runnerdeployment_types.go
@ -31,6 +31,14 @@ type RunnerDeploymentSpec struct {
 	// +nullable
 	Replicas *int `json:"replicas,omitempty"`

+	// EffectiveTime is the time the upstream controller requested to sync Replicas.
+	// It is usually populated by the webhook-based autoscaler via HRA.
+	// The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated.
+	//
+	// +optional
+	// +nullable
+	EffectiveTime *metav1.Time `json:"effectiveTime"`
+
 	// +optional
 	// +nullable
 	Selector *metav1.LabelSelector `json:"selector"`
--- a/api/v1alpha1/runnerreplicaset_types.go
+++ b/api/v1alpha1/runnerreplicaset_types.go
@ -26,6 +26,15 @@ type RunnerReplicaSetSpec struct {
 	// +nullable
 	Replicas *int `json:"replicas,omitempty"`

+	// EffectiveTime is the time the upstream controller requested to sync Replicas.
+	// It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment.
+	// The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners
+	// based on potentially outdated Replicas value.
+	//
+	// +optional
+	// +nullable
+	EffectiveTime *metav1.Time `json:"effectiveTime"`
+
 	// +optional
 	// +nullable
 	Selector *metav1.LabelSelector `json:"selector"`
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@ -47,6 +47,7 @@ func (in *CacheEntry) DeepCopy() *CacheEntry {
 func (in *CapacityReservation) DeepCopyInto(out *CapacityReservation) {
 	*out = *in
 	in.ExpirationTime.DeepCopyInto(&out.ExpirationTime)
+	in.EffectiveTime.DeepCopyInto(&out.EffectiveTime)
 }

 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservation.
@ -498,6 +499,10 @@ func (in *RunnerDeploymentSpec) DeepCopyInto(out *RunnerDeploymentSpec) {
 		*out = new(int)
 		**out = **in
 	}
+	if in.EffectiveTime != nil {
+		in, out := &in.EffectiveTime, &out.EffectiveTime
+		*out = (*in).DeepCopy()
+	}
 	if in.Selector != nil {
 		in, out := &in.Selector, &out.Selector
 		*out = new(metav1.LabelSelector)
@ -812,6 +817,10 @@ func (in *RunnerReplicaSetSpec) DeepCopyInto(out *RunnerReplicaSetSpec) {
 		*out = new(int)
 		**out = **in
 	}
+	if in.EffectiveTime != nil {
+		in, out := &in.EffectiveTime, &out.EffectiveTime
+		*out = (*in).DeepCopy()
+	}
 	if in.Selector != nil {
 		in, out := &in.Selector, &out.Selector
 		*out = new(metav1.LabelSelector)
--- a/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
+++ b/charts/actions-runner-controller/crds/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
@ -49,6 +49,9 @@ spec:
                  items:
                    description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime.
                    properties:
+                      effectiveTime:
+                        format: date-time
+                        type: string
                      expirationTime:
                        format: date-time
                        type: string
--- a/charts/actions-runner-controller/crds/actions.summerwind.dev_runnerdeployments.yaml
+++ b/charts/actions-runner-controller/crds/actions.summerwind.dev_runnerdeployments.yaml
@ -48,6 +48,11 @@ spec:
            spec:
              description: RunnerDeploymentSpec defines the desired state of RunnerDeployment
              properties:
+                effectiveTime:
+                  description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA. The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated.
+                  format: date-time
+                  nullable: true
+                  type: string
                replicas:
                  nullable: true
                  type: integer
--- a/charts/actions-runner-controller/crds/actions.summerwind.dev_runnerreplicasets.yaml
+++ b/charts/actions-runner-controller/crds/actions.summerwind.dev_runnerreplicasets.yaml
@ -45,6 +45,11 @@ spec:
            spec:
              description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet
              properties:
+                effectiveTime:
+                  description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment. The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners based on potentially outdated Replicas value.
+                  format: date-time
+                  nullable: true
+                  type: string
                replicas:
                  nullable: true
                  type: integer
--- a/charts/actions-runner-controller/templates/deployment.yaml
+++ b/charts/actions-runner-controller/templates/deployment.yaml
@ -14,6 +14,7 @@ spec:
    metadata:
      {{- with .Values.podAnnotations }}
      annotations:
+        kubectl.kubernetes.io/default-logs-container: "manager"
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
--- a/charts/actions-runner-controller/templates/githubwebhook.deployment.yaml
+++ b/charts/actions-runner-controller/templates/githubwebhook.deployment.yaml
@ -15,6 +15,7 @@ spec:
    metadata:
      {{- with .Values.githubWebhookServer.podAnnotations }}
      annotations:
+        kubectl.kubernetes.io/default-logs-container: "github-webhook-server"
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
--- a/cmd/githubwebhookserver/main.go
+++ b/cmd/githubwebhookserver/main.go
@ -29,15 +29,14 @@ import (
 	actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
 	"github.com/actions-runner-controller/actions-runner-controller/controllers"
 	"github.com/actions-runner-controller/actions-runner-controller/github"
+	"github.com/actions-runner-controller/actions-runner-controller/logging"
 	"github.com/kelseyhightower/envconfig"
-	zaplib "go.uber.org/zap"
 	"k8s.io/apimachinery/pkg/runtime"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	_ "k8s.io/client-go/plugin/pkg/client/auth/exec"
 	_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
 	_ "k8s.io/client-go/plugin/pkg/client/auth/oidc"
 	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 	// +kubebuilder:scaffold:imports
 )

@ -47,11 +46,6 @@ var (
 )

 const (
-	logLevelDebug = "debug"
-	logLevelInfo  = "info"
-	logLevelWarn  = "warn"
-	logLevelError = "error"
-
 	webhookSecretTokenEnvName = "GITHUB_WEBHOOK_SECRET_TOKEN"
 )

@ -97,7 +91,7 @@ func main() {
 	flag.BoolVar(&enableLeaderElection, "enable-leader-election", false,
 		"Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
 	flag.DurationVar(&syncPeriod, "sync-period", 10*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled. When you use autoscaling, set to a lower value like 10 minute, because this corresponds to the minimum time to react on demand change")
-	flag.StringVar(&logLevel, "log-level", logLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`)
+	flag.StringVar(&logLevel, "log-level", logging.LogLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`)
 	flag.StringVar(&webhookSecretToken, "github-webhook-secret-token", "", "The personal access token of GitHub.")
 	flag.StringVar(&c.Token, "github-token", c.Token, "The personal access token of GitHub.")
 	flag.Int64Var(&c.AppID, "github-app-id", c.AppID, "The application ID of GitHub App.")
@ -126,23 +120,7 @@ func main() {
 		setupLog.Info("-watch-namespace is %q. Only HorizontalRunnerAutoscalers in %q are watched, cached, and considered as scale targets.")
 	}

-	logger := zap.New(func(o *zap.Options) {
-		switch logLevel {
-		case logLevelDebug:
-			o.Development = true
-			lvl := zaplib.NewAtomicLevelAt(-2) // maps to logr's V(2)
-			o.Level = &lvl
-		case logLevelInfo:
-			lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel)
-			o.Level = &lvl
-		case logLevelWarn:
-			lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel)
-			o.Level = &lvl
-		case logLevelError:
-			lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel)
-			o.Level = &lvl
-		}
-	})
+	logger := logging.NewLogger(logLevel)

 	ctrl.SetLogger(logger)

@ -152,6 +130,8 @@ func main() {
 	// That is, all runner groups managed by ARC are assumed to be visible to any repositories,
 	// which is wrong when you have one or more non-default runner groups in your organization or enterprise.
 	if len(c.Token) > 0 || (c.AppID > 0 && c.AppInstallationID > 0 && c.AppPrivateKey != "") || (len(c.BasicauthUsername) > 0 && len(c.BasicauthPassword) > 0) {
+		c.Log = &logger
+
 		ghClient, err = c.NewClient()
 		if err != nil {
 			fmt.Fprintln(os.Stderr, "Error: Client creation failed.", err)
--- a/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
+++ b/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml
@ -49,6 +49,9 @@ spec:
                  items:
                    description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime.
                    properties:
+                      effectiveTime:
+                        format: date-time
+                        type: string
                      expirationTime:
                        format: date-time
                        type: string
--- a/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml
+++ b/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml
@ -48,6 +48,11 @@ spec:
            spec:
              description: RunnerDeploymentSpec defines the desired state of RunnerDeployment
              properties:
+                effectiveTime:
+                  description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA. The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated.
+                  format: date-time
+                  nullable: true
+                  type: string
                replicas:
                  nullable: true
                  type: integer
--- a/config/crd/bases/actions.summerwind.dev_runnerreplicasets.yaml
+++ b/config/crd/bases/actions.summerwind.dev_runnerreplicasets.yaml
@ -45,6 +45,11 @@ spec:
            spec:
              description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet
              properties:
+                effectiveTime:
+                  description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment. The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners based on potentially outdated Replicas value.
+                  format: date-time
+                  nullable: true
+                  type: string
                replicas:
                  nullable: true
                  type: integer
--- a/controllers/horizontal_runner_autoscaler_webhook.go
+++ b/controllers/horizontal_runner_autoscaler_webhook.go
@ -770,8 +770,10 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) tryScale(ctx context.
 	capacityReservations := getValidCapacityReservations(copy)

 	if amount > 0 {
+		now := time.Now()
 		copy.Spec.CapacityReservations = append(capacityReservations, v1alpha1.CapacityReservation{
-			ExpirationTime: metav1.Time{Time: time.Now().Add(target.ScaleUpTrigger.Duration.Duration)},
+			EffectiveTime:  metav1.Time{Time: now},
+			ExpirationTime: metav1.Time{Time: now.Add(target.ScaleUpTrigger.Duration.Duration)},
 			Replicas:       amount,
 		})
 	} else if amount < 0 {
@ -790,10 +792,16 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) tryScale(ctx context.
 		copy.Spec.CapacityReservations = reservations
 	}

-	autoscaler.Log.Info(
+	before := len(target.HorizontalRunnerAutoscaler.Spec.CapacityReservations)
+	expired := before - len(capacityReservations)
+	after := len(copy.Spec.CapacityReservations)
+
+	autoscaler.Log.V(1).Info(
 		"Patching hra for capacityReservations update",
-		"before", target.HorizontalRunnerAutoscaler.Spec.CapacityReservations,
-		"after", copy.Spec.CapacityReservations,
+		"before", before,
+		"expired", expired,
+		"amount", amount,
+		"after", after,
 	)

 	if err := autoscaler.Client.Patch(ctx, copy, client.MergeFrom(&target.HorizontalRunnerAutoscaler)); err != nil {
--- a/controllers/horizontalrunnerautoscaler_controller.go
+++ b/controllers/horizontalrunnerautoscaler_controller.go
@ -99,11 +99,33 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(ctx context.Context, re
 		return r.reconcile(ctx, req, log, hra, st, func(newDesiredReplicas int) error {
 			currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)

+			ephemeral := rd.Spec.Template.Spec.Ephemeral == nil || *rd.Spec.Template.Spec.Ephemeral
+
+			var effectiveTime *time.Time
+
+			for _, r := range hra.Spec.CapacityReservations {
+				t := r.EffectiveTime
+				if effectiveTime == nil || effectiveTime.Before(t.Time) {
+					effectiveTime = &t.Time
+				}
+			}
+
 			// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
 			if currentDesiredReplicas != newDesiredReplicas {
 				copy := rd.DeepCopy()
 				copy.Spec.Replicas = &newDesiredReplicas

+				if ephemeral && effectiveTime != nil {
+					copy.Spec.EffectiveTime = &metav1.Time{Time: *effectiveTime}
+				}
+
+				if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil {
+					return fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err)
+				}
+			} else if ephemeral && effectiveTime != nil {
+				copy := rd.DeepCopy()
+				copy.Spec.EffectiveTime = &metav1.Time{Time: *effectiveTime}
+
 				if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil {
 					return fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err)
 				}
--- a/controllers/integration_test.go
+++ b/controllers/integration_test.go
@ -110,6 +110,8 @@ func SetupIntegrationTest(ctx2 context.Context) *testEnvironment {
 			Name:                        controllerName("runner"),
 			RegistrationRecheckInterval: time.Millisecond,
 			RegistrationRecheckJitter:   time.Millisecond,
+			UnregistrationTimeout:       1 * time.Second,
+			UnregistrationRetryDelay:    1 * time.Second,
 		}
 		err = runnerController.SetupWithManager(mgr)
 		Expect(err).NotTo(HaveOccurred(), "failed to setup runner controller")
--- a/controllers/runner_controller.go
+++ b/controllers/runner_controller.go
@ -72,6 +72,9 @@ type RunnerReconciler struct {
 	Name                        string
 	RegistrationRecheckInterval time.Duration
 	RegistrationRecheckJitter   time.Duration
+
+	UnregistrationTimeout    time.Duration
+	UnregistrationRetryDelay time.Duration
 }

 // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
@ -110,8 +113,23 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
 			return ctrl.Result{}, nil
 		}
 	} else {
+		var p *corev1.Pod
+
+		{
+			var pod corev1.Pod
+			if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
+				if !kerrors.IsNotFound(err) {
+					log.Info(fmt.Sprintf("Retrying soon as we failed to get registration-only runner pod: %v", err))
+
+					return ctrl.Result{Requeue: true}, nil
+				}
+			} else {
+				p = &pod
+			}
+		}
+
 		// Request to remove a runner. DeletionTimestamp was set in the runner - we need to unregister runner
-		return r.processRunnerDeletion(runner, ctx, log)
+		return r.processRunnerDeletion(runner, ctx, log, p)
 	}

 	registrationOnly := metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly)
@ -159,20 +177,27 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr

 	// If pod has ended up succeeded we need to restart it
 	// Happens e.g. when dind is in runner and run completes
-	stopped := pod.Status.Phase == corev1.PodSucceeded
+	stopped := runnerPodOrContainerIsStopped(&pod)

-	if !stopped {
-		if pod.Status.Phase == corev1.PodRunning {
-			for _, status := range pod.Status.ContainerStatuses {
-				if status.Name != containerName {
-					continue
-				}
+	ephemeral := runner.Spec.Ephemeral == nil || *runner.Spec.Ephemeral

-				if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 {
-					stopped = true
-				}
-			}
+	if stopped && ephemeral {
+		log.V(1).Info("Ephemeral runner has been stopped successfully. Marking this runner for deletion.")
+
+		// This is the key to make ephemeral runners to work reliably with webhook-based autoscale.
+		// See https://github.com/actions-runner-controller/actions-runner-controller/issues/911#issuecomment-1046161384 for more context.
+		//
+		// In the next reconcilation loop, this triggers a runner unregistration.
+		// (Note that the unregistration can fail safely because an ephemeral runner usually unregisters itself from GitHub but we do it just for confirmation)
+		//
+		// See the code path above that is executed when `runner.ObjectMeta.DeletionTimestamp.IsZero()` isn't true,
+		// which handles the unregistrationa the removal of the completed pod, and so on.
+		if err := r.Delete(ctx, &runner); err != nil {
+			log.V(1).Error(err, "Retrying to mark this runner for deletion in 10 seconds.")
+			return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
 		}
+
+		return ctrl.Result{Requeue: true}, nil
 	}

 	restart := stopped
@ -404,64 +429,53 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
 		return ctrl.Result{}, nil
 	}

-	// Try to delete current pod if recreation is needed
-	safeToDeletePod := false
-	ok, err := r.unregisterRunner(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
-	if err != nil {
-		log.Error(err, "Failed to unregister runner before deleting the pod.", "runner", runner.Name)
-	} else {
-		// `r.unregisterRunner()` will returns `false, nil` if the runner is not found on GitHub.
-		if !ok {
-			log.Info("Runner no longer exists on GitHub", "runner", runner.Name)
-		}
-
-		safeToDeletePod = true
+	updatedPod, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name, &pod)
+	if res != nil {
+		return *res, err
 	}

-	if safeToDeletePod {
-		// Only delete the pod if we successfully unregistered the runner or the runner is already deleted from the service.
-		// This should help us avoid race condition between runner pickup job after we think the runner is not busy.
-		if err := r.Delete(ctx, &pod); err != nil {
-			log.Error(err, "Failed to delete pod resource")
-			return ctrl.Result{}, err
-		}
-
-		r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Deleted pod '%s'", newPod.Name))
-		log.Info("Deleted runner pod", "repository", runner.Spec.Repository)
+	// Only delete the pod if we successfully unregistered the runner or the runner is already deleted from the service.
+	// This should help us avoid race condition between runner pickup job after we think the runner is not busy.
+	if err := r.Delete(ctx, updatedPod); err != nil {
+		log.Error(err, "Failed to delete pod resource")
+		return ctrl.Result{}, err
 	}

+	r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Deleted pod '%s'", newPod.Name))
+	log.Info("Deleted runner pod", "repository", runner.Spec.Repository)
+
 	return ctrl.Result{}, nil
 }

-func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger) (reconcile.Result, error) {
+func runnerPodOrContainerIsStopped(pod *corev1.Pod) bool {
+	// If pod has ended up succeeded we need to restart it
+	// Happens e.g. when dind is in runner and run completes
+	stopped := pod.Status.Phase == corev1.PodSucceeded
+
+	if !stopped {
+		if pod.Status.Phase == corev1.PodRunning {
+			for _, status := range pod.Status.ContainerStatuses {
+				if status.Name != containerName {
+					continue
+				}
+
+				if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 {
+					stopped = true
+				}
+			}
+		}
+	}
+
+	return stopped
+}
+
+func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger, pod *corev1.Pod) (reconcile.Result, error) {
 	finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName)

 	if removed {
-		if len(runner.Status.Registration.Token) > 0 {
-			ok, err := r.unregisterRunner(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
-			if err != nil {
-				if errors.Is(err, &gogithub.RateLimitError{}) {
-					// We log the underlying error when we failed calling GitHub API to list or unregisters,
-					// or the runner is still busy.
-					log.Error(
-						err,
-						fmt.Sprintf(
-							"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls",
-							retryDelayOnGitHubAPIRateLimitError,
-						),
-					)
-
-					return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
-				}
-
-				return ctrl.Result{}, err
-			}
-
-			if !ok {
-				log.V(1).Info("Runner no longer exists on GitHub")
-			}
-		} else {
-			log.V(1).Info("Runner was never registered on GitHub")
+		_, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name, pod)
+		if res != nil {
+			return *res, err
 		}

 		newRunner := runner.DeepCopy()
@ -478,6 +492,24 @@ func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx con
 	return ctrl.Result{}, nil
 }

+func (r *RunnerReconciler) unregistrationTimeout() time.Duration {
+	unregistrationTimeout := DefaultUnregistrationTimeout
+
+	if r.UnregistrationTimeout > 0 {
+		unregistrationTimeout = r.UnregistrationTimeout
+	}
+	return unregistrationTimeout
+}
+
+func (r *RunnerReconciler) unregistrationRetryDelay() time.Duration {
+	retryDelay := DefaultUnregistrationRetryDelay
+
+	if r.UnregistrationRetryDelay > 0 {
+		retryDelay = r.UnregistrationRetryDelay
+	}
+	return retryDelay
+}
+
 func (r *RunnerReconciler) processRunnerPodDeletion(ctx context.Context, runner v1alpha1.Runner, log logr.Logger, pod corev1.Pod) (reconcile.Result, error) {
 	deletionTimeout := 1 * time.Minute
 	currentTime := time.Now()
--- a/controllers/runner_graceful_stop.go
+++ b/controllers/runner_graceful_stop.go
@ -0,0 +1,249 @@
+package controllers
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/actions-runner-controller/actions-runner-controller/github"
+	"github.com/go-logr/logr"
+	gogithub "github.com/google/go-github/v39/github"
+	corev1 "k8s.io/api/core/v1"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+const (
+	unregistrationCompleteTimestamp = "unregistration-complete-timestamp"
+	unregistrationStartTimestamp    = "unregistration-start-timestamp"
+
+	// DefaultUnregistrationTimeout is the duration until ARC gives up retrying the combo of ListRunners API (to detect the runner ID by name)
+	// and RemoveRunner API (to actually unregister the runner) calls.
+	// This needs to be longer than 60 seconds because a part of the combo, the ListRunners API, seems to use the Cache-Control header of max-age=60s
+	// and that instructs our cache library httpcache to cache responses for 60 seconds, which results in ARC unable to see the runner in the ListRunners response
+	// up to 60 seconds (or even more depending on the situation).
+	DefaultUnregistrationTimeout = 60 * time.Second
+
+	// This can be any value but a larger value can make an unregistration timeout longer than configured in practice.
+	DefaultUnregistrationRetryDelay = 30 * time.Second
+)
+
+// tickRunnerGracefulStop reconciles the runner and the runner pod in a way so that
+// we can delete the runner pod without disrupting a workflow job.
+//
+// This function returns a non-nil pointer to corev1.Pod as the first return value
+// if the runner is considered to have gracefully stopped, hence it's pod is safe for deletion.
+//
+// It's a "tick" operation so a graceful stop can take multiple calls to complete.
+// This function is designed to complete a length graceful stop process in a unblocking way.
+// When it wants to be retried later, the function returns a non-nil *ctrl.Result as the second return value, may or may not populating the error in the second return value.
+// The caller is expected to return the returned ctrl.Result and error to postpone the current reconcilation loop and trigger a scheduled retry.
+func tickRunnerGracefulStop(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) {
+	if pod != nil {
+		if _, ok := getAnnotation(pod, unregistrationStartTimestamp); !ok {
+			updated := pod.DeepCopy()
+			setAnnotation(updated, unregistrationStartTimestamp, time.Now().Format(time.RFC3339))
+			if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil {
+				log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", unregistrationStartTimestamp))
+				return nil, &ctrl.Result{}, err
+			}
+			pod = updated
+
+			log.Info("Runner has started unregistration")
+		} else {
+			log.Info("Runner has already started unregistration")
+		}
+	}
+
+	if res, err := ensureRunnerUnregistration(ctx, unregistrationTimeout, retryDelay, log, ghClient, enterprise, organization, repository, runner, pod); res != nil {
+		return nil, res, err
+	}
+
+	if pod != nil {
+		if _, ok := getAnnotation(pod, unregistrationCompleteTimestamp); !ok {
+			updated := pod.DeepCopy()
+			setAnnotation(updated, unregistrationCompleteTimestamp, time.Now().Format(time.RFC3339))
+			if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil {
+				log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", unregistrationCompleteTimestamp))
+				return nil, &ctrl.Result{}, err
+			}
+			pod = updated
+
+			log.Info("Runner has completed unregistration")
+		} else {
+			log.Info("Runner has already completed unregistration")
+		}
+	}
+
+	return pod, nil, nil
+}
+
+// If the first return value is nil, it's safe to delete the runner pod.
+func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) {
+	ok, err := unregisterRunner(ctx, ghClient, enterprise, organization, repository, runner)
+	if err != nil {
+		if errors.Is(err, &gogithub.RateLimitError{}) {
+			// We log the underlying error when we failed calling GitHub API to list or unregisters,
+			// or the runner is still busy.
+			log.Error(
+				err,
+				fmt.Sprintf(
+					"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls",
+					retryDelayOnGitHubAPIRateLimitError,
+				),
+			)
+
+			return &ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
+		}
+
+		log.Error(err, "Failed to unregister runner before deleting the pod.")
+
+		return &ctrl.Result{}, err
+	} else if ok {
+		log.Info("Runner has just been unregistered. Removing the runner pod.")
+	} else if pod == nil {
+		// `r.unregisterRunner()` will returns `false, nil` if the runner is not found on GitHub.
+		// However, that doesn't always mean the pod can be safely removed.
+		//
+		// If the pod does not exist for the runner,
+		// it may be due to that the runner pod has never been created.
+		// In that case we can safely assume that the runner will never be registered.
+
+		log.Info("Runner was not found on GitHub and the runner pod was not found on Kuberntes.")
+	} else if pod.Annotations[unregistrationCompleteTimestamp] != "" {
+		// If it's already unregistered in the previous reconcilation loop,
+		// you can safely assume that it won't get registered again so it's safe to delete the runner pod.
+		log.Info("Runner pod is marked as already unregistered.")
+	} else if runnerPodOrContainerIsStopped(pod) {
+		// If it's an ephemeral runner with the actions/runner container exited with 0,
+		// we can safely assume that it has unregistered itself from GitHub Actions
+		// so it's natural that RemoveRunner fails due to 404.
+
+		// If pod has ended up succeeded we need to restart it
+		// Happens e.g. when dind is in runner and run completes
+		log.Info("Runner pod has been stopped with a successful status.")
+	} else if ts := pod.Annotations[unregistrationStartTimestamp]; ts != "" {
+		t, err := time.Parse(time.RFC3339, ts)
+		if err != nil {
+			return &ctrl.Result{RequeueAfter: retryDelay}, err
+		}
+
+		if r := time.Until(t.Add(unregistrationTimeout)); r > 0 {
+			log.Info("Runner unregistration is in-progress.", "timeout", unregistrationTimeout, "remaining", r)
+			return &ctrl.Result{RequeueAfter: retryDelay}, err
+		}
+
+		log.Info("Runner unregistration has been timed out. The runner pod will be deleted soon.", "timeout", unregistrationTimeout)
+	} else {
+		// A runner and a runner pod that is created by this version of ARC should match
+		// any of the above branches.
+		//
+		// But we leave this match all branch for potential backward-compatibility.
+		// The caller is expected to take appropriate actions, like annotating the pod as started the unregistration process,
+		// and retry later.
+		log.V(1).Info("Runner unregistration is being retried later.")
+
+		return &ctrl.Result{RequeueAfter: retryDelay}, nil
+	}
+
+	return nil, nil
+}
+
+func getAnnotation(pod *corev1.Pod, key string) (string, bool) {
+	if pod.Annotations == nil {
+		return "", false
+	}
+
+	v, ok := pod.Annotations[key]
+
+	return v, ok
+}
+
+func setAnnotation(pod *corev1.Pod, key, value string) {
+	if pod.Annotations == nil {
+		pod.Annotations = map[string]string{}
+	}
+
+	pod.Annotations[key] = value
+}
+
+// unregisterRunner unregisters the runner from GitHub Actions by name.
+//
+// This function returns:
+//
+// Case 1. (true, nil) when it has successfully unregistered the runner.
+// Case 2. (false, nil) when (2-1.) the runner has been already unregistered OR (2-2.) the runner will never be created OR (2-3.) the runner is not created yet and it is about to be registered(hence we couldn't see it's existence from GitHub Actions API yet)
+// Case 3. (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
+//   an error returned by GitHub API.
+//
+// When the returned values is "Case 2. (false, nil)", the caller must handle the three possible sub-cases appropriately.
+// In other words, all those three sub-cases cannot be distinguished by this function alone.
+//
+// - Case "2-1." can happen when e.g. ARC has successfully unregistered in a previous reconcilation loop or it was an ephemeral runner that finished it's job run(an ephemeral runner is designed to stop after a job run).
+//   You'd need to maintain the runner state(i.e. if it's already unregistered or not) somewhere,
+//   so that you can either not call this function at all if the runner state says it's already unregistered, or determine that it's case "2-1." when you got (false, nil).
+//
+// - Case "2-2." can happen when e.g. the runner registration token was somehow broken so that `config.sh` within the runner container was never meant to succeed.
+//   Waiting and retrying forever on this case is not a solution, because `config.sh` won't succeed with a wrong token hence the runner gets stuck in this state forever.
+//   There isn't a perfect solution to this, but a practical workaround would be implement a "grace period" in the caller side.
+//
+// - Case "2-3." can happen when e.g. ARC recreated an ephemral runner pod in a previous reconcilation loop and then it was requested to delete the runner before the runner comes up.
+//   If handled inappropriately, this can cause a race condition betweeen a deletion of the runner pod and GitHub scheduling a workflow job onto the runner.
+//
+// Once successfully detected case "2-1." or "2-2.", you can safely delete the runner pod because you know that the runner won't come back
+// as long as you recreate the runner pod.
+//
+// If it was "2-3.", you need a workaround to avoid the race condition.
+//
+// You shall introduce a "grace period" mechanism, similar or equal to that is required for "Case 2-2.", so that you ever
+// start the runner pod deletion only after it's more and more likely that the runner pod is not coming up.
+//
+// Beware though, you need extra care to set an appropriate grace period depending on your environment.
+// There isn't a single right grace period that works for everyone.
+// The longer the grace period is, the earlier a cluster resource shortage can occur due to throttoled runner pod deletions,
+// while the shorter the grace period is, the more likely you may encounter the race issue.
+func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo, name string) (bool, error) {
+	runners, err := client.ListRunners(ctx, enterprise, org, repo)
+	if err != nil {
+		return false, err
+	}
+
+	id := int64(0)
+	for _, runner := range runners {
+		if runner.GetName() == name {
+			id = runner.GetID()
+			break
+		}
+	}
+
+	if id == int64(0) {
+		return false, nil
+	}
+
+	// For the record, historically ARC did not try to call RemoveRunner on a busy runner, but it's no longer true.
+	// The reason ARC did so was to let a runner running a job to not stop prematurely.
+	//
+	// However, we learned that RemoveRunner already has an ability to prevent stopping a busy runner,
+	// so ARC doesn't need to do anything special for a graceful runner stop.
+	// It can just call RemoveRunner, and if it returned 200 you're guaranteed that the runner will not automatically come back and
+	// the runner pod is safe for deletion.
+	//
+	// Trying to remove a busy runner can result in errors like the following:
+	//    failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
+	//
+	// # NOTES
+	//
+	// - It can be "status=offline" at the same time but that's another story.
+	// - After https://github.com/actions-runner-controller/actions-runner-controller/pull/1127, ListRunners responses that are used to
+	//   determine if the runner is busy can be more outdated than before, as those responeses are now cached for 60 seconds.
+	// - Note that 60 seconds is controlled by the Cache-Control response header provided by GitHub so we don't have a strict control on it but we assume it won't
+	//   change from 60 seconds.
+	//
+	// TODO: Probably we can just remove the runner by ID without seeing if the runner is busy, by treating it as busy when a remove-runner call failed with 422?
+	if err := client.RemoveRunner(ctx, enterprise, org, repo, id); err != nil {
+		return false, err
+	}
+
+	return true, nil
+}
--- a/controllers/runner_pod_controller.go
+++ b/controllers/runner_pod_controller.go
@ -47,6 +47,9 @@ type RunnerPodReconciler struct {
 	Name                        string
 	RegistrationRecheckInterval time.Duration
 	RegistrationRecheckJitter   time.Duration
+
+	UnregistrationTimeout    time.Duration
+	UnregistrationRetryDelay time.Duration
 }

 const (
@ -105,33 +108,16 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
 		finalizers, removed := removeFinalizer(runnerPod.ObjectMeta.Finalizers, runnerPodFinalizerName)

 		if removed {
-			ok, err := r.unregisterRunner(ctx, enterprise, org, repo, runnerPod.Name)
-			if err != nil {
-				if errors.Is(err, &gogithub.RateLimitError{}) {
-					// We log the underlying error when we failed calling GitHub API to list or unregisters,
-					// or the runner is still busy.
-					log.Error(
-						err,
-						fmt.Sprintf(
-							"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls",
-							retryDelayOnGitHubAPIRateLimitError,
-						),
-					)
-
-					return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
-				}
-
-				return ctrl.Result{}, err
+			updatedPod, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
+			if res != nil {
+				return *res, err
 			}

-			if !ok {
-				log.V(1).Info("Runner no longer exists on GitHub")
-			}
+			patchedPod := updatedPod.DeepCopy()
+			patchedPod.ObjectMeta.Finalizers = finalizers

-			newRunner := runnerPod.DeepCopy()
-			newRunner.ObjectMeta.Finalizers = finalizers
-
-			if err := r.Patch(ctx, newRunner, client.MergeFrom(&runnerPod)); err != nil {
+			// We commit the removal of the finalizer so that Kuberenetes notices it and delete the pod resource from the cluster.
+			if err := r.Patch(ctx, patchedPod, client.MergeFrom(&runnerPod)); err != nil {
 				log.Error(err, "Failed to update runner for finalizer removal")
 				return ctrl.Result{}, err
 			}
@ -365,8 +351,13 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
 		return ctrl.Result{}, nil
 	}

+	updated, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod)
+	if res != nil {
+		return *res, err
+	}
+
 	// Delete current pod if recreation is needed
-	if err := r.Delete(ctx, &runnerPod); err != nil {
+	if err := r.Delete(ctx, updated); err != nil {
 		log.Error(err, "Failed to delete pod resource")
 		return ctrl.Result{}, err
 	}
@ -377,8 +368,22 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
 	return ctrl.Result{}, nil
 }

-func (r *RunnerPodReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) {
-	return unregisterRunner(ctx, r.GitHubClient, enterprise, org, repo, name)
+func (r *RunnerPodReconciler) unregistrationTimeout() time.Duration {
+	unregistrationTimeout := DefaultUnregistrationTimeout
+
+	if r.UnregistrationTimeout > 0 {
+		unregistrationTimeout = r.UnregistrationTimeout
+	}
+	return unregistrationTimeout
+}
+
+func (r *RunnerPodReconciler) unregistrationRetryDelay() time.Duration {
+	retryDelay := DefaultUnregistrationRetryDelay
+
+	if r.UnregistrationRetryDelay > 0 {
+		retryDelay = r.UnregistrationRetryDelay
+	}
+	return retryDelay
 }

 func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error {
--- a/controllers/runnerdeployment_controller.go
+++ b/controllers/runnerdeployment_controller.go
@ -177,6 +177,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
 	// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
 	if currentDesiredReplicas != newDesiredReplicas {
 		newestSet.Spec.Replicas = &newDesiredReplicas
+		newestSet.Spec.EffectiveTime = rd.Spec.EffectiveTime

 		if err := r.Client.Update(ctx, newestSet); err != nil {
 			log.Error(err, "Failed to update runnerreplicaset resource")
@ -417,9 +418,10 @@ func newRunnerReplicaSet(rd *v1alpha1.RunnerDeployment, commonRunnerLabels []str
 			Labels:       newRSTemplate.ObjectMeta.Labels,
 		},
 		Spec: v1alpha1.RunnerReplicaSetSpec{
-			Replicas: rd.Spec.Replicas,
-			Selector: newRSSelector,
-			Template: newRSTemplate,
+			Replicas:      rd.Spec.Replicas,
+			Selector:      newRSSelector,
+			Template:      newRSTemplate,
+			EffectiveTime: rd.Spec.EffectiveTime,
 		},
 	}

--- a/controllers/runnerreplicaset_controller.go
+++ b/controllers/runnerreplicaset_controller.go
@ -49,6 +49,10 @@ type RunnerReplicaSetReconciler struct {
 	Name         string
 }

+const (
+	SyncTimeAnnotationKey = "sync-time"
+)
+
 // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/finalizers,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/status,verbs=get;update;patch
@ -85,19 +89,36 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
 		}
 	}

-	var myRunners []v1alpha1.Runner
-
 	var (
 		current   int
 		ready     int
 		available int
+
+		lastSyncTime *time.Time
 	)

 	for _, r := range allRunners.Items {
 		// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
 		// to not treat all the runners in the namespace as its children.
 		if metav1.IsControlledBy(&r, &rs) && !metav1.HasAnnotation(r.ObjectMeta, annotationKeyRegistrationOnly) {
-			myRunners = append(myRunners, r)
+			// If the runner is already marked for deletion(=has a non-zero deletion timestamp) by the runner controller (can be caused by an ephemeral runner completion)
+			// or by runnerreplicaset controller (in case it was deleted in the previous reconcilation loop),
+			// we don't need to bother calling GitHub API to re-mark the runner for deletion.
+			// Just hold on, and runners will disappear as long as the runner controller is up and running.
+			if !r.DeletionTimestamp.IsZero() {
+				continue
+			}
+
+			if r.Annotations != nil {
+				if a, ok := r.Annotations[SyncTimeAnnotationKey]; ok {
+					t, err := time.Parse(time.RFC3339, a)
+					if err == nil {
+						if lastSyncTime == nil || lastSyncTime.Before(t) {
+							lastSyncTime = &t
+						}
+					}
+				}
+			}

 			current += 1

@ -152,7 +173,30 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req
 		}
 	}

-	if current > desired {
+	effectiveTime := rs.Spec.EffectiveTime
+	ephemeral := rs.Spec.Template.Spec.Ephemeral == nil || *rs.Spec.Template.Spec.Ephemeral
+
+	if current < desired && ephemeral && lastSyncTime != nil && effectiveTime != nil && lastSyncTime.After(effectiveTime.Time) {
+		log.V(1).Info("Detected that some ephemeral runners have disappeared. Usually this is due to that ephemeral runner completions so ARC does not create new runners until EffectiveTime is updated.", "lastSyncTime", metav1.Time{Time: *lastSyncTime}, "effectiveTime", *effectiveTime, "desired", desired, "available", current, "ready", ready)
+	} else if current > desired {
+		// If you use ephemeral runners with webhook-based autoscaler and the runner controller is working normally,
+		// you're unlikely to fall into this branch.
+		//
+		// That's becaseu all the stakeholders work like this:
+		//
+		// 1. A runner pod completes with the runner container exiting with code 0
+		// 2. ARC runner controller detects the pod completion, marks the runner resource on k8s for deletion (=Runner.DeletionTimestamp becomes non-zero)
+		// 3. GitHub triggers a corresponding workflow_job "complete" webhook event
+		// 4. ARC github-webhook-server (webhook-based autoscaler) receives the webhook event updates HRA with removing the oldest capacity reservation
+		// 5. ARC horizontalrunnerautoscaler updates RunnerDeployment's desired replicas based on capacity reservations
+		// 6. ARC runnerdeployment controller updates RunnerReplicaSet's desired replicas
+		// 7. (We're here) ARC runnerreplicaset controller (this controller) starts reconciling the RunnerReplicaSet
+		//
+		// In a normally working ARC installation, the runner that was used to run the workflow job should already have been
+		// marked for deletion by the runner controller.
+		// This runnerreplicaset controller doesn't count marked runners into the `current` value, hence you're unlikely to
+		// fall into this branch when you're using ephemeral runners with webhook-based-autoscaler.
+
 		n := current - desired

 		log.V(0).Info(fmt.Sprintf("Deleting %d runners", n), "desired", desired, "current", current, "ready", ready)
@ -282,6 +326,10 @@ func (r *RunnerReplicaSetReconciler) newRunner(rs v1alpha1.RunnerReplicaSet) (v1

 	objectMeta.GenerateName = rs.ObjectMeta.Name + "-"
 	objectMeta.Namespace = rs.ObjectMeta.Namespace
+	if objectMeta.Annotations == nil {
+		objectMeta.Annotations = map[string]string{}
+	}
+	objectMeta.Annotations[SyncTimeAnnotationKey] = time.Now().Format(time.RFC3339)

 	runner := v1alpha1.Runner{
 		TypeMeta:   metav1.TypeMeta{},
--- a/controllers/unregister.go
+++ b/controllers/unregister.go
@ -1,49 +0,0 @@
-package controllers
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/actions-runner-controller/actions-runner-controller/github"
-)
-
-// unregisterRunner unregisters the runner from GitHub Actions by name.
-//
-// This function returns:
-// - (true, nil) when it has successfully unregistered the runner.
-// - (false, nil) when the runner has been already unregistered.
-// - (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
-//   an error returned by GitHub API.
-func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo, name string) (bool, error) {
-	runners, err := client.ListRunners(ctx, enterprise, org, repo)
-	if err != nil {
-		return false, err
-	}
-
-	id := int64(0)
-	for _, runner := range runners {
-		if runner.GetName() == name {
-			// Note that sometimes a runner can stuck "busy" even though it is already "offline".
-			// But we assume that it's not actually offline and still running a job.
-			if runner.GetBusy() {
-				return false, fmt.Errorf("runner is busy")
-			}
-			id = runner.GetID()
-			break
-		}
-	}
-
-	if id == int64(0) {
-		return false, nil
-	}
-
-	// Trying to remove a busy runner can result in errors like the following:
-	//    failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
-	//
-	// TODO: Probably we can just remove the runner by ID without seeing if the runner is busy, by treating it as busy when a remove-runner call failed with 422?
-	if err := client.RemoveRunner(ctx, enterprise, org, repo, id); err != nil {
-		return false, err
-	}
-
-	return true, nil
-}
--- a/github/github.go
+++ b/github/github.go
@ -11,8 +11,11 @@ import (
 	"time"

 	"github.com/actions-runner-controller/actions-runner-controller/github/metrics"
+	"github.com/actions-runner-controller/actions-runner-controller/logging"
 	"github.com/bradleyfalzon/ghinstallation"
+	"github.com/go-logr/logr"
 	"github.com/google/go-github/v39/github"
+	"github.com/gregjones/httpcache"
 	"golang.org/x/oauth2"
 )

@ -28,6 +31,8 @@ type Config struct {
 	BasicauthUsername string `split_words:"true"`
 	BasicauthPassword string `split_words:"true"`
 	RunnerGitHubURL   string `split_words:"true"`
+
+	Log *logr.Logger
 }

 // Client wraps GitHub client with some additional
@ -82,8 +87,11 @@ func (c *Config) NewClient() (*Client, error) {
 		transport = tr
 	}

-	transport = metrics.Transport{Transport: transport}
-	httpClient := &http.Client{Transport: transport}
+	cached := httpcache.NewTransport(httpcache.NewMemoryCache())
+	cached.Transport = transport
+	loggingTransport := logging.Transport{Transport: cached, Log: c.Log}
+	metricsTransport := metrics.Transport{Transport: loggingTransport}
+	httpClient := &http.Client{Transport: metricsTransport}

 	var client *github.Client
 	var githubBaseURL string
--- a/go.mod
+++ b/go.mod
@ -41,6 +41,7 @@ require (
 	github.com/google/gofuzz v1.1.0 // indirect
 	github.com/google/uuid v1.1.2 // indirect
 	github.com/googleapis/gnostic v0.5.5 // indirect
+	github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
 	github.com/imdario/mergo v0.3.12 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
--- a/go.sum
+++ b/go.sum
@ -259,6 +259,8 @@ github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
 github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
 github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
+github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA=
+github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
 github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
--- a/logging/logger.go
+++ b/logging/logger.go
@ -0,0 +1,54 @@
+package logging
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+
+	"github.com/go-logr/logr"
+	zaplib "go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+)
+
+const (
+	LogLevelDebug = "debug"
+	LogLevelInfo  = "info"
+	LogLevelWarn  = "warn"
+	LogLevelError = "error"
+)
+
+func NewLogger(logLevel string) logr.Logger {
+	log := zap.New(func(o *zap.Options) {
+		switch logLevel {
+		case LogLevelDebug:
+			o.Development = true
+			lvl := zaplib.NewAtomicLevelAt(zaplib.DebugLevel) // maps to logr's V(1)
+			o.Level = &lvl
+		case LogLevelInfo:
+			lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel)
+			o.Level = &lvl
+		case LogLevelWarn:
+			lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel)
+			o.Level = &lvl
+		case LogLevelError:
+			lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel)
+			o.Level = &lvl
+		default:
+			// We use bitsize of 8 as zapcore.Level is a type alias to int8
+			levelInt, err := strconv.ParseInt(logLevel, 10, 8)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Failed to parse --log-level=%s: %v", logLevel, err)
+				os.Exit(1)
+			}
+
+			// For example, --log-level=debug a.k.a --log-level=-1 maps to zaplib.DebugLevel, which is associated to logr's V(1)
+			// --log-level=-2 maps the specific custom log level that is associated to logr's V(2).
+			level := zapcore.Level(levelInt)
+			atomicLevel := zaplib.NewAtomicLevelAt(level)
+			o.Level = &atomicLevel
+		}
+	})
+
+	return log
+}
--- a/logging/transport.go
+++ b/logging/transport.go
@ -0,0 +1,51 @@
+// Package logging provides various logging helpers for ARC
+package logging
+
+import (
+	"net/http"
+
+	"github.com/go-logr/logr"
+	"github.com/gregjones/httpcache"
+)
+
+const (
+	// https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting
+	headerRateLimitRemaining = "X-RateLimit-Remaining"
+)
+
+// Transport wraps a transport with metrics monitoring
+type Transport struct {
+	Transport http.RoundTripper
+
+	Log *logr.Logger
+}
+
+func (t Transport) RoundTrip(req *http.Request) (*http.Response, error) {
+	resp, err := t.Transport.RoundTrip(req)
+	if resp != nil {
+		t.log(req, resp)
+	}
+	return resp, err
+}
+
+func (t Transport) log(req *http.Request, resp *http.Response) {
+	if t.Log == nil {
+		return
+	}
+
+	var args []interface{}
+
+	marked := resp.Header.Get(httpcache.XFromCache) == "1"
+
+	args = append(args, "from_cache", marked, "method", req.Method, "url", req.URL.String())
+
+	if !marked {
+		// Do not log outdated rate limit remaining value
+
+		remaining := resp.Header.Get(headerRateLimitRemaining)
+
+		args = append(args, "ratelimit_remaining", remaining)
+	}
+
+	t.Log.V(3).Info("Seen HTTP response", args...)
+}
--- a/main.go
+++ b/main.go
@ -26,24 +26,18 @@ import (
 	actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1"
 	"github.com/actions-runner-controller/actions-runner-controller/controllers"
 	"github.com/actions-runner-controller/actions-runner-controller/github"
+	"github.com/actions-runner-controller/actions-runner-controller/logging"
 	"github.com/kelseyhightower/envconfig"
-	zaplib "go.uber.org/zap"
 	"k8s.io/apimachinery/pkg/runtime"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
 	ctrl "sigs.k8s.io/controller-runtime"
-	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 	// +kubebuilder:scaffold:imports
 )

 const (
 	defaultRunnerImage = "summerwind/actions-runner:latest"
 	defaultDockerImage = "docker:dind"
-
-	logLevelDebug = "debug"
-	logLevelInfo  = "info"
-	logLevelWarn  = "warn"
-	logLevelError = "error"
 )

 var (
@ -120,24 +114,12 @@ func main() {
 	flag.DurationVar(&syncPeriod, "sync-period", 10*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled. When you use autoscaling, set to a lower value like 10 minute, because this corresponds to the minimum time to react on demand change. . If you're tweaking this in order to make autoscaling more responsive, you'll probably want to tweak github-api-cache-duration, too")
 	flag.Var(&commonRunnerLabels, "common-runner-labels", "Runner labels in the K1=V1,K2=V2,... format that are inherited all the runners created by the controller. See https://github.com/actions-runner-controller/actions-runner-controller/issues/321 for more information")
 	flag.StringVar(&namespace, "watch-namespace", "", "The namespace to watch for custom resources. Set to empty for letting it watch for all namespaces.")
-	flag.StringVar(&logLevel, "log-level", logLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`)
+	flag.StringVar(&logLevel, "log-level", logging.LogLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`)
 	flag.Parse()

-	logger := zap.New(func(o *zap.Options) {
-		switch logLevel {
-		case logLevelDebug:
-			o.Development = true
-		case logLevelInfo:
-			lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel)
-			o.Level = &lvl
-		case logLevelWarn:
-			lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel)
-			o.Level = &lvl
-		case logLevelError:
-			lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel)
-			o.Level = &lvl
-		}
-	})
+	logger := logging.NewLogger(logLevel)
+
+	c.Log = &logger

 	ghClient, err = c.NewClient()
 	if err != nil {
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@ -17,12 +17,15 @@ var (
 	controllerImageTag  = "e2e"
 	controllerImage     = testing.Img(controllerImageRepo, controllerImageTag)
 	runnerImageRepo     = "actionsrunnercontrollere2e/actions-runner"
+	runnerDindImageRepo = "actionsrunnercontrollere2e/actions-runner-dind"
 	runnerImageTag      = "e2e"
 	runnerImage         = testing.Img(runnerImageRepo, runnerImageTag)
+	runnerDindImage     = testing.Img(runnerDindImageRepo, runnerImageTag)

 	prebuildImages = []testing.ContainerImage{
 		controllerImage,
 		runnerImage,
+		runnerDindImage,
 	}

 	builds = []testing.DockerBuild{
@ -36,6 +39,11 @@ var (
 			Args:       []testing.BuildArg{},
 			Image:      runnerImage,
 		},
+		{
+			Dockerfile: "../../runner/Dockerfile.dindrunner",
+			Args:       []testing.BuildArg{},
+			Image:      runnerDindImage,
+		},
 	}

 	certManagerVersion = "v1.1.1"
@ -52,7 +60,6 @@ var (
 		"SYNC_PERIOD=" + "10s",
 		"NAME=" + controllerImageRepo,
 		"VERSION=" + controllerImageTag,
-		"RUNNER_NAME=" + runnerImageRepo,
 		"RUNNER_TAG=" + runnerImageTag,
 	}

@ -167,11 +174,15 @@ type env struct {
 	useRunnerSet bool

 	testID                                                   string
+	testName                                                 string
 	repoToCommit                                             string
 	runnerLabel, githubToken, testRepo, testOrg, testOrgRepo string
 	githubTokenWebhook                                       string
 	testEnterprise                                           string
 	featureFlagEphemeral                                     bool
+	scaleDownDelaySecondsAfterScaleOut                       int64
+	minReplicas                                              int64
+	dockerdWithinRunnerContainer                             bool
 	testJobs                                                 []job
 }

@ -184,11 +195,12 @@ func initTestEnv(t *testing.T) *env {

 	id := e.ID()

-	testID := t.Name() + " " + id
+	testName := t.Name() + " " + id

-	t.Logf("Using test id %s", testID)
+	t.Logf("Initializing test with name %s", testName)

-	e.testID = testID
+	e.testID = id
+	e.testName = testName
 	e.runnerLabel = "test-" + id
 	e.githubToken = testing.Getenv(t, "GITHUB_TOKEN")
 	e.githubTokenWebhook = testing.Getenv(t, "WEBHOOK_GITHUB_TOKEN")
@ -197,9 +209,17 @@ func initTestEnv(t *testing.T) *env {
 	e.testOrg = testing.Getenv(t, "TEST_ORG", "")
 	e.testOrgRepo = testing.Getenv(t, "TEST_ORG_REPO", "")
 	e.testEnterprise = testing.Getenv(t, "TEST_ENTERPRISE")
-	e.testJobs = createTestJobs(id, testResultCMNamePrefix, 10)
+	e.testJobs = createTestJobs(id, testResultCMNamePrefix, 100)
 	ephemeral, _ := strconv.ParseBool(testing.Getenv(t, "TEST_FEATURE_FLAG_EPHEMERAL"))
 	e.featureFlagEphemeral = ephemeral
+	e.scaleDownDelaySecondsAfterScaleOut, _ = strconv.ParseInt(testing.Getenv(t, "TEST_RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT", "10"), 10, 32)
+	e.minReplicas, _ = strconv.ParseInt(testing.Getenv(t, "TEST_RUNNER_MIN_REPLICAS", "1"), 10, 32)
+
+	var err error
+	e.dockerdWithinRunnerContainer, err = strconv.ParseBool(testing.Getenv(t, "TEST_RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER", "false"))
+	if err != nil {
+		panic(fmt.Sprintf("unable to parse bool from TEST_RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER: %v", err))
+	}

 	return e
 }
@ -254,7 +274,22 @@ func (e *env) installActionsRunnerController(t *testing.T) {
 		"GITHUB_TOKEN=" + e.githubToken,
 		"WEBHOOK_GITHUB_TOKEN=" + e.githubTokenWebhook,
 		"RUNNER_LABEL=" + e.runnerLabel,
+		"TEST_ID=" + e.testID,
 		fmt.Sprintf("RUNNER_FEATURE_FLAG_EPHEMERAL=%v", e.featureFlagEphemeral),
+		fmt.Sprintf("RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT=%d", e.scaleDownDelaySecondsAfterScaleOut),
+		fmt.Sprintf("ORG_RUNNER_MIN_REPLICAS=%d", e.minReplicas),
+	}
+
+	if e.dockerdWithinRunnerContainer {
+		varEnv = append(varEnv,
+			"RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER=true",
+			"RUNNER_NAME="+runnerDindImageRepo,
+		)
+	} else {
+		varEnv = append(varEnv,
+			"RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER=false",
+			"RUNNER_NAME="+runnerImageRepo,
+		)
 	}

 	scriptEnv = append(scriptEnv, varEnv...)
@ -273,7 +308,7 @@ func (e *env) createControllerNamespaceAndServiceAccount(t *testing.T) {
 func (e *env) installActionsWorkflow(t *testing.T) {
 	t.Helper()

-	installActionsWorkflow(t, e.testID, e.runnerLabel, testResultCMNamePrefix, e.repoToCommit, e.testJobs)
+	installActionsWorkflow(t, e.testName, e.runnerLabel, testResultCMNamePrefix, e.repoToCommit, e.testJobs)
 }

 func (e *env) verifyActionsWorkflowRun(t *testing.T) {
@ -302,13 +337,13 @@ func createTestJobs(id, testResultCMNamePrefix string, numJobs int) []job {

 const Branch = "main"

-func installActionsWorkflow(t *testing.T, testID, runnerLabel, testResultCMNamePrefix, testRepo string, testJobs []job) {
+func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNamePrefix, testRepo string, testJobs []job) {
 	t.Helper()

 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()

-	wfName := "E2E " + testID
+	wfName := "E2E " + testName
 	wf := testing.Workflow{
 		Name: wfName,
 		On: testing.On{