Fix runners to do their best to gracefully stop on pod eviction (#1759)

Ref #1535 Ref #1581 Signed-off-by: Yusuke Kuoka <ykuoka@gmail.com>
2022-11-01 20:30:10 +09:00 · 2022-11-01 20:30:10 +09:00 · c74ad6195f
parent 332548093a
commit c74ad6195f
30 changed files with 757 additions and 301 deletions
--- a/.github/workflows/validate-runners.yaml
+++ b/.github/workflows/validate-runners.yaml
@ -6,7 +6,7 @@ on:
      - '**'
    paths:
      - 'runner/**'
-      - 'test/entrypoint/**'
+      - 'test/startup/**'
      - '!**.md'

 permissions:
@ -42,4 +42,4 @@ jobs:

    - name: Run tests
      run: |
-        make acceptance/runner/entrypoint
+        make acceptance/runner/startup
--- a/6
+++ b/6
@ -117,7 +117,7 @@ generate: controller-gen

 # Run shellcheck on runner scripts
 shellcheck: shellcheck-install
-	$(TOOLS_PATH)/shellcheck --shell bash --source-path runner runner/*.bash runner/*.sh
+	$(TOOLS_PATH)/shellcheck --shell bash --source-path runner runner/*.sh

 docker-buildx:
 	export DOCKER_CLI_EXPERIMENTAL=enabled ;\
@ -203,8 +203,8 @@ acceptance/deploy:
 acceptance/tests:
 	acceptance/checks.sh

-acceptance/runner/entrypoint:
-	cd test/entrypoint/ && bash test.sh
+acceptance/runner/startup:
+	cd test/startup/ && bash test.sh

 # We use -count=1 instead of `go clean -testcache`
 # See https://terratest.gruntwork.io/docs/testing-best-practices/avoid-test-caching/
--- a/acceptance/testdata/runnerdeploy.envsubst.yaml
+++ b/acceptance/testdata/runnerdeploy.envsubst.yaml
@ -8,6 +8,16 @@ provisioner: rancher.io/local-path
 reclaimPolicy: Delete
 volumeBindingMode: WaitForFirstConsumer
 ---
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: ${NAME}-rootless-dind-work-dir
+  labels:
+    content: ${NAME}-rootless-dind-work-dir
+provisioner: rancher.io/local-path
+reclaimPolicy: Delete
+volumeBindingMode: WaitForFirstConsumer
+---
 apiVersion: actions.summerwind.dev/v1alpha1
 kind: RunnerDeployment
 metadata:
@ -49,7 +59,12 @@ spec:
      labels:
      - "${RUNNER_LABEL}"

+      serviceAccountName: ${RUNNER_SERVICE_ACCOUNT_NAME}
+      terminationGracePeriodSeconds: ${RUNNER_TERMINATION_GRACE_PERIOD_SECONDS}
+
      env:
+      - name: RUNNER_GRACEFUL_STOP_TIMEOUT
+        value: "${RUNNER_GRACEFUL_STOP_TIMEOUT}"
      - name: ROLLING_UPDATE_PHASE
        value: "${ROLLING_UPDATE_PHASE}"
      - name: ARC_DOCKER_MTU_PROPAGATION
@ -57,6 +72,30 @@ spec:

      dockerMTU: 1400

+      # Fix the following no space left errors with rootless-dind runners that can happen while running buildx build:
+      #   ------
+      #   > [4/5] RUN go mod download:
+      #   ------
+      #   ERROR: failed to solve: failed to prepare yxsw8lv9hqnuafzlfta244l0z: mkdir /home/runner/.local/share/docker/vfs/dir/yxsw8lv9hqnuafzlfta244l0z/usr/local/go/src/cmd/compile/internal/types2/testdata: no space left on device
+      #   Error: Process completed with exit code 1.
+      #
+      volumeMounts:
+      - name: rootless-dind-work-dir
+        # Omit the /share/docker part of the /home/runner/.local/share/docker as
+        # that part is created by dockerd.
+        mountPath: /home/runner/.local
+        readOnly: false
+      volumes:
+      - name: rootless-dind-work-dir
+        ephemeral:
+          volumeClaimTemplate:
+            spec:
+              accessModes: [ "ReadWriteOnce" ]
+              storageClassName: "${NAME}-rootless-dind-work-dir"
+              resources:
+                requests:
+                  storage: 3Gi
+
      #
      # Non-standard working directory
      #
@ -72,7 +111,6 @@ spec:
        resources:
          requests:
            storage: 10Gi
-      serviceAccountName: ${RUNNER_SERVICE_ACCOUNT_NAME}
 ---
 apiVersion: actions.summerwind.dev/v1alpha1
 kind: HorizontalRunnerAutoscaler
--- a/acceptance/testdata/runnerset.envsubst.yaml
+++ b/acceptance/testdata/runnerset.envsubst.yaml
@ -54,6 +54,16 @@ provisioner: rancher.io/local-path
 reclaimPolicy: Retain
 volumeBindingMode: WaitForFirstConsumer
 ---
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: ${NAME}-rootless-dind-work-dir
+  labels:
+    content: ${NAME}-rootless-dind-work-dir
+provisioner: rancher.io/local-path
+reclaimPolicy: Delete
+volumeBindingMode: WaitForFirstConsumer
+---
 apiVersion: actions.summerwind.dev/v1alpha1
 kind: RunnerSet
 metadata:
@ -113,10 +123,20 @@ spec:
        app: ${NAME}
    spec:
      serviceAccountName: ${RUNNER_SERVICE_ACCOUNT_NAME}
+      terminationGracePeriodSeconds: ${RUNNER_TERMINATION_GRACE_PERIOD_SECONDS}
      containers:
+      # # Uncomment only when non-dind-runner / you're using docker sidecar
+      # - name: docker
+      #   # Image is required for the dind sidecar definition within RunnerSet spec
+      #   image: "docker:dind"
+      #   env:
+      #   - name: RUNNER_GRACEFUL_STOP_TIMEOUT
+      #     value: "${RUNNER_GRACEFUL_STOP_TIMEOUT}"
      - name: runner
        imagePullPolicy: IfNotPresent
        env:
+        - name: RUNNER_GRACEFUL_STOP_TIMEOUT
+          value: "${RUNNER_GRACEFUL_STOP_TIMEOUT}"
        - name: RUNNER_FEATURE_FLAG_EPHEMERAL
          value: "${RUNNER_FEATURE_FLAG_EPHEMERAL}"
        - name: GOMODCACHE
@ -168,7 +188,15 @@ spec:
      #   # For buildx cache
      #   - name: cache
      #     mountPath: "/home/runner/.cache"
-        # Comment out the ephemeral work volume if you're going to test the kubernetes container mode
+
+        # For fixing no space left error on rootless dind runner
+        - name: rootless-dind-work-dir
+          # Omit the /share/docker part of the /home/runner/.local/share/docker as
+          # that part is created by dockerd.
+          mountPath: /home/runner/.local
+          readOnly: false
+
+      # Comment out the ephemeral work volume if you're going to test the kubernetes container mode
      # volumes:
      # - name: work
      #   ephemeral:
@ -180,6 +208,24 @@ spec:
      #         resources:
      #           requests:
      #             storage: 10Gi
+
+      # Fix the following no space left errors with rootless-dind runners that can happen while running buildx build:
+      #   ------
+      #   > [4/5] RUN go mod download:
+      #   ------
+      #   ERROR: failed to solve: failed to prepare yxsw8lv9hqnuafzlfta244l0z: mkdir /home/runner/.local/share/docker/vfs/dir/yxsw8lv9hqnuafzlfta244l0z/usr/local/go/src/cmd/compile/internal/types2/testdata: no space left on device
+      #   Error: Process completed with exit code 1.
+      #
+      volumes:
+      - name: rootless-dind-work-dir
+        ephemeral:
+          volumeClaimTemplate:
+            spec:
+              accessModes: [ "ReadWriteOnce" ]
+              storageClassName: "${NAME}-rootless-dind-work-dir"
+              resources:
+                requests:
+                  storage: 3Gi
  volumeClaimTemplates:
  - metadata:
      name: vol1
--- a/controllers/new_runner_pod_test.go
+++ b/controllers/new_runner_pod_test.go
@ -190,6 +190,17 @@ func TestNewRunnerPod(t *testing.T) {
 					SecurityContext: &corev1.SecurityContext{
 						Privileged: func(b bool) *bool { return &b }(true),
 					},
+					Lifecycle: &corev1.Lifecycle{
+						PreStop: &corev1.LifecycleHandler{
+							Exec: &corev1.ExecAction{
+								Command: []string{
+									"/bin/sh",
+									"-c",
+									"timeout \"${RUNNER_GRACEFUL_STOP_TIMEOUT:-15}\" /bin/sh -c \"echo 'Prestop hook started'; while [ -f /runner/.runner ]; do sleep 1; done; echo 'Waiting for dockerd to start'; while ! pgrep -x dockerd; do sleep 1; done; echo 'Prestop hook stopped'\" >/proc/1/fd/1 2>&1",
+								},
+							},
+						},
+					},
 				},
 			},
 			RestartPolicy: corev1.RestartPolicyNever,
@ -709,6 +720,17 @@ func TestNewRunnerPodFromRunnerController(t *testing.T) {
 					SecurityContext: &corev1.SecurityContext{
 						Privileged: func(b bool) *bool { return &b }(true),
 					},
+					Lifecycle: &corev1.Lifecycle{
+						PreStop: &corev1.LifecycleHandler{
+							Exec: &corev1.ExecAction{
+								Command: []string{
+									"/bin/sh",
+									"-c",
+									"timeout \"${RUNNER_GRACEFUL_STOP_TIMEOUT:-15}\" /bin/sh -c \"echo 'Prestop hook started'; while [ -f /runner/.runner ]; do sleep 1; done; echo 'Waiting for dockerd to start'; while ! pgrep -x dockerd; do sleep 1; done; echo 'Prestop hook stopped'\" >/proc/1/fd/1 2>&1",
+								},
+							},
+						},
+					},
 				},
 			},
 			RestartPolicy: corev1.RestartPolicyNever,
--- a/controllers/runner_controller.go
+++ b/controllers/runner_controller.go
@ -1151,6 +1151,27 @@ func newRunnerPodWithContainerMode(containerMode string, template corev1.Pod, ru
 				fmt.Sprintf("--registry-mirror=%s", dockerRegistryMirror),
 			)
 		}
+
+		dockerdContainer.Lifecycle = &corev1.Lifecycle{
+			PreStop: &corev1.LifecycleHandler{
+				Exec: &corev1.ExecAction{
+					Command: []string{
+						"/bin/sh", "-c",
+						// A prestop hook can start before the dockerd start up, for example, when the docker init is still provisioning
+						// the TLS key and  the cert to be used by dockerd.
+						//
+						// The author of this prestop script encountered issues where the prestophung for ten or more minutes on his cluster.
+						// He realized that the hang happened when a prestop hook is executed while the docker init is provioning the key and cert.
+						// Assuming it's due to that the SIGTERM sent by K8s after the prestop hook was ignored by the docker init at that time,
+						// and it needed to wait until terminationGracePeriodSeconds to elapse before finally killing the container,
+						// he wrote this script so that it tries to delay SIGTERM until dockerd starts and becomes ready for processing the signal.
+						//
+						// Also note that we don't need to run `pkill dockerd` at the end of the prehook script, as SIGTERM is sent by K8s after the prestop had completed.
+						`timeout "${RUNNER_GRACEFUL_STOP_TIMEOUT:-15}" /bin/sh -c "echo 'Prestop hook started'; while [ -f /runner/.runner ]; do sleep 1; done; echo 'Waiting for dockerd to start'; while ! pgrep -x dockerd; do sleep 1; done; echo 'Prestop hook stopped'" >/proc/1/fd/1 2>&1`,
+					},
+				},
+			},
+		}
 	}

 	if runnerContainerIndex == -1 {
--- a/docs/detailed-docs.md
+++ b/docs/detailed-docs.md
@ -36,6 +36,7 @@ ToC:
    - [Runner with rootless DinD](#runner-with-rootless-dind)
    - [Runner with k8s jobs](#runner-with-k8s-jobs)
  - [Additional Tweaks](#additional-tweaks)
+  - [Runner Graceful Termination](#runner-graceful-termination)
  - [Custom Volume mounts](#custom-volume-mounts)
  - [Runner Labels](#runner-labels)
  - [Runner Groups](#runner-groups)
@ -1220,6 +1221,66 @@ spec:
        # privileged: true
 ```

+### Runner Graceful Termination
+
+As of ARC 0.27.0 (unreleased as of 2022/09/30), runners can only wait for 15 seconds by default on pod termination.
+
+This can be problematic in two scenarios:
+
+- Scenario 1 - RunnerSet-only: You're triggering updates other than replica changes to `RunnerSet` very often- With current implementation, every update except `replicas` change to RunnerSet may result in terminating the in-progress workflow jobs to fail.
+- Scenario 2 - RunnerDeployment and RunnerSet: You have another Kubernetes controller that evicts runner pods directly, not consulting ARC.
+
+> RunnerDeployment is not affected by the Scenario 1 as RunnerDeployment-managed runners are already tolerable to unlimitedly long in-progress running job while being replaced, as it's graceful termination process is handled outside of the entrypoint and the Kubernetes' pod termination process.
+
+To make it more reliable, please set `spec.template.spec.terminationGracePeriodSeconds` field and the `RUNNER_GRACEFUL_STOP_TIMEOUT` environment variable appropriately.
+
+If you want the pod to terminate in approximately 110 seconds at the latest since the termination request, try `terminationGracePeriodSeconds` of `110` and `RUNNER_GRACEFUL_STOP_TIMEOUT` of like `90`.
+
+The difference between `terminationGracePeriodSeconds` and `RUNNER_GRACEFUL_STOP_TIMEOUT` can vary depending on your environment and cluster.
+
+The idea is two fold:
+
+- `RUNNER_GRACEFUL_STOP_TIMEOUT` is for giving the runner the longest possible time to wait for the in-progress job to complete. You should keep this smaller than `terminationGracePeriodSeconds` so that you don't unnecessarily cancel running jobs.
+- `terminationGracePeriodSeconds` is for giving the runner the longest possible time to stop before disappear. If the pod forcefully terminated before a graceful stop, the job running within the runner pod can hang like 10 minutes in the GitHub Actions Workflow Run/Job UI. A correct value for this avoids the hang, even though it had to cancel the running job due to the approaching deadline.
+
+> We know the default 15 seconds timeout is too short to be useful at all.
+> In near future, we might raise the default to, for example, 100 seconds, so that runners that are tend to run up to 100 seconds jobs can
+> terminate gracefully without failing running jobs. It will also allow the job which were running on the node that was requsted for termination
+> to correct report its status as "cancelled", rather than hanging approximately 10 minutes in the Actions Web UI until it finally fails(without any specific error message).
+> 100 seconds is just an example. It might be a good default in case you're using AWS EC2 Spot Instances because they tend to send
+> termination notice two minutes before the termination.
+> If you have any other suggestions for the default value, please share your thoughts in Discussions.
+
+#### Status and Future of this feature
+
+Note that this feature is currently intended for use with runner pods being terminated by other Kubernetes controller and human operators, or those being replaced by ARC RunnerSet controller due to spec change(s) except `replicas`. RunnerDeployment has no issue for the scenario. non-dind runners are affected but this feature does not support those yet.
+
+For example, a runner pod can be terminated prematurely by cluster-autoscaler when it's about to terminate the node on cluster scale down.
+All the variants of RunnerDeployment and RunnerSet managed runner pods, including runners with dockerd sidecars, rootless and rootful dind runners are affected by it. For dind runner pods only, you can use this feature to fix or alleviate the issue.
+
+To be clear, an increase/decrease in the desired replicas of RunnerDeployment and RunnerSet will never result in worklfow jobs being termianted prematurely.
+That's because it's handled BEFORE the runner pod is terminated, by ARC respective controller.
+
+For anyone interested in improving it, adding a dedicated pod finalizer for this issue will never work.
+It's due to that a pod finalizer can't prevent SIGTERM from being sent when deletionTimestamp is updated to non-zero,
+which triggers a Kubernetes pod termination process anyway.
+What we want here is to delay the SIGTERM sent to the `actions/runner` process running within the runner container of the runner pod,
+not blocking the removal of the pod resource in the Kubernetes cluster.
+
+Also, handling all the graceful termination scenarios with a single method may or may not work.
+
+The most viable option would be to do the graceful termination handling entirely in the SIGTERM handler within the runner entrypoint.
+But this may or may not work long-term, as it's subject to terminationGracePeriodSeconds anyway and the author of this note thinks there still is
+no formally defined limit for terminationGracePeriodSeconds and hence we arent' sure how long terminationGracePeriodSeconds can be set in practice.
+Also, I think the max workflow job duration is approximately 24h. So Kubernetes must formally support setting terminationGracePeriodSeconds of 24h if
+we are moving entirely to the entrypoint based solution.
+If you have any insights about the matter, chime in to the development of ARC!
+
+That's why we still rely on ARC's own graceful termination logic in Runner controller for the spec change and replica increase/decrease of RunnerDeployment and
+replica increase/decrease of RunnerSet, even though we now have the entrypoint based graceful stop handler.
+
+Our plan is to improve the RunnerSet to have the same logic as the Runner controller so that you don't need this feature based on the SIGTERM handler for the spec change of RunnerSet.
+
 ### Custom Volume mounts

 You can configure your own custom volume mounts. For example to have the work/docker data in memory or on NVME SSD, for
--- a/runner/actions-runner-dind-rootless.dockerfile
+++ b/runner/actions-runner-dind-rootless.dockerfile
@ -102,9 +102,9 @@ RUN export ARCH=$(echo ${TARGETPLATFORM} | cut -d / -f2) \
    && curl -f -L -o /usr/local/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v${DUMB_INIT_VERSION}/dumb-init_${DUMB_INIT_VERSION}_${ARCH} \
    && chmod +x /usr/local/bin/dumb-init

-COPY entrypoint.sh logger.bash rootless-startup.sh update-status /usr/bin/
+COPY entrypoint-dind-rootless.sh startup.sh logger.sh graceful-stop.sh update-status /usr/bin/

-RUN chmod +x /usr/bin/rootless-startup.sh /usr/bin/entrypoint.sh
+RUN chmod +x /usr/bin/entrypoint-dind-rootless.sh /usr/bin/startup.sh

 # Copy the docker shim which propagates the docker MTU to underlying networks
 # to replace the docker binary in the PATH.
@ -140,5 +140,5 @@ RUN curl -fsSL https://get.docker.com/rootless | sh
 RUN curl -L "https://github.com/docker/compose/releases/download/${COMPOSE_VERSION}/docker-compose-Linux-x86_64" -o /home/runner/bin/docker-compose ; \
    chmod +x /home/runner/bin/docker-compose

-ENTRYPOINT ["/usr/local/bin/dumb-init", "--"]
-CMD ["rootless-startup.sh"]
+ENTRYPOINT ["/bin/bash", "-c"]
+CMD ["entrypoint-dind-rootless.sh"]
--- a/runner/actions-runner-dind.dockerfile
+++ b/runner/actions-runner-dind.dockerfile
@ -99,9 +99,9 @@ RUN mkdir /opt/hostedtoolcache \

 # We place the scripts in `/usr/bin` so that users who extend this image can
 # override them with scripts of the same name placed in `/usr/local/bin`.
-COPY entrypoint.sh logger.bash startup.sh update-status /usr/bin/
+COPY entrypoint-dind.sh startup.sh logger.sh wait.sh graceful-stop.sh update-status /usr/bin/
 COPY supervisor/ /etc/supervisor/conf.d/
-RUN chmod +x /usr/bin/startup.sh /usr/bin/entrypoint.sh
+RUN chmod +x /usr/bin/entrypoint-dind.sh /usr/bin/startup.sh

 # Copy the docker shim which propagates the docker MTU to underlying networks
 # to replace the docker binary in the PATH.
@ -130,5 +130,5 @@ RUN echo "PATH=${PATH}" > /etc/environment \
 # No group definition, as that makes it harder to run docker.
 USER runner

-ENTRYPOINT ["/usr/local/bin/dumb-init", "--"]
-CMD ["startup.sh"]
+ENTRYPOINT ["/bin/bash", "-c"]
+CMD ["entrypoint-dind.sh"]
--- a/runner/actions-runner.dockerfile
+++ b/runner/actions-runner.dockerfile
@ -117,7 +117,7 @@ RUN mkdir /opt/hostedtoolcache \

 # We place the scripts in `/usr/bin` so that users who extend this image can
 # override them with scripts of the same name placed in `/usr/local/bin`.
-COPY entrypoint.sh logger.bash update-status /usr/bin/
+COPY entrypoint.sh startup.sh logger.sh graceful-stop.sh update-status /usr/bin/

 # Copy the docker shim which propagates the docker MTU to underlying networks
 # to replace the docker binary in the PATH.
@ -136,5 +136,5 @@ RUN echo "PATH=${PATH}" > /etc/environment \

 USER runner

-ENTRYPOINT ["/usr/local/bin/dumb-init", "--"]
+ENTRYPOINT ["/bin/bash", "-c"]
 CMD ["entrypoint.sh"]
--- a/runner/entrypoint-dind-rootless.sh
+++ b/runner/entrypoint-dind-rootless.sh
@ -1,5 +1,7 @@
 #!/bin/bash
-source logger.bash
+source logger.sh
+source graceful-stop.sh
+trap graceful_stop TERM

 log.notice "Writing out Docker config file"
 /bin/bash <<SCRIPT
@ -21,7 +23,20 @@ fi
 SCRIPT

 log.notice "Starting Docker (rootless)"
+
+dumb-init bash <<'SCRIPT' &
+# Note that we don't want dockerd to be terminated before the runner agent,
+# because it defeats the goal of the runner agent graceful stop logic implemenbed above.
+# We can't rely on e.g. `dumb-init --single-child` for that, because with `--single-child` we can't even trap SIGTERM
+# for not only dockerd but also the runner agent.
 /home/runner/bin/dockerd-rootless.sh --config-file /home/runner/.config/docker/daemon.json >> /dev/null 2>&1 &

-# Wait processes to be running
-entrypoint.sh
+startup.sh
+SCRIPT
+
+RUNNER_INIT_PID=$!
+log.notice "Runner init started with pid $RUNNER_INIT_PID"
+wait $RUNNER_INIT_PID
+log.notice "Runner init exited. Exiting this process with code 0 so that the container and the pod is GC'ed Kubernetes soon."
+
+trap - TERM
--- a/runner/entrypoint-dind.sh
+++ b/runner/entrypoint-dind.sh
@ -0,0 +1,69 @@
+#!/bin/bash
+source logger.sh
+source graceful-stop.sh
+trap graceful_stop TERM
+
+sudo /bin/bash <<SCRIPT
+mkdir -p /etc/docker
+
+if [ ! -f /etc/docker/daemon.json ]; then
+  echo "{}" > /etc/docker/daemon.json
+fi
+
+if [ -n "${MTU}" ]; then
+jq ".\"mtu\" = ${MTU}" /etc/docker/daemon.json > /tmp/.daemon.json && mv /tmp/.daemon.json /etc/docker/daemon.json
+# See https://docs.docker.com/engine/security/rootless/
+echo "environment=DOCKERD_ROOTLESS_ROOTLESSKIT_MTU=${MTU}" >> /etc/supervisor/conf.d/dockerd.conf
+fi
+
+if [ -n "${DOCKER_REGISTRY_MIRROR}" ]; then
+jq ".\"registry-mirrors\"[0] = \"${DOCKER_REGISTRY_MIRROR}\"" /etc/docker/daemon.json > /tmp/.daemon.json && mv /tmp/.daemon.json /etc/docker/daemon.json
+fi
+SCRIPT
+
+dumb-init bash <<'SCRIPT' &
+source logger.sh
+source wait.sh
+
+dump() {
+  local path=${1:?missing required <path> argument}
+  shift
+  printf -- "%s\n---\n" "${*//\{path\}/"$path"}" 1>&2
+  cat "$path" 1>&2
+  printf -- '---\n' 1>&2
+}
+
+for config in /etc/docker/daemon.json /etc/supervisor/conf.d/dockerd.conf; do
+  dump "$config" 'Using {path} with the following content:'
+done
+
+log.debug 'Starting supervisor daemon'
+sudo /usr/bin/supervisord -n >> /dev/null 2>&1 &
+
+log.debug 'Waiting for processes to be running...'
+processes=(dockerd)
+
+for process in "${processes[@]}"; do
+    if ! wait_for_process "$process"; then
+        log.error "$process is not running after max time"
+        dump /var/log/dockerd.err.log 'Dumping {path} to aid investigation'
+        dump /var/log/supervisor/supervisord.log 'Dumping {path} to aid investigation'
+        exit 1
+    else
+        log.debug "$process is running"
+    fi
+done
+
+if [ -n "${MTU}" ]; then
+  sudo ifconfig docker0 mtu "${MTU}" up
+fi
+
+startup.sh
+SCRIPT
+
+RUNNER_INIT_PID=$!
+log.notice "Runner init started with pid $RUNNER_INIT_PID"
+wait $RUNNER_INIT_PID
+log.notice "Runner init exited. Exiting this process with code 0 so that the container and the pod is GC'ed Kubernetes soon."
+
+trap - TERM
--- a/runner/entrypoint.sh
+++ b/runner/entrypoint.sh
@ -1,172 +1,30 @@
 #!/bin/bash
-source logger.bash
+source logger.sh
+source graceful-stop.sh
+trap graceful_stop TERM

-RUNNER_ASSETS_DIR=${RUNNER_ASSETS_DIR:-/runnertmp}
-RUNNER_HOME=${RUNNER_HOME:-/runner}
+dumb-init bash <<'SCRIPT' &
+source logger.sh

-# Let GitHub runner execute these hooks. These environment variables are used by GitHub's Runner as described here
-# https://github.com/actions/runner/blob/main/docs/adrs/1751-runner-job-hooks.md
-# Scripts referenced in the ACTIONS_RUNNER_HOOK_ environment variables must end in .sh or .ps1
-# for it to become a valid hook script, otherwise GitHub will fail to run the hook
-export ACTIONS_RUNNER_HOOK_JOB_STARTED=/etc/arc/hooks/job-started.sh
-export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/etc/arc/hooks/job-completed.sh
+startup.sh
+SCRIPT

-if [ -n "${STARTUP_DELAY_IN_SECONDS}" ]; then
-  log.notice "Delaying startup by ${STARTUP_DELAY_IN_SECONDS} seconds"
-  sleep "${STARTUP_DELAY_IN_SECONDS}"
+RUNNER_INIT_PID=$!
+log.notice "Runner init started with pid $RUNNER_INIT_PID"
+wait $RUNNER_INIT_PID
+log.notice "Runner init exited. Exiting this process with code 0 so that the container and the pod is GC'ed Kubernetes soon."
+
+if [ -f /runner/.runner ]; then
+# If the runner failed with the following error:
+#   √ Connected to GitHub
+#   Failed to create a session. The runner registration has been deleted from the server, please re-configure.
+#   Runner listener exit with terminated error, stop the service, no retry needed.
+#   Exiting runner...
+# It might have failed to delete the .runner file.
+# We use the existence of the .runner file as the indicator that the runner agent has not stopped yet.
+# Remove it by ourselves now, so that the dockerd sidecar prestop won't hang waiting for the .runner file to appear.
+  echo "Removing the .runner file"
+  rm -f /runner/.runner
 fi

-if [ -z "${GITHUB_URL}" ]; then
-  log.debug 'Working with public GitHub'
-  GITHUB_URL="https://github.com/"
-else
-  length=${#GITHUB_URL}
-  last_char=${GITHUB_URL:length-1:1}
-
-  [[ $last_char != "/" ]] && GITHUB_URL="$GITHUB_URL/"; :
-  log.debug "Github endpoint URL ${GITHUB_URL}"
-fi
-
-if [ -z "${RUNNER_NAME}" ]; then
-  log.error 'RUNNER_NAME must be set'
-  exit 1
-fi
-
-if [ -n "${RUNNER_ORG}" ] && [ -n "${RUNNER_REPO}" ] && [ -n "${RUNNER_ENTERPRISE}" ]; then
-  ATTACH="${RUNNER_ORG}/${RUNNER_REPO}"
-elif [ -n "${RUNNER_ORG}" ]; then
-  ATTACH="${RUNNER_ORG}"
-elif [ -n "${RUNNER_REPO}" ]; then
-  ATTACH="${RUNNER_REPO}"
-elif [ -n "${RUNNER_ENTERPRISE}" ]; then
-  ATTACH="enterprises/${RUNNER_ENTERPRISE}"
-else
-  log.error 'At least one of RUNNER_ORG, RUNNER_REPO, or RUNNER_ENTERPRISE must be set'
-  exit 1
-fi
-
-if [ -z "${RUNNER_TOKEN}" ]; then
-  log.error 'RUNNER_TOKEN must be set'
-  exit 1
-fi
-
-if [ -z "${RUNNER_REPO}" ] && [ -n "${RUNNER_GROUP}" ];then
-  RUNNER_GROUPS=${RUNNER_GROUP}
-fi
-
-# Hack due to https://github.com/actions-runner-controller/actions-runner-controller/issues/252#issuecomment-758338483
-if [ ! -d "${RUNNER_HOME}" ]; then
-  log.error "$RUNNER_HOME should be an emptyDir mount. Please fix the pod spec."
-  exit 1
-fi
-
-# if this is not a testing environment
-if [[ "${UNITTEST:-}" == '' ]]; then
-  sudo chown -R runner:docker "$RUNNER_HOME"
-  # enable dotglob so we can copy a ".env" file to load in env vars as part of the service startup if one is provided
-  # loading a .env from the root of the service is part of the actions/runner logic
-  shopt -s dotglob
-  # use cp instead of mv to avoid issues when src and dst are on different devices
-  cp -r "$RUNNER_ASSETS_DIR"/* "$RUNNER_HOME"/
-  shopt -u dotglob
-fi
-
-if ! cd "${RUNNER_HOME}"; then
-  log.error "Failed to cd into ${RUNNER_HOME}"
-  exit 1
-fi
-
-# past that point, it's all relative pathes from /runner
-
-config_args=()
-if [ "${RUNNER_FEATURE_FLAG_ONCE:-}" != "true" ] && [ "${RUNNER_EPHEMERAL}" == "true" ]; then
-  config_args+=(--ephemeral)
-  log.debug 'Passing --ephemeral to config.sh to enable the ephemeral runner.'
-fi
-if [ "${DISABLE_RUNNER_UPDATE:-}" == "true" ]; then
-  config_args+=(--disableupdate)
-  log.debug 'Passing --disableupdate to config.sh to disable automatic runner updates.'
-fi
-
-update-status "Registering"
-
-retries_left=10
-while [[ ${retries_left} -gt 0 ]]; do
-  log.debug 'Configuring the runner.'
-  ./config.sh --unattended --replace \
-    --name "${RUNNER_NAME}" \
-    --url "${GITHUB_URL}${ATTACH}" \
-    --token "${RUNNER_TOKEN}" \
-    --runnergroup "${RUNNER_GROUPS}" \
-    --labels "${RUNNER_LABELS}" \
-    --work "${RUNNER_WORKDIR}" "${config_args[@]}"
-
-  if [ -f .runner ]; then
-    log.debug 'Runner successfully configured.'
-    break
-  fi
-
-  log.debug 'Configuration failed. Retrying'
-  retries_left=$((retries_left - 1))
-  sleep 1
-done
-
-if [ ! -f .runner ]; then
-  # we couldn't configure and register the runner; no point continuing
-  log.error 'Configuration failed!'
-  exit 2
-fi
-
-cat .runner
-# Note: the `.runner` file's content should be something like the below:
-#
-# $ cat /runner/.runner
-# {
-# "agentId": 117, #=> corresponds to the ID of the runner
-# "agentName": "THE_RUNNER_POD_NAME",
-# "poolId": 1,
-# "poolName": "Default",
-# "serverUrl": "https://pipelines.actions.githubusercontent.com/SOME_RANDOM_ID",
-# "gitHubUrl": "https://github.com/USER/REPO",
-# "workFolder": "/some/work/dir" #=> corresponds to Runner.Spec.WorkDir
-# }
-#
-# Especially `agentId` is important, as other than listing all the runners in the repo,
-# this is the only change we could get the exact runnner ID which can be useful for further
-# GitHub API call like the below. Note that 171 is the agentId seen above.
-#   curl \
-#     -H "Accept: application/vnd.github.v3+json" \
-#     -H "Authorization: bearer ${GITHUB_TOKEN}"
-#     https://api.github.com/repos/USER/REPO/actions/runners/171
-
-# Hack due to the DinD volumes
-if [ -z "${UNITTEST:-}" ] && [ -e ./externalstmp ]; then
-  mkdir -p ./externals
-  mv ./externalstmp/* ./externals/
-fi
-
-if [[ "${DISABLE_WAIT_FOR_DOCKER}" != "true" ]] && [[ "${DOCKER_ENABLED}" == "true" ]]; then
-    log.debug 'Docker enabled runner detected and Docker daemon wait is enabled'
-    log.debug 'Waiting until Docker is available or the timeout is reached'
-    timeout 120s bash -c 'until docker ps ;do sleep 1; done'
-else
-  log.notice 'Docker wait check skipped. Either Docker is disabled or the wait is disabled, continuing with entrypoint'
-fi
-
-# Unset entrypoint environment variables so they don't leak into the runner environment
-unset RUNNER_NAME RUNNER_REPO RUNNER_TOKEN STARTUP_DELAY_IN_SECONDS DISABLE_WAIT_FOR_DOCKER
-
-# Docker ignores PAM and thus never loads the system environment variables that
-# are meant to be set in every environment of every user. We emulate the PAM
-# behavior by reading the environment variables without interpreting them.
-#
-# https://github.com/actions-runner-controller/actions-runner-controller/issues/1135
-# https://github.com/actions/runner/issues/1703
-
-# /etc/environment may not exist when running unit tests depending on the platform being used
-# (e.g. Mac OS) so we just skip the mapping entirely
-if [ -z "${UNITTEST:-}" ]; then
-  mapfile -t env </etc/environment
-fi
-update-status "Idle"
-exec env -- "${env[@]}" ./run.sh
+trap - TERM
--- a/runner/graceful-stop.sh
+++ b/runner/graceful-stop.sh
@ -0,0 +1,99 @@
+#!/bin/bash
+
+# This should be shorter enough than the terminationGracePeriodSeconds,
+# so that the job is cancelled immediately, instead of hanging for 10 minutes or so and failing without any error message.
+RUNNER_GRACEFUL_STOP_TIMEOUT=${RUNNER_GRACEFUL_STOP_TIMEOUT:-15}
+
+graceful_stop() {
+  log.notice "Executing actions-runner-controller's SIGTERM handler."
+  log.notice "Note that if this takes more time than terminationGracePeriodSeconds, the runner will be forcefully terminated by Kubernetes, which may result in the in-progress workflow job, if any, to fail."
+
+  log.notice "Ensuring dockerd is still running."
+  if ! docker ps -a; then
+    log.warning "Detected configuration error: dockerd should be running but is already nowhere. This is wrong. Ensure that your init system to NOT pass SIGTERM directly to dockerd!"
+  fi
+
+  # The below procedure atomically removes the runner from GitHub Actions service,
+  # to ensure that the runner is not running any job.
+  # This is required to not terminate the actions runner agent while running the job.
+  # If we didn't do this atomically, we might end up with a rare race where
+  # the runner agent is terminated while it was about to start a job.
+
+  # `pushd`` is needed to run the config.sh successfully.
+  # Without this the author of this script ended up with errors like the below:
+  #   Cannot connect to server, because config files are missing. Skipping removing runner from the server.
+  #   Does not exist. Skipping Removing .credentials
+  #   Does not exist. Skipping Removing .runner
+  if ! pushd /runner; then
+    log.error "Failed to pushd ${RUNNER_HOME}"
+    exit 1
+  fi
+
+  # We need to wait for the registration first.
+  # Otherwise a direct runner pod deletion triggered while the runner entrypoint.sh is about to register itself with
+  # config.sh can result in this graceful stop process to get skipped.
+  # In that case, the pod is eventually and forcefully terminated by ARC and K8s, resulting
+  # in the possible running workflow job after this graceful stop process failed might get cancelled prematurely.
+  log.notice "Waiting for the runner to register first."
+  while ! [ -f /runner/.runner ]; do
+    sleep 1
+  done
+  log.notice "Observed that the runner has been registered."
+
+  if ! /runner/config.sh remove --token "$RUNNER_TOKEN"; then
+    i=0
+    log.notice "Waiting for RUNNER_GRACEFUL_STOP_TIMEOUT=$RUNNER_GRACEFUL_STOP_TIMEOUT seconds until the runner agent to stop by itself."
+    while [[ $i -lt $RUNNER_GRACEFUL_STOP_TIMEOUT ]]; do
+      sleep 1
+      if ! pgrep Runner.Listener > /dev/null; then
+        log.notice "The runner agent stopped before RUNNER_GRACEFUL_STOP_TIMEOUT=$RUNNER_GRACEFUL_STOP_TIMEOUT"
+        break
+      fi
+      i=$((i+1))
+    done
+  fi
+
+  if ! popd; then
+    log.error "Failed to popd from ${RUNNER_HOME}"
+    exit 1
+  fi
+
+  if pgrep Runner.Listener > /dev/null; then
+    # The below procedure fixes the runner to correctly notify the Actions service for the cancellation of this runner.
+    # It enables you to see `Error: The operation was canceled.` in the worklow job log, in case a job was still running on this runner when the
+    # termination is requested.
+    #
+    # Note though, due to how Actions work, no all job steps gets `Error: The operation was canceled.` in the job step logs.
+    # Jobs that were still in the first `Stet up job` step` seem to get `Error: A task was canceled.`,
+    #
+    # Anyway, without this, a runer pod is "forcefully" killed by any other controller (like cluster-autoscaler) can result in the workflow job to
+    # hang for 10 minutes or so.
+    # After 10 minutes, the Actions UI just shows the failure icon for the step, without `Error: The operation was canceled.`,
+    # not even showing `Error: The operation was canceled.`, which is confusing.
+    runner_listener_pid=$(pgrep Runner.Listener)
+    log.notice "Sending SIGTERM to the actions runner agent ($runner_listener_pid)."
+    kill -TERM "$runner_listener_pid"
+
+    log.notice "SIGTERM sent. If the runner is still running a job, you'll probably see \"Error: The operation was canceled.\" in its log."
+    log.notice "Waiting for the actions runner agent to stop."
+    while pgrep Runner.Listener > /dev/null; do
+      sleep 1
+    done
+  fi
+
+  # This message is supposed to be output only after the runner agent output:
+  #   2022-08-27 02:04:37Z: Job test3 completed with result: Canceled
+  # because this graceful stopping logic is basically intended to let the runner agent have some time
+  # needed to "Cancel" it.
+  # At the times we didn't have this logic, the runner agent was even unable to output the Cancelled message hence
+  # unable to gracefully stop, hence the workflow job hanged like forever.
+  log.notice "The actions runner process exited."
+  
+  if [ "$RUNNER_INIT_PID" != "" ]; then
+    log.notice "Holding on until runner init (pid $RUNNER_INIT_PID) exits, so that there will hopefully be no zombie processes remaining."
+    # We don't need to kill -TERM $RUNNER_INIT_PID as the init is supposed to exit by itself once the foreground process(=the runner agent) exists.
+    wait "$RUNNER_INIT_PID" || :
+  fi
+  
+  log.notice "Graceful stop completed."
+}
--- a/runner/hooks/job-completed.sh
+++ b/runner/hooks/job-completed.sh
@ -1,8 +1,8 @@
 #!/usr/bin/env bash
 set -Eeuo pipefail

-# shellcheck source=runner/logger.bash
-source logger.bash
+# shellcheck source=runner/logger.sh
+source logger.sh

 log.debug "Running ARC Job Completed Hooks"

--- a/runner/hooks/job-started.sh
+++ b/runner/hooks/job-started.sh
@ -1,8 +1,8 @@
 #!/usr/bin/env bash
 set -Eeuo pipefail

-# shellcheck source=runner/logger.bash
-source logger.bash
+# shellcheck source=runner/logger.sh
+source logger.sh

 log.debug "Running ARC Job Started Hooks"

--- a/runner/logger.bash
+++ b/runner/logger.bash
@ -6,7 +6,7 @@
 # are not using any variables that need to be set, and are not using any pipes.

 # This logger implementation can be replaced with another logger implementation
-# by placing a script called `logger.bash` in `/usr/local/bin` of the image. The
+# by placing a script called `logger.sh` in `/usr/local/bin` of the image. The
 # only requirement for the script is that it defines the following functions:
 #
 # - `log.debug`
--- a/runner/startup.sh
+++ b/runner/startup.sh
@ -1,72 +1,172 @@
 #!/bin/bash
-source logger.bash
+source logger.sh

-function wait_for_process () {
-    local max_time_wait=30
-    local process_name="$1"
-    local waited_sec=0
-    while ! pgrep "$process_name" >/dev/null && ((waited_sec < max_time_wait)); do
-        log.debug "Process $process_name is not running yet. Retrying in 1 seconds"
-        log.debug "Waited $waited_sec seconds of $max_time_wait seconds"
-        sleep 1
-        ((waited_sec=waited_sec+1))
-        if ((waited_sec >= max_time_wait)); then
-            return 1
-        fi
-    done
-    return 0
-}
+RUNNER_ASSETS_DIR=${RUNNER_ASSETS_DIR:-/runnertmp}
+RUNNER_HOME=${RUNNER_HOME:-/runner}

-sudo /bin/bash <<SCRIPT
-mkdir -p /etc/docker
+# Let GitHub runner execute these hooks. These environment variables are used by GitHub's Runner as described here
+# https://github.com/actions/runner/blob/main/docs/adrs/1751-runner-job-hooks.md
+# Scripts referenced in the ACTIONS_RUNNER_HOOK_ environment variables must end in .sh or .ps1
+# for it to become a valid hook script, otherwise GitHub will fail to run the hook
+export ACTIONS_RUNNER_HOOK_JOB_STARTED=/etc/arc/hooks/job-started.sh
+export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/etc/arc/hooks/job-completed.sh

-if [ ! -f /etc/docker/daemon.json ]; then
-  echo "{}" > /etc/docker/daemon.json
+if [ -n "${STARTUP_DELAY_IN_SECONDS}" ]; then
+  log.notice "Delaying startup by ${STARTUP_DELAY_IN_SECONDS} seconds"
+  sleep "${STARTUP_DELAY_IN_SECONDS}"
 fi

-if [ -n "${MTU}" ]; then
-jq ".\"mtu\" = ${MTU}" /etc/docker/daemon.json > /tmp/.daemon.json && mv /tmp/.daemon.json /etc/docker/daemon.json
-# See https://docs.docker.com/engine/security/rootless/
-echo "environment=DOCKERD_ROOTLESS_ROOTLESSKIT_MTU=${MTU}" >> /etc/supervisor/conf.d/dockerd.conf
+if [ -z "${GITHUB_URL}" ]; then
+  log.debug 'Working with public GitHub'
+  GITHUB_URL="https://github.com/"
+else
+  length=${#GITHUB_URL}
+  last_char=${GITHUB_URL:length-1:1}
+
+  [[ $last_char != "/" ]] && GITHUB_URL="$GITHUB_URL/"; :
+  log.debug "Github endpoint URL ${GITHUB_URL}"
 fi

-if [ -n "${DOCKER_REGISTRY_MIRROR}" ]; then
-jq ".\"registry-mirrors\"[0] = \"${DOCKER_REGISTRY_MIRROR}\"" /etc/docker/daemon.json > /tmp/.daemon.json && mv /tmp/.daemon.json /etc/docker/daemon.json
+if [ -z "${RUNNER_NAME}" ]; then
+  log.error 'RUNNER_NAME must be set'
+  exit 1
 fi
-SCRIPT

-dump() {
-  local path=${1:?missing required <path> argument}
-  shift
-  printf -- "%s\n---\n" "${*//\{path\}/"$path"}" 1>&2
-  cat "$path" 1>&2
-  printf -- '---\n' 1>&2
-}
+if [ -n "${RUNNER_ORG}" ] && [ -n "${RUNNER_REPO}" ] && [ -n "${RUNNER_ENTERPRISE}" ]; then
+  ATTACH="${RUNNER_ORG}/${RUNNER_REPO}"
+elif [ -n "${RUNNER_ORG}" ]; then
+  ATTACH="${RUNNER_ORG}"
+elif [ -n "${RUNNER_REPO}" ]; then
+  ATTACH="${RUNNER_REPO}"
+elif [ -n "${RUNNER_ENTERPRISE}" ]; then
+  ATTACH="enterprises/${RUNNER_ENTERPRISE}"
+else
+  log.error 'At least one of RUNNER_ORG, RUNNER_REPO, or RUNNER_ENTERPRISE must be set'
+  exit 1
+fi

-for config in /etc/docker/daemon.json /etc/supervisor/conf.d/dockerd.conf; do
-  dump "$config" 'Using {path} with the following content:'
+if [ -z "${RUNNER_TOKEN}" ]; then
+  log.error 'RUNNER_TOKEN must be set'
+  exit 1
+fi
+
+if [ -z "${RUNNER_REPO}" ] && [ -n "${RUNNER_GROUP}" ];then
+  RUNNER_GROUPS=${RUNNER_GROUP}
+fi
+
+# Hack due to https://github.com/actions-runner-controller/actions-runner-controller/issues/252#issuecomment-758338483
+if [ ! -d "${RUNNER_HOME}" ]; then
+  log.error "$RUNNER_HOME should be an emptyDir mount. Please fix the pod spec."
+  exit 1
+fi
+
+# if this is not a testing environment
+if [[ "${UNITTEST:-}" == '' ]]; then
+  sudo chown -R runner:docker "$RUNNER_HOME"
+  # enable dotglob so we can copy a ".env" file to load in env vars as part of the service startup if one is provided
+  # loading a .env from the root of the service is part of the actions/runner logic
+  shopt -s dotglob
+  # use cp instead of mv to avoid issues when src and dst are on different devices
+  cp -r "$RUNNER_ASSETS_DIR"/* "$RUNNER_HOME"/
+  shopt -u dotglob
+fi
+
+if ! cd "${RUNNER_HOME}"; then
+  log.error "Failed to cd into ${RUNNER_HOME}"
+  exit 1
+fi
+
+# past that point, it's all relative pathes from /runner
+
+config_args=()
+if [ "${RUNNER_FEATURE_FLAG_ONCE:-}" != "true" ] && [ "${RUNNER_EPHEMERAL}" == "true" ]; then
+  config_args+=(--ephemeral)
+  log.debug 'Passing --ephemeral to config.sh to enable the ephemeral runner.'
+fi
+if [ "${DISABLE_RUNNER_UPDATE:-}" == "true" ]; then
+  config_args+=(--disableupdate)
+  log.debug 'Passing --disableupdate to config.sh to disable automatic runner updates.'
+fi
+
+update-status "Registering"
+
+retries_left=10
+while [[ ${retries_left} -gt 0 ]]; do
+  log.debug 'Configuring the runner.'
+  ./config.sh --unattended --replace \
+    --name "${RUNNER_NAME}" \
+    --url "${GITHUB_URL}${ATTACH}" \
+    --token "${RUNNER_TOKEN}" \
+    --runnergroup "${RUNNER_GROUPS}" \
+    --labels "${RUNNER_LABELS}" \
+    --work "${RUNNER_WORKDIR}" "${config_args[@]}"
+
+  if [ -f .runner ]; then
+    log.debug 'Runner successfully configured.'
+    break
+  fi
+
+  log.debug 'Configuration failed. Retrying'
+  retries_left=$((retries_left - 1))
+  sleep 1
 done

-log.debug 'Starting supervisor daemon'
-sudo /usr/bin/supervisord -n >> /dev/null 2>&1 &
-
-log.debug 'Waiting for processes to be running...'
-processes=(dockerd)
-
-for process in "${processes[@]}"; do
-    if ! wait_for_process "$process"; then
-        log.error "$process is not running after max time"
-        dump /var/log/dockerd.err.log 'Dumping {path} to aid investigation'
-        dump /var/log/supervisor/supervisord.log 'Dumping {path} to aid investigation'
-        exit 1
-    else
-        log.debug "$process is running"
-    fi
-done
-
-if [ -n "${MTU}" ]; then
-  sudo ifconfig docker0 mtu "${MTU}" up
+if [ ! -f .runner ]; then
+  # we couldn't configure and register the runner; no point continuing
+  log.error 'Configuration failed!'
+  exit 2
 fi

-# Wait processes to be running
-entrypoint.sh
+cat .runner
+# Note: the `.runner` file's content should be something like the below:
+#
+# $ cat /runner/.runner
+# {
+# "agentId": 117, #=> corresponds to the ID of the runner
+# "agentName": "THE_RUNNER_POD_NAME",
+# "poolId": 1,
+# "poolName": "Default",
+# "serverUrl": "https://pipelines.actions.githubusercontent.com/SOME_RANDOM_ID",
+# "gitHubUrl": "https://github.com/USER/REPO",
+# "workFolder": "/some/work/dir" #=> corresponds to Runner.Spec.WorkDir
+# }
+#
+# Especially `agentId` is important, as other than listing all the runners in the repo,
+# this is the only change we could get the exact runnner ID which can be useful for further
+# GitHub API call like the below. Note that 171 is the agentId seen above.
+#   curl \
+#     -H "Accept: application/vnd.github.v3+json" \
+#     -H "Authorization: bearer ${GITHUB_TOKEN}"
+#     https://api.github.com/repos/USER/REPO/actions/runners/171
+
+# Hack due to the DinD volumes
+if [ -z "${UNITTEST:-}" ] && [ -e ./externalstmp ]; then
+  mkdir -p ./externals
+  mv ./externalstmp/* ./externals/
+fi
+
+if [[ "${DISABLE_WAIT_FOR_DOCKER}" != "true" ]] && [[ "${DOCKER_ENABLED}" == "true" ]]; then
+    log.debug 'Docker enabled runner detected and Docker daemon wait is enabled'
+    log.debug 'Waiting until Docker is available or the timeout is reached'
+    timeout 120s bash -c 'until docker ps ;do sleep 1; done'
+else
+  log.notice 'Docker wait check skipped. Either Docker is disabled or the wait is disabled, continuing with entrypoint'
+fi
+
+# Unset entrypoint environment variables so they don't leak into the runner environment
+unset RUNNER_NAME RUNNER_REPO RUNNER_TOKEN STARTUP_DELAY_IN_SECONDS DISABLE_WAIT_FOR_DOCKER
+
+# Docker ignores PAM and thus never loads the system environment variables that
+# are meant to be set in every environment of every user. We emulate the PAM
+# behavior by reading the environment variables without interpreting them.
+#
+# https://github.com/actions-runner-controller/actions-runner-controller/issues/1135
+# https://github.com/actions/runner/issues/1703
+
+# /etc/environment may not exist when running unit tests depending on the platform being used
+# (e.g. Mac OS) so we just skip the mapping entirely
+if [ -z "${UNITTEST:-}" ]; then
+  mapfile -t env </etc/environment
+fi
+update-status "Idle"
+exec env -- "${env[@]}" ./run.sh
--- a/runner/update-status
+++ b/runner/update-status
@ -2,8 +2,8 @@
 set -Eeuo pipefail

 if [[ ${1:-} == '' ]]; then
-  # shellcheck source=runner/logger.bash
-  source logger.bash
+  # shellcheck source=runner/logger.sh
+  source logger.sh
  log.error "Missing required argument -- '<phase>'"
  exit 64
 fi
@ -26,6 +26,6 @@ if [[ ${RUNNER_STATUS_UPDATE_HOOK:-false} == true ]]; then
        --show-error \
        --silent \
        --request PATCH \
-        "${apiserver}/apis/actions.summerwind.dev/v1alpha1/namespaces/${namespace}/runners/${HOSTNAME}/status"
-        1>&-
+        "${apiserver}/apis/actions.summerwind.dev/v1alpha1/namespaces/${namespace}/runners/${HOSTNAME}/status" \
+        1>/dev/null
 fi
--- a/runner/wait.sh
+++ b/runner/wait.sh
@ -0,0 +1,17 @@
+#!/bin/bash
+
+function wait_for_process () {
+    local max_time_wait=30
+    local process_name="$1"
+    local waited_sec=0
+    while ! pgrep "$process_name" >/dev/null && ((waited_sec < max_time_wait)); do
+        log.debug "Process $process_name is not running yet. Retrying in 1 seconds"
+        log.debug "Waited $waited_sec seconds of $max_time_wait seconds"
+        sleep 1
+        ((waited_sec=waited_sec+1))
+        if ((waited_sec >= max_time_wait)); then
+            return 1
+        fi
+    done
+    return 0
+}
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@ -14,6 +14,7 @@ import (
 	"github.com/actions-runner-controller/actions-runner-controller/testing"
 	"github.com/google/go-github/v47/github"
 	"github.com/onsi/gomega"
+	"github.com/stretchr/testify/require"
 	"golang.org/x/oauth2"
 	"sigs.k8s.io/yaml"
 )
@ -330,6 +331,10 @@ func TestE2E(t *testing.T) {
 					t.Run(fmt.Sprintf("update runners - attempt %d", i), func(t *testing.T) {
 						env.deploy(t, RunnerDeployments, testID, fmt.Sprintf("ROLLING_UPDATE_PHASE=%d", i))
 					})
+
+					t.Run(fmt.Sprintf("set deletiontimestamps on runner pods - attempt %d", i), func(t *testing.T) {
+						env.setDeletionTimestampsOnRunningPods(t, RunnerDeployments)
+					})
 				}
 			}
 		}()
@ -370,6 +375,8 @@ type env struct {
 	doDockerBuild                               bool
 	containerMode                               string
 	runnerServiceAccuontName                    string
+	runnerGracefulStopTimeout                   string
+	runnerTerminationGracePeriodSeconds         string
 	runnerNamespace                             string
 	remoteKubeconfig                            string
 	imagePullSecretName                         string
@ -500,6 +507,8 @@ func initTestEnv(t *testing.T, k8sMinorVer string, vars vars) *env {
 	e.testEnterprise = testing.Getenv(t, "TEST_ENTERPRISE", "")
 	e.testEphemeral = testing.Getenv(t, "TEST_EPHEMERAL", "")
 	e.runnerServiceAccuontName = testing.Getenv(t, "TEST_RUNNER_SERVICE_ACCOUNT_NAME", "")
+	e.runnerTerminationGracePeriodSeconds = testing.Getenv(t, "TEST_RUNNER_TERMINATION_GRACE_PERIOD_SECONDS", "30")
+	e.runnerGracefulStopTimeout = testing.Getenv(t, "TEST_RUNNER_GRACEFUL_STOP_TIMEOUT", "15")
 	e.runnerNamespace = testing.Getenv(t, "TEST_RUNNER_NAMESPACE", "default")
 	e.remoteKubeconfig = testing.Getenv(t, "ARC_E2E_REMOTE_KUBECONFIG", "")
 	e.imagePullSecretName = testing.Getenv(t, "ARC_E2E_IMAGE_PULL_SECRET_NAME", "")
@ -712,6 +721,48 @@ func (e *env) undeploy(t *testing.T, kind DeployKind, testID string) {
 	e.do(t, "delete", kind, testID)
 }

+func (e *env) setDeletionTimestampsOnRunningPods(t *testing.T, deployKind DeployKind) {
+	t.Helper()
+
+	var scope, kind, labelKind string
+	if e.testOrg != "" {
+		scope = "org"
+	} else if e.testEnterprise != "" {
+		scope = "enterprise"
+	} else {
+		scope = "repo"
+	}
+
+	if deployKind == RunnerDeployments {
+		kind = "runnerdeploy"
+		labelKind = "runner-deployment"
+	} else {
+		kind = "runnerset"
+		labelKind = "runnerset"
+	}
+
+	label := fmt.Sprintf("%s-name=%s-%s", labelKind, scope, kind)
+
+	ctx := context.Background()
+	c := e.getKubectlConfig()
+
+	t.Logf("Finding pods with label %s", label)
+
+	pods, err := e.Kubectl.FindPods(ctx, label, c)
+	require.NoError(t, err)
+
+	if len(pods) == 0 {
+		return
+	}
+
+	t.Logf("Setting deletionTimestamps on pods %s", strings.Join(pods, ", "))
+
+	err = e.Kubectl.DeletePods(ctx, pods, c)
+	require.NoError(t, err)
+
+	t.Logf("Deleted pods %s", strings.Join(pods, ", "))
+}
+
 func (e *env) do(t *testing.T, op string, kind DeployKind, testID string, env ...string) {
 	t.Helper()

@ -722,6 +773,8 @@ func (e *env) do(t *testing.T, op string, kind DeployKind, testID string, env ..
 		"OP=" + op,
 		"RUNNER_NAMESPACE=" + e.runnerNamespace,
 		"RUNNER_SERVICE_ACCOUNT_NAME=" + e.runnerServiceAccuontName,
+		"RUNNER_GRACEFUL_STOP_TIMEOUT=" + e.runnerGracefulStopTimeout,
+		"RUNNER_TERMINATION_GRACE_PERIOD_SECONDS=" + e.runnerTerminationGracePeriodSeconds,
 	}
 	scriptEnv = append(scriptEnv, env...)

@ -825,7 +878,7 @@ func (e *env) testJobs(testID string) []job {
 func (e *env) verifyActionsWorkflowRun(t *testing.T, testID string) {
 	t.Helper()

-	verifyActionsWorkflowRun(t, e.Env, e.testJobs(testID), e.verifyTimeout())
+	verifyActionsWorkflowRun(t, e.Env, e.testJobs(testID), e.verifyTimeout(), e.getKubectlConfig())
 }

 func (e *env) verifyTimeout() time.Duration {
@ -836,6 +889,18 @@ func (e *env) verifyTimeout() time.Duration {
 	return 8 * 60 * time.Second
 }

+func (e *env) getKubectlConfig() testing.KubectlConfig {
+	kubectlEnv := []string{
+		"KUBECONFIG=" + e.Kubeconfig,
+	}
+
+	cmCfg := testing.KubectlConfig{
+		Env: kubectlEnv,
+	}
+
+	return cmCfg
+}
+
 type job struct {
 	name, testArg, configMapName string
 }
@ -969,10 +1034,18 @@ func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNam
 					// When rootless, we need to use the `docker` buildx driver, which doesn't support cache export
 					// so we end up with the below error on docker-build:
 					//   error: cache export feature is currently not supported for docker driver. Please switch to a different driver (eg. "docker buildx create --use")
+					// See https://docs.docker.com/engine/reference/commandline/buildx_create/#docker-container-driver
+					// for the `docker-container` driver.
 					dockerBuildCache = "--cache-from=type=local,src=/home/runner/.cache/buildx " +
 						"--cache-to=type=local,dest=/home/runner/.cache/buildx-new,mode=max "
 					dockerfile = "Dockerfile"
+					// Note though, if the cache does not exist yet, the buildx build seem to write cache data to /home/runner/.cache/buildx,
+					// not buildx-new.
+					// I think the following message emitted by buildx in the end is relevant to this behaviour, but not 100% sure:
+					//   WARNING: local cache import at /home/runner/.cache/buildx not found due to err: could not read /home/runner/.cache/buildx/index.json: open /home/runner/.cache/buildx/index.json: no such file or directory
 				} else {
+					// See https://docs.docker.com/engine/reference/commandline/buildx_create/#docker-driver
+					// for the `docker` driver.
 					setupBuildXActionWith.Driver = "docker"
 					dockerfile = "Dockerfile.nocache"
 				}
@ -997,20 +1070,35 @@ func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNam
 							fmt.Sprintf("-f %s .", dockerfile),
 					},
 				)
-			}
-		}

-		if useSudo {
-			steps = append(steps,
-				testing.Step{
-					// https://github.com/docker/build-push-action/blob/master/docs/advanced/cache.md#local-cache
-					// See https://github.com/moby/buildkit/issues/1896 for why this is needed
-					Run: "rm -rf /home/runner/.cache/buildx && mv /home/runner/.cache/buildx-new /home/runner/.cache/buildx",
-				},
-				testing.Step{
-					Run: "ls -lah /home/runner/.cache/*",
-				},
-			)
+				if useSudo {
+					steps = append(steps,
+						testing.Step{
+							// https://github.com/docker/build-push-action/blob/master/docs/advanced/cache.md#local-cache
+							// See https://github.com/moby/buildkit/issues/1896 for why this is needed
+							Run: "if -d /home/runner/.cache/buildx-new; then " + sudo + "rm -rf /home/runner/.cache/buildx && " + sudo + `mv /home/runner/.cache/buildx-new /home/runner/.cache/buildx; else echo "/home/runner/.cache/buildx-new is not found. Perhaps you're running this on a stateleess runner?"; fi`,
+						},
+						testing.Step{
+							Run: "ls -lah /home/runner/.cache/*",
+						},
+					)
+				}
+			}
+
+			if useSudo {
+				if kind == RunnerDeployments {
+					steps = append(steps,
+						testing.Step{
+							// https://github.com/docker/build-push-action/blob/master/docs/advanced/cache.md#local-cache
+							// See https://github.com/moby/buildkit/issues/1896 for why this is needed
+							Run: sudo + "rm -rf /home/runner/.cache/buildx && mv /home/runner/.cache/buildx-new /home/runner/.cache/buildx",
+						},
+						testing.Step{
+							Run: sudo + "ls -lah /home/runner/.cache/*",
+						},
+					)
+				}
+			}
 		}

 		steps = append(steps,
@ -1062,7 +1150,7 @@ kubectl create cm %s$id --from-literal=status=ok
 	}
 }

-func verifyActionsWorkflowRun(t *testing.T, env *testing.Env, testJobs []job, timeout time.Duration) {
+func verifyActionsWorkflowRun(t *testing.T, env *testing.Env, testJobs []job, timeout time.Duration, cmCfg testing.KubectlConfig) {
 	t.Helper()

 	var expected []string
@ -1079,14 +1167,6 @@ func verifyActionsWorkflowRun(t *testing.T, env *testing.Env, testJobs []job, ti
 		for i := range testJobs {
 			testResultCMName := testJobs[i].configMapName

-			kubectlEnv := []string{
-				"KUBECONFIG=" + env.Kubeconfig,
-			}
-
-			cmCfg := testing.KubectlConfig{
-				Env: kubectlEnv,
-			}
-
 			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 			defer cancel()

--- a/test/entrypoint/assets/config.sh
+++ b/test/entrypoint/assets/config.sh
--- a/test/entrypoint/assets/logging.sh
+++ b/test/entrypoint/assets/logging.sh
--- a/test/entrypoint/assets/run.sh
+++ b/test/entrypoint/assets/run.sh
--- a/test/entrypoint/should_retry_configuring/test.sh
+++ b/test/entrypoint/should_retry_configuring/test.sh
@ -3,14 +3,14 @@
 # UNITTEST: retry config
 # Will simulate a configuration failure and expects:
 # - the configuration step to be run 10 times
-# - the entrypoint script to exit with error code 2
+# - the startup script to exit with error code 2
 # - the run.sh script to never run.

 source ../assets/logging.sh

-entrypoint_log() {
+startup_log() {
  while read I; do
-    printf "\tentrypoint.sh: $I\n"
+    printf "\tstartup.sh: $I\n"
  done
 }

@ -44,12 +44,12 @@ cleanup() {
 # Always run cleanup when test ends regardless of how it ends
 trap cleanup SIGINT SIGTERM SIGQUIT EXIT

-log "Running the entrypoint"
+log "Running the startup script"
 log ""

-# Run the runner entrypoint script which as a final step runs this
+# Run the runner startup script which as a final step runs this
 # unit tests run.sh as it was symlinked
-../../../runner/entrypoint.sh 2> >(entrypoint_log)
+../../../runner/startup.sh 2> >(startup_log)

 if [ "$?" != "2" ]; then
  error "========================================="
--- a/test/entrypoint/should_work_non_ephemeral/test.sh
+++ b/test/entrypoint/should_work_non_ephemeral/test.sh
@ -3,14 +3,14 @@
 # UNITTEST: should work as non ephemeral
 # Will simulate a scenario where ephemeral=false. expects:
 # - the configuration step to be run exactly once
-# - the entrypoint script to exit with no error
+# - the startup script to exit with no error
 # - the run.sh script to run without the --once flag

 source ../assets/logging.sh

-entrypoint_log() {
+startup_log() {
  while read I; do
-    printf "\tentrypoint.sh: $I\n"
+    printf "\tstartup.sh: $I\n"
  done
 }

@ -44,16 +44,16 @@ cleanup() {
 # Always run cleanup when test ends regardless of how it ends
 trap cleanup SIGINT SIGTERM SIGQUIT EXIT

-log "Running the entrypoint"
+log "Running the startup script"
 log ""

-# Run the runner entrypoint script which as a final step runs this
+# Run the runner entrypstartupoint script which as a final step runs this
 # unit tests run.sh as it was symlinked
-../../../runner/entrypoint.sh 2> >(entrypoint_log)
+../../../runner/startup.sh 2> >(startup_log)

 if [ "$?" != "0" ]; then
  error "==========================================="
-  error "FAIL | Entrypoint script did not exit successfully"
+  error "FAIL | Startup script did not exit successfully"
  exit 1
 fi

--- a/test/entrypoint/should_work_normally/test.sh
+++ b/test/entrypoint/should_work_normally/test.sh
@ -3,14 +3,14 @@
 # UNITTEST: should work normally
 # Will simulate a normal execution scenario. expects:
 # - the configuration step to be run exactly once
-# - the entrypoint script to exit with no error
+# - the startup script to exit with no error
 # - the run.sh script to run with the --once flag activated.

 source ../assets/logging.sh

-entrypoint_log() {
+startup_log() {
  while read I; do
-    printf "\tentrypoint.sh: $I\n"
+    printf "\startup.sh: $I\n"
  done
 }

@ -42,12 +42,12 @@ cleanup() {
 # Always run cleanup when test ends regardless of how it ends
 trap cleanup SIGINT SIGTERM SIGQUIT EXIT

-log "Running the entrypoint"
+log "Running the startup script"
 log ""

-# Run the runner entrypoint script which as a final step runs this
+# Run the runner startup script which as a final step runs this
 # unit tests run.sh as it was symlinked
-../../../runner/entrypoint.sh 2> >(entrypoint_log)
+../../../runner/startup.sh 2> >(startup_log)

 if [ "$?" != "0" ]; then
  error "=========================="
--- a/test/entrypoint/should_work_use_disable_update_switch/test.sh
+++ b/test/entrypoint/should_work_use_disable_update_switch/test.sh
@ -3,14 +3,14 @@
 # UNITTEST: should work disable update
 # Will simulate a scneario where disableupdate=true. expects:
 # - the configuration step to be run exactly once
-# - the entrypoint script to exit with no error
+# - the startup script to exit with no error
 # - the config.sh script to run with the --disableupdate flag set to 'true'.

 source ../assets/logging.sh

-entrypoint_log() {
+startup_log() {
  while read I; do
-    printf "\tentrypoint.sh: $I\n"
+    printf "\tstartup.sh: $I\n"
  done
 }

@ -43,12 +43,12 @@ cleanup() {
 # Always run cleanup when test ends regardless of how it ends
 trap cleanup SIGINT SIGTERM SIGQUIT EXIT

-log "Running the entrypoint"
+log "Running the startup script"
 log ""

-# run.sh and config.sh get used by the runner's real entrypoint.sh and are part of actions/runner.
-# We change symlink dummy versions so the entrypoint.sh can run allowing us to test the real entrypoint.sh
-../../../runner/entrypoint.sh 2> >(entrypoint_log)
+# run.sh and config.sh get used by the runner's real startup.sh and are part of actions/runner.
+# We change symlink dummy versions so the startup.sh can run allowing us to test the real entrypoint.sh
+../../../runner/startup.sh 2> >(startup_log)

 if [ "$?" != "0" ]; then
  error "=========================="
--- a/test/entrypoint/test.sh
+++ b/test/entrypoint/test.sh
--- a/testing/kubectl.go
+++ b/testing/kubectl.go
@ -6,6 +6,7 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
+	"strings"
 	"time"

 	"github.com/actions-runner-controller/actions-runner-controller/testing/runtime"
@ -112,6 +113,35 @@ func (k *Kubectl) WaitUntilDeployAvailable(ctx context.Context, name string, cfg
 	return nil
 }

+func (k *Kubectl) FindPods(ctx context.Context, label string, cfg KubectlConfig) ([]string, error) {
+	args := []string{"po", "-l", label, "-o", `jsonpath={range .items[*]}{.metadata.name}{"\n"}`}
+
+	out, err := k.CombinedOutput(k.kubectlCmd(ctx, "get", args, cfg))
+	if err != nil {
+		return nil, err
+	}
+
+	var pods []string
+	for _, l := range strings.Split(out, "\n") {
+		if l != "" {
+			pods = append(pods, l)
+		}
+	}
+
+	return pods, nil
+}
+
+func (k *Kubectl) DeletePods(ctx context.Context, names []string, cfg KubectlConfig) error {
+	args := []string{"po"}
+	args = append(args, names...)
+
+	if _, err := k.CombinedOutput(k.kubectlCmd(ctx, "delete", args, cfg)); err != nil {
+		return err
+	}
+
+	return nil
+}
+
 func (k *Kubectl) kubectlCmd(ctx context.Context, c string, args []string, cfg KubectlConfig) *exec.Cmd {
 	args = append([]string{c}, args...)