feat: EKS IAM Roles for Service Accounts for Runner Pods (#226)

One of the pod recreation conditions has been modified to use hash of runner spec, so that the controller does not keep restarting pods mutated by admission webhooks. This naturally allows us, for example, to use IRSA for EKS that requires its admission webhook to mutate the runner pod to have additional, IRSA-related volumes, volume mounts and env. Resolves #200
2020-12-08 17:56:06 +09:00 · 2020-12-08 17:56:06 +09:00 · dfffd3fb62
parent f710a54110
commit dfffd3fb62
10 changed files with 197 additions and 23 deletions
--- a/16
+++ b/16
@ -126,20 +126,22 @@ release/clean:
 	rm -rf release

 .PHONY: acceptance
-acceptance: release/clean release
-	ACCEPTANCE_TEST_SECRET_TYPE=token make acceptance/setup acceptance/tests acceptance/teardown
-	ACCEPTANCE_TEST_SECRET_TYPE=app make acceptance/setup acceptance/tests acceptance/teardown
-	ACCEPTANCE_TEST_DEPLOYMENT_TOOL=helm ACCEPTANCE_TEST_SECRET_TYPE=token make acceptance/setup acceptance/tests acceptance/teardown
-	ACCEPTANCE_TEST_DEPLOYMENT_TOOL=helm ACCEPTANCE_TEST_SECRET_TYPE=app make acceptance/setup acceptance/tests acceptance/teardown
+acceptance: release/clean docker-build docker-push release
+	ACCEPTANCE_TEST_SECRET_TYPE=token make acceptance/kind acceptance/setup acceptance/tests acceptance/teardown
+	ACCEPTANCE_TEST_SECRET_TYPE=app make acceptance/kind acceptance/setup acceptance/tests acceptance/teardown
+	ACCEPTANCE_TEST_DEPLOYMENT_TOOL=helm ACCEPTANCE_TEST_SECRET_TYPE=token make acceptance/kind acceptance/setup acceptance/tests acceptance/teardown
+	ACCEPTANCE_TEST_DEPLOYMENT_TOOL=helm ACCEPTANCE_TEST_SECRET_TYPE=app make acceptance/kind acceptance/setup acceptance/tests acceptance/teardown

-acceptance/setup:
+acceptance/kind:
 	kind create cluster --name acceptance
 	kubectl cluster-info --context kind-acceptance
+
+acceptance/setup:
 	kubectl apply --validate=false -f https://github.com/jetstack/cert-manager/releases/download/v1.0.4/cert-manager.yaml	#kubectl create namespace actions-runner-system
 	kubectl -n cert-manager wait deploy/cert-manager-cainjector --for condition=available --timeout 60s
 	kubectl -n cert-manager wait deploy/cert-manager-webhook --for condition=available --timeout 60s
 	kubectl -n cert-manager wait deploy/cert-manager --for condition=available --timeout 60s
-	kubectl create namespace actions-runner-system
+	kubectl create namespace actions-runner-system || true
 	# Adhocly wait for some time until cert-manager's admission webhook gets ready
 	sleep 5

--- a/README.md
+++ b/README.md
@ -404,6 +404,32 @@ spec:
      group: NewGroup
 ```

+## Using EKS IAM role for service accounts
+
+`actions-runner-controller` v0.15.0 or later has support for EKS IAM role for service accounts.
+
+As similar as for regular pods and deployments, you firstly need an existing service account with the IAM role associated.
+Create one using e.g. `eksctl`. You can refer to [the EKS documentation](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) for more details.
+
+Once you set up the service account, all you need is to add `serviceAccountName` and `fsGroup` to any pods that uses
+the IAM-role enabled service account.
+
+For `RunnerDeployment`, you can set those two fields under the runner spec at `RunnerDeployment.Spec.Template`:
+
+```yaml
+apiVersion: actions.summerwind.dev/v1alpha1
+kind: RunnerDeployment
+metadata:
+  name: example-runnerdeploy
+spec:
+  template:
+    spec:
+      repository: USER/REO
+      serviceAccountName: my-service-account
+      securityContext:
+        fsGroup: 1447
+```
+
 ## Software installed in the runner image

 The GitHub hosted runners include a large amount of pre-installed software packages. For Ubuntu 18.04, this list can be found at <https://github.com/actions/virtual-environments/blob/master/images/linux/Ubuntu1804-README.md>
@ -458,7 +484,10 @@ If you'd like to modify the controller to fork or contribute, I'd suggest using
 the acceptance test:

 ```shell
-NAME=$DOCKER_USER/actions-runner-controller VERSION=dev \
+# This sets `VERSION` envvar to some appropriate value
+. hack/make-env.sh
+
+NAME=$DOCKER_USER/actions-runner-controller \
  GITHUB_TOKEN=*** \
  APP_ID=*** \
  PRIVATE_KEY_FILE_PATH=path/to/pem/file \
@ -474,6 +503,19 @@ The test creates a one-off `kind` cluster, deploys `cert-manager` and `actions-r
 creates a `RunnerDeployment` custom resource for a public Git repository to confirm that the
 controller is able to bring up a runner pod with the actions runner registration token installed.

+If you prefer to test in a non-kind cluster, you can instead run:
+
+```shell script
+KUBECONFIG=path/to/kubeconfig \
+NAME=$DOCKER_USER/actions-runner-controller \
+  GITHUB_TOKEN=*** \
+  APP_ID=*** \
+  PRIVATE_KEY_FILE_PATH=path/to/pem/file \
+  INSTALLATION_ID=*** \
+  ACCEPTANCE_TEST_SECRET_TYPE=token \
+  make docker-build docker-push \
+       acceptance/setup acceptance/tests
+```
 # Alternatives

 The following is a list of alternative solutions that may better fit you depending on your use-case:
--- a/acceptance/checks.sh
+++ b/acceptance/checks.sh
@ -24,6 +24,6 @@ echo Found pod ${pod_name}.

 echo Waiting for pod ${runner_name} to become ready... 1>&2

-kubectl wait pod/${runner_name} --for condition=ready --timeout 120s
+kubectl wait pod/${runner_name} --for condition=ready --timeout 180s

 echo All tests passed. 1>&2
--- a/acceptance/deploy.sh
+++ b/acceptance/deploy.sh
@ -32,7 +32,7 @@ else
  kubectl apply \
    -n actions-runner-system \
    -f release/actions-runner-controller.yaml
-  kubectl -n actions-runner-system wait deploy/controller-manager --for condition=available
+  kubectl -n actions-runner-system wait deploy/controller-manager --for condition=available --timeout 60s
 fi

 # Adhocly wait for some time until actions-runner-controller's admission webhook gets ready
--- a/controllers/runner_controller.go
+++ b/controllers/runner_controller.go
@ -19,7 +19,7 @@ package controllers
 import (
 	"context"
 	"fmt"
-	"reflect"
+	"github.com/summerwind/actions-runner-controller/hash"
 	"strings"

 	"github.com/go-logr/logr"
@ -39,6 +39,8 @@ import (
 const (
 	containerName = "runner"
 	finalizerName = "runner.actions.summerwind.dev"
+
+	LabelKeyPodTemplateHash = "pod-template-hash"
 )

 // RunnerReconciler reconciles a Runner object
@ -198,11 +200,12 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
 			return ctrl.Result{}, nil
 		}

-		// Filter out token that is changed hourly.
-		currentEnvValues := filterEnvVars(pod.Spec.Containers[0].Env, "RUNNER_TOKEN")
-		newEnvValues := filterEnvVars(newPod.Spec.Containers[0].Env, "RUNNER_TOKEN")
+		// See the `newPod` function called above for more information
+		// about when this hash changes.
+		curHash := pod.Labels[LabelKeyPodTemplateHash]
+		newHash := newPod.Labels[LabelKeyPodTemplateHash]

-		if !runnerBusy && (!reflect.DeepEqual(currentEnvValues, newEnvValues) || pod.Spec.Containers[0].Image != newPod.Spec.Containers[0].Image) {
+		if !runnerBusy && curHash != newHash {
 			restart = true
 		}

@ -363,11 +366,44 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
 	}

 	env = append(env, runner.Spec.Env...)
+
+	labels := map[string]string{}
+
+	for k, v := range runner.Labels {
+		labels[k] = v
+	}
+
+	// This implies that...
+	//
+	// (1) We recreate the runner pod whenever the runner has changes in:
+	// - metadata.labels (excluding "runner-template-hash" added by the parent RunnerReplicaSet
+	// - metadata.annotations
+	// - metadata.spec (including image, env, organization, repository, group, and so on)
+	// - GithubBaseURL setting of the controller (can be configured via GITHUB_ENTERPRISE_URL)
+	//
+	// (2) We don't recreate the runner pod when there are changes in:
+	// - runner.status.registration.token
+	//   - This token expires and changes hourly, but you don't need to recreate the pod due to that.
+	//     It's the opposite.
+	//     An unexpired token is required only when the runner agent is registering itself on launch.
+	//
+	//     In other words, the registered runner doesn't get invalidated on registration token expiration.
+	//     A registered runner's session and the a registration token seem to have two different and independent
+	//     lifecycles.
+	//
+	//     See https://github.com/summerwind/actions-runner-controller/issues/143 for more context.
+	labels[LabelKeyPodTemplateHash] = hash.FNVHashStringObjects(
+		filterLabels(runner.Labels, LabelKeyRunnerTemplateHash),
+		runner.Annotations,
+		runner.Spec,
+		r.GitHubClient.GithubBaseURL,
+	)
+
 	pod := corev1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
 			Name:        runner.Name,
 			Namespace:   runner.Namespace,
-			Labels:      runner.Labels,
+			Labels:      labels,
 			Annotations: runner.Annotations,
 		},
 		Spec: corev1.PodSpec{
--- a/controllers/utils.go
+++ b/controllers/utils.go
@ -1,14 +1,13 @@
 package controllers

-import (
-	corev1 "k8s.io/api/core/v1"
-)
+func filterLabels(labels map[string]string, filter string) map[string]string {
+	filtered := map[string]string{}

-func filterEnvVars(envVars []corev1.EnvVar, filter string) (filtered []corev1.EnvVar) {
-	for _, envVar := range envVars {
-		if envVar.Name != filter {
-			filtered = append(filtered, envVar)
+	for k, v := range labels {
+		if k != filter {
+			filtered[k] = v
 		}
 	}
+
 	return filtered
 }
--- a/controllers/utils_test.go
+++ b/controllers/utils_test.go
@ -0,0 +1,34 @@
+package controllers
+
+import (
+	"reflect"
+	"testing"
+)
+
+func Test_filterLabels(t *testing.T) {
+	type args struct {
+		labels map[string]string
+		filter string
+	}
+	tests := []struct {
+		name string
+		args args
+		want map[string]string
+	}{
+		{
+			name: "ok",
+			args: args{
+				labels: map[string]string{LabelKeyRunnerTemplateHash: "abc", LabelKeyPodTemplateHash: "def"},
+				filter: LabelKeyRunnerTemplateHash,
+			},
+			want: map[string]string{LabelKeyPodTemplateHash: "def"},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := filterLabels(tt.args.labels, tt.args.filter); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("filterLabels() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
--- a/hack/make-env.sh
+++ b/hack/make-env.sh
@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+COMMIT=$(git rev-parse HEAD)
+TAG=$(git describe --exact-match --abbrev=0 --tags "${COMMIT}" 2> /dev/null || true)
+BRANCH=$(git branch | grep \* | cut -d ' ' -f2 | sed -e 's/[^a-zA-Z0-9+=._:/-]*//g' || true)
+VERSION=""
+
+if [ -z "$TAG" ]; then
+  [[ -n "$BRANCH" ]] && VERSION="${BRANCH}-"
+	VERSION="${VERSION}${COMMIT:0:8}"
+else
+	VERSION=$TAG
+fi
+
+if [ -n "$(git diff --shortstat 2> /dev/null | tail -n1)" ]; then
+    VERSION="${VERSION}-dirty"
+fi
+
+export VERSION
--- a/hash/fnv.go
+++ b/hash/fnv.go
@ -0,0 +1,17 @@
+package hash
+
+import (
+	"fmt"
+	"hash/fnv"
+	"k8s.io/apimachinery/pkg/util/rand"
+)
+
+func FNVHashStringObjects(objs ...interface{}) string {
+	hash := fnv.New32a()
+
+	for _, obj := range objs {
+		DeepHashObject(hash, obj)
+	}
+
+	return rand.SafeEncodeString(fmt.Sprint(hash.Sum32()))
+}
--- a/hash/hash.go
+++ b/hash/hash.go
@ -0,0 +1,25 @@
+// Copyright 2015 The Kubernetes Authors.
+// hash.go is copied from kubernetes's pkg/util/hash.go
+// See https://github.com/kubernetes/kubernetes/blob/e1c617a88ec286f5f6cb2589d6ac562d095e1068/pkg/util/hash/hash.go#L25-L37
+
+package hash
+
+import (
+	"hash"
+
+	"github.com/davecgh/go-spew/spew"
+)
+
+// DeepHashObject writes specified object to hash using the spew library
+// which follows pointers and prints actual values of the nested objects
+// ensuring the hash does not change when a pointer changes.
+func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) {
+	hasher.Reset()
+	printer := spew.ConfigState{
+		Indent:         " ",
+		SortKeys:       true,
+		DisableMethods: true,
+		SpewKeys:       true,
+	}
+	printer.Fprintf(hasher, "%#v", objectToWrite)
+}