diff --git a/acceptance/deploy_runners.sh b/acceptance/deploy_runners.sh index 0c0ecfbe..bb8a21d4 100755 --- a/acceptance/deploy_runners.sh +++ b/acceptance/deploy_runners.sh @@ -6,6 +6,8 @@ OP=${OP:-apply} RUNNER_LABEL=${RUNNER_LABEL:-self-hosted} +cat acceptance/testdata/kubernetes_container_mode.envsubst.yaml | NAMESPACE=${RUNNER_NAMESPACE} envsubst | kubectl apply -f - + if [ -n "${TEST_REPO}" ]; then if [ "${USE_RUNNERSET}" != "false" ]; then cat acceptance/testdata/runnerset.envsubst.yaml | TEST_ENTERPRISE= TEST_ORG= RUNNER_MIN_REPLICAS=${REPO_RUNNER_MIN_REPLICAS} NAME=repo-runnerset envsubst | kubectl ${OP} -f - diff --git a/acceptance/testdata/kubernetes_container_mode.envsubst.yaml b/acceptance/testdata/kubernetes_container_mode.envsubst.yaml index 6c7b4465..43d63c0e 100644 --- a/acceptance/testdata/kubernetes_container_mode.envsubst.yaml +++ b/acceptance/testdata/kubernetes_container_mode.envsubst.yaml @@ -20,6 +20,10 @@ rules: - apiGroups: [""] resources: ["secrets"] verbs: ["get", "list", "create", "delete"] +# Needed to report test success by crating a cm from within workflow job step +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["create", "delete"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -33,7 +37,7 @@ rules: apiVersion: v1 kind: ServiceAccount metadata: - name: runner + name: ${RUNNER_SERVICE_ACCOUNT_NAME} namespace: ${NAMESPACE} --- # To verify it's working, try: @@ -50,7 +54,7 @@ metadata: namespace: ${NAMESPACE} subjects: - kind: ServiceAccount - name: runner + name: ${RUNNER_SERVICE_ACCOUNT_NAME} namespace: ${NAMESPACE} roleRef: kind: ClusterRole @@ -64,7 +68,7 @@ metadata: namespace: ${NAMESPACE} subjects: - kind: ServiceAccount - name: runner + name: ${RUNNER_SERVICE_ACCOUNT_NAME} namespace: ${NAMESPACE} roleRef: kind: ClusterRole diff --git a/acceptance/testdata/runnerdeploy.envsubst.yaml b/acceptance/testdata/runnerdeploy.envsubst.yaml index a76f38d3..69402111 100644 --- a/acceptance/testdata/runnerdeploy.envsubst.yaml +++ b/acceptance/testdata/runnerdeploy.envsubst.yaml @@ -49,6 +49,10 @@ spec: labels: - "${RUNNER_LABEL}" + env: + - name: ROLLING_UPDATE_PHASE + value: "${ROLLING_UPDATE_PHASE}" + # # Non-standard working directory # @@ -64,6 +68,7 @@ spec: resources: requests: storage: 10Gi + serviceAccountName: ${RUNNER_SERVICE_ACCOUNT_NAME} --- apiVersion: actions.summerwind.dev/v1alpha1 kind: HorizontalRunnerAutoscaler diff --git a/acceptance/testdata/runnerset.envsubst.yaml b/acceptance/testdata/runnerset.envsubst.yaml index 1809f483..f9e6147e 100644 --- a/acceptance/testdata/runnerset.envsubst.yaml +++ b/acceptance/testdata/runnerset.envsubst.yaml @@ -112,6 +112,7 @@ spec: labels: app: ${NAME} spec: + serviceAccountName: ${RUNNER_SERVICE_ACCOUNT_NAME} containers: - name: runner imagePullPolicy: IfNotPresent @@ -120,6 +121,8 @@ spec: value: "${RUNNER_FEATURE_FLAG_EPHEMERAL}" - name: GOMODCACHE value: "/home/runner/.cache/go-mod" + - name: ROLLING_UPDATE_PHASE + value: "${ROLLING_UPDATE_PHASE}" # PV-backed runner work dir volumeMounts: # Comment out the ephemeral work volume if you're going to test the kubernetes container mode @@ -152,19 +155,19 @@ spec: # https://github.com/actions/setup-go/blob/56a61c9834b4a4950dbbf4740af0b8a98c73b768/src/installer.ts#L144 mountPath: "/opt/hostedtoolcache" # Valid only when dockerdWithinRunnerContainer=false - - name: docker - # PV-backed runner work dir - volumeMounts: - - name: work - mountPath: /runner/_work - # Cache docker image layers, in case dockerdWithinRunnerContainer=false - - name: var-lib-docker - mountPath: /var/lib/docker - # image: mumoshu/actions-runner-dind:dev + # - name: docker + # # PV-backed runner work dir + # volumeMounts: + # - name: work + # mountPath: /runner/_work + # # Cache docker image layers, in case dockerdWithinRunnerContainer=false + # - name: var-lib-docker + # mountPath: /var/lib/docker + # # image: mumoshu/actions-runner-dind:dev - # For buildx cache - - name: cache - mountPath: "/home/runner/.cache" + # # For buildx cache + # - name: cache + # mountPath: "/home/runner/.cache" # Comment out the ephemeral work volume if you're going to test the kubernetes container mode # volumes: # - name: work diff --git a/controllers/sync_volumes.go b/controllers/sync_volumes.go index eb959338..9419c27f 100644 --- a/controllers/sync_volumes.go +++ b/controllers/sync_volumes.go @@ -75,6 +75,10 @@ func syncPVC(ctx context.Context, c client.Client, log logr.Logger, ns string, p log.V(2).Info("Reconciling runner PVC") + // TODO: Probably we'd better remove PVCs related to the RunnetSet that is nowhere now? + // Otherwise, a bunch of continuously recreated StatefulSet + // can leave dangling PVCs forever, which might stress the cluster. + var sts appsv1.StatefulSet if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: stsName}, &sts); err != nil { if !kerrors.IsNotFound(err) { diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 36faf6bd..4e21edb9 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "time" "github.com/actions-runner-controller/actions-runner-controller/testing" @@ -25,6 +26,8 @@ const ( ) var ( + // See the below link for maintained versions of cert-manager + // https://cert-manager.io/docs/installation/supported-releases/ certManagerVersion = "v1.8.2" images = []testing.ContainerImage{ @@ -36,6 +39,8 @@ var ( } testResultCMNamePrefix = "test-result-" + + RunnerVersion = "2.296.0" ) // If you're willing to run this test via VS Code "run test" or "debug test", @@ -119,6 +124,7 @@ func TestE2E(t *testing.T) { t.Fatalf("Failed to parse duration %q: %v", vt, err) } } + env.doDockerBuild = os.Getenv("ARC_E2E_DO_DOCKER_BUILD") != "" t.Run("build and load images", func(t *testing.T) { env.buildAndLoadImages(t) @@ -210,12 +216,37 @@ func TestE2E(t *testing.T) { return } + ctx, cancel := context.WithCancel(context.Background()) + go func() { + for i := 1; ; i++ { + select { + case _, ok := <-ctx.Done(): + if !ok { + t.Logf("Stopping the continuous rolling-update of runners") + } + default: + time.Sleep(60 * time.Second) + + t.Run(fmt.Sprintf("update runners attempt %d", i), func(t *testing.T) { + env.deploy(t, RunnerSets, testID, fmt.Sprintf("ROLLING_UPDATE_PHASE=%d", i)) + }) + } + } + }() + t.Cleanup(func() { + cancel() + }) + t.Run("Verify workflow run result", func(t *testing.T) { env.verifyActionsWorkflowRun(t, testID) }) }) t.Run("RunnerDeployments", func(t *testing.T) { + if os.Getenv("ARC_E2E_SKIP_RUNNERDEPLOYMENT") != "" { + t.Skip("RunnerSets test has been skipped due to ARC_E2E_SKIP_RUNNERSETS") + } + var ( testID string ) @@ -285,6 +316,27 @@ func TestE2E(t *testing.T) { return } + ctx, cancel := context.WithCancel(context.Background()) + go func() { + for i := 1; ; i++ { + select { + case _, ok := <-ctx.Done(): + if !ok { + t.Logf("Stopping the continuous rolling-update of runners") + } + default: + time.Sleep(10 * time.Second) + + t.Run(fmt.Sprintf("update runners - attempt %d", i), func(t *testing.T) { + env.deploy(t, RunnerDeployments, testID, fmt.Sprintf("ROLLING_UPDATE_PHASE=%d", i)) + }) + } + } + }() + t.Cleanup(func() { + cancel() + }) + t.Run("Verify workflow run result", func(t *testing.T) { env.verifyActionsWorkflowRun(t, testID) }) @@ -315,7 +367,10 @@ type env struct { minReplicas int64 dockerdWithinRunnerContainer bool rootlessDocker bool + doDockerBuild bool containerMode string + runnerServiceAccuontName string + runnerNamespace string remoteKubeconfig string imagePullSecretName string imagePullPolicy string @@ -383,7 +438,7 @@ func buildVars(repo string) vars { Args: []testing.BuildArg{ { Name: "RUNNER_VERSION", - Value: "2.294.0", + Value: RunnerVersion, }, }, Image: runnerImage, @@ -394,7 +449,7 @@ func buildVars(repo string) vars { Args: []testing.BuildArg{ { Name: "RUNNER_VERSION", - Value: "2.294.0", + Value: RunnerVersion, }, }, Image: runnerDindImage, @@ -405,7 +460,7 @@ func buildVars(repo string) vars { Args: []testing.BuildArg{ { Name: "RUNNER_VERSION", - Value: "2.294.0", + Value: RunnerVersion, }, }, Image: runnerRootlessDindImage, @@ -444,6 +499,8 @@ func initTestEnv(t *testing.T, k8sMinorVer string, vars vars) *env { e.testOrgRepo = testing.Getenv(t, "TEST_ORG_REPO", "") e.testEnterprise = testing.Getenv(t, "TEST_ENTERPRISE", "") e.testEphemeral = testing.Getenv(t, "TEST_EPHEMERAL", "") + e.runnerServiceAccuontName = testing.Getenv(t, "TEST_RUNNER_SERVICE_ACCOUNT_NAME", "") + e.runnerNamespace = testing.Getenv(t, "TEST_RUNNER_NAMESPACE", "default") e.remoteKubeconfig = testing.Getenv(t, "ARC_E2E_REMOTE_KUBECONFIG", "") e.imagePullSecretName = testing.Getenv(t, "ARC_E2E_IMAGE_PULL_SECRET_NAME", "") e.vars = vars @@ -507,9 +564,9 @@ func (e *env) checkGitHubToken(t *testing.T, tok string) error { c := github.NewClient(&http.Client{Transport: transport}) aa, res, err := c.Octocat(context.Background(), "hello") if err != nil { - b, err := io.ReadAll(res.Body) - if err != nil { - t.Logf("%v", err) + b, ioerr := io.ReadAll(res.Body) + if ioerr != nil { + t.Logf("%v", ioerr) return err } t.Logf(string(b)) @@ -518,14 +575,42 @@ func (e *env) checkGitHubToken(t *testing.T, tok string) error { t.Logf("%s", aa) - if _, res, err := c.Actions.CreateRegistrationToken(ctx, e.testOrg, e.testOrgRepo); err != nil { - b, err := io.ReadAll(res.Body) - if err != nil { - t.Logf("%v", err) + if e.testEnterprise != "" { + if _, res, err := c.Enterprise.CreateRegistrationToken(ctx, e.testEnterprise); err != nil { + b, ioerr := io.ReadAll(res.Body) + if ioerr != nil { + t.Logf("%v", ioerr) + return err + } + t.Logf(string(b)) + return err + } + } + + if e.testOrg != "" { + if _, res, err := c.Actions.CreateOrganizationRegistrationToken(ctx, e.testOrg); err != nil { + b, ioerr := io.ReadAll(res.Body) + if ioerr != nil { + t.Logf("%v", ioerr) + return err + } + t.Logf(string(b)) + return err + } + } + + if e.testRepo != "" { + s := strings.Split(e.testRepo, "/") + owner, repo := s[0], s[1] + if _, res, err := c.Actions.CreateRegistrationToken(ctx, owner, repo); err != nil { + b, ioerr := io.ReadAll(res.Body) + if ioerr != nil { + t.Logf("%v", ioerr) + return err + } + t.Logf(string(b)) return err } - t.Logf(string(b)) - return err } return nil @@ -620,9 +705,9 @@ func (e *env) installActionsRunnerController(t *testing.T, repo, tag, testID, ch e.RunScript(t, "../../acceptance/deploy.sh", testing.ScriptConfig{Dir: "../..", Env: scriptEnv}) } -func (e *env) deploy(t *testing.T, kind DeployKind, testID string) { +func (e *env) deploy(t *testing.T, kind DeployKind, testID string, env ...string) { t.Helper() - e.do(t, "apply", kind, testID) + e.do(t, "apply", kind, testID, env...) } func (e *env) undeploy(t *testing.T, kind DeployKind, testID string) { @@ -630,7 +715,7 @@ func (e *env) undeploy(t *testing.T, kind DeployKind, testID string) { e.do(t, "delete", kind, testID) } -func (e *env) do(t *testing.T, op string, kind DeployKind, testID string) { +func (e *env) do(t *testing.T, op string, kind DeployKind, testID string, env ...string) { t.Helper() e.createControllerNamespaceAndServiceAccount(t) @@ -638,7 +723,10 @@ func (e *env) do(t *testing.T, op string, kind DeployKind, testID string) { scriptEnv := []string{ "KUBECONFIG=" + e.Kubeconfig, "OP=" + op, + "RUNNER_NAMESPACE=" + e.runnerNamespace, + "RUNNER_SERVICE_ACCOUNT_NAME=" + e.runnerServiceAccuontName, } + scriptEnv = append(scriptEnv, env...) switch kind { case RunnerSets: @@ -730,7 +818,7 @@ func (e *env) createControllerNamespaceAndServiceAccount(t *testing.T) { func (e *env) installActionsWorkflow(t *testing.T, kind DeployKind, testID string) { t.Helper() - installActionsWorkflow(t, e.testName+" "+testID, e.runnerLabel(testID), testResultCMNamePrefix, e.repoToCommit, kind, e.testJobs(testID), !e.rootlessDocker) + installActionsWorkflow(t, e.testName+" "+testID, e.runnerLabel(testID), testResultCMNamePrefix, e.repoToCommit, kind, e.testJobs(testID), !e.rootlessDocker, e.doDockerBuild) } func (e *env) testJobs(testID string) []job { @@ -772,7 +860,7 @@ func createTestJobs(id, testResultCMNamePrefix string, numJobs int) []job { const Branch = "main" // useSudo also implies rootful docker and the use of buildx cache export/import -func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNamePrefix, testRepo string, kind DeployKind, testJobs []job, useSudo bool) { +func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNamePrefix, testRepo string, kind DeployKind, testJobs []job, useSudo, doDockerBuild bool) { t.Helper() ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) @@ -827,32 +915,30 @@ func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNam } } - steps = append(steps, - testing.Step{ - // This might be the easiest way to handle permissions without use of securityContext - // https://stackoverflow.com/questions/50156124/kubernetes-nfs-persistent-volumes-permission-denied#comment107483717_53186320 - Run: sudo + "chmod 777 -R \"${RUNNER_TOOL_CACHE}\" \"${HOME}/.cache\"", - }, - ) if useSudo { steps = append(steps, + testing.Step{ + // This might be the easiest way to handle permissions without use of securityContext + // https://stackoverflow.com/questions/50156124/kubernetes-nfs-persistent-volumes-permission-denied#comment107483717_53186320 + Run: sudo + "chmod 777 -R \"${RUNNER_TOOL_CACHE}\" \"${HOME}/.cache\"", + }, testing.Step{ Run: sudo + "chmod 777 -R \"/var/lib/docker\"", }, + testing.Step{ + // This might be the easiest way to handle permissions without use of securityContext + // https://stackoverflow.com/questions/50156124/kubernetes-nfs-persistent-volumes-permission-denied#comment107483717_53186320 + Run: "ls -lah \"${RUNNER_TOOL_CACHE}\" \"${HOME}/.cache\"", + }, + testing.Step{ + // This might be the easiest way to handle permissions without use of securityContext + // https://stackoverflow.com/questions/50156124/kubernetes-nfs-persistent-volumes-permission-denied#comment107483717_53186320 + Run: "ls -lah \"/var/lib/docker\" || echo ls failed.", + }, ) } steps = append(steps, - testing.Step{ - // This might be the easiest way to handle permissions without use of securityContext - // https://stackoverflow.com/questions/50156124/kubernetes-nfs-persistent-volumes-permission-denied#comment107483717_53186320 - Run: "ls -lah \"${RUNNER_TOOL_CACHE}\" \"${HOME}/.cache\"", - }, - testing.Step{ - // This might be the easiest way to handle permissions without use of securityContext - // https://stackoverflow.com/questions/50156124/kubernetes-nfs-persistent-volumes-permission-denied#comment107483717_53186320 - Run: "ls -lah \"/var/lib/docker\" || echo ls failed.", - }, testing.Step{ Uses: "actions/setup-go@v3", With: &testing.With{ @@ -871,75 +957,77 @@ func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNam }, ) - if !kubernetesContainerMode { - setupBuildXActionWith := &testing.With{ - BuildkitdFlags: "--debug", - Endpoint: "mycontext", - // As the consequence of setting `install: false`, it doesn't install buildx as an alias to `docker build` - // so we need to use `docker buildx build` in the next step - Install: false, - } - var dockerBuildCache, dockerfile string - if useSudo { - // This needs to be set only when rootful docker mode. - // When rootless, we need to use the `docker` buildx driver, which doesn't support cache export - // so we end up with the below error on docker-build: - // error: cache export feature is currently not supported for docker driver. Please switch to a different driver (eg. "docker buildx create --use") - dockerBuildCache = "--cache-from=type=local,src=/home/runner/.cache/buildx " + - "--cache-to=type=local,dest=/home/runner/.cache/buildx-new,mode=max " - dockerfile = "Dockerfile" - } else { - setupBuildXActionWith.Driver = "docker" - dockerfile = "Dockerfile.nocache" - } - steps = append(steps, - testing.Step{ - // https://github.com/docker/buildx/issues/413#issuecomment-710660155 - // To prevent setup-buildx-action from failing with: - // error: could not create a builder instance with TLS data loaded from environment. Please use `docker context create ` to create a context for current environment and then create a builder instance with `docker buildx create ` - Run: "docker context create mycontext", - }, - testing.Step{ - Run: "docker context use mycontext", - }, - testing.Step{ - Name: "Set up Docker Buildx", - Uses: "docker/setup-buildx-action@v1", - With: setupBuildXActionWith, - }, - testing.Step{ - Run: "docker buildx build --platform=linux/amd64 " + - dockerBuildCache + - fmt.Sprintf("-f %s .", dockerfile), - }, - ) - - if useSudo { + if doDockerBuild { + if !kubernetesContainerMode { + setupBuildXActionWith := &testing.With{ + BuildkitdFlags: "--debug", + Endpoint: "mycontext", + // As the consequence of setting `install: false`, it doesn't install buildx as an alias to `docker build` + // so we need to use `docker buildx build` in the next step + Install: false, + } + var dockerBuildCache, dockerfile string + if useSudo { + // This needs to be set only when rootful docker mode. + // When rootless, we need to use the `docker` buildx driver, which doesn't support cache export + // so we end up with the below error on docker-build: + // error: cache export feature is currently not supported for docker driver. Please switch to a different driver (eg. "docker buildx create --use") + dockerBuildCache = "--cache-from=type=local,src=/home/runner/.cache/buildx " + + "--cache-to=type=local,dest=/home/runner/.cache/buildx-new,mode=max " + dockerfile = "Dockerfile" + } else { + setupBuildXActionWith.Driver = "docker" + dockerfile = "Dockerfile.nocache" + } steps = append(steps, testing.Step{ - // https://github.com/docker/build-push-action/blob/master/docs/advanced/cache.md#local-cache - // See https://github.com/moby/buildkit/issues/1896 for why this is needed - Run: "rm -rf /home/runner/.cache/buildx && mv /home/runner/.cache/buildx-new /home/runner/.cache/buildx", + // https://github.com/docker/buildx/issues/413#issuecomment-710660155 + // To prevent setup-buildx-action from failing with: + // error: could not create a builder instance with TLS data loaded from environment. Please use `docker context create ` to create a context for current environment and then create a builder instance with `docker buildx create ` + Run: "docker context create mycontext", }, testing.Step{ - Run: "ls -lah /home/runner/.cache/*", + Run: "docker context use mycontext", + }, + testing.Step{ + Name: "Set up Docker Buildx", + Uses: "docker/setup-buildx-action@v1", + With: setupBuildXActionWith, + }, + testing.Step{ + Run: "docker buildx build --platform=linux/amd64 " + + dockerBuildCache + + fmt.Sprintf("-f %s .", dockerfile), }, ) } + } + if useSudo { steps = append(steps, testing.Step{ - Uses: "azure/setup-kubectl@v1", - With: &testing.With{ - Version: "v1.20.2", - }, + // https://github.com/docker/build-push-action/blob/master/docs/advanced/cache.md#local-cache + // See https://github.com/moby/buildkit/issues/1896 for why this is needed + Run: "rm -rf /home/runner/.cache/buildx && mv /home/runner/.cache/buildx-new /home/runner/.cache/buildx", }, testing.Step{ - Run: fmt.Sprintf("./test.sh %s %s", t.Name(), j.testArg), + Run: "ls -lah /home/runner/.cache/*", }, ) } + steps = append(steps, + testing.Step{ + Uses: "azure/setup-kubectl@v1", + With: &testing.With{ + Version: "v1.20.2", + }, + }, + testing.Step{ + Run: fmt.Sprintf("./test.sh %s %s", t.Name(), j.testArg), + }, + ) + wf.Jobs[j.name] = testing.Job{ RunsOn: runnerLabel, Container: container,