diff --git a/.github/workflows/on-push-master-publish-chart.yml b/.github/workflows/on-push-master-publish-chart.yml index d55bc5e2..cea1b4ea 100644 --- a/.github/workflows/on-push-master-publish-chart.yml +++ b/.github/workflows/on-push-master-publish-chart.yml @@ -114,7 +114,7 @@ jobs: git config user.email "$GITHUB_ACTOR@users.noreply.github.com" - name: Run chart-releaser - uses: helm/chart-releaser-action@v1.3.0 + uses: helm/chart-releaser-action@v1.4.0 env: CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/runners.yml b/.github/workflows/runners.yml index 26be32e5..cce9b42e 100644 --- a/.github/workflows/runners.yml +++ b/.github/workflows/runners.yml @@ -15,7 +15,7 @@ on: - '!**.md' env: - RUNNER_VERSION: 2.289.1 + RUNNER_VERSION: 2.289.2 DOCKER_VERSION: 20.10.12 DOCKERHUB_USERNAME: summerwind diff --git a/README.md b/README.md index 6d5d0b45..0c32e8f3 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ ToC: - [Enterprise Runners](#enterprise-runners) - [RunnerDeployments](#runnerdeployments) - [RunnerSets](#runnersets) + - [Persistent Runners](#persistent-runners) - [Autoscaling](#autoscaling) - [Anti-Flapping Configuration](#anti-flapping-configuration) - [Pull Driven Scaling](#pull-driven-scaling) @@ -32,7 +33,6 @@ ToC: - [Runner Groups](#runner-groups) - [Runner Entrypoint Features](#runner-entrypoint-features) - [Using IRSA (IAM Roles for Service Accounts) in EKS](#using-irsa-iam-roles-for-service-accounts-in-eks) - - [Persistent Runners](#persistent-runners) - [Software Installed in the Runner Image](#software-installed-in-the-runner-image) - [Using without cert-manager](#using-without-cert-manager) - [Troubleshooting](#troubleshooting) @@ -226,14 +226,16 @@ By default the controller will look for runners in all namespaces, the watch nam This feature is configured via the controller's `--watch-namespace` flag. When a namespace is provided via this flag, the controller will only monitor runners in that namespace. -If you plan on installing all instances of the controller stack into a single namespace you will need to make the names of the resources unique to each stack. In the case of Helm this can be done by giving each install a unique release name, or via the `fullnameOverride` properties. +You can deploy multiple controllers either in a single shared namespace, or in a unique namespace per controller. -Alternatively, you can install each controller stack into its own unique namespace (relative to other controller stacks in the cluster), avoiding the need to uniquely prefix resources. +If you plan on installing all instances of the controller stack into a single namespace there are a few things you need to do for this to work. -When you go to the route of sharing the namespace while giving each a unique Helm release name, you must also ensure the following values are configured correctly: +1. All resources per stack must have a unique, in the case of Helm this can be done by giving each install a unique release name, or via the `fullnameOverride` properties. +2. `authSecret.name` needs be unique per stack when each stack is tied to runners in different GitHub organizations and repositories AND you want your GitHub credentials to narrowly scoped. +3. `leaderElectionId` needs to be unique per stack. If this is not unique to the stack the controller tries to race onto the leader election lock resulting in only one stack working concurrently. Your controller will be stuck with a log message something like this `attempting to acquire leader lease arc-controllers/actions-runner-controller...` +4. The MutatingWebhookConfiguration in each stack must include a namespace selector for that stacks corresponding runner namespace, this is already configured in the helm chart. -- `authSecret.name` needs be unique per stack when each stack is tied to runners in different GitHub organizations and repositories AND you want your GitHub credentials to narrowly scoped. -- `leaderElectionId` needs to be unique per stack. If this is not unique to the stack the controller tries to race onto the leader election lock and resulting in only one stack working concurrently. +Alternatively, you can install each controller stack into a unique namespace (relative to other controller stacks in the cluster). Implementing ARC this way avoids the first, second and third pitfalls (you still need to set the corresponding namespace selector for each stacks mutating webhook) ## Usage @@ -365,6 +367,8 @@ example-runnerdeploy2475ht2qbr mumoshu/actions-runner-controller-ci Running > This feature requires controller version => [v0.20.0](https://github.com/actions-runner-controller/actions-runner-controller/releases/tag/v0.20.0) +_Ensure you see the limitations before using this kind!!!!!_ + For scenarios where you require the advantages of a `StatefulSet`, for example persistent storage, ARC implements a runner based on Kubernete's StatefulSets, the RunnerSet. A basic `RunnerSet` would look like this: @@ -448,6 +452,21 @@ Under the hood, `RunnerSet` relies on Kubernetes's `StatefulSet` and Mutating We **Limitations** * For autoscaling the `RunnerSet` kind only supports pull driven scaling or the `workflow_job` event for webhook driven scaling. +* Whilst `RunnerSets` support all runner modes as well as autoscaling, currently PVs are **NOT** automatically cleaned up as they are still bound to their respective PVCs when a runner is deleted by the controller. This has **major** implications when using `RunnerSets` in the standard runner mode, `ephemeral: true`, see [persistent runners](#persistent-runners) for more details. As a result of this, using the default ephemeral configuration or implementing autoscaling for your `RunnerSets`, you will get a build up of PVCs and PVs without some sort of custom solution for cleaning up. + +### Persistent Runners + +Every runner managed by ARC is "ephemeral" by default. The life of an ephemeral runner managed by ARC looks like this- ARC creates a runner pod for the runner. As it's an ephemeral runner, the `--ephemeral` flag is passed to the `actions/runner` agent that runs within the `runner` container of the runner pod. + +`--ephemeral` is an `actions/runner` feature that instructs the runner to stop and de-register itself after the first job run. + +Once the ephemeral runner has completed running a workflow job, it stops with a status code of 0, hence the runner pod is marked as completed, removed by ARC. + +As it's removed after a workflow job run, the runner pod is never reused across multiple GitHub Actions workflow jobs, providing you a clean environment per each workflow job. + +Although not generally recommended, it's possible to disable passing `--ephemeral` flag by explicitly setting `ephemeral: false` in the `RunnerDeployment` or `RunnerSet` spec. When disabled, your runner becomes "persistent". A persistent runner does not stop after workflow job ends, and in this mode `actions/runner` is known to clean only runner's work dir after each job. Whilst this can seem helpful it creates a non-deterministic environment which is not ideal for a CI/CD environment. Between runs your actions cache, docker images stored in the `dind` and layer cache, globally installed packages etc are retained across multiple workflow job runs which can cause issues which are hard to debug and inconsistent. + +Persistent runners are available as an option for some edge cases however they are not preferred as they can create challenges around providing a deterministic and secure environment. ### Autoscaling @@ -666,7 +685,7 @@ The primary benefit of autoscaling on Webhook compared to the pull driven scalin > You can learn the implementation details in [#282](https://github.com/actions-runner-controller/actions-runner-controller/pull/282) -To enable this feature, you firstly need to install the webhook server, currently, only our Helm chart has the ability install it: +To enable this feature, you first need to install the GitHub webhook server. To install via our Helm chart, _[see the values documentation for all configuration options](https://github.com/actions-runner-controller/actions-runner-controller/blob/master/charts/actions-runner-controller/README.md)_ ```console @@ -1309,21 +1328,6 @@ spec: securityContext: fsGroup: 1000 ``` - -### Persistent Runners - -Every runner managed by ARC is "ephemeral" by default. The life of an ephemeral runner managed by ARC looks like this- ARC creates a runner pod for the runner. As it's an ephemeral runner, the `--ephemeral` flag is passed to the `actions/runner` agent that runs within the `runner` container of the runner pod. - -`--ephemeral` is an `actions/runner` feature that instructs the runner to stop and de-register itself after the first job run. - -Once the ephemeral runner has completed running a workflow job, it stops with a status code of 0, hence the runner pod is marked as completed, removed by ARC. - -As it's removed after a workflow job run, the runner pod is never reused across multiple GitHub Actions workflow jobs, providing you a clean environment per each workflow job. - -Although not recommended, it's possible to disable passing `--ephemeral` flag by explicitly setting `ephemeral: false` in the `RunnerDeployment` or `RunnerSet` spec. When disabled, your runner becomes "persistent". A persistent runner does not stop after workflow job ends, and in this mode `actions/runner` is known to clean only runner's work dir after each job. That means your runner's environment, including various actions cache, docker images stored in the `dind` and layer cache, is retained across multiple workflow job runs. - -Persistent runners are available as an option for some edge cases however they are not preferred as they can create challenges around providing a deterministic and secure environment. - ### Software Installed in the Runner Image **Cloud Tooling**
diff --git a/api/v1alpha1/runner_types.go b/api/v1alpha1/runner_types.go index 25437987..5ced5fcc 100644 --- a/api/v1alpha1/runner_types.go +++ b/api/v1alpha1/runner_types.go @@ -181,6 +181,9 @@ func (rs *RunnerSpec) ValidateRepository() error { // RunnerStatus defines the observed state of Runner type RunnerStatus struct { + // Turns true only if the runner pod is ready. + // +optional + Ready bool `json:"ready"` // +optional Registration RunnerStatusRegistration `json:"registration"` // +optional diff --git a/api/v1alpha1/runnerdeployment_webhook.go b/api/v1alpha1/runnerdeployment_webhook.go index 3f5626fe..91a034d6 100644 --- a/api/v1alpha1/runnerdeployment_webhook.go +++ b/api/v1alpha1/runnerdeployment_webhook.go @@ -26,7 +26,7 @@ import ( ) // log is for logging in this package. -var runenrDeploymentLog = logf.Log.WithName("runnerdeployment-resource") +var runnerDeploymentLog = logf.Log.WithName("runnerdeployment-resource") func (r *RunnerDeployment) SetupWebhookWithManager(mgr ctrl.Manager) error { return ctrl.NewWebhookManagedBy(mgr). @@ -49,13 +49,13 @@ var _ webhook.Validator = &RunnerDeployment{} // ValidateCreate implements webhook.Validator so a webhook will be registered for the type func (r *RunnerDeployment) ValidateCreate() error { - runenrDeploymentLog.Info("validate resource to be created", "name", r.Name) + runnerDeploymentLog.Info("validate resource to be created", "name", r.Name) return r.Validate() } // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type func (r *RunnerDeployment) ValidateUpdate(old runtime.Object) error { - runenrDeploymentLog.Info("validate resource to be updated", "name", r.Name) + runnerDeploymentLog.Info("validate resource to be updated", "name", r.Name) return r.Validate() } diff --git a/charts/actions-runner-controller/Chart.yaml b/charts/actions-runner-controller/Chart.yaml index 48cfc29b..9e89d500 100644 --- a/charts/actions-runner-controller/Chart.yaml +++ b/charts/actions-runner-controller/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.17.0 +version: 0.17.2 # Used as the default manager tag value when no tag property is provided in the values.yaml -appVersion: 0.22.0 +appVersion: 0.22.2 home: https://github.com/actions-runner-controller/actions-runner-controller diff --git a/charts/actions-runner-controller/crds/actions.summerwind.dev_runners.yaml b/charts/actions-runner-controller/crds/actions.summerwind.dev_runners.yaml index 3f85eb1d..01bf48e4 100644 --- a/charts/actions-runner-controller/crds/actions.summerwind.dev_runners.yaml +++ b/charts/actions-runner-controller/crds/actions.summerwind.dev_runners.yaml @@ -5126,6 +5126,9 @@ spec: type: string phase: type: string + ready: + description: Turns true only if the runner pod is ready. + type: boolean reason: type: string registration: diff --git a/charts/actions-runner-controller/docs/UPGRADING.md b/charts/actions-runner-controller/docs/UPGRADING.md index d749849d..9cd22207 100644 --- a/charts/actions-runner-controller/docs/UPGRADING.md +++ b/charts/actions-runner-controller/docs/UPGRADING.md @@ -18,7 +18,7 @@ Due to the above you can't just do a `helm upgrade` to release the latest versio ## Steps -1. Upgrade CRDs +1. Upgrade CRDs, this isn't optional, the CRDs you are using must be those that correspond with the version of the controller you are installing ```shell # REMEMBER TO UPDATE THE CHART_VERSION TO RELEVANT CHART VERISON!!!! diff --git a/charts/actions-runner-controller/templates/webhook_configs.yaml b/charts/actions-runner-controller/templates/webhook_configs.yaml index 93be277f..264d294f 100644 --- a/charts/actions-runner-controller/templates/webhook_configs.yaml +++ b/charts/actions-runner-controller/templates/webhook_configs.yaml @@ -12,6 +12,11 @@ metadata: webhooks: - admissionReviewVersions: - v1beta1 + {{- if .Values.scope.singleNamespace }} + namespaceSelector: + matchLabels: + name: {{ default .Release.Namespace .Values.scope.watchNamespace }} + {{- end }} clientConfig: {{- if .Values.admissionWebHooks.caBundle }} caBundle: {{ quote .Values.admissionWebHooks.caBundle }} @@ -35,6 +40,11 @@ webhooks: sideEffects: None - admissionReviewVersions: - v1beta1 + {{- if .Values.scope.singleNamespace }} + namespaceSelector: + matchLabels: + name: {{ default .Release.Namespace .Values.scope.watchNamespace }} + {{- end }} clientConfig: {{- if .Values.admissionWebHooks.caBundle }} caBundle: {{ .Values.admissionWebHooks.caBundle }} @@ -58,6 +68,11 @@ webhooks: sideEffects: None - admissionReviewVersions: - v1beta1 + {{- if .Values.scope.singleNamespace }} + namespaceSelector: + matchLabels: + name: {{ default .Release.Namespace .Values.scope.watchNamespace }} + {{- end }} clientConfig: {{- if .Values.admissionWebHooks.caBundle }} caBundle: {{ .Values.admissionWebHooks.caBundle }} @@ -81,6 +96,11 @@ webhooks: sideEffects: None - admissionReviewVersions: - v1beta1 + {{- if .Values.scope.singleNamespace }} + namespaceSelector: + matchLabels: + name: {{ default .Release.Namespace .Values.scope.watchNamespace }} + {{- end }} clientConfig: {{- if .Values.admissionWebHooks.caBundle }} caBundle: {{ .Values.admissionWebHooks.caBundle }} @@ -117,6 +137,11 @@ metadata: webhooks: - admissionReviewVersions: - v1beta1 + {{- if .Values.scope.singleNamespace }} + namespaceSelector: + matchLabels: + name: {{ default .Release.Namespace .Values.scope.watchNamespace }} + {{- end }} clientConfig: {{- if .Values.admissionWebHooks.caBundle }} caBundle: {{ .Values.admissionWebHooks.caBundle }} @@ -140,6 +165,11 @@ webhooks: sideEffects: None - admissionReviewVersions: - v1beta1 + {{- if .Values.scope.singleNamespace }} + namespaceSelector: + matchLabels: + name: {{ default .Release.Namespace .Values.scope.watchNamespace }} + {{- end }} clientConfig: {{- if .Values.admissionWebHooks.caBundle }} caBundle: {{ .Values.admissionWebHooks.caBundle }} @@ -163,6 +193,11 @@ webhooks: sideEffects: None - admissionReviewVersions: - v1beta1 + {{- if .Values.scope.singleNamespace }} + namespaceSelector: + matchLabels: + name: {{ default .Release.Namespace .Values.scope.watchNamespace }} + {{- end }} clientConfig: {{- if .Values.admissionWebHooks.caBundle }} caBundle: {{ .Values.admissionWebHooks.caBundle }} diff --git a/config/crd/bases/actions.summerwind.dev_runners.yaml b/config/crd/bases/actions.summerwind.dev_runners.yaml index 3f85eb1d..01bf48e4 100644 --- a/config/crd/bases/actions.summerwind.dev_runners.yaml +++ b/config/crd/bases/actions.summerwind.dev_runners.yaml @@ -5126,6 +5126,9 @@ spec: type: string phase: type: string + ready: + description: Turns true only if the runner pod is ready. + type: boolean reason: type: string registration: diff --git a/config/default/gh-webhook-server-auth-proxy-patch.yaml b/config/default/gh-webhook-server-auth-proxy-patch.yaml new file mode 100644 index 00000000..6d01f5da --- /dev/null +++ b/config/default/gh-webhook-server-auth-proxy-patch.yaml @@ -0,0 +1,23 @@ +# This patch injects an HTTP proxy sidecar container that performs RBAC +# authorization against the Kubernetes API using SubjectAccessReviews. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: github-webhook-server +spec: + template: + spec: + containers: + - name: kube-rbac-proxy + image: quay.io/brancz/kube-rbac-proxy:v0.10.0 + args: + - '--secure-listen-address=0.0.0.0:8443' + - '--upstream=http://127.0.0.1:8080/' + - '--logtostderr=true' + - '--v=10' + ports: + - containerPort: 8443 + name: https + - name: github-webhook-server + args: + - '--metrics-addr=127.0.0.1:8080' diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index d44cfad5..4539475e 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -20,19 +20,22 @@ bases: - ../webhook # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. - ../certmanager -# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. #- ../prometheus +# [GH_WEBHOOK_SERVER] To enable the GitHub webhook server, uncomment all sections with 'GH_WEBHOOK_SERVER'. +#- ../github-webhook-server patchesStrategicMerge: - # Protect the /metrics endpoint by putting it behind auth. - # Only one of manager_auth_proxy_patch.yaml and - # manager_prometheus_metrics_patch.yaml should be enabled. +# Protect the /metrics endpoint by putting it behind auth. +# Only one of manager_auth_proxy_patch.yaml and +# manager_prometheus_metrics_patch.yaml should be enabled. - manager_auth_proxy_patch.yaml - # If you want your controller-manager to expose the /metrics - # endpoint w/o any authn/z, uncomment the following line and - # comment manager_auth_proxy_patch.yaml. - # Only one of manager_auth_proxy_patch.yaml and - # manager_prometheus_metrics_patch.yaml should be enabled. + +# If you want your controller-manager to expose the /metrics +# endpoint w/o any authn/z, uncomment the following line and +# comment manager_auth_proxy_patch.yaml. +# Only one of manager_auth_proxy_patch.yaml and +# manager_prometheus_metrics_patch.yaml should be enabled. #- manager_prometheus_metrics_patch.yaml # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in crd/kustomization.yaml @@ -43,6 +46,10 @@ patchesStrategicMerge: # 'CERTMANAGER' needs to be enabled to use ca injection - webhookcainjection_patch.yaml +# [GH_WEBHOOK_SERVER] To enable the GitHub webhook server, uncomment all sections with 'GH_WEBHOOK_SERVER'. +# Protect the GitHub webhook server metrics endpoint by putting it behind auth. +# - gh-webhook-server-auth-proxy-patch.yaml + # the following config is for teaching kustomize how to do var substitution vars: # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml index 558812ec..2703a9af 100644 --- a/config/default/manager_auth_proxy_patch.yaml +++ b/config/default/manager_auth_proxy_patch.yaml @@ -23,4 +23,3 @@ spec: args: - "--metrics-addr=127.0.0.1:8080" - "--enable-leader-election" - - "--sync-period=10m" diff --git a/config/github-webhook-server/deployment.yaml b/config/github-webhook-server/deployment.yaml new file mode 100644 index 00000000..b1fe967d --- /dev/null +++ b/config/github-webhook-server/deployment.yaml @@ -0,0 +1,37 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: github-webhook-server + app.kubernetes.io/part-of: actions-runner-controller + name: github-webhook-server +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: github-webhook-server + app.kubernetes.io/part-of: actions-runner-controller + template: + metadata: + labels: + app.kubernetes.io/component: github-webhook-server + app.kubernetes.io/part-of: actions-runner-controller + spec: + containers: + - name: github-webhook-server + image: controller:latest + command: + - '/github-webhook-server' + env: + - name: GITHUB_WEBHOOK_SECRET_TOKEN + valueFrom: + secretKeyRef: + key: github_webhook_secret_token + name: github-webhook-server + optional: true + ports: + - containerPort: 8000 + name: http + protocol: TCP + serviceAccountName: github-webhook-server + terminationGracePeriodSeconds: 10 diff --git a/config/github-webhook-server/kustomization.yaml b/config/github-webhook-server/kustomization.yaml new file mode 100644 index 00000000..b7c92d42 --- /dev/null +++ b/config/github-webhook-server/kustomization.yaml @@ -0,0 +1,12 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +images: + - name: controller + newName: summerwind/actions-runner-controller + newTag: latest + +resources: + - deployment.yaml + - rbac.yaml + - service.yaml diff --git a/config/github-webhook-server/rbac.yaml b/config/github-webhook-server/rbac.yaml new file mode 100644 index 00000000..685e8c71 --- /dev/null +++ b/config/github-webhook-server/rbac.yaml @@ -0,0 +1,113 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: github-webhook-server + app.kubernetes.io/part-of: actions-runner-controller + name: github-webhook-server +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: github-webhook-server + app.kubernetes.io/part-of: actions-runner-controller + name: github-webhook-server +rules: + - apiGroups: + - actions.summerwind.dev + resources: + - horizontalrunnerautoscalers + verbs: + - get + - list + - patch + - update + - watch + - apiGroups: + - actions.summerwind.dev + resources: + - horizontalrunnerautoscalers/finalizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - actions.summerwind.dev + resources: + - horizontalrunnerautoscalers/status + verbs: + - get + - patch + - update + - apiGroups: + - actions.summerwind.dev + resources: + - runnersets + verbs: + - get + - list + - watch + - apiGroups: + - actions.summerwind.dev + resources: + - runnerdeployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - actions.summerwind.dev + resources: + - runnerdeployments/finalizers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - actions.summerwind.dev + resources: + - runnerdeployments/status + verbs: + - get + - patch + - update + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: github-webhook-server + app.kubernetes.io/part-of: actions-runner-controller + name: github-webhook-server +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: github-webhook-server +subjects: + - kind: ServiceAccount + name: github-webhook-server diff --git a/config/github-webhook-server/service.yaml b/config/github-webhook-server/service.yaml new file mode 100644 index 00000000..00f28a06 --- /dev/null +++ b/config/github-webhook-server/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: github-webhook-server + app.kubernetes.io/part-of: actions-runner-controller + name: github-webhook-server +spec: + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + app.kubernetes.io/component: github-webhook-server + app.kubernetes.io/part-of: actions-runner-controller diff --git a/controllers/pod_runner_token_injector.go b/controllers/pod_runner_token_injector.go index a186b432..5c7dd487 100644 --- a/controllers/pod_runner_token_injector.go +++ b/controllers/pod_runner_token_injector.go @@ -59,9 +59,9 @@ func (t *PodRunnerTokenInjector) Handle(ctx context.Context, req admission.Reque return newEmptyResponse() } - enterprise, okEnterprise := getEnv(runnerContainer, "RUNNER_ENTERPRISE") - repo, okRepo := getEnv(runnerContainer, "RUNNER_REPO") - org, okOrg := getEnv(runnerContainer, "RUNNER_ORG") + enterprise, okEnterprise := getEnv(runnerContainer, EnvVarEnterprise) + repo, okRepo := getEnv(runnerContainer, EnvVarRepo) + org, okOrg := getEnv(runnerContainer, EnvVarOrg) if !okRepo || !okOrg || !okEnterprise { return newEmptyResponse() } diff --git a/controllers/runner_controller.go b/controllers/runner_controller.go index 4b2dad64..0619a06a 100644 --- a/controllers/runner_controller.go +++ b/controllers/runner_controller.go @@ -133,7 +133,9 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr phase = "Created" } - if runner.Status.Phase != phase { + ready := runnerPodReady(&pod) + + if runner.Status.Phase != phase || runner.Status.Ready != ready { if pod.Status.Phase == corev1.PodRunning { // Seeing this message, you can expect the runner to become `Running` soon. log.V(1).Info( @@ -144,6 +146,7 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr updated := runner.DeepCopy() updated.Status.Phase = phase + updated.Status.Ready = ready updated.Status.Reason = pod.Status.Reason updated.Status.Message = pod.Status.Message @@ -156,6 +159,18 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr return ctrl.Result{}, nil } +func runnerPodReady(pod *corev1.Pod) bool { + for _, c := range pod.Status.Conditions { + if c.Type != corev1.PodReady { + continue + } + + return c.Status == corev1.ConditionTrue + } + + return false +} + func runnerContainerExitCode(pod *corev1.Pod) *int32 { for _, status := range pod.Status.ContainerStatuses { if status.Name != containerName { diff --git a/controllers/runner_pod_controller.go b/controllers/runner_pod_controller.go index 3a631671..9e9ed9c4 100644 --- a/controllers/runner_pod_controller.go +++ b/controllers/runner_pod_controller.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "errors" "fmt" "time" @@ -64,9 +65,19 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, nil } + var envvars []corev1.EnvVar + for _, container := range runnerPod.Spec.Containers { + if container.Name == "runner" { + envvars = container.Env + } + } + + if len(envvars) == 0 { + return ctrl.Result{}, errors.New("Could not determine env vars for runner Pod") + } + var enterprise, org, repo string - envvars := runnerPod.Spec.Containers[0].Env for _, e := range envvars { switch e.Name { case EnvVarEnterprise: diff --git a/github/github.go b/github/github.go index 6def3f50..b037d8ba 100644 --- a/github/github.go +++ b/github/github.go @@ -153,8 +153,18 @@ func (c *Client) GetRegistrationToken(ctx context.Context, enterprise, org, repo key := getRegistrationKey(org, repo, enterprise) rt, ok := c.regTokens[key] - // we like to give runners a chance that are just starting up and may miss the expiration date by a bit - runnerStartupTimeout := 3 * time.Minute + // We'd like to allow the runner just starting up to miss the expiration date by a bit. + // Note that this means that we're going to cache Creation Registraion Token API response longer than the + // recommended cache duration. + // + // https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository + // https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-an-organization + // https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-an-enterprise + // https://docs.github.com/en/rest/overview/resources-in-the-rest-api#conditional-requests + // + // This is currently set to 30 minutes as the result of the discussion took place at the following issue: + // https://github.com/actions-runner-controller/actions-runner-controller/issues/1295 + runnerStartupTimeout := 30 * time.Minute if ok && rt.GetExpiresAt().After(time.Now().Add(runnerStartupTimeout)) { return rt, nil diff --git a/runner/Dockerfile b/runner/Dockerfile index 720b41ec..5b0b353e 100644 --- a/runner/Dockerfile +++ b/runner/Dockerfile @@ -83,7 +83,7 @@ ENV HOME=/home/runner # # If you're willing to uncomment the following line, you'd also need to comment-out the # && curl -L -o runner.tar.gz https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-${ARCH}-${RUNNER_VERSION}.tar.gz \ -# line in the next `RUN` command in this Dockerfile, to avoid overwiding this runner.tar.gz with a remote one. +# line in the next `RUN` command in this Dockerfile, to avoid overwiting this runner.tar.gz with a remote one. # COPY actions-runner-linux-x64-2.280.3.tar.gz /runnertmp/runner.tar.gz @@ -116,6 +116,9 @@ COPY entrypoint.sh / ENV PATH="${PATH}:${HOME}/.local/bin" ENV ImageOS=ubuntu20 +RUN echo "PATH=${PATH}" > /etc/environment \ + && echo "ImageOS=${ImageOS}" >> /etc/environment + USER runner ENTRYPOINT ["/usr/local/bin/dumb-init", "--"] diff --git a/runner/Dockerfile.dindrunner b/runner/Dockerfile.dindrunner index 00ad5e1f..c1c06e49 100644 --- a/runner/Dockerfile.dindrunner +++ b/runner/Dockerfile.dindrunner @@ -118,6 +118,9 @@ VOLUME /var/lib/docker ENV PATH="${PATH}:${HOME}/.local/bin" ENV ImageOS=ubuntu20 +RUN echo "PATH=${PATH}" > /etc/environment \ + && echo "ImageOS=${ImageOS}" >> /etc/environment + # No group definition, as that makes it harder to run docker. USER runner diff --git a/runner/entrypoint.sh b/runner/entrypoint.sh index ba46e645..cb2c5344 100755 --- a/runner/entrypoint.sh +++ b/runner/entrypoint.sh @@ -151,7 +151,7 @@ cat .runner # https://api.github.com/repos/USER/REPO/actions/runners/171 if [ -z "${UNITTEST:-}" ]; then - mkdir ./externals + mkdir -p ./externals # Hack due to the DinD volumes mv ./externalstmp/* ./externals/ fi diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 293051c3..0d6791c3 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -37,13 +37,23 @@ var ( }, { Dockerfile: "../../runner/Dockerfile", - Args: []testing.BuildArg{}, - Image: runnerImage, + Args: []testing.BuildArg{ + { + Name: "RUNNER_VERSION", + Value: "2.289.2", + }, + }, + Image: runnerImage, }, { Dockerfile: "../../runner/Dockerfile.dindrunner", - Args: []testing.BuildArg{}, - Image: runnerDindImage, + Args: []testing.BuildArg{ + { + Name: "RUNNER_VERSION", + Value: "2.289.2", + }, + }, + Image: runnerDindImage, }, } @@ -58,7 +68,7 @@ var ( } commonScriptEnv = []string{ - "SYNC_PERIOD=" + "10s", + "SYNC_PERIOD=" + "30m", "NAME=" + controllerImageRepo, "VERSION=" + controllerImageTag, "RUNNER_TAG=" + runnerImageTag,