Upgrade resources in-place, causing 1-1 mapping between autoscaling runner set and ephemeral runner set (#4516)

This commit is contained in:
Nikola Jokic 2026-06-09 13:52:37 +02:00 committed by GitHub
parent 0acef229e2
commit 767e58e4b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 2651 additions and 3255 deletions

View File

@ -29,7 +29,8 @@ jobs:
default-setup:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -52,7 +53,8 @@ jobs:
default-setup-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -74,7 +76,8 @@ jobs:
single-namespace-setup:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -96,7 +99,8 @@ jobs:
single-namespace-setup-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -118,29 +122,8 @@ jobs:
dind-mode-setup:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
ref: ${{github.head_ref}}
- name: Get configure token
id: config-token
uses: peter-murray/workflow-application-token-action@d17e3a9a36850ea89f35db16c1067dd2b68ee343
with:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run dind mode setup test
run: hack/e2e-test.sh dind-mode-setup-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
dind-mode-setup-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -159,10 +142,11 @@ jobs:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
kubernetes-mode-setup:
dind-mode-setup-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -175,16 +159,17 @@ jobs:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run kubernetes mode setup test
run: hack/e2e-test.sh kubernetes-mode-setup-v2
- name: Run dind mode setup test
run: hack/e2e-test.sh dind-mode-setup-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
kubernetes-mode-setup-v2:
kubernetes-mode-setup:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -203,10 +188,11 @@ jobs:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
auth-proxy-setup:
kubernetes-mode-setup-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -219,16 +205,17 @@ jobs:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run single namespace setup test
run: hack/e2e-test.sh single-namespace-setup-v2
- name: Run kubernetes mode setup test
run: hack/e2e-test.sh kubernetes-mode-setup-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
auth-proxy-setup-v2:
auth-proxy-setup:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -247,10 +234,11 @@ jobs:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
anonymous-proxy-setup:
auth-proxy-setup-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -263,16 +251,17 @@ jobs:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run anonymous proxy setup test
run: hack/e2e-test.sh anonymous-proxy-setup-v2
- name: Run single namespace setup test
run: hack/e2e-test.sh single-namespace-setup-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
anonymous-proxy-setup-v2:
anonymous-proxy-setup:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -291,10 +280,11 @@ jobs:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
self-signed-ca-setup:
anonymous-proxy-setup-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -307,16 +297,17 @@ jobs:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run self signed CA setup test
run: hack/e2e-test.sh self-signed-ca-setup-v2
- name: Run anonymous proxy setup test
run: hack/e2e-test.sh anonymous-proxy-setup-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
self-signed-ca-setup-v2:
self-signed-ca-setup:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -335,10 +326,11 @@ jobs:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
update-strategy-tests:
self-signed-ca-setup-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -351,16 +343,17 @@ jobs:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run update strategy test
run: hack/e2e-test.sh update-strategy-v2
- name: Run self signed CA setup test
run: hack/e2e-test.sh self-signed-ca-setup-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
update-strategy-tests-v2:
update-gha-runner-scale-set-tests:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -374,7 +367,30 @@ jobs:
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run update strategy test
run: hack/e2e-test.sh update-strategy
run: hack/e2e-test.sh update-gha-runner-scale-set
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
update-gha-runner-scale-set-tests-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
ref: ${{github.head_ref}}
- name: Get configure token
id: config-token
uses: peter-murray/workflow-application-token-action@d17e3a9a36850ea89f35db16c1067dd2b68ee343
with:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run update strategy test
run: hack/e2e-test.sh update-gha-runner-scale-set-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
@ -382,29 +398,8 @@ jobs:
init-with-min-runners:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
ref: ${{github.head_ref}}
- name: Get configure token
id: config-token
uses: peter-murray/workflow-application-token-action@d17e3a9a36850ea89f35db16c1067dd2b68ee343
with:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run init with min runners test
run: hack/e2e-test.sh init-with-min-runners-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
init-with-min-runners-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.id == github.repository_id
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
@ -423,3 +418,25 @@ jobs:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash
init-with-min-runners-v2:
runs-on: ubuntu-latest
timeout-minutes: 20
if: github.event_name != 'pull_request' ||
github.event.pull_request.head.repo.id == github.repository_id
steps:
- uses: actions/checkout@v6
with:
ref: ${{github.head_ref}}
- name: Get configure token
id: config-token
uses: peter-murray/workflow-application-token-action@d17e3a9a36850ea89f35db16c1067dd2b68ee343
with:
application_id: ${{ secrets.E2E_TESTS_ACCESS_APP_ID }}
application_private_key: ${{ secrets.E2E_TESTS_ACCESS_PK }}
organization: ${{ env.TARGET_ORG }}
- name: Run init with min runners test
run: hack/e2e-test.sh init-with-min-runners-v2
env:
GITHUB_TOKEN: "${{steps.config-token.outputs.token}}"
shell: bash

View File

@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1
import (
"github.com/actions/actions-runner-controller/hash"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -24,13 +25,13 @@ import (
// AutoscalingListenerSpec defines the desired state of AutoscalingListener
type AutoscalingListenerSpec struct {
// Required
GitHubConfigUrl string `json:"githubConfigUrl,omitempty"`
GitHubConfigURL string `json:"githubConfigUrl,omitempty"`
// Required
GitHubConfigSecret string `json:"githubConfigSecret,omitempty"`
// Required
RunnerScaleSetId int `json:"runnerScaleSetId,omitempty"`
RunnerScaleSetID int `json:"runnerScaleSetId,omitempty"`
// Required
AutoscalingRunnerSetNamespace string `json:"autoscalingRunnerSetNamespace,omitempty"`
@ -83,6 +84,10 @@ type AutoscalingListenerSpec struct {
RoleBindingMetadata *ResourceMeta `json:"roleBindingMetadata,omitempty"`
}
func (s *AutoscalingListenerSpec) Hash() string {
return hash.ComputeTemplateHash(s)
}
// AutoscalingListenerStatus defines the observed state of AutoscalingListener
type AutoscalingListenerStatus struct{}

View File

@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1
import (
"github.com/actions/actions-runner-controller/hash"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -77,7 +78,7 @@ func (er *EphemeralRunner) GitHubConfigSecret() string {
}
func (er *EphemeralRunner) GitHubConfigUrl() string {
return er.Spec.GitHubConfigUrl
return er.Spec.GitHubConfigURL
}
func (er *EphemeralRunner) GitHubProxy() *ProxyConfig {
@ -102,7 +103,7 @@ func (er *EphemeralRunner) VaultProxy() *ProxyConfig {
// EphemeralRunnerSpec defines the desired state of EphemeralRunner
type EphemeralRunnerSpec struct {
// +required
GitHubConfigUrl string `json:"githubConfigUrl,omitempty"`
GitHubConfigURL string `json:"githubConfigUrl,omitempty"`
// +required
GitHubConfigSecret string `json:"githubConfigSecret,omitempty"`
@ -128,6 +129,10 @@ type EphemeralRunnerSpec struct {
corev1.PodTemplateSpec `json:",inline"`
}
func (s *EphemeralRunnerSpec) Hash() string {
return hash.ComputeTemplateHash(s)
}
// EphemeralRunnerStatus defines the observed state of EphemeralRunner
type EphemeralRunnerStatus struct {
// Turns true only if the runner is online.

View File

@ -28,6 +28,9 @@ type EphemeralRunnerSetSpec struct {
PatchID int `json:"patchID"`
// EphemeralRunnerSpec is the spec of the ephemeral runner
EphemeralRunnerSpec EphemeralRunnerSpec `json:"ephemeralRunnerSpec,omitempty"`
// EphemeralRunnerMetadata is the metadata to be applied to all ephemeral runners created by this set.
// If the EphemeralRunnerMetadata is updated, the update applies to new ephemeral runners created after the update,
// but does not apply to existing ephemeral runners.
// +optional
EphemeralRunnerMetadata *ResourceMeta `json:"ephemeralRunnerMetadata,omitempty"`
}
@ -74,12 +77,17 @@ type EphemeralRunnerSet struct {
Status EphemeralRunnerSetStatus `json:"status,omitempty"`
}
// EphemeralRunnerSpecHash computes the hash value of the EphemeralRunnerSpec and returns it as a string.
func (ers *EphemeralRunnerSet) EphemeralRunnerSpecHash() string {
return ers.Spec.EphemeralRunnerSpec.Hash()
}
func (ers *EphemeralRunnerSet) GitHubConfigSecret() string {
return ers.Spec.EphemeralRunnerSpec.GitHubConfigSecret
}
func (ers *EphemeralRunnerSet) GitHubConfigUrl() string {
return ers.Spec.EphemeralRunnerSpec.GitHubConfigUrl
return ers.Spec.EphemeralRunnerSpec.GitHubConfigURL
}
func (ers *EphemeralRunnerSet) GitHubProxy() *ProxyConfig {

View File

@ -59,7 +59,10 @@ spec:
description: EphemeralRunnerSetSpec defines the desired state of EphemeralRunnerSet
properties:
ephemeralRunnerMetadata:
description: ResourceMeta carries metadata common to all internal resources
description: |-
EphemeralRunnerMetadata is the metadata to be applied to all ephemeral runners created by this set.
If the EphemeralRunnerMetadata is updated, the update applies to new ephemeral runners created after the update,
but does not apply to existing ephemeral runners.
properties:
annotations:
additionalProperties:

View File

@ -50,9 +50,6 @@ args:
{{- with .Values.controller.manager.config.runnerMaxConcurrentReconciles }}
- "--runner-max-concurrent-reconciles={{ . }}"
{{- end }}
{{- with .Values.controller.manager.config.updateStrategy }}
- "--update-strategy={{ . }}"
{{- end }}
{{- if .Values.controller.metrics }}
{{- with .Values.controller.metrics }}
- "--listener-metrics-addr={{ .listenerAddr }}"

View File

@ -31,9 +31,6 @@ controller:
# The maximum number of concurrent reconciles which can be run by the EphemeralRunner controller.
runnerMaxConcurrentReconciles: 2
# How the controller handles upgrades with running jobs: "immediate" or "eventual".
updateStrategy: "immediate"
# List of label prefixes that should NOT be propagated to internal resources.
excludeLabelPropagationPrefixes: []
# Example:

View File

@ -59,7 +59,10 @@ spec:
description: EphemeralRunnerSetSpec defines the desired state of EphemeralRunnerSet
properties:
ephemeralRunnerMetadata:
description: ResourceMeta carries metadata common to all internal resources
description: |-
EphemeralRunnerMetadata is the metadata to be applied to all ephemeral runners created by this set.
If the EphemeralRunnerMetadata is updated, the update applies to new ephemeral runners created after the update,
but does not apply to existing ephemeral runners.
properties:
annotations:
additionalProperties:

View File

@ -70,9 +70,6 @@ spec:
{{- with .Values.flags.runnerMaxConcurrentReconciles }}
- "--runner-max-concurrent-reconciles={{ . }}"
{{- end }}
{{- with .Values.flags.updateStrategy }}
- "--update-strategy={{ . }}"
{{- end }}
{{- if .Values.metrics }}
{{- with .Values.metrics }}
- "--listener-metrics-addr={{ .listenerAddr }}"

View File

@ -363,7 +363,6 @@ func TestTemplate_ControllerDeployment_Defaults(t *testing.T) {
"--auto-scaling-runner-set-only",
"--log-level=debug",
"--log-format=text",
"--update-strategy=immediate",
"--metrics-addr=0",
"--listener-metrics-addr=0",
"--listener-metrics-endpoint=",
@ -431,7 +430,6 @@ func TestTemplate_ControllerDeployment_Customize(t *testing.T) {
"topologySpreadConstraints[0].maxSkew": "1",
"topologySpreadConstraints[0].topologyKey": "foo",
"priorityClassName": "test-priority-class",
"flags.updateStrategy": "eventual",
"flags.logLevel": "info",
"flags.logFormat": "json",
"volumes[0].name": "customMount",
@ -516,7 +514,6 @@ func TestTemplate_ControllerDeployment_Customize(t *testing.T) {
"--auto-scaler-image-pull-secrets=dockerhub",
"--log-level=info",
"--log-format=json",
"--update-strategy=eventual",
"--listener-metrics-addr=0",
"--listener-metrics-endpoint=",
"--metrics-addr=0",
@ -645,7 +642,6 @@ func TestTemplate_EnableLeaderElection(t *testing.T) {
"--leader-election-id=test-arc-gha-rs-controller",
"--log-level=debug",
"--log-format=text",
"--update-strategy=immediate",
"--listener-metrics-addr=0",
"--listener-metrics-endpoint=",
"--metrics-addr=0",
@ -687,7 +683,6 @@ func TestTemplate_ControllerDeployment_ForwardImagePullSecrets(t *testing.T) {
"--auto-scaler-image-pull-secrets=ghcr",
"--log-level=debug",
"--log-format=text",
"--update-strategy=immediate",
"--listener-metrics-addr=0",
"--listener-metrics-endpoint=",
"--metrics-addr=0",
@ -778,7 +773,6 @@ func TestTemplate_ControllerDeployment_WatchSingleNamespace(t *testing.T) {
"--log-level=debug",
"--log-format=text",
"--watch-single-namespace=demo",
"--update-strategy=immediate",
"--listener-metrics-addr=0",
"--listener-metrics-endpoint=",
"--metrics-addr=0",

View File

@ -115,22 +115,6 @@ flags:
# It may also increase the load on the API server and the external service (e.g. GitHub API).
runnerMaxConcurrentReconciles: 2
## Defines how the controller should handle upgrades while having running jobs.
##
## The strategies available are:
## - "immediate": (default) The controller will immediately apply the change causing the
## recreation of the listener and ephemeral runner set. This can lead to an
## overprovisioning of runners, if there are pending / running jobs. This should not
## be a problem at a small scale, but it could lead to a significant increase of
## resources if you have a lot of jobs running concurrently.
##
## - "eventual": The controller will remove the listener and ephemeral runner set
## immediately, but will not recreate them (to apply changes) until all
## pending / running jobs have completed.
## This can lead to a longer time to apply the change but it will ensure
## that you don't have any overprovisioning of runners.
updateStrategy: "immediate"
## Defines a list of prefixes that should not be propagated to internal resources.
## This is useful when you have labels that are used for internal purposes and should not be propagated to internal resources.
## See https://github.com/actions/actions-runner-controller/issues/3533 for more information.

View File

@ -15,13 +15,13 @@ scaleset:
## maxRunners is the max number of runners the autoscaling runner set will scale up to.
# maxRunners: 5
# Auth object provides authorization parameters.
# You should apply either:
# 1) secretName referencing the secret containing authorization parameters in the same namespace where the scale set is being installed in
# 2) app object parameters
# 3) github_tokne
#
# If multiple of them are set, only single one will be applied based on the above mentioned order.
# Auth object provides authorization parameters.
# You should apply either:
# 1) secretName referencing the secret containing authorization parameters in the same namespace where the scale set is being installed in
# 2) app object parameters
# 3) github_tokne
#
# If multiple of them are set, only single one will be applied based on the above mentioned order.
auth:
url: "" # Required
githubToken: ""
@ -43,7 +43,6 @@ secretResolution:
# - "kubernetes" - use Kubernetes secrets
# - "azureKeyVault" - use Azure Key Vault
type: "kubernetes"
## Proxy settings when type is NOT "kubernetes"
# proxy:
# http:
@ -63,37 +62,37 @@ secretResolution:
# tenant_id: ""
# certificate_path: ""
## Proxy can be used to define proxy settings that will be used by the
## controller, the listener and the runner of this scale set.
# proxy:
# http:
# url: http://proxy.com:1234
# credentialSecretRef: proxy-auth # a secret with `username` and `password` keys
# https:
# url: http://proxy.com:1234
# credentialSecretRef: proxy-auth # a secret with `username` and `password` keys
# noProxy:
# - example.com
# - example.org
## Proxy can be used to define proxy settings that will be used by the
## controller, the listener and the runner of this scale set.
# proxy:
# http:
# url: http://proxy.com:1234
# credentialSecretRef: proxy-auth # a secret with `username` and `password` keys
# https:
# url: http://proxy.com:1234
# credentialSecretRef: proxy-auth # a secret with `username` and `password` keys
# noProxy:
# - example.com
# - example.org
## listenerTemplate is the PodSpec for each listener Pod
## For reference: https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec
# listenerPodTemplate:
# spec:
# containers:
# # Use this section to append additional configuration to the listener container.
# # If you change the name of the container, the configuration will not be applied to the listener,
# # and it will be treated as a side-car container.
# - name: listener
# securityContext:
# runAsUser: 1000
# # Use this section to add the configuration of a side-car container.
# # Comment it out or remove it if you don't need it.
# # Spec for this container will be applied as is without any modifications.
# - name: side-car
# image: example-sidecar
## listenerTemplate is the PodSpec for each listener Pod
## For reference: https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec
# listenerPodTemplate:
# spec:
# containers:
# # Use this section to append additional configuration to the listener container.
# # If you change the name of the container, the configuration will not be applied to the listener,
# # and it will be treated as a side-car container.
# - name: listener
# securityContext:
# runAsUser: 1000
# # Use this section to add the configuration of a side-car container.
# # Comment it out or remove it if you don't need it.
# # Spec for this container will be applied as is without any modifications.
# - name: side-car
# image: example-sidecar
## Resource object allows modifying resources created by the chart itself
## Resource object allows modifying resources created by the chart itself
resource:
# Specifies metadata that will be applied to all resources managed by ARC
all:
@ -195,10 +194,9 @@ resource:
metadata:
labels: {}
annotations: {}
# TODO: Add more resource customizations when needed
# Template applied for the runner container
# Template applied for the runner container
runner:
# Mode can be used to automatically add configuration for the selected mode
# The available modes are:
@ -288,28 +286,28 @@ runner:
# spec:
# containers: []
## A self-signed CA certificate for communication with the GitHub server can be
## provided using a config map key selector. If `runnerMountPath` is set, for
## each runner pod ARC will:
## - create a `github-server-tls-cert` volume containing the certificate
## specified in `certificateFrom`
## - mount that volume on path `runnerMountPath`/{certificate name}
## - set NODE_EXTRA_CA_CERTS environment variable to that same path
## - set RUNNER_UPDATE_CA_CERTS environment variable to "1" (as of version
## 2.303.0 this will instruct the runner to reload certificates on the host)
##
## If any of the above had already been set by the user in the runner pod
## template, ARC will observe those and not overwrite them.
## Example configuration:
#
# githubServerTLS:
# certificateFrom:
# configMapKeyRef:
# name: config-map-name
# key: ca.crt
# runnerMountPath: /usr/local/share/ca-certificates/
## A self-signed CA certificate for communication with the GitHub server can be
## provided using a config map key selector. If `runnerMountPath` is set, for
## each runner pod ARC will:
## - create a `github-server-tls-cert` volume containing the certificate
## specified in `certificateFrom`
## - mount that volume on path `runnerMountPath`/{certificate name}
## - set NODE_EXTRA_CA_CERTS environment variable to that same path
## - set RUNNER_UPDATE_CA_CERTS environment variable to "1" (as of version
## 2.303.0 this will instruct the runner to reload certificates on the host)
##
## If any of the above had already been set by the user in the runner pod
## template, ARC will observe those and not overwrite them.
## Example configuration:
#
# githubServerTLS:
# certificateFrom:
# configMapKeyRef:
# name: config-map-name
# key: ca.crt
# runnerMountPath: /usr/local/share/ca-certificates/
## controllerServiceAccount is the service account of the controller
## controllerServiceAccount is the service account of the controller
controllerServiceAccount:
namespace: ""
name: ""

View File

@ -59,7 +59,10 @@ spec:
description: EphemeralRunnerSetSpec defines the desired state of EphemeralRunnerSet
properties:
ephemeralRunnerMetadata:
description: ResourceMeta carries metadata common to all internal resources
description: |-
EphemeralRunnerMetadata is the metadata to be applied to all ephemeral runners created by this set.
If the EphemeralRunnerMetadata is updated, the update applies to new ephemeral runners created after the update,
but does not apply to existing ephemeral runners.
properties:
annotations:
additionalProperties:

View File

@ -46,22 +46,13 @@ rules:
- ""
resources:
- secrets
verbs:
- create
- delete
- get
- list
- update
- watch
- apiGroups:
- ""
resources:
- serviceaccounts
verbs:
- create
- delete
- get
- list
- update
- watch
- apiGroups:
- actions.github.com
@ -170,15 +161,6 @@ rules:
- rbac.authorization.k8s.io
resources:
- rolebindings
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
verbs:
- create

View File

@ -17,10 +17,10 @@ limitations under the License.
package actionsgithubcom
import (
"bytes"
"context"
"fmt"
"maps"
"reflect"
"time"
"github.com/go-logr/logr"
@ -38,10 +38,8 @@ import (
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1/appconfig"
"github.com/actions/actions-runner-controller/controllers/actions.github.com/metrics"
"github.com/actions/actions-runner-controller/github/actions"
hash "github.com/actions/actions-runner-controller/hash"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
const (
@ -65,9 +63,9 @@ type AutoscalingListenerReconciler struct {
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=core,resources=pods/status,verbs=get
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update
// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create
// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;update
// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles,verbs=create;delete;get;list;watch;update
// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=create;delete;get;list;watch
// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=create;delete;get;list;watch;update
// +kubebuilder:rbac:groups=actions.github.com,resources=autoscalinglisteners,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=actions.github.com,resources=autoscalinglisteners/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=actions.github.com,resources=autoscalinglisteners/finalizers,verbs=update
@ -76,18 +74,19 @@ type AutoscalingListenerReconciler struct {
func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("autoscalinglistener", req.NamespacedName)
autoscalingListener := new(v1alpha1.AutoscalingListener)
if err := r.Get(ctx, req.NamespacedName, autoscalingListener); err != nil {
var autoscalingListener v1alpha1.AutoscalingListener
if err := r.Get(ctx, req.NamespacedName, &autoscalingListener); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
original := autoscalingListener.DeepCopy()
if !autoscalingListener.DeletionTimestamp.IsZero() {
if !controllerutil.ContainsFinalizer(autoscalingListener, autoscalingListenerFinalizerName) {
if !controllerutil.ContainsFinalizer(&autoscalingListener, autoscalingListenerFinalizerName) {
return ctrl.Result{}, nil
}
log.Info("Deleting resources")
requeue, err := r.cleanupResources(ctx, autoscalingListener, log)
requeue, err := r.cleanupResources(ctx, &autoscalingListener, log)
if err != nil {
log.Error(err, "Failed to cleanup resources after deletion")
return ctrl.Result{}, err
@ -98,23 +97,19 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
}
log.Info("Removing finalizer")
err = patch(ctx, r.Client, autoscalingListener, func(obj *v1alpha1.AutoscalingListener) {
controllerutil.RemoveFinalizer(obj, autoscalingListenerFinalizerName)
})
if err != nil && !kerrors.IsNotFound(err) {
log.Error(err, "Failed to remove finalizer")
return ctrl.Result{}, err
if controllerutil.RemoveFinalizer(&autoscalingListener, autoscalingListenerFinalizerName) {
if err := r.Patch(ctx, &autoscalingListener, client.MergeFrom(original)); err != nil && !kerrors.IsNotFound(err) {
log.Error(err, "Failed to remove finalizer")
return ctrl.Result{}, err
}
}
log.Info("Successfully removed finalizer after cleanup")
return ctrl.Result{}, nil
}
if !controllerutil.ContainsFinalizer(autoscalingListener, autoscalingListenerFinalizerName) {
log.Info("Adding finalizer")
if err := patch(ctx, r.Client, autoscalingListener, func(obj *v1alpha1.AutoscalingListener) {
controllerutil.AddFinalizer(obj, autoscalingListenerFinalizerName)
}); err != nil {
if controllerutil.AddFinalizer(&autoscalingListener, autoscalingListenerFinalizerName) {
if err := r.Patch(ctx, &autoscalingListener, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to add finalizer")
return ctrl.Result{}, err
}
@ -125,101 +120,243 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
// Check if the AutoscalingRunnerSet exists
var autoscalingRunnerSet v1alpha1.AutoscalingRunnerSet
if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: autoscalingListener.Spec.AutoscalingRunnerSetName}, &autoscalingRunnerSet); err != nil {
log.Error(err, "Failed to find AutoscalingRunnerSet.",
"namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
"name", autoscalingListener.Spec.AutoscalingRunnerSetName)
return ctrl.Result{}, err
}
if err := r.Get(
ctx,
types.NamespacedName{
Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
Name: autoscalingListener.Spec.AutoscalingRunnerSetName,
},
&autoscalingRunnerSet,
); err != nil {
if kerrors.IsNotFound(err) {
log.Info("AutoscalingRunnerSet is not found, deleting autoscaling listener", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", autoscalingListener.Spec.AutoscalingRunnerSetName)
if err := r.Delete(ctx, &autoscalingListener); err != nil {
log.Error(err, "failed to delete autoscaling listener", "namespace", autoscalingListener.Namespace, "name", autoscalingListener.Name)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
appConfig, err := r.GetAppConfig(ctx, &autoscalingRunnerSet)
if err != nil {
log.Error(
err,
"Failed to get app config for AutoscalingRunnerSet.",
"namespace",
autoscalingRunnerSet.Namespace,
"name",
autoscalingRunnerSet.GitHubConfigSecret,
err, "Failed to find AutoscalingRunnerSet.",
"namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
"name", autoscalingListener.Spec.AutoscalingRunnerSetName,
)
return ctrl.Result{}, err
}
// Make sure the runner scale set listener service account is created for the listener pod in the controller namespace
serviceAccount := new(corev1.ServiceAccount)
if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: autoscalingListener.Name}, serviceAccount); err != nil {
if !kerrors.IsNotFound(err) {
log.Error(err, "Unable to get listener service accounts", "namespace", autoscalingListener.Namespace, "name", autoscalingListener.Name)
var serviceAccount corev1.ServiceAccount
err := r.Get(
ctx,
types.NamespacedName{
Namespace: autoscalingListener.Namespace,
Name: autoscalingListener.Name,
},
&serviceAccount,
)
switch {
case err == nil:
desiredServiceAccount, err := r.newScaleSetListenerServiceAccount(&autoscalingListener)
if err != nil {
log.Error(err, "Failed to build desired listener service account")
return ctrl.Result{}, err
}
updatedServiceAccount := serviceAccount.DeepCopy()
var shouldUpdate bool
desiredLabels := r.filterAndMergeLabels(serviceAccount.Labels, desiredServiceAccount.Labels)
if !maps.Equal(serviceAccount.Labels, desiredLabels) {
updatedServiceAccount.Labels = desiredLabels
shouldUpdate = true
}
desiredAnnotations := r.mergeAnnotations(serviceAccount.Annotations, desiredServiceAccount.Annotations)
if !maps.Equal(serviceAccount.Annotations, desiredAnnotations) {
updatedServiceAccount.Annotations = desiredAnnotations
shouldUpdate = true
}
if shouldUpdate {
log.Info("Updating listener service account")
if err := r.Update(ctx, updatedServiceAccount); err != nil {
log.Error(err, "Failed to update listener service account")
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
}
case kerrors.IsNotFound(err):
// Create a service account for the listener pod in the controller namespace
log.Info("Creating a service account for the listener pod")
return r.createServiceAccountForListener(ctx, autoscalingListener, log)
return r.createServiceAccountForListener(ctx, &autoscalingListener, log)
default:
log.Error(err, "Unable to get listener service accounts", "namespace", autoscalingListener.Namespace, "name", autoscalingListener.Name)
return ctrl.Result{}, err
}
// TODO: make sure the service account is up to date
// Make sure the runner scale set listener role is created in the AutoscalingRunnerSet namespace
listenerRole := new(rbacv1.Role)
if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: autoscalingListener.Name}, listenerRole); err != nil {
if !kerrors.IsNotFound(err) {
log.Error(err, "Unable to get listener role", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", autoscalingListener.Name)
return ctrl.Result{}, err
var listenerRole rbacv1.Role
err = r.Get(
ctx,
types.NamespacedName{
Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
Name: autoscalingListener.Name,
},
&listenerRole,
)
switch {
case err == nil:
desiredRole := r.newScaleSetListenerRole(&autoscalingListener)
updatedRole := listenerRole.DeepCopy()
var shouldUpdate bool
desiredLabels := r.filterAndMergeLabels(listenerRole.Labels, desiredRole.Labels)
if !maps.Equal(listenerRole.Labels, desiredLabels) {
updatedRole.Labels = desiredLabels
shouldUpdate = true
}
desiredAnnotations := r.mergeAnnotations(listenerRole.Annotations, desiredRole.Annotations)
if !maps.Equal(listenerRole.Annotations, desiredAnnotations) {
updatedRole.Annotations = desiredAnnotations
shouldUpdate = true
}
if !reflect.DeepEqual(listenerRole.Rules, desiredRole.Rules) {
updatedRole.Rules = desiredRole.Rules
shouldUpdate = true
}
if shouldUpdate {
log.Info("Updating listener role")
if err := r.Update(ctx, updatedRole); err != nil {
log.Error(err, "Failed to update listener role")
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
}
case kerrors.IsNotFound(err):
// Create a role for the listener pod in the AutoScalingRunnerSet namespace
log.Info("Creating a role for the listener pod")
return r.createRoleForListener(ctx, autoscalingListener, log)
}
// Make sure the listener role has the up-to-date rules
existingRuleHash := listenerRole.Labels["role-policy-rules-hash"]
desiredRules := rulesForListenerRole([]string{autoscalingListener.Spec.EphemeralRunnerSetName})
desiredRulesHash := hash.ComputeTemplateHash(&desiredRules)
if existingRuleHash != desiredRulesHash {
log.Info("Updating the listener role with the up-to-date rules")
return r.updateRoleForListener(ctx, listenerRole, desiredRules, desiredRulesHash, log)
return r.createRoleForListener(ctx, &autoscalingListener, log)
default: // error
log.Error(err, "Unable to get listener role", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", autoscalingListener.Name)
return ctrl.Result{}, err
}
// Make sure the runner scale set listener role binding is created
listenerRoleBinding := new(rbacv1.RoleBinding)
if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: autoscalingListener.Name}, listenerRoleBinding); err != nil {
if !kerrors.IsNotFound(err) {
log.Error(err, "Unable to get listener role binding", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", autoscalingListener.Name)
return ctrl.Result{}, err
var listenerRoleBinding rbacv1.RoleBinding
err = r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace, Name: autoscalingListener.Name}, &listenerRoleBinding)
switch {
case err == nil:
desiredRoleBinding := r.newScaleSetListenerRoleBinding(
&autoscalingListener,
&listenerRole,
&serviceAccount,
)
updatedRoleBinding := listenerRoleBinding.DeepCopy()
var shouldUpdate bool
desiredLabels := r.filterAndMergeLabels(listenerRoleBinding.Labels, desiredRoleBinding.Labels)
if !maps.Equal(listenerRoleBinding.Labels, desiredLabels) {
updatedRoleBinding.Labels = desiredLabels
shouldUpdate = true
}
desiredAnnotations := r.mergeAnnotations(listenerRoleBinding.Annotations, desiredRoleBinding.Annotations)
if !maps.Equal(listenerRoleBinding.Annotations, desiredAnnotations) {
updatedRoleBinding.Annotations = desiredAnnotations
shouldUpdate = true
}
if shouldUpdate {
log.Info("Updating listener role binding")
if err := r.Update(ctx, updatedRoleBinding); err != nil {
log.Error(err, "Failed to update listener role binding")
return ctrl.Result{}, err
}
log.Info("Updated listener role binding")
return ctrl.Result{Requeue: true}, nil
}
case kerrors.IsNotFound(err):
// Create a role binding for the listener pod in the AutoScalingRunnerSet namespace
log.Info("Creating a role binding for the service account and role")
return r.createRoleBindingForListener(ctx, autoscalingListener, listenerRole, serviceAccount, log)
return r.createRoleBindingForListener(
ctx,
&autoscalingListener,
&listenerRole,
&serviceAccount,
log,
)
default: // error
log.Error(err, "Unable to get listener role binding", "namespace", autoscalingListener.Spec.AutoscalingRunnerSetNamespace, "name", autoscalingListener.Name)
return ctrl.Result{}, err
}
// Create a secret containing proxy config if specified
if autoscalingListener.Spec.Proxy != nil {
proxySecret := new(corev1.Secret)
if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: proxyListenerSecretName(autoscalingListener)}, proxySecret); err != nil {
if !kerrors.IsNotFound(err) {
log.Error(err, "Unable to get listener proxy secret", "namespace", autoscalingListener.Namespace, "name", proxyListenerSecretName(autoscalingListener))
var proxySecret corev1.Secret
err := r.Get(
ctx,
types.NamespacedName{
Namespace: autoscalingListener.Namespace,
Name: proxyListenerSecretName(&autoscalingListener),
},
&proxySecret,
)
switch {
case err == nil:
desiredListenerProxy, err := r.newAutoscalingListenerProxySecret(&autoscalingListener, proxySecret.Data)
if err != nil {
log.Error(err, "Failed to build desired listener proxy secret")
return ctrl.Result{}, err
}
updatedProxySecret := proxySecret.DeepCopy()
var shouldUpdate bool
desiredLabels := r.filterAndMergeLabels(proxySecret.Labels, desiredListenerProxy.Labels)
if !maps.Equal(proxySecret.Labels, desiredLabels) {
updatedProxySecret.Labels = desiredLabels
shouldUpdate = true
}
desiredAnnotations := r.mergeAnnotations(proxySecret.Annotations, desiredListenerProxy.Annotations)
if !maps.Equal(proxySecret.Annotations, desiredAnnotations) {
updatedProxySecret.Annotations = desiredAnnotations
shouldUpdate = true
}
if shouldUpdate {
log.Info("Updating listener proxy secret")
if err := r.Update(ctx, updatedProxySecret); err != nil {
log.Error(err, "Failed to update listener proxy secret")
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
}
case kerrors.IsNotFound(err):
// Create a mirror secret for the listener pod in the Controller namespace for listener pod to use
log.Info("Creating a listener proxy secret for the listener pod")
return r.createProxySecret(ctx, autoscalingListener, log)
return r.createProxySecret(ctx, &autoscalingListener, log)
default: // error
log.Error(err, "Unable to get listener proxy secret", "namespace", autoscalingListener.Namespace, "name", proxyListenerSecretName(&autoscalingListener))
return ctrl.Result{}, err
}
}
// TODO: make sure the role binding has the up-to-date role and service account
// Reconcile listener config secret and detect drift
cert := ""
if autoscalingListener.Spec.GitHubServerTLS != nil {
var err error
cert, err = r.certificate(ctx, &autoscalingRunnerSet, autoscalingListener)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to build GitHub server TLS certificate value for listener config: %w", err)
var appConfig *appconfig.AppConfig
getAppConfig := func() (*appconfig.AppConfig, error) {
if appConfig != nil {
return appConfig, nil
}
cfg, err := r.GetAppConfig(ctx, &autoscalingRunnerSet)
if err != nil {
log.Error(
err,
"Failed to get app config for AutoscalingRunnerSet.",
"namespace",
autoscalingRunnerSet.Namespace,
"name",
autoscalingRunnerSet.GitHubConfigSecret,
)
return nil, err
}
appConfig = cfg
return appConfig, nil
}
var metricsConfig *listenerMetricsServerConfig
@ -230,43 +367,169 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
}
}
secretChanged, err := r.reconcileListenerConfigSecret(ctx, autoscalingListener, appConfig, metricsConfig, cert, log)
if err != nil {
log.Error(err, "Failed to reconcile listener config secret")
var listenerConfigSecret corev1.Secret
err = r.Get(
ctx,
types.NamespacedName{
Namespace: autoscalingListener.Namespace,
Name: scaleSetListenerConfigName(&autoscalingListener),
},
&listenerConfigSecret,
)
switch {
case err == nil:
cfg, err := r.GetAppConfig(ctx, &autoscalingRunnerSet)
if err != nil {
return ctrl.Result{}, err
}
cert := ""
if autoscalingListener.Spec.GitHubServerTLS != nil {
cert, err = r.certificate(ctx, &autoscalingRunnerSet, &autoscalingListener)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to build GitHub server TLS certificate value for listener config: %w", err)
}
}
desiredSecret, err := r.newScaleSetListenerConfig(&autoscalingListener, cfg, metricsConfig, cert)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to build listener config secret: %w", err)
}
updatedSecret := listenerConfigSecret.DeepCopy()
var shouldUpdate bool
desiredLabels := r.filterAndMergeLabels(listenerConfigSecret.Labels, desiredSecret.Labels)
if !maps.Equal(listenerConfigSecret.Labels, desiredLabels) {
updatedSecret.Labels = desiredLabels
shouldUpdate = true
}
desiredAnnotations := r.mergeAnnotations(listenerConfigSecret.Annotations, desiredSecret.Annotations)
if !maps.Equal(listenerConfigSecret.Annotations, desiredAnnotations) {
updatedSecret.Annotations = desiredAnnotations
shouldUpdate = true
}
if shouldUpdate {
log.Info("Updating listener config secret", "namespace", updatedSecret.Namespace, "name", updatedSecret.Name)
if err := r.Update(ctx, updatedSecret); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update listener config secret: %w", err)
}
return ctrl.Result{Requeue: true}, nil
}
case kerrors.IsNotFound(err):
cfg, err := getAppConfig()
if err != nil {
return ctrl.Result{}, err
}
cert := ""
if autoscalingListener.Spec.GitHubServerTLS != nil {
cert, err = r.certificate(ctx, &autoscalingRunnerSet, &autoscalingListener)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to build GitHub server TLS certificate value for listener config: %w", err)
}
}
desiredSecret, err := r.newScaleSetListenerConfig(&autoscalingListener, cfg, metricsConfig, cert)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to build listener config secret: %w", err)
}
log.Info("Creating listener config secret", "namespace", desiredSecret.Namespace, "name", desiredSecret.Name)
if err := r.Create(ctx, desiredSecret); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create listener config secret: %w", err)
}
// Requeue to create listener pod with the config secret
return ctrl.Result{Requeue: true}, nil
default:
log.Error(err, "Unable to get listener config secret", "namespace", autoscalingListener.Namespace, "name", scaleSetListenerConfigName(&autoscalingListener))
return ctrl.Result{}, err
}
listenerPod := new(corev1.Pod)
if err := r.Get(
var listenerPod corev1.Pod
err = r.Get(
ctx,
client.ObjectKey{
Namespace: autoscalingListener.Namespace,
Name: autoscalingListener.Name,
},
listenerPod,
); err != nil {
if !kerrors.IsNotFound(err) {
log.Error(err, "Unable to get listener pod", "namespace", autoscalingListener.Namespace, "name", autoscalingListener.Name)
&listenerPod,
)
switch {
case err == nil:
desiredPod, err := r.newScaleSetListenerPod(
&autoscalingListener,
&listenerConfigSecret,
&serviceAccount,
&listenerRole,
&listenerRoleBinding,
metricsConfig,
)
if err != nil {
log.Error(err, "Failed to build listener pod")
return ctrl.Result{}, err
}
if err := r.publishRunningListener(autoscalingListener, false); err != nil {
shouldReCreate := desiredPod.Annotations[annotationKeyIntegrityHash] != listenerPod.Annotations[annotationKeyIntegrityHash]
if shouldReCreate {
log.Info("Listener pod dependency changed, recreating listener pod")
if err := r.deleteListenerPod(ctx, &autoscalingListener, &listenerPod, log); err != nil {
return ctrl.Result{}, err
}
log.Info("Listener pod is deleted, will recreate with new dependencies")
return ctrl.Result{}, nil
}
updatedPod := listenerPod.DeepCopy()
var shouldUpdate bool
desiredLabels := r.filterAndMergeLabels(listenerPod.Labels, desiredPod.Labels)
if !maps.Equal(listenerPod.Labels, desiredLabels) {
updatedPod.Labels = desiredLabels
shouldUpdate = true
}
desiredAnnotations := r.mergeAnnotations(listenerPod.Annotations, desiredPod.Annotations)
if !maps.Equal(listenerPod.Annotations, desiredAnnotations) {
updatedPod.Annotations = desiredAnnotations
shouldUpdate = true
}
if shouldUpdate {
log.Info("Updating listener pod", "namespace", updatedPod.Namespace, "name", updatedPod.Name)
if err := r.Update(ctx, updatedPod); err != nil {
log.Error(err, "Unable to update listener pod", "namespace", updatedPod.Namespace, "name", updatedPod.Name)
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
case kerrors.IsNotFound(err):
if err := r.publishRunningListener(&autoscalingListener, false); err != nil {
// If publish fails, URL is incorrect which means the listener pod would never be able to start
return ctrl.Result{}, nil
}
// Create a listener pod in the controller namespace
log.Info("Creating a listener pod")
return r.createListenerPod(ctx, &autoscalingRunnerSet, autoscalingListener, serviceAccount, appConfig, log)
desiredPod, err := r.newScaleSetListenerPod(
&autoscalingListener,
&listenerConfigSecret,
&serviceAccount,
&listenerRole,
&listenerRoleBinding,
metricsConfig,
)
if err != nil {
log.Error(err, "Failed to build listener pod")
return ctrl.Result{}, err
}
log.Info("Creating listener pod", "namespace", desiredPod.Namespace, "name", desiredPod.Name)
if err := r.Create(ctx, desiredPod); err != nil {
log.Error(err, "Unable to create listener pod", "namespace", desiredPod.Namespace, "name", desiredPod.Name)
return ctrl.Result{}, err
}
default: // error
log.Error(err, "Unable to get listener pod", "namespace", autoscalingListener.Namespace, "name", autoscalingListener.Name)
return ctrl.Result{}, err
}
// If listener config secret changed and pod exists, restart the pod
if secretChanged {
log.Info("Listener config secret changed, restarting listener pod")
return ctrl.Result{}, r.deleteListenerPod(ctx, autoscalingListener, listenerPod, log)
}
cs := listenerContainerStatus(listenerPod)
cs := listenerContainerStatus(&listenerPod)
switch {
case listenerPod.Status.Reason == "Evicted":
log.Info(
@ -276,7 +539,7 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
"message", listenerPod.Status.Message,
)
return ctrl.Result{}, r.deleteListenerPod(ctx, autoscalingListener, listenerPod, log)
return ctrl.Result{}, r.deleteListenerPod(ctx, &autoscalingListener, &listenerPod, log)
case cs == nil:
log.Info("Listener pod is not ready", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
@ -290,10 +553,10 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
"message", cs.State.Terminated.Message,
)
return ctrl.Result{}, r.deleteListenerPod(ctx, autoscalingListener, listenerPod, log)
return ctrl.Result{}, r.deleteListenerPod(ctx, &autoscalingListener, &listenerPod, log)
case cs.State.Running != nil:
if err := r.publishRunningListener(autoscalingListener, true); err != nil {
if err := r.publishRunningListener(&autoscalingListener, true); err != nil {
log.Error(err, "Unable to publish running listener", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
// stop reconciling. We should never get to this point but if we do,
// listener won't be able to start up, and the crash from the pod should
@ -428,9 +691,8 @@ func (r *AutoscalingListenerReconciler) cleanupResources(ctx context.Context, au
}
func (r *AutoscalingListenerReconciler) createServiceAccountForListener(ctx context.Context, autoscalingListener *v1alpha1.AutoscalingListener, logger logr.Logger) (ctrl.Result, error) {
newServiceAccount := r.newScaleSetListenerServiceAccount(autoscalingListener)
if err := ctrl.SetControllerReference(autoscalingListener, newServiceAccount, r.Scheme); err != nil {
newServiceAccount, err := r.newScaleSetListenerServiceAccount(autoscalingListener)
if err != nil {
return ctrl.Result{}, err
}
@ -444,156 +706,6 @@ func (r *AutoscalingListenerReconciler) createServiceAccountForListener(ctx cont
return ctrl.Result{}, nil
}
func (r *AutoscalingListenerReconciler) reconcileListenerConfigSecret(ctx context.Context, autoscalingListener *v1alpha1.AutoscalingListener, appConfig *appconfig.AppConfig, metricsConfig *listenerMetricsServerConfig, cert string, logger logr.Logger) (changed bool, err error) {
desiredSecret, err := r.newScaleSetListenerConfig(autoscalingListener, appConfig, metricsConfig, cert)
if err != nil {
return false, fmt.Errorf("failed to build listener config secret: %w", err)
}
existingSecret := &corev1.Secret{}
err = r.Get(ctx, types.NamespacedName{Name: desiredSecret.Name, Namespace: desiredSecret.Namespace}, existingSecret)
if err != nil {
if kerrors.IsNotFound(err) {
if err := ctrl.SetControllerReference(autoscalingListener, desiredSecret, r.Scheme); err != nil {
return false, fmt.Errorf("failed to set controller reference: %w", err)
}
logger.Info("Creating listener config secret", "namespace", desiredSecret.Namespace, "name", desiredSecret.Name)
if err := r.Create(ctx, desiredSecret); err != nil {
return false, fmt.Errorf("failed to create listener config secret: %w", err)
}
return true, nil
}
return false, fmt.Errorf("failed to get listener config secret: %w", err)
}
if listenerConfigSecretDrifted(existingSecret, desiredSecret) {
updatedSecret := existingSecret.DeepCopy()
updatedSecret.Data = desiredSecret.Data
updatedSecret.Labels = desiredSecret.Labels
updatedSecret.Annotations = desiredSecret.Annotations
logger.Info("Updating listener config secret", "namespace", updatedSecret.Namespace, "name", updatedSecret.Name)
if err := r.Update(ctx, updatedSecret); err != nil {
return false, fmt.Errorf("failed to update listener config secret: %w", err)
}
return true, nil
}
return false, nil
}
func (r *AutoscalingListenerReconciler) createListenerPod(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, autoscalingListener *v1alpha1.AutoscalingListener, serviceAccount *corev1.ServiceAccount, appConfig *appconfig.AppConfig, logger logr.Logger) (ctrl.Result, error) {
var envs []corev1.EnvVar
if autoscalingListener.Spec.Proxy != nil {
httpURL := corev1.EnvVar{
Name: "http_proxy",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{Name: proxyListenerSecretName(autoscalingListener)},
Key: "http_proxy",
},
},
}
if autoscalingListener.Spec.Proxy.HTTP != nil {
envs = append(envs, httpURL)
}
httpsURL := corev1.EnvVar{
Name: "https_proxy",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{Name: proxyListenerSecretName(autoscalingListener)},
Key: "https_proxy",
},
},
}
if autoscalingListener.Spec.Proxy.HTTPS != nil {
envs = append(envs, httpsURL)
}
noProxy := corev1.EnvVar{
Name: "no_proxy",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{Name: proxyListenerSecretName(autoscalingListener)},
Key: "no_proxy",
},
},
}
if len(autoscalingListener.Spec.Proxy.NoProxy) > 0 {
envs = append(envs, noProxy)
}
}
cert := ""
if autoscalingListener.Spec.GitHubServerTLS != nil {
var err error
cert, err = r.certificate(ctx, autoscalingRunnerSet, autoscalingListener)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create certificate env var for listener: %w", err)
}
}
var metricsConfig *listenerMetricsServerConfig
if r.ListenerMetricsAddr != "0" {
metricsConfig = &listenerMetricsServerConfig{
addr: r.ListenerMetricsAddr,
endpoint: r.ListenerMetricsEndpoint,
}
}
var podConfig corev1.Secret
if err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: scaleSetListenerConfigName(autoscalingListener)}, &podConfig); err != nil {
if !kerrors.IsNotFound(err) {
logger.Error(err, "Unable to get listener config secret", "namespace", autoscalingListener.Namespace, "name", scaleSetListenerConfigName(autoscalingListener))
return ctrl.Result{Requeue: true}, err
}
logger.Info("Creating listener config secret")
podConfig, err := r.newScaleSetListenerConfig(autoscalingListener, appConfig, metricsConfig, cert)
if err != nil {
logger.Error(err, "Failed to build listener config secret")
return ctrl.Result{}, err
}
if err := ctrl.SetControllerReference(autoscalingListener, podConfig, r.Scheme); err != nil {
logger.Error(err, "Failed to set controller reference")
return ctrl.Result{}, err
}
if err := r.Create(ctx, podConfig); err != nil {
logger.Error(err, "Unable to create listener config secret", "namespace", podConfig.Namespace, "name", podConfig.Name)
return ctrl.Result{}, err
}
return ctrl.Result{Requeue: true}, nil
}
newPod, err := r.newScaleSetListenerPod(autoscalingListener, &podConfig, serviceAccount, metricsConfig, envs...)
if err != nil {
logger.Error(err, "Failed to build listener pod")
return ctrl.Result{}, err
}
if err := ctrl.SetControllerReference(autoscalingListener, newPod, r.Scheme); err != nil {
logger.Error(err, "Failed to set controller reference")
return ctrl.Result{}, err
}
logger.Info("Creating listener pod", "namespace", newPod.Namespace, "name", newPod.Name)
if err := r.Create(ctx, newPod); err != nil {
logger.Error(err, "Unable to create listener pod", "namespace", newPod.Namespace, "name", newPod.Name)
return ctrl.Result{}, err
}
logger.Info("Created listener pod", "namespace", newPod.Namespace, "name", newPod.Name)
return ctrl.Result{}, nil
}
func (r *AutoscalingListenerReconciler) certificate(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, autoscalingListener *v1alpha1.AutoscalingListener) (string, error) {
if autoscalingListener.Spec.GitHubServerTLS.CertificateFrom == nil {
return "", fmt.Errorf("githubServerTLS.certificateFrom is not specified")
@ -645,19 +757,9 @@ func (r *AutoscalingListenerReconciler) createProxySecret(ctx context.Context, a
return ctrl.Result{}, fmt.Errorf("failed to convert proxy config to secret data: %w", err)
}
newProxySecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: proxyListenerSecretName(autoscalingListener),
Namespace: autoscalingListener.Namespace,
Labels: map[string]string{
LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
LabelKeyGitHubScaleSetName: autoscalingListener.Spec.AutoscalingRunnerSetName,
},
},
Data: data,
}
if err := ctrl.SetControllerReference(autoscalingListener, newProxySecret, r.Scheme); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create listener proxy secret: %w", err)
newProxySecret, err := r.newAutoscalingListenerProxySecret(autoscalingListener, data)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to build listener proxy secret: %w", err)
}
logger.Info("Creating listener proxy secret", "namespace", newProxySecret.Namespace, "name", newProxySecret.Name)
@ -684,21 +786,6 @@ func (r *AutoscalingListenerReconciler) createRoleForListener(ctx context.Contex
return ctrl.Result{Requeue: true}, nil
}
func (r *AutoscalingListenerReconciler) updateRoleForListener(ctx context.Context, listenerRole *rbacv1.Role, desiredRules []rbacv1.PolicyRule, desiredRulesHash string, logger logr.Logger) (ctrl.Result, error) {
updatedPatchRole := listenerRole.DeepCopy()
updatedPatchRole.Labels["role-policy-rules-hash"] = desiredRulesHash
updatedPatchRole.Rules = desiredRules
logger.Info("Updating listener role in namespace to have the right permission", "namespace", updatedPatchRole.Namespace, "name", updatedPatchRole.Name, "oldRules", listenerRole.Rules, "newRules", updatedPatchRole.Rules)
if err := r.Update(ctx, updatedPatchRole); err != nil {
logger.Error(err, "Unable to update listener role", "namespace", updatedPatchRole.Namespace, "name", updatedPatchRole.Name, "rules", updatedPatchRole.Rules)
return ctrl.Result{}, err
}
logger.Info("Updated listener role in namespace to have the right permission", "namespace", updatedPatchRole.Namespace, "name", updatedPatchRole.Name, "rules", updatedPatchRole.Rules)
return ctrl.Result{Requeue: true}, nil
}
func (r *AutoscalingListenerReconciler) createRoleBindingForListener(ctx context.Context, autoscalingListener *v1alpha1.AutoscalingListener, listenerRole *rbacv1.Role, serviceAccount *corev1.ServiceAccount, logger logr.Logger) (ctrl.Result, error) {
newRoleBinding := r.newScaleSetListenerRoleBinding(autoscalingListener, listenerRole, serviceAccount)
@ -728,7 +815,7 @@ func (r *AutoscalingListenerReconciler) createRoleBindingForListener(ctx context
}
func (r *AutoscalingListenerReconciler) publishRunningListener(autoscalingListener *v1alpha1.AutoscalingListener, isUp bool) error {
githubConfigURL := autoscalingListener.Spec.GitHubConfigUrl
githubConfigURL := autoscalingListener.Spec.GitHubConfigURL
parsedURL, err := actions.ParseGitHubConfigFromURL(githubConfigURL)
if err != nil {
return err
@ -751,29 +838,10 @@ func (r *AutoscalingListenerReconciler) publishRunningListener(autoscalingListen
return nil
}
// listenerConfigSecretDrifted detects if the listener config secret has drifted from the desired state.
// It compares the config.json data, labels, and annotations deterministically.
func listenerConfigSecretDrifted(existing *corev1.Secret, desired *corev1.Secret) bool {
// Compare config.json data
if !bytes.Equal(existing.Data["config.json"], desired.Data["config.json"]) {
return true
}
// Compare labels
if !maps.Equal(existing.Labels, desired.Labels) {
return true
}
// Compare annotations
if !maps.Equal(existing.Annotations, desired.Annotations) {
return true
}
return false
}
// SetupWithManager sets up the controller with the Manager.
func (r *AutoscalingListenerReconciler) SetupWithManager(mgr ctrl.Manager, opts ...Option) error {
r.setSchemeIfUnset(r.Scheme)
labelBasedWatchFunc := func(_ context.Context, obj client.Object) []reconcile.Request {
var requests []reconcile.Request
labels := obj.GetLabels()
@ -786,7 +854,8 @@ func (r *AutoscalingListenerReconciler) SetupWithManager(mgr ctrl.Manager, opts
if !ok {
return nil
}
requests = append(requests,
requests = append(
requests,
reconcile.Request{
NamespacedName: types.NamespacedName{
Name: name,

View File

@ -94,17 +94,43 @@ var _ = Describe("Test AutoScalingListener controller", func() {
ObjectMeta: metav1.ObjectMeta{
Name: "test-asl",
Namespace: autoscalingNS.Name,
Labels: map[string]string{
"arc.test/listener-label": "initial",
},
},
Spec: v1alpha1.AutoscalingListenerSpec{
GitHubConfigUrl: "https://github.com/owner/repo",
GitHubConfigURL: "https://github.com/owner/repo",
GitHubConfigSecret: configSecret.Name,
RunnerScaleSetId: 1,
RunnerScaleSetID: 1,
AutoscalingRunnerSetNamespace: autoscalingRunnerSet.Namespace,
AutoscalingRunnerSetName: autoscalingRunnerSet.Name,
EphemeralRunnerSetName: "test-ers",
MaxRunners: 10,
MinRunners: 1,
Image: "ghcr.io/owner/repo",
ServiceAccountMetadata: &v1alpha1.ResourceMeta{
Annotations: map[string]string{
"arc.test/service-account-annotation": "initial",
},
},
RoleMetadata: &v1alpha1.ResourceMeta{
Annotations: map[string]string{
"arc.test/role-annotation": "initial",
},
},
RoleBindingMetadata: &v1alpha1.ResourceMeta{
Annotations: map[string]string{
"arc.test/role-binding-annotation": "initial",
},
},
ConfigSecretMetadata: &v1alpha1.ResourceMeta{
Labels: map[string]string{
"arc.test/config-secret-label": "initial",
},
Annotations: map[string]string{
"arc.test/config-secret-annotation": "initial",
},
},
},
}
@ -143,7 +169,8 @@ var _ = Describe("Test AutoScalingListener controller", func() {
return created.Finalizers[0], nil
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(BeEquivalentTo(autoscalingListenerFinalizerName), "AutoScalingListener should have a finalizer")
autoscalingListenerTestInterval,
).Should(BeEquivalentTo(autoscalingListenerFinalizerName), "AutoScalingListener should have a finalizer")
// Check if service account is created
serviceAccount := new(corev1.ServiceAccount)
@ -171,7 +198,8 @@ var _ = Describe("Test AutoScalingListener controller", func() {
return role.Rules, nil
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(BeEquivalentTo(rulesForListenerRole([]string{autoscalingListener.Spec.EphemeralRunnerSetName})), "Role should be created")
autoscalingListenerTestInterval,
).Should(BeEquivalentTo(rulesForListenerRole([]string{autoscalingListener.Spec.EphemeralRunnerSetName})), "Role should be created")
// Check if rolebinding is created
roleBinding := new(rbacv1.RoleBinding)
@ -185,7 +213,8 @@ var _ = Describe("Test AutoScalingListener controller", func() {
return roleBinding.RoleRef.Name, nil
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(BeEquivalentTo(autoscalingListener.Name), "Rolebinding should be created")
autoscalingListenerTestInterval,
).Should(BeEquivalentTo(autoscalingListener.Name), "Rolebinding should be created")
// Check if pod is created
pod := new(corev1.Pod)
@ -199,7 +228,8 @@ var _ = Describe("Test AutoScalingListener controller", func() {
return pod.Name, nil
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(BeEquivalentTo(autoscalingListener.Name), "Pod should be created")
autoscalingListenerTestInterval,
).Should(BeEquivalentTo(autoscalingListener.Name), "Pod should be created")
})
})
@ -292,7 +322,8 @@ var _ = Describe("Test AutoScalingListener controller", func() {
return nil
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).ShouldNot(Succeed(), "failed to delete service account")
autoscalingListenerTestInterval,
).ShouldNot(Succeed(), "failed to delete service account")
// The AutoScalingListener should be deleted
Eventually(
@ -309,7 +340,8 @@ var _ = Describe("Test AutoScalingListener controller", func() {
return nil
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).ShouldNot(Succeed(), "failed to delete AutoScalingListener")
autoscalingListenerTestInterval,
).ShouldNot(Succeed(), "failed to delete AutoScalingListener")
})
})
@ -327,7 +359,8 @@ var _ = Describe("Test AutoScalingListener controller", func() {
return pod.Name, nil
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(BeEquivalentTo(autoscalingListener.Name), "Pod should be created")
autoscalingListenerTestInterval,
).Should(BeEquivalentTo(autoscalingListener.Name), "Pod should be created")
// Update the AutoScalingListener
updated := autoscalingListener.DeepCopy()
@ -347,7 +380,101 @@ var _ = Describe("Test AutoScalingListener controller", func() {
return role.Rules, nil
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(BeEquivalentTo(rulesForListenerRole([]string{updated.Spec.EphemeralRunnerSetName})), "Role should be updated")
autoscalingListenerTestInterval,
).Should(BeEquivalentTo(rulesForListenerRole([]string{updated.Spec.EphemeralRunnerSetName})), "Role should be updated")
})
It("propagates updated listener metadata to owned resources", func() {
assertPropagatedMetadata := func(expected string) {
Eventually(
func(g Gomega) {
serviceAccount := new(corev1.ServiceAccount)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace}, serviceAccount)
g.Expect(err).NotTo(HaveOccurred(), "failed to get ServiceAccount")
g.Expect(serviceAccount.Labels["arc.test/listener-label"]).To(Equal(expected))
g.Expect(serviceAccount.Annotations["arc.test/service-account-annotation"]).To(Equal(expected))
if expected == "updated" {
g.Expect(serviceAccount.Annotations["arc.test/new-service-account-annotation"]).To(Equal("added"))
}
role := new(rbacv1.Role)
err = k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, role)
g.Expect(err).NotTo(HaveOccurred(), "failed to get Role")
g.Expect(role.Labels["arc.test/listener-label"]).To(Equal(expected))
g.Expect(role.Annotations["arc.test/role-annotation"]).To(Equal(expected))
if expected == "updated" {
g.Expect(role.Annotations["arc.test/new-role-annotation"]).To(Equal("added"))
}
roleBinding := new(rbacv1.RoleBinding)
err = k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace}, roleBinding)
g.Expect(err).NotTo(HaveOccurred(), "failed to get RoleBinding")
g.Expect(roleBinding.Labels["arc.test/listener-label"]).To(Equal(expected))
g.Expect(roleBinding.Annotations["arc.test/role-binding-annotation"]).To(Equal(expected))
if expected == "updated" {
g.Expect(roleBinding.Annotations["arc.test/new-role-binding-annotation"]).To(Equal("added"))
}
secret := new(corev1.Secret)
err = k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerConfigName(autoscalingListener), Namespace: autoscalingListener.Namespace}, secret)
g.Expect(err).NotTo(HaveOccurred(), "failed to get config Secret")
g.Expect(secret.Labels["arc.test/config-secret-label"]).To(Equal(expected))
g.Expect(secret.Annotations["arc.test/config-secret-annotation"]).To(Equal(expected))
if expected == "updated" {
g.Expect(secret.Annotations["arc.test/new-config-secret-annotation"]).To(Equal("added"))
}
pod := new(corev1.Pod)
err = k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace}, pod)
g.Expect(err).NotTo(HaveOccurred(), "failed to get Pod")
g.Expect(pod.Labels["arc.test/listener-label"]).To(Equal(expected))
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval,
).Should(Succeed())
}
assertPropagatedMetadata("initial")
current := new(v1alpha1.AutoscalingListener)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace}, current)
Expect(err).NotTo(HaveOccurred(), "failed to get AutoScalingListener")
updated := current.DeepCopy()
updated.Labels = map[string]string{
"arc.test/listener-label": "updated",
}
updated.Spec.ServiceAccountMetadata = &v1alpha1.ResourceMeta{
Annotations: map[string]string{
"arc.test/service-account-annotation": "updated",
"arc.test/new-service-account-annotation": "added",
},
}
updated.Spec.RoleMetadata = &v1alpha1.ResourceMeta{
Annotations: map[string]string{
"arc.test/role-annotation": "updated",
"arc.test/new-role-annotation": "added",
},
}
updated.Spec.RoleBindingMetadata = &v1alpha1.ResourceMeta{
Annotations: map[string]string{
"arc.test/role-binding-annotation": "updated",
"arc.test/new-role-binding-annotation": "added",
},
}
updated.Spec.ConfigSecretMetadata = &v1alpha1.ResourceMeta{
Labels: map[string]string{
"arc.test/config-secret-label": "updated",
},
Annotations: map[string]string{
"arc.test/config-secret-annotation": "updated",
"arc.test/new-config-secret-annotation": "added",
},
}
err = k8sClient.Patch(ctx, updated, client.MergeFrom(current))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingListener metadata")
assertPropagatedMetadata("updated")
})
It("It should re-create pod but persist config secret whenever listener container is terminated", func() {
@ -512,9 +639,9 @@ var _ = Describe("Test AutoScalingListener customization", func() {
Namespace: autoscalingNS.Name,
},
Spec: v1alpha1.AutoscalingListenerSpec{
GitHubConfigUrl: "https://github.com/owner/repo",
GitHubConfigURL: "https://github.com/owner/repo",
GitHubConfigSecret: configSecret.Name,
RunnerScaleSetId: 1,
RunnerScaleSetID: 1,
AutoscalingRunnerSetNamespace: autoscalingRunnerSet.Namespace,
AutoscalingRunnerSetName: autoscalingRunnerSet.Name,
EphemeralRunnerSetName: "test-ers",
@ -771,9 +898,9 @@ var _ = Describe("Test AutoScalingListener controller with proxy", func() {
Namespace: autoscalingNS.Name,
},
Spec: v1alpha1.AutoscalingListenerSpec{
GitHubConfigUrl: "https://github.com/owner/repo",
GitHubConfigURL: "https://github.com/owner/repo",
GitHubConfigSecret: configSecret.Name,
RunnerScaleSetId: 1,
RunnerScaleSetID: 1,
AutoscalingRunnerSetNamespace: autoscalingRunnerSet.Namespace,
AutoscalingRunnerSetName: autoscalingRunnerSet.Name,
EphemeralRunnerSetName: "test-ers",
@ -908,7 +1035,8 @@ var _ = Describe("Test AutoScalingListener controller with proxy", func() {
}), "no_proxy environment variable not found")
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(Succeed(), "failed to create listener pod with proxy details")
autoscalingListenerTestInterval,
).Should(Succeed(), "failed to create listener pod with proxy details")
// Delete the AutoScalingListener
err = k8sClient.Delete(ctx, autoscalingListener)
@ -925,7 +1053,8 @@ var _ = Describe("Test AutoScalingListener controller with proxy", func() {
g.Expect(kerrors.IsNotFound(err)).To(BeTrue())
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(Succeed(), "failed to delete secret with proxy details")
autoscalingListenerTestInterval,
).Should(Succeed(), "failed to delete secret with proxy details")
})
})
@ -973,9 +1102,9 @@ var _ = Describe("Test AutoScalingListener controller with template modification
Namespace: autoscalingNS.Name,
},
Spec: v1alpha1.AutoscalingListenerSpec{
GitHubConfigUrl: "https://github.com/owner/repo",
GitHubConfigURL: "https://github.com/owner/repo",
GitHubConfigSecret: configSecret.Name,
RunnerScaleSetId: 1,
RunnerScaleSetID: 1,
AutoscalingRunnerSetNamespace: autoscalingRunnerSet.Namespace,
AutoscalingRunnerSetName: autoscalingRunnerSet.Name,
EphemeralRunnerSetName: "test-ers",
@ -1063,7 +1192,8 @@ var _ = Describe("Test AutoScalingListener controller with template modification
g.Expect(pod.ObjectMeta.Labels).To(HaveKeyWithValue("test-label-key", "test-label-value"), "pod labels should be copied from runner set template")
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(Succeed(), "failed to create listener pod with proxy details")
autoscalingListenerTestInterval,
).Should(Succeed(), "failed to create listener pod with proxy details")
// Delete the AutoScalingListener
err := k8sClient.Delete(ctx, autoscalingListener)
@ -1080,7 +1210,8 @@ var _ = Describe("Test AutoScalingListener controller with template modification
g.Expect(kerrors.IsNotFound(err)).To(BeTrue())
},
autoscalingListenerTestTimeout,
autoscalingListenerTestInterval).Should(Succeed(), "failed to delete secret with proxy details")
autoscalingListenerTestInterval,
).Should(Succeed(), "failed to delete secret with proxy details")
})
})
@ -1177,7 +1308,7 @@ var _ = Describe("Test GitHub Server TLS configuration", func() {
Namespace: autoscalingNS.Name,
},
Spec: v1alpha1.AutoscalingListenerSpec{
GitHubConfigUrl: "https://github.com/owner/repo",
GitHubConfigURL: "https://github.com/owner/repo",
GitHubConfigSecret: configSecret.Name,
GitHubServerTLS: &v1alpha1.TLSConfig{
CertificateFrom: &v1alpha1.TLSCertificateSource{
@ -1189,7 +1320,7 @@ var _ = Describe("Test GitHub Server TLS configuration", func() {
},
},
},
RunnerScaleSetId: 1,
RunnerScaleSetID: 1,
AutoscalingRunnerSetNamespace: autoscalingRunnerSet.Namespace,
AutoscalingRunnerSetName: autoscalingRunnerSet.Name,
EphemeralRunnerSetName: "test-ers",
@ -1240,7 +1371,8 @@ var _ = Describe("Test GitHub Server TLS configuration", func() {
BeEquivalentTo(string(cert)),
"GITHUB_SERVER_ROOT_CA should be the rootCA.crt",
)
}).
},
).
WithTimeout(autoscalingRunnerSetTestTimeout).
WithPolling(autoscalingListenerTestInterval).
Should(Succeed(), "failed to create pod with volume and env variable")

View File

@ -19,7 +19,7 @@ package actionsgithubcom
import (
"context"
"fmt"
"sort"
"maps"
"strconv"
"strings"
"time"
@ -28,6 +28,7 @@ import (
"github.com/actions/actions-runner-controller/build"
"github.com/actions/scaleset"
"github.com/go-logr/logr"
"github.com/google/go-cmp/cmp"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
@ -42,35 +43,10 @@ import (
)
const (
annotationKeyRunnerSpecHash = "actions.github.com/runner-spec-hash"
// annotationKeyValuesHash is hash of the entire values json.
// This is used to determine if the values have changed, so we can
// re-create listener.
annotationKeyValuesHash = "actions.github.com/values-hash"
annotationKeyChangeHash = "actions.github.com/change-hash"
autoscalingRunnerSetFinalizerName = "autoscalingrunnerset.actions.github.com/finalizer"
runnerScaleSetIDAnnotationKey = "runner-scale-set-id"
)
type UpdateStrategy string
// Defines how the controller should handle upgrades while having running jobs.
const (
// "immediate": (default) The controller will immediately apply the change causing the
// recreation of the listener and ephemeral runner set. This can lead to an
// overprovisioning of runners, if there are pending / running jobs. This should not
// be a problem at a small scale, but it could lead to a significant increase of
// resources if you have a lot of jobs running concurrently.
UpdateStrategyImmediate = UpdateStrategy("immediate")
// "eventual": The controller will remove the listener and ephemeral runner set
// immediately, but will not recreate them (to apply changes) until all
// pending / running jobs have completed.
// This can lead to a longer time to apply the change but it will ensure
// that you don't have any overprovisioning of runners.
UpdateStrategyEventual = UpdateStrategy("eventual")
)
// AutoscalingRunnerSetReconciler reconciles a AutoscalingRunnerSet object
type AutoscalingRunnerSetReconciler struct {
client.Client
@ -79,7 +55,6 @@ type AutoscalingRunnerSetReconciler struct {
ControllerNamespace string
DefaultRunnerScaleSetListenerImage string
DefaultRunnerScaleSetListenerImagePullSecrets []string
UpdateStrategy UpdateStrategy
ResourceBuilder
}
@ -95,18 +70,19 @@ type AutoscalingRunnerSetReconciler struct {
func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("autoscalingrunnerset", req.NamespacedName)
autoscalingRunnerSet := new(v1alpha1.AutoscalingRunnerSet)
if err := r.Get(ctx, req.NamespacedName, autoscalingRunnerSet); err != nil {
var autoscalingRunnerSet v1alpha1.AutoscalingRunnerSet
if err := r.Get(ctx, req.NamespacedName, &autoscalingRunnerSet); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
original := autoscalingRunnerSet.DeepCopy()
if !autoscalingRunnerSet.DeletionTimestamp.IsZero() {
if !controllerutil.ContainsFinalizer(autoscalingRunnerSet, autoscalingRunnerSetFinalizerName) {
if !controllerutil.ContainsFinalizer(&autoscalingRunnerSet, autoscalingRunnerSetFinalizerName) {
return ctrl.Result{}, nil
}
log.Info("Deleting resources")
done, err := r.cleanUpResources(ctx, autoscalingRunnerSet, nil, log)
done, err := r.cleanUpResources(ctx, &autoscalingRunnerSet, log)
if err != nil {
log.Error(err, "Failed to clean up resources during deletion")
return ctrl.Result{}, err
@ -118,18 +94,17 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl
}, nil
}
if err := r.removeFinalizersFromDependentResources(ctx, autoscalingRunnerSet, log); err != nil {
if err := r.removeFinalizersFromDependentResources(ctx, &autoscalingRunnerSet, log); err != nil {
log.Error(err, "Failed to remove finalizers on dependent resources")
return ctrl.Result{}, err
}
log.Info("Removing finalizer")
err = patch(ctx, r.Client, autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) {
controllerutil.RemoveFinalizer(obj, autoscalingRunnerSetFinalizerName)
})
if err != nil && !kerrors.IsNotFound(err) {
log.Error(err, "Failed to update autoscaling runner set without finalizer")
return ctrl.Result{}, err
if controllerutil.RemoveFinalizer(&autoscalingRunnerSet, autoscalingRunnerSetFinalizerName) {
log.Info("Removing finalizer")
if err := r.Patch(ctx, &autoscalingRunnerSet, client.MergeFrom(original)); err != nil && !kerrors.IsNotFound(err) {
log.Error(err, "Failed to update autoscaling runner set without finalizer")
return ctrl.Result{}, err
}
}
log.Info("Successfully removed finalizer after cleanup")
@ -137,7 +112,7 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl
}
if !v1alpha1.IsVersionAllowed(autoscalingRunnerSet.Labels[LabelKeyKubernetesVersion], build.Version) {
if err := r.Delete(ctx, autoscalingRunnerSet); err != nil {
if err := r.Delete(ctx, &autoscalingRunnerSet); err != nil {
log.Error(
err, "Failed to delete autoscaling runner set on version mismatch",
"buildVersion", build.Version,
@ -154,178 +129,267 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl
return ctrl.Result{}, nil
}
if !controllerutil.ContainsFinalizer(autoscalingRunnerSet, autoscalingRunnerSetFinalizerName) {
if controllerutil.AddFinalizer(&autoscalingRunnerSet, autoscalingRunnerSetFinalizerName) {
log.Info("Adding finalizer")
if err := patch(ctx, r.Client, autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) {
controllerutil.AddFinalizer(obj, autoscalingRunnerSetFinalizerName)
}); err != nil {
log.Error(err, "Failed to update autoscaling runner set with finalizer added")
if err := r.Patch(ctx, &autoscalingRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update autoscaling runner set with finalizer")
return ctrl.Result{}, err
}
log.Info("Successfully added finalizer")
}
if targetHash := autoscalingRunnerSet.Hash(); autoscalingRunnerSet.Annotations[annotationKeyChangeHash] != targetHash {
if err := patch(ctx, r.Client, autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) {
if obj.Annotations == nil {
obj.Annotations = map[string]string{}
}
obj.Annotations[annotationKeyChangeHash] = targetHash
}); err != nil {
log.Error(err, "Failed to update autoscaling runner set with change hash annotation")
return ctrl.Result{}, err
}
if err := r.updateStatus(ctx, autoscalingRunnerSet, nil, v1alpha1.AutoscalingRunnerSetPhasePending, log); err != nil {
log.Error(err, "Failed to update autoscaling runner set status to pending")
return ctrl.Result{}, err
}
}
outdated := autoscalingRunnerSet.Status.Phase == v1alpha1.AutoscalingRunnerSetPhaseOutdated
if !outdated {
scaleSetIDRaw, ok := autoscalingRunnerSet.Annotations[runnerScaleSetIDAnnotationKey]
if !ok {
// Need to create a new runner scale set on Actions service
log.Info("Runner scale set id annotation does not exist. Creating a new runner scale set.")
return r.createRunnerScaleSet(ctx, autoscalingRunnerSet, log)
}
if id, err := strconv.Atoi(scaleSetIDRaw); err != nil || id <= 0 {
log.Info("Runner scale set id annotation is not an id, or is <= 0. Creating a new runner scale set.")
// something modified the scaleSetId. Try to create one
return r.createRunnerScaleSet(ctx, autoscalingRunnerSet, log)
}
// Make sure the runner group of the scale set is up to date
currentRunnerGroupName, ok := autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerGroupName]
if !ok || (len(autoscalingRunnerSet.Spec.RunnerGroup) > 0 && !strings.EqualFold(currentRunnerGroupName, autoscalingRunnerSet.Spec.RunnerGroup)) {
log.Info("AutoScalingRunnerSet runner group changed. Updating the runner scale set.")
return r.updateRunnerScaleSetRunnerGroup(ctx, autoscalingRunnerSet, log)
}
// Make sure the runner scale set name is up to date
currentRunnerScaleSetName, ok := autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerScaleSetName]
if !ok || (len(autoscalingRunnerSet.Spec.RunnerScaleSetName) > 0 && !strings.EqualFold(currentRunnerScaleSetName, autoscalingRunnerSet.Spec.RunnerScaleSetName)) {
log.Info("AutoScalingRunnerSet runner scale set name changed. Updating the runner scale set.")
return r.updateRunnerScaleSetName(ctx, autoscalingRunnerSet, log)
}
}
existingRunnerSets, err := r.listEphemeralRunnerSets(ctx, autoscalingRunnerSet)
if err != nil {
log.Error(err, "Failed to list existing ephemeral runner sets")
return ctrl.Result{}, err
}
latestRunnerSet := existingRunnerSets.latest()
if latestRunnerSet == nil && !outdated {
log.Info("Latest runner set does not exist. Creating a new runner set.")
return r.createEphemeralRunnerSet(ctx, autoscalingRunnerSet, log)
}
for _, runnerSet := range existingRunnerSets.all() {
log.Info("Find existing ephemeral runner set", "name", runnerSet.Name, "specHash", runnerSet.Annotations[annotationKeyRunnerSpecHash])
}
outdated = outdated || (latestRunnerSet != nil && latestRunnerSet.Status.Phase == v1alpha1.EphemeralRunnerSetPhaseOutdated)
// Make sure the AutoscalingListener is up and running in the controller namespace
listener := new(v1alpha1.AutoscalingListener)
listenerFound := true
if err := r.Get(ctx, client.ObjectKey{Namespace: r.ControllerNamespace, Name: scaleSetListenerName(autoscalingRunnerSet)}, listener); err != nil {
if !kerrors.IsNotFound(err) {
log.Error(err, "Failed to get AutoscalingListener resource")
return ctrl.Result{}, err
}
listenerFound = false
log.Info("AutoscalingListener does not exist.")
}
if outdated {
log.Info("Ephemeral runner set is outdated")
if autoscalingRunnerSet.Status.Phase != v1alpha1.AutoscalingRunnerSetPhaseOutdated {
if err := r.updateStatus(ctx, autoscalingRunnerSet, latestRunnerSet, v1alpha1.AutoscalingRunnerSetPhaseOutdated, log); err != nil {
log.Error(err, "Failed to update autoscaling runner set status to outdated")
return ctrl.Result{}, err
}
}
done, err := r.cleanUpResources(ctx, autoscalingRunnerSet, latestRunnerSet, log)
if err != nil {
log.Error(err, "Failed to clean up resources for outdated runner set")
return ctrl.Result{}, err
}
if done {
return ctrl.Result{}, nil
}
log.Info("Waiting for resources to be cleaned up for outdated runner set")
return ctrl.Result{
RequeueAfter: 5 * time.Second,
}, nil
}
// Our listener pod is out of date, so we need to delete it to get a new recreate.
listenerValuesHashChanged := listener.Annotations[annotationKeyValuesHash] != autoscalingRunnerSet.Annotations[annotationKeyValuesHash]
listenerSpecHashChanged := listener.Annotations[annotationKeyRunnerSpecHash] != autoscalingRunnerSet.ListenerSpecHash()
if listenerFound && (listenerValuesHashChanged ||
listenerSpecHashChanged ||
latestRunnerSet == nil ||
listener.Spec.EphemeralRunnerSetName != latestRunnerSet.Name) {
log.Info("RunnerScaleSetListener is out of date. Deleting it so that it is recreated", "name", listener.Name)
if err := r.Delete(ctx, listener); err != nil {
log.Error(err, "Failed to delete AutoscalingListener resource")
return ctrl.Result{}, err
}
log.Info("Deleted RunnerScaleSetListener since existing one is out of date")
return ctrl.Result{}, nil
}
if latestRunnerSet.Annotations[annotationKeyRunnerSpecHash] != autoscalingRunnerSet.RunnerSetSpecHash() {
if r.drainingJobs(&latestRunnerSet.Status) {
log.Info("Latest runner set spec hash does not match the current autoscaling runner set. Waiting for the running and pending runners to finish:", "running", latestRunnerSet.Status.RunningEphemeralRunners, "pending", latestRunnerSet.Status.PendingEphemeralRunners)
log.Info("Scaling down the number of desired replicas to 0")
// We are in the process of draining the jobs. The listener has been deleted and the ephemeral runner set replicas
// need to scale down to 0
err := patch(ctx, r.Client, latestRunnerSet, func(obj *v1alpha1.EphemeralRunnerSet) {
obj.Spec.Replicas = 0
obj.Spec.PatchID = 0
})
if err != nil {
log.Error(err, "Failed to patch runner set to set desired count to 0")
}
// Something has changed, we need to re-apply the pending phase and change hash annotation to trigger the update of runner scale set and listener.
if targetHash := autoscalingRunnerSet.Hash(); autoscalingRunnerSet.Annotations[annotationKeyIntegrityHash] != targetHash {
// TODO: apply the version label
original := autoscalingRunnerSet.DeepCopy()
if autoscalingRunnerSet.Annotations == nil {
autoscalingRunnerSet.Annotations = map[string]string{}
}
autoscalingRunnerSet.Annotations[annotationKeyIntegrityHash] = targetHash
if err := r.Patch(ctx, &autoscalingRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update autoscaling runner set with new change hash and pending phase")
return ctrl.Result{}, err
}
log.Info("Latest runner set spec hash does not match the current autoscaling runner set. Creating a new runner set")
return r.createEphemeralRunnerSet(ctx, autoscalingRunnerSet, log)
original = autoscalingRunnerSet.DeepCopy()
autoscalingRunnerSet.Status.Phase = v1alpha1.AutoscalingRunnerSetPhasePending
if err := r.Status().Patch(ctx, &autoscalingRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update autoscaling runner set status with pending phase")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
oldRunnerSets := existingRunnerSets.old()
if len(oldRunnerSets) > 0 {
log.Info("Cleanup old ephemeral runner sets", "count", len(oldRunnerSets))
err := r.deleteEphemeralRunnerSets(ctx, oldRunnerSets, log)
outdated := autoscalingRunnerSet.Status.Phase == v1alpha1.AutoscalingRunnerSetPhaseOutdated
if outdated {
log.Info("Autoscaling runner set is in outdated phase, removing the listener")
done, err := r.cleanupListener(ctx, &autoscalingRunnerSet, log)
if err != nil {
log.Error(err, "Failed to clean up old runner sets")
log.Error(err, "Failed to clean up listener")
return ctrl.Result{}, err
}
}
if !done {
log.Info("Waiting for listener to be cleaned up for the outdated runner set")
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
var ephemeralRunnerSet v1alpha1.EphemeralRunnerSet
err = r.Get(
ctx,
types.NamespacedName{
Namespace: autoscalingRunnerSet.Namespace,
Name: autoscalingRunnerSet.Name,
},
&ephemeralRunnerSet,
)
switch {
case kerrors.IsNotFound(err):
// If the ephemeral runner set is not found, something removed the ephemeral runner set. The ephemeral runner set should
// not be removed by the controller once it is outdated. However, if the ephemeral runner set is removed, it means no ephemeral
// runners should be running (or at least no ephemeral runners associated with the ephemeral runner set).
// Therefore, this state is acceptable, because the update to the autoscaling runner set will trigger the loop
// that will eventually create a new ephemeral runner set.
log.Info("Ephemeral runner set is not found. Ignoring the state until the autoscaling runner set is updated")
return ctrl.Result{}, nil
case err != nil:
log.Error(err, "Failed to get ephemeral runner set for the outdated runner set")
return ctrl.Result{}, err
default:
if !ephemeralRunnerSet.DeletionTimestamp.IsZero() {
// Same as NotFound case, ignore.
return ctrl.Result{}, nil
}
original := ephemeralRunnerSet.DeepCopy()
ephemeralRunnerSet.Spec.Replicas = 0
ephemeralRunnerSet.Spec.PatchID = 0
if err := r.Patch(ctx, &ephemeralRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to patch ephemeral runner set with 0 replicas and reset patch ID for the outdated runner set")
return ctrl.Result{}, err
}
// Make sure the AutoscalingListener is up and running in the controller namespace
if !listenerFound {
if r.drainingJobs(&latestRunnerSet.Status) {
log.Info("Creating a new AutoscalingListener is waiting for the running and pending runners to finish. Waiting for the running and pending runners to finish:", "running", latestRunnerSet.Status.RunningEphemeralRunners, "pending", latestRunnerSet.Status.PendingEphemeralRunners)
return ctrl.Result{}, nil
}
log.Info("Creating a new AutoscalingListener for the runner set", "ephemeralRunnerSetName", latestRunnerSet.Name)
return r.createAutoScalingListenerForRunnerSet(ctx, autoscalingRunnerSet, latestRunnerSet, log)
}
if err := r.updateStatus(ctx, autoscalingRunnerSet, latestRunnerSet, v1alpha1.AutoscalingRunnerSetPhaseRunning, log); err != nil {
if shouldCreateScaleSet(&autoscalingRunnerSet) {
log.Info("Creating runner scale set")
return r.createRunnerScaleSet(ctx, &autoscalingRunnerSet, log)
}
// Make sure the runner group of the scale set is up to date
currentRunnerGroupName, ok := autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerGroupName]
if !ok || (len(autoscalingRunnerSet.Spec.RunnerGroup) > 0 && !strings.EqualFold(currentRunnerGroupName, autoscalingRunnerSet.Spec.RunnerGroup)) {
log.Info("AutoScalingRunnerSet runner group changed. Updating the runner scale set.")
return r.updateRunnerScaleSetRunnerGroup(ctx, &autoscalingRunnerSet, log)
}
// Make sure the runner scale set name is up to date
currentRunnerScaleSetName, ok := autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerScaleSetName]
if !ok || (len(autoscalingRunnerSet.Spec.RunnerScaleSetName) > 0 && !strings.EqualFold(currentRunnerScaleSetName, autoscalingRunnerSet.Spec.RunnerScaleSetName)) {
log.Info("AutoScalingRunnerSet runner scale set name changed. Updating the runner scale set.")
return r.updateRunnerScaleSetName(ctx, &autoscalingRunnerSet, log)
}
var ephemeralRunnerSet v1alpha1.EphemeralRunnerSet
err := r.Get(
ctx,
types.NamespacedName{
Namespace: autoscalingRunnerSet.Namespace,
Name: autoscalingRunnerSet.Name,
},
&ephemeralRunnerSet,
)
switch {
case kerrors.IsNotFound(err):
log.Info("Creating ephemeral runner set")
return r.createEphemeralRunnerSet(ctx, &autoscalingRunnerSet, log)
case err != nil:
log.Error(err, "Failed to get ephemeral runner")
return ctrl.Result{}, err
case ephemeralRunnerSet.Status.Phase == v1alpha1.EphemeralRunnerSetPhaseOutdated && autoscalingRunnerSet.Status.Phase == v1alpha1.AutoscalingRunnerSetPhaseRunning:
// Runners are outdated. We need to stop the listener so it stops getting new jobs.
log.Info("Ephemeral runner set is outdated. Cleaning up resources for the outdated runner set")
done, err := r.cleanupListener(ctx, &autoscalingRunnerSet, log)
if err != nil {
log.Error(err, "Failed to clean up listener for outdated ephemeral runner set")
return ctrl.Result{}, err
}
if !done {
log.Info("Waiting for listener to be cleaned up for the outdated ephemeral runner set")
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
// Then, we need to remove the ephemeral runner set to force scale-down. The ephemeral runner set
// will eventually remove all runners as soon as possible.
//
// The scale set should not be removed yet, since user did not explicitly remove the scale set (or the autoscaling runner set)
// Therefore, the autoscaling runner set should stay in outdated state until the spec is updated,
// or until the autoscaling runner set is removed.
done, err = r.cleanupEphemeralRunnerSet(ctx, &autoscalingRunnerSet, log)
if err != nil {
log.Error(err, "Failed to clean up ephemeral runner set for outdated runner set")
return ctrl.Result{}, err
}
if !done {
log.Info("Waiting for ephemeral runner set to be cleaned up for the outdated runner set")
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
log.Info("Successfully cleaned up resources for the outdated runner set")
return ctrl.Result{}, nil
default:
desired, err := r.newEphemeralRunnerSet(&autoscalingRunnerSet)
if err != nil {
log.Error(err, "Failed to generate ephemeral runner set spec")
return ctrl.Result{}, nil
}
if ephemeralRunnerSet.Annotations[annotationKeyIntegrityHash] != desired.Annotations[annotationKeyIntegrityHash] {
// When runners are actively processing jobs, defer the spec update:
// delete the listener to stop accepting new jobs, but leave the ERS
// (and its running pods) untouched until all jobs have drained.
if ephemeralRunnerSet.Status.RunningEphemeralRunners+ephemeralRunnerSet.Status.PendingEphemeralRunners > 0 {
log.Info("Ephemeral runner set spec changed but runners are still active; deleting listener to stop new jobs")
if _, err := r.cleanupListener(ctx, &autoscalingRunnerSet, log); err != nil {
log.Error(err, "Failed to clean up listener while waiting for runners to drain")
return ctrl.Result{}, err
}
return ctrl.Result{RequeueAfter: 1 * time.Second}, nil
}
original := ephemeralRunnerSet.DeepCopy()
ephemeralRunnerSet.Spec.EphemeralRunnerMetadata = desired.Spec.EphemeralRunnerMetadata
ephemeralRunnerSet.Spec.EphemeralRunnerSpec = desired.Spec.EphemeralRunnerSpec
ephemeralRunnerSet.Labels = r.filterAndMergeLabels(ephemeralRunnerSet.Labels, desired.Labels)
ephemeralRunnerSet.Annotations = r.mergeAnnotations(ephemeralRunnerSet.Annotations, desired.Annotations)
log.Info("Updating ephemeral runner set spec to match the desired spec")
if err := r.Patch(ctx, &ephemeralRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to patch ephemeral runner set to match the desired spec")
return ctrl.Result{}, err
}
log.Info("Successfully patched ephemeral runner set spec")
return ctrl.Result{}, nil
}
ephemeralRunnerMetadataModified := !cmp.Equal(ephemeralRunnerSet.Spec.EphemeralRunnerMetadata, desired.Spec.EphemeralRunnerMetadata)
ephemeralRunnerLabelsModified := !maps.Equal(ephemeralRunnerSet.Labels, desired.Labels)
ephemeralRunnerAnnotationsModified := !maps.Equal(ephemeralRunnerSet.Annotations, desired.Annotations)
if ephemeralRunnerLabelsModified || ephemeralRunnerAnnotationsModified || ephemeralRunnerMetadataModified {
original := ephemeralRunnerSet.DeepCopy()
ephemeralRunnerSet.Labels = r.filterAndMergeLabels(ephemeralRunnerSet.Labels, desired.Labels)
ephemeralRunnerSet.Annotations = r.mergeAnnotations(ephemeralRunnerSet.Annotations, desired.Annotations)
ephemeralRunnerSet.Spec.EphemeralRunnerMetadata = desired.Spec.EphemeralRunnerMetadata
log.Info("Updating ephemeral runner set metadata to match desired labels and annotations")
if err := r.Patch(ctx, &ephemeralRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to patch ephemeral runner set metadata to match desired labels and annotations")
return ctrl.Result{}, err
}
log.Info("Successfully patched ephemeral runner set metadata")
return ctrl.Result{}, nil
}
}
var listener v1alpha1.AutoscalingListener
err = r.Get(
ctx,
types.NamespacedName{
Namespace: r.ControllerNamespace,
Name: scaleSetListenerName(&autoscalingRunnerSet),
},
&listener,
)
switch {
case kerrors.IsNotFound(err):
log.Info("AutoscalingListener does not exist, creating autoscaling listener")
return r.createAutoScalingListenerForRunnerSet(ctx, &autoscalingRunnerSet, &ephemeralRunnerSet, log)
case err != nil:
log.Error(err, "Failed to get AutoscalingListener resource")
return ctrl.Result{}, err
default:
desired, err := r.newAutoscalingListener(
&autoscalingRunnerSet,
&ephemeralRunnerSet,
r.ControllerNamespace,
r.DefaultRunnerScaleSetListenerImage,
nil, // TODO: remove
)
if err != nil {
log.Error(err, "Failed to generate AutoscalingListener spec")
return ctrl.Result{}, nil
}
if !cmp.Equal(listener.Spec, desired.Spec) ||
!cmp.Equal(listener.Labels, desired.Labels) ||
!cmp.Equal(listener.Annotations, desired.Annotations) {
log.Info("Deleting AutoscalingListener to re-create with updated spec")
if err := r.Delete(ctx, &listener); err != nil {
log.Error(err, "Failed to delete AutoscalingListener for re-creation")
return ctrl.Result{}, err
}
log.Info("Deleted AutoscalingListener, will re-create on next reconcile")
return ctrl.Result{}, nil
}
}
log.Info("Autoscaling runner set is up to date and ready")
if err := r.updateStatus(
ctx,
&autoscalingRunnerSet,
&ephemeralRunnerSet,
v1alpha1.AutoscalingRunnerSetPhaseRunning,
log,
); err != nil {
log.Error(err, "Failed to update autoscaling runner set status to running")
return ctrl.Result{}, err
}
@ -333,7 +397,7 @@ func (r *AutoscalingRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl
return ctrl.Result{}, nil
}
func (r *AutoscalingRunnerSetReconciler) cleanUpResources(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, latestRunnerSet *v1alpha1.EphemeralRunnerSet, log logr.Logger) (bool, error) {
func (r *AutoscalingRunnerSetReconciler) cleanUpResources(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, log logr.Logger) (bool, error) {
log.Info("Deleting the listener")
done, err := r.cleanupListener(ctx, autoscalingRunnerSet, log)
if err != nil {
@ -347,7 +411,7 @@ func (r *AutoscalingRunnerSetReconciler) cleanUpResources(ctx context.Context, a
}
log.Info("deleting ephemeral runner sets")
done, err = r.cleanupEphemeralRunnerSets(ctx, autoscalingRunnerSet, log)
done, err = r.cleanupEphemeralRunnerSet(ctx, autoscalingRunnerSet, log)
if err != nil {
log.Error(err, "Failed to clean up ephemeral runner sets")
return false, err
@ -371,39 +435,41 @@ func (r *AutoscalingRunnerSetReconciler) cleanUpResources(ctx context.Context, a
func (r *AutoscalingRunnerSetReconciler) updateStatus(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, phase v1alpha1.AutoscalingRunnerSetPhase, log logr.Logger) error {
countDiff := ephemeralRunnerSet != nil && ephemeralRunnerSet.Status.CurrentReplicas != autoscalingRunnerSet.Status.CurrentRunners
phaseDiff := phase != autoscalingRunnerSet.Status.Phase
if countDiff || phaseDiff {
if err := patchSubResource(ctx, r.Status(), autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) {
obj.Status.Phase = phase
var ephemeralRunnerSetStatus v1alpha1.EphemeralRunnerSetStatus
if ephemeralRunnerSet != nil {
ephemeralRunnerSetStatus = ephemeralRunnerSet.Status
}
obj.Status.CurrentRunners = ephemeralRunnerSetStatus.CurrentReplicas
obj.Status.PendingEphemeralRunners = ephemeralRunnerSetStatus.PendingEphemeralRunners
obj.Status.RunningEphemeralRunners = ephemeralRunnerSetStatus.RunningEphemeralRunners
obj.Status.FailedEphemeralRunners = ephemeralRunnerSetStatus.FailedEphemeralRunners
}); err != nil {
log.Error(err, "Failed to update autoscaling runner set status with current runner count")
return err
}
if !countDiff && !phaseDiff {
return nil
}
return nil
}
// Prevents overprovisioning of runners.
// We reach this code path when runner scale set has been patched with a new runner spec but there are still running ephemeral runners.
// The safest approach is to wait for the running ephemeral runners to finish before creating a new runner set.
func (r *AutoscalingRunnerSetReconciler) drainingJobs(latestRunnerSetStatus *v1alpha1.EphemeralRunnerSetStatus) bool {
if r.UpdateStrategy == UpdateStrategyEventual && ((latestRunnerSetStatus.RunningEphemeralRunners + latestRunnerSetStatus.PendingEphemeralRunners) > 0) {
return true
original := autoscalingRunnerSet.DeepCopy()
if phaseDiff {
autoscalingRunnerSet.Status.Phase = phase
}
return false
if countDiff && ephemeralRunnerSet != nil {
autoscalingRunnerSet.Status.CurrentRunners = ephemeralRunnerSet.Status.CurrentReplicas
autoscalingRunnerSet.Status.PendingEphemeralRunners = ephemeralRunnerSet.Status.PendingEphemeralRunners
autoscalingRunnerSet.Status.RunningEphemeralRunners = ephemeralRunnerSet.Status.RunningEphemeralRunners
autoscalingRunnerSet.Status.FailedEphemeralRunners = ephemeralRunnerSet.Status.FailedEphemeralRunners
}
if err := r.Status().Patch(ctx, autoscalingRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to patch autoscaling runner set status")
return err
}
return nil
}
func (r *AutoscalingRunnerSetReconciler) cleanupListener(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, logger logr.Logger) (done bool, err error) {
logger.Info("Cleaning up the listener")
var listener v1alpha1.AutoscalingListener
err = r.Get(ctx, client.ObjectKey{Namespace: r.ControllerNamespace, Name: scaleSetListenerName(autoscalingRunnerSet)}, &listener)
err = r.Get(
ctx,
client.ObjectKey{
Namespace: r.ControllerNamespace,
Name: scaleSetListenerName(autoscalingRunnerSet),
},
&listener,
)
switch {
case err == nil:
if listener.DeletionTimestamp.IsZero() {
@ -421,39 +487,32 @@ func (r *AutoscalingRunnerSetReconciler) cleanupListener(ctx context.Context, au
return true, nil
}
func (r *AutoscalingRunnerSetReconciler) cleanupEphemeralRunnerSets(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, logger logr.Logger) (done bool, err error) {
logger.Info("Cleaning up ephemeral runner sets")
runnerSets, err := r.listEphemeralRunnerSets(ctx, autoscalingRunnerSet)
if err != nil {
return false, fmt.Errorf("failed to list ephemeral runner sets: %w", err)
}
if runnerSets.empty() {
logger.Info("All ephemeral runner sets are deleted")
return true, nil
func (r *AutoscalingRunnerSetReconciler) cleanupEphemeralRunnerSet(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, logger logr.Logger) (done bool, err error) {
logger.Info("Cleaning up ephemeral runner set")
var ers v1alpha1.EphemeralRunnerSet
err = r.Get(
ctx,
client.ObjectKey{
Namespace: autoscalingRunnerSet.Namespace,
Name: autoscalingRunnerSet.Name,
},
&ers,
)
switch {
case err == nil:
if ers.DeletionTimestamp.IsZero() {
logger.Info("Deleting the ephemeral runner set")
if err := r.Delete(ctx, &ers); err != nil {
return false, fmt.Errorf("failed to delete ephemeral runner set: %w", err)
}
}
return false, nil
case !kerrors.IsNotFound(err):
return false, fmt.Errorf("failed to get ephemeral runner set: %w", err)
}
logger.Info("Deleting all ephemeral runner sets", "count", runnerSets.count())
if err := r.deleteEphemeralRunnerSets(ctx, runnerSets.all(), logger); err != nil {
return false, fmt.Errorf("failed to delete ephemeral runner sets: %w", err)
}
return false, nil
}
func (r *AutoscalingRunnerSetReconciler) deleteEphemeralRunnerSets(ctx context.Context, oldRunnerSets []v1alpha1.EphemeralRunnerSet, logger logr.Logger) error {
for i := range oldRunnerSets {
rs := &oldRunnerSets[i]
// already deleted but contains finalizer so it still exists
if !rs.DeletionTimestamp.IsZero() {
logger.Info("Skip ephemeral runner set since it is already marked for deletion", "name", rs.Name)
continue
}
logger.Info("Deleting ephemeral runner set", "name", rs.Name)
if err := r.Delete(ctx, rs); err != nil {
return fmt.Errorf("failed to delete EphemeralRunnerSet resource: %w", err)
}
logger.Info("Deleted ephemeral runner set", "name", rs.Name)
}
return nil
logger.Info("Ephemeral runner set is deleted")
return true, nil
}
func (r *AutoscalingRunnerSetReconciler) removeFinalizersFromDependentResources(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, logger logr.Logger) error {
@ -612,11 +671,11 @@ func (r *AutoscalingRunnerSetReconciler) updateRunnerScaleSetRunnerGroup(ctx con
}
logger.Info("Updating runner scale set name and runner group name as annotations")
if err := patch(ctx, r.Client, autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) {
obj.Annotations[AnnotationKeyGitHubRunnerGroupName] = updatedRunnerScaleSet.RunnerGroupName
obj.Annotations[AnnotationKeyGitHubRunnerScaleSetName] = updatedRunnerScaleSet.Name
}); err != nil {
logger.Error(err, "Failed to update runner group name annotation")
original := autoscalingRunnerSet.DeepCopy()
autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerGroupName] = updatedRunnerScaleSet.RunnerGroupName
autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerScaleSetName] = updatedRunnerScaleSet.Name
if err := r.Patch(ctx, autoscalingRunnerSet, client.MergeFrom(original)); err != nil {
logger.Error(err, "Failed to update runner group name and runner scale set name annotation")
return ctrl.Result{}, err
}
@ -649,9 +708,9 @@ func (r *AutoscalingRunnerSetReconciler) updateRunnerScaleSetName(ctx context.Co
}
logger.Info("Updating runner scale set name as an annotation")
if err := patch(ctx, r.Client, autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) {
obj.Annotations[AnnotationKeyGitHubRunnerScaleSetName] = updatedRunnerScaleSet.Name
}); err != nil {
original := autoscalingRunnerSet.DeepCopy()
autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerScaleSetName] = updatedRunnerScaleSet.Name
if err := r.Patch(ctx, autoscalingRunnerSet, client.MergeFrom(original)); err != nil {
logger.Error(err, "Failed to update runner scale set name annotation")
return ctrl.Result{}, err
}
@ -669,7 +728,7 @@ func (r *AutoscalingRunnerSetReconciler) deleteRunnerScaleSet(ctx context.Contex
//
// 2. The scale set has been deleted by the controller.
// In that case, the controller will clean up annotation because the scale set does not exist anymore.
// Removal of the scale set id is also useful because permission cleanup will eventually lose permission
// Removal of the scale set id is also useful because permission cleanup will later lose permission
// assigned to it on a GitHub secret, causing actions client from secret to result in permission denied
//
// 3. Annotation is removed manually.
@ -699,11 +758,11 @@ func (r *AutoscalingRunnerSetReconciler) deleteRunnerScaleSet(ctx context.Contex
return err
}
err = patch(ctx, r.Client, autoscalingRunnerSet, func(obj *v1alpha1.AutoscalingRunnerSet) {
delete(obj.Annotations, runnerScaleSetIDAnnotationKey)
})
if err != nil {
logger.Error(err, "Failed to patch autoscaling runner set with annotation removed", "annotation", runnerScaleSetIDAnnotationKey)
original := autoscalingRunnerSet.DeepCopy()
delete(autoscalingRunnerSet.Annotations, runnerScaleSetIDAnnotationKey)
if err := r.Patch(ctx, autoscalingRunnerSet, client.MergeFrom(original)); err != nil {
logger.Error(err, "Failed to remove runner scale set ID annotation after deleting the runner scale set", "runnerScaleSetId", runnerScaleSetID)
return err
}
@ -718,11 +777,6 @@ func (r *AutoscalingRunnerSetReconciler) createEphemeralRunnerSet(ctx context.Co
return ctrl.Result{}, err
}
if err := ctrl.SetControllerReference(autoscalingRunnerSet, desiredRunnerSet, r.Scheme); err != nil {
log.Error(err, "Failed to set controller reference to a new EphemeralRunnerSet")
return ctrl.Result{}, err
}
log.Info("Creating a new EphemeralRunnerSet resource")
if err := r.Create(ctx, desiredRunnerSet); err != nil {
log.Error(err, "Failed to create EphemeralRunnerSet resource")
@ -741,7 +795,13 @@ func (r *AutoscalingRunnerSetReconciler) createAutoScalingListenerForRunnerSet(c
})
}
autoscalingListener, err := r.newAutoScalingListener(autoscalingRunnerSet, ephemeralRunnerSet, r.ControllerNamespace, r.DefaultRunnerScaleSetListenerImage, imagePullSecrets)
autoscalingListener, err := r.newAutoscalingListener(
autoscalingRunnerSet,
ephemeralRunnerSet,
r.ControllerNamespace,
r.DefaultRunnerScaleSetListenerImage,
imagePullSecrets,
)
if err != nil {
log.Error(err, "Could not create AutoscalingListener spec")
return ctrl.Result{}, err
@ -757,17 +817,20 @@ func (r *AutoscalingRunnerSetReconciler) createAutoScalingListenerForRunnerSet(c
return ctrl.Result{}, nil
}
func (r *AutoscalingRunnerSetReconciler) listEphemeralRunnerSets(ctx context.Context, autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet) (*EphemeralRunnerSets, error) {
list := new(v1alpha1.EphemeralRunnerSetList)
if err := r.List(ctx, list, client.InNamespace(autoscalingRunnerSet.Namespace), client.MatchingFields{resourceOwnerKey: autoscalingRunnerSet.Name}); err != nil {
return nil, fmt.Errorf("failed to list ephemeral runner sets: %w", err)
// TODO: change that
func shouldCreateScaleSet(autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet) bool {
scaleSetIDRaw, ok := autoscalingRunnerSet.Annotations[runnerScaleSetIDAnnotationKey]
if !ok {
return true
}
return &EphemeralRunnerSets{list: list}, nil
id, err := strconv.Atoi(scaleSetIDRaw)
return err != nil || id <= 0
}
// SetupWithManager sets up the controller with the Manager.
func (r *AutoscalingRunnerSetReconciler) SetupWithManager(mgr ctrl.Manager, opts ...Option) error {
r.ResourceBuilder.setSchemeIfUnset(r.Scheme)
return builderWithOptions(
ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.AutoscalingRunnerSet{}).
@ -829,12 +892,13 @@ func (c *autoscalingRunnerSetFinalizerDependencyCleaner) removeKubernetesModeRol
c.logger.Info("Kubernetes mode role binding finalizer has already been removed", "name", roleBindingName)
return
}
err = patch(ctx, c.client, roleBinding, func(obj *rbacv1.RoleBinding) {
controllerutil.RemoveFinalizer(obj, AutoscalingRunnerSetCleanupFinalizerName)
})
if err != nil {
c.err = fmt.Errorf("failed to patch kubernetes mode role binding without finalizer: %w", err)
return
original := roleBinding.DeepCopy()
if controllerutil.RemoveFinalizer(roleBinding, AutoscalingRunnerSetCleanupFinalizerName) {
if err = c.client.Patch(ctx, roleBinding, client.MergeFrom(original)); err != nil {
c.err = fmt.Errorf("failed to patch kubernetes mode role binding without finalizer: %w", err)
return
}
}
c.logger.Info("Removed finalizer from container mode kubernetes role binding", "name", roleBindingName)
return
@ -871,12 +935,12 @@ func (c *autoscalingRunnerSetFinalizerDependencyCleaner) removeKubernetesModeRol
c.logger.Info("Kubernetes mode role finalizer has already been removed", "name", roleName)
return
}
err = patch(ctx, c.client, role, func(obj *rbacv1.Role) {
controllerutil.RemoveFinalizer(obj, AutoscalingRunnerSetCleanupFinalizerName)
})
if err != nil {
c.err = fmt.Errorf("failed to patch kubernetes mode role without finalizer: %w", err)
return
original := role.DeepCopy()
if controllerutil.RemoveFinalizer(role, AutoscalingRunnerSetCleanupFinalizerName) {
if err = c.client.Patch(ctx, role, client.MergeFrom(original)); err != nil {
c.err = fmt.Errorf("failed to patch kubernetes mode role without finalizer: %w", err)
return
}
}
c.logger.Info("Removed finalizer from container mode kubernetes role")
return
@ -914,13 +978,14 @@ func (c *autoscalingRunnerSetFinalizerDependencyCleaner) removeKubernetesModeSer
c.logger.Info("Kubernetes mode service account finalizer has already been removed", "name", serviceAccountName)
return
}
err = patch(ctx, c.client, serviceAccount, func(obj *corev1.ServiceAccount) {
controllerutil.RemoveFinalizer(obj, AutoscalingRunnerSetCleanupFinalizerName)
})
if err != nil {
c.err = fmt.Errorf("failed to patch kubernetes mode service account without finalizer: %w", err)
return
original := serviceAccount.DeepCopy()
if controllerutil.RemoveFinalizer(serviceAccount, AutoscalingRunnerSetCleanupFinalizerName) {
if err = c.client.Patch(ctx, serviceAccount, client.MergeFrom(original)); err != nil {
c.err = fmt.Errorf("failed to patch kubernetes mode service account without finalizer: %w", err)
return
}
}
c.logger.Info("Removed finalizer from container mode kubernetes service account")
return
case kerrors.IsNotFound(err):
@ -950,20 +1015,29 @@ func (c *autoscalingRunnerSetFinalizerDependencyCleaner) removeNoPermissionServi
c.logger.Info("Removing finalizer from no permission service account", "name", serviceAccountName)
serviceAccount := new(corev1.ServiceAccount)
err := c.client.Get(ctx, types.NamespacedName{Name: serviceAccountName, Namespace: c.autoscalingRunnerSet.Namespace}, serviceAccount)
err := c.client.Get(
ctx,
types.NamespacedName{
Name: serviceAccountName,
Namespace: c.autoscalingRunnerSet.Namespace,
},
serviceAccount,
)
switch {
case err == nil:
if !controllerutil.ContainsFinalizer(serviceAccount, AutoscalingRunnerSetCleanupFinalizerName) {
c.logger.Info("No permission service account finalizer has already been removed", "name", serviceAccountName)
return
}
err = patch(ctx, c.client, serviceAccount, func(obj *corev1.ServiceAccount) {
controllerutil.RemoveFinalizer(obj, AutoscalingRunnerSetCleanupFinalizerName)
})
if err != nil {
c.err = fmt.Errorf("failed to patch service account without finalizer: %w", err)
return
original := serviceAccount.DeepCopy()
if controllerutil.RemoveFinalizer(serviceAccount, AutoscalingRunnerSetCleanupFinalizerName) {
if err = c.client.Patch(ctx, serviceAccount, client.MergeFrom(original)); err != nil {
c.err = fmt.Errorf("failed to patch no permission service account without finalizer: %w", err)
return
}
}
c.logger.Info("Removed finalizer from no permission service account", "name", serviceAccountName)
return
case kerrors.IsNotFound(err):
@ -1000,12 +1074,13 @@ func (c *autoscalingRunnerSetFinalizerDependencyCleaner) removeGitHubSecretFinal
c.logger.Info("GitHub secret finalizer has already been removed", "name", githubSecretName)
return
}
err = patch(ctx, c.client, githubSecret, func(obj *corev1.Secret) {
controllerutil.RemoveFinalizer(obj, AutoscalingRunnerSetCleanupFinalizerName)
})
if err != nil {
c.err = fmt.Errorf("failed to patch GitHub secret without finalizer: %w", err)
return
original := githubSecret.DeepCopy()
if controllerutil.RemoveFinalizer(githubSecret, AutoscalingRunnerSetCleanupFinalizerName) {
if err = c.client.Patch(ctx, githubSecret, client.MergeFrom(original)); err != nil {
c.err = fmt.Errorf("failed to patch GitHub secret without finalizer: %w", err)
return
}
}
c.logger.Info("Removed finalizer from GitHub secret", "name", githubSecretName)
return
@ -1039,17 +1114,14 @@ func (c *autoscalingRunnerSetFinalizerDependencyCleaner) removeManagerRoleBindin
err := c.client.Get(ctx, types.NamespacedName{Name: managerRoleBindingName, Namespace: c.autoscalingRunnerSet.Namespace}, roleBinding)
switch {
case err == nil:
if !controllerutil.ContainsFinalizer(roleBinding, AutoscalingRunnerSetCleanupFinalizerName) {
c.logger.Info("Manager role binding finalizer has already been removed", "name", managerRoleBindingName)
return
}
err = patch(ctx, c.client, roleBinding, func(obj *rbacv1.RoleBinding) {
controllerutil.RemoveFinalizer(obj, AutoscalingRunnerSetCleanupFinalizerName)
})
if err != nil {
c.err = fmt.Errorf("failed to patch manager role binding without finalizer: %w", err)
return
original := roleBinding.DeepCopy()
if controllerutil.RemoveFinalizer(roleBinding, AutoscalingRunnerSetCleanupFinalizerName) {
if err = c.client.Patch(ctx, roleBinding, client.MergeFrom(original)); err != nil {
c.err = fmt.Errorf("failed to patch manager role binding without finalizer: %w", err)
return
}
}
c.logger.Info("Removed finalizer from manager role binding", "name", managerRoleBindingName)
return
case kerrors.IsNotFound(err):
@ -1082,16 +1154,12 @@ func (c *autoscalingRunnerSetFinalizerDependencyCleaner) removeManagerRoleFinali
err := c.client.Get(ctx, types.NamespacedName{Name: managerRoleName, Namespace: c.autoscalingRunnerSet.Namespace}, role)
switch {
case err == nil:
if !controllerutil.ContainsFinalizer(role, AutoscalingRunnerSetCleanupFinalizerName) {
c.logger.Info("Manager role finalizer has already been removed", "name", managerRoleName)
return
}
err = patch(ctx, c.client, role, func(obj *rbacv1.Role) {
controllerutil.RemoveFinalizer(obj, AutoscalingRunnerSetCleanupFinalizerName)
})
if err != nil {
c.err = fmt.Errorf("failed to patch manager role without finalizer: %w", err)
return
original := role.DeepCopy()
if controllerutil.RemoveFinalizer(role, AutoscalingRunnerSetCleanupFinalizerName) {
if err := c.client.Patch(ctx, role, client.MergeFrom(original)); err != nil {
c.err = fmt.Errorf("failed to patch manager role without finalizer: %w", err)
return
}
}
c.logger.Info("Removed finalizer from manager role", "name", managerRoleName)
return
@ -1103,54 +1171,3 @@ func (c *autoscalingRunnerSetFinalizerDependencyCleaner) removeManagerRoleFinali
return
}
}
// NOTE: if this is logic should be used for other resources,
// consider using generics
type EphemeralRunnerSets struct {
list *v1alpha1.EphemeralRunnerSetList
sorted bool
}
func (rs *EphemeralRunnerSets) latest() *v1alpha1.EphemeralRunnerSet {
if rs.empty() {
return nil
}
if !rs.sorted {
rs.sort()
}
return rs.list.Items[0].DeepCopy()
}
func (rs *EphemeralRunnerSets) old() []v1alpha1.EphemeralRunnerSet {
if rs.empty() {
return nil
}
if !rs.sorted {
rs.sort()
}
copy := rs.list.DeepCopy()
return copy.Items[1:]
}
func (rs *EphemeralRunnerSets) all() []v1alpha1.EphemeralRunnerSet {
if rs.empty() {
return nil
}
copy := rs.list.DeepCopy()
return copy.Items
}
func (rs *EphemeralRunnerSets) empty() bool {
return rs.list == nil || len(rs.list.Items) == 0
}
func (rs *EphemeralRunnerSets) sort() {
sort.Slice(rs.list.Items, func(i, j int) bool {
return rs.list.Items[i].GetCreationTimestamp().After(rs.list.Items[j].GetCreationTimestamp().Time)
})
}
func (rs *EphemeralRunnerSets) count() int {
return len(rs.list.Items)
}

View File

@ -115,7 +115,9 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() {
Name: "test-asrs",
Namespace: autoscalingNS.Name,
Labels: map[string]string{
LabelKeyKubernetesVersion: buildVersion,
LabelKeyKubernetesVersion: buildVersion,
"arc.test/label-drift": "initial",
"arc.test/spec-update-label": "initial",
},
},
Spec: v1alpha1.AutoscalingRunnerSetSpec{
@ -124,6 +126,19 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() {
MaxRunners: &max,
MinRunners: &min,
RunnerGroup: "testgroup",
EphemeralRunnerSetMetadata: &v1alpha1.ResourceMeta{
Annotations: map[string]string{
"arc.test/metadata-annotation": "initial",
},
},
EphemeralRunnerMetadata: &v1alpha1.ResourceMeta{
Labels: map[string]string{
"arc.test/runner-metadata-label": "initial",
},
Annotations: map[string]string{
"arc.test/runner-metadata-annotation": "initial",
},
},
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
@ -442,8 +457,7 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() {
})
Context("When updating a new AutoScalingRunnerSet", func() {
It("It should re-create EphemeralRunnerSet and Listener as needed when updating AutoScalingRunnerSet", func() {
// Wait till the listener is created
It("updates EphemeralRunnerSet when the runner image changes without touching the Listener", func() {
listener := new(v1alpha1.AutoscalingListener)
Eventually(
func() error {
@ -452,161 +466,315 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() {
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed(), "Listener should be created")
originalListenerUID := listener.UID
originalListenerResourceVersion := listener.ResourceVersion
runnerSetList := new(v1alpha1.EphemeralRunnerSetList)
err := k8sClient.List(ctx, runnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace))
Expect(err).NotTo(HaveOccurred(), "failed to list EphemeralRunnerSet")
Expect(len(runnerSetList.Items)).To(Equal(1), "There should be 1 EphemeralRunnerSet")
runnerSet := runnerSetList.Items[0]
runnerSet := new(v1alpha1.EphemeralRunnerSet)
Eventually(
func() error {
return k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, runnerSet)
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed(), "EphemeralRunnerSet should be created")
originalRunnerSetUID := runnerSet.UID
originalRunnerSetHash := runnerSet.Annotations[annotationKeyIntegrityHash]
// Update the AutoScalingRunnerSet.Spec.Template
// This should trigger re-creation of EphemeralRunnerSet and Listener
patched := autoscalingRunnerSet.DeepCopy()
patched.Spec.Template.Spec.PriorityClassName = "test-priority-class"
if patched.Annotations == nil {
patched.Annotations = make(map[string]string)
patched.Spec.Template.Spec.Containers[0].Image = "ghcr.io/actions/runner:updated"
err := k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet")
Eventually(
func(g Gomega) {
current := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
g.Expect(current.UID).To(Equal(originalRunnerSetUID), "EphemeralRunnerSet should be updated in place")
g.Expect(current.Spec.EphemeralRunnerSpec.PodTemplateSpec.Spec.Containers[0].Image).To(Equal("ghcr.io/actions/runner:updated"))
g.Expect(current.Annotations[annotationKeyIntegrityHash]).NotTo(Equal(originalRunnerSetHash), "EphemeralRunnerSet spec hash should change")
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed())
Consistently(
func(g Gomega) {
current := new(v1alpha1.AutoscalingListener)
err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get Listener")
g.Expect(current.UID).To(Equal(originalListenerUID), "Listener should not be recreated")
g.Expect(current.ResourceVersion).To(Equal(originalListenerResourceVersion), "Listener should not be updated")
},
time.Second*5,
autoscalingRunnerSetTestInterval,
).Should(Succeed())
})
It("recreates only the Listener when max runners changes", func() {
listener := new(v1alpha1.AutoscalingListener)
Eventually(
func() error {
return k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener)
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed(), "Listener should be created")
originalListenerUID := listener.UID
runnerSet := new(v1alpha1.EphemeralRunnerSet)
Eventually(
func() error {
return k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, runnerSet)
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed(), "EphemeralRunnerSet should be created")
originalRunnerSetUID := runnerSet.UID
originalRunnerSetHash := runnerSet.Annotations[annotationKeyIntegrityHash]
patched := autoscalingRunnerSet.DeepCopy()
max := 20
patched.Spec.MaxRunners = &max
err := k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet")
Eventually(
func(g Gomega) {
current := new(v1alpha1.AutoscalingListener)
err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get Listener")
g.Expect(current.UID).NotTo(Equal(originalListenerUID), "Listener should be recreated")
g.Expect(current.Spec.MaxRunners).To(Equal(max))
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed())
Consistently(
func(g Gomega) {
current := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
g.Expect(current.UID).To(Equal(originalRunnerSetUID), "EphemeralRunnerSet should not be recreated")
g.Expect(current.Annotations[annotationKeyIntegrityHash]).To(Equal(originalRunnerSetHash), "EphemeralRunnerSet spec should not change")
},
time.Second*5,
autoscalingRunnerSetTestInterval,
).Should(Succeed())
})
It("updates EphemeralRunnerSet labels when only AutoScalingRunnerSet labels change", func() {
runnerSet := new(v1alpha1.EphemeralRunnerSet)
Eventually(
func() (string, error) {
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, runnerSet)
if err != nil {
return "", err
}
return runnerSet.Labels["arc.test/label-drift"], nil
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Equal("initial"), "EphemeralRunnerSet should start with the predefined label")
patched := autoscalingRunnerSet.DeepCopy()
patched.Labels["arc.test/label-drift"] = "updated"
err := k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet labels")
Eventually(
func() (string, error) {
current := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, current)
if err != nil {
return "", err
}
return current.Labels["arc.test/label-drift"], nil
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Equal("updated"), "EphemeralRunnerSet should be patched with label-only drift")
})
It("updates EphemeralRunnerSet annotations when only EphemeralRunnerSet metadata annotations change", func() {
runnerSet := new(v1alpha1.EphemeralRunnerSet)
Eventually(
func() (string, error) {
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, runnerSet)
if err != nil {
return "", err
}
return runnerSet.Annotations["arc.test/metadata-annotation"], nil
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Equal("initial"), "EphemeralRunnerSet should start with the predefined annotation")
patched := autoscalingRunnerSet.DeepCopy()
patched.Spec.EphemeralRunnerSetMetadata.Annotations["arc.test/metadata-annotation"] = "updated"
patched.Spec.EphemeralRunnerSetMetadata.Annotations["arc.test/new-metadata-annotation"] = "added"
err := k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet EphemeralRunnerSet metadata")
Eventually(
func(g Gomega) {
current := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
g.Expect(current.Annotations["arc.test/metadata-annotation"]).To(Equal("updated"))
g.Expect(current.Annotations["arc.test/new-metadata-annotation"]).To(Equal("added"))
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed(), "EphemeralRunnerSet should be patched with annotation-only metadata drift")
})
It("updates EphemeralRunnerSet runner metadata when only EphemeralRunner metadata changes", func() {
runnerSet := new(v1alpha1.EphemeralRunnerSet)
Eventually(
func(g Gomega) {
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, runnerSet)
g.Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
g.Expect(runnerSet.Spec.EphemeralRunnerMetadata).NotTo(BeNil())
g.Expect(runnerSet.Spec.EphemeralRunnerMetadata.Labels["arc.test/runner-metadata-label"]).To(Equal("initial"))
g.Expect(runnerSet.Spec.EphemeralRunnerMetadata.Annotations["arc.test/runner-metadata-annotation"]).To(Equal("initial"))
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed())
patched := autoscalingRunnerSet.DeepCopy()
patched.Spec.EphemeralRunnerMetadata.Labels["arc.test/runner-metadata-label"] = "updated"
patched.Spec.EphemeralRunnerMetadata.Annotations["arc.test/runner-metadata-annotation"] = "updated"
patched.Spec.EphemeralRunnerMetadata.Annotations["arc.test/new-runner-metadata-annotation"] = "added"
err := k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet EphemeralRunner metadata")
Eventually(
func(g Gomega) {
current := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
g.Expect(current.Spec.EphemeralRunnerMetadata).NotTo(BeNil())
g.Expect(current.Spec.EphemeralRunnerMetadata.Labels["arc.test/runner-metadata-label"]).To(Equal("updated"))
g.Expect(current.Spec.EphemeralRunnerMetadata.Annotations["arc.test/runner-metadata-annotation"]).To(Equal("updated"))
g.Expect(current.Spec.EphemeralRunnerMetadata.Annotations["arc.test/new-runner-metadata-annotation"]).To(Equal("added"))
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed())
})
It("propagates latest labels while updating EphemeralRunnerSet spec", func() {
runnerSet := new(v1alpha1.EphemeralRunnerSet)
Eventually(
func() (string, error) {
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, runnerSet)
if err != nil {
return "", err
}
return runnerSet.Labels["arc.test/spec-update-label"], nil
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Equal("initial"), "EphemeralRunnerSet should start with the predefined label")
patched := autoscalingRunnerSet.DeepCopy()
patched.Labels["arc.test/spec-update-label"] = "updated"
patched.Spec.EphemeralRunnerSetMetadata.Annotations["arc.test/metadata-annotation"] = "updated"
patched.Spec.EphemeralRunnerSetMetadata.Annotations["arc.test/new-spec-update-annotation"] = "added"
patched.Spec.Template.Spec.Containers[0].Image = "ghcr.io/actions/runner:updated-with-label"
err := k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet spec and labels")
Eventually(
func(g Gomega) {
current := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
g.Expect(current.Spec.EphemeralRunnerSpec.PodTemplateSpec.Spec.Containers[0].Image).To(Equal("ghcr.io/actions/runner:updated-with-label"))
g.Expect(current.Labels["arc.test/spec-update-label"]).To(Equal("updated"))
g.Expect(current.Annotations["arc.test/metadata-annotation"]).To(Equal("updated"))
g.Expect(current.Annotations["arc.test/new-spec-update-annotation"]).To(Equal("added"))
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed())
})
It("updates EphemeralRunnerSet and Listener when the GitHub config secret changes", func() {
updatedSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "github-config-secret-updated",
Namespace: autoscalingRunnerSet.Namespace,
},
Data: map[string][]byte{
"github_token": []byte(defaultGitHubToken),
},
}
patched.Annotations[annotationKeyValuesHash] = "test-hash"
err = k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet")
autoscalingRunnerSet = patched.DeepCopy()
err := k8sClient.Create(ctx, updatedSecret)
Expect(err).NotTo(HaveOccurred(), "failed to create updated GitHub config secret")
// We should create a new EphemeralRunnerSet and delete the old one, eventually, we will have only one EphemeralRunnerSet
listener := new(v1alpha1.AutoscalingListener)
Eventually(
func() (string, error) {
runnerSetList := new(v1alpha1.EphemeralRunnerSetList)
err := k8sClient.List(ctx, runnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace))
if err != nil {
return "", err
}
if len(runnerSetList.Items) != 1 {
return "", fmt.Errorf("We should have only 1 EphemeralRunnerSet, but got %v", len(runnerSetList.Items))
}
return runnerSetList.Items[0].Annotations[annotationKeyRunnerSpecHash], nil
func() error {
return k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener)
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).ShouldNot(BeEquivalentTo(runnerSet.Annotations[annotationKeyRunnerSpecHash]), "New EphemeralRunnerSet should be created")
).Should(Succeed(), "Listener should be created")
originalListenerUID := listener.UID
// We should create a new listener
runnerSet := new(v1alpha1.EphemeralRunnerSet)
Eventually(
func() (string, error) {
listener := new(v1alpha1.AutoscalingListener)
err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener)
if err != nil {
return "", err
}
return listener.Spec.EphemeralRunnerSetName, nil
func() error {
return k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, runnerSet)
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).ShouldNot(BeEquivalentTo(runnerSet.Name), "New Listener should be created")
).Should(Succeed(), "EphemeralRunnerSet should be created")
originalRunnerSetUID := runnerSet.UID
// Only update the Spec for the AutoScalingListener
// This should trigger re-creation of the Listener only
runnerSetList = new(v1alpha1.EphemeralRunnerSetList)
err = k8sClient.List(ctx, runnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace))
Expect(err).NotTo(HaveOccurred(), "failed to list EphemeralRunnerSet")
Expect(len(runnerSetList.Items)).To(Equal(1), "There should be 1 EphemeralRunnerSet")
runnerSet = runnerSetList.Items[0]
listener = new(v1alpha1.AutoscalingListener)
err = k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener)
Expect(err).NotTo(HaveOccurred(), "failed to get Listener")
patched = autoscalingRunnerSet.DeepCopy()
min := 10
patched.Spec.MinRunners = &min
patched := autoscalingRunnerSet.DeepCopy()
patched.Spec.GitHubConfigSecret = updatedSecret.Name
err = k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet")
// We should not re-create a new EphemeralRunnerSet
Consistently(
func() (string, error) {
runnerSetList := new(v1alpha1.EphemeralRunnerSetList)
err := k8sClient.List(ctx, runnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace))
if err != nil {
return "", err
}
if len(runnerSetList.Items) != 1 {
return "", fmt.Errorf("We should have only 1 EphemeralRunnerSet, but got %v", len(runnerSetList.Items))
}
return string(runnerSetList.Items[0].UID), nil
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(BeEquivalentTo(string(runnerSet.UID)), "New EphemeralRunnerSet should not be created")
// We should only re-create a new listener
Eventually(
func() (string, error) {
listener := new(v1alpha1.AutoscalingListener)
err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener)
if err != nil {
return "", err
}
return string(listener.UID), nil
func(g Gomega) {
current := new(v1alpha1.AutoscalingRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get AutoScalingRunnerSet")
g.Expect(current.Spec.GitHubConfigSecret).To(Equal(updatedSecret.Name))
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).ShouldNot(BeEquivalentTo(string(listener.UID)), "New Listener should be created")
).Should(Succeed())
// Only update the values hash for the autoscaling runner set
// This should trigger re-creation of the Listener only
runnerSetList = new(v1alpha1.EphemeralRunnerSetList)
err = k8sClient.List(ctx, runnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace))
Expect(err).NotTo(HaveOccurred(), "failed to list EphemeralRunnerSet")
Expect(len(runnerSetList.Items)).To(Equal(1), "There should be 1 EphemeralRunnerSet")
runnerSet = runnerSetList.Items[0]
listener = new(v1alpha1.AutoscalingListener)
err = k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener)
Expect(err).NotTo(HaveOccurred(), "failed to get Listener")
patched = autoscalingRunnerSet.DeepCopy()
patched.Annotations[annotationKeyValuesHash] = "hash-changes"
err = k8sClient.Patch(ctx, patched, client.MergeFrom(autoscalingRunnerSet))
Expect(err).NotTo(HaveOccurred(), "failed to patch AutoScalingRunnerSet")
// We should not re-create a new EphemeralRunnerSet
Consistently(
func() (string, error) {
runnerSetList := new(v1alpha1.EphemeralRunnerSetList)
err := k8sClient.List(ctx, runnerSetList, client.InNamespace(autoscalingRunnerSet.Namespace))
if err != nil {
return "", err
}
if len(runnerSetList.Items) != 1 {
return "", fmt.Errorf("We should have only 1 EphemeralRunnerSet, but got %v", len(runnerSetList.Items))
}
return string(runnerSetList.Items[0].UID), nil
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(BeEquivalentTo(string(runnerSet.UID)), "New EphemeralRunnerSet should not be created")
// We should only re-create a new listener
Eventually(
func() (string, error) {
listener := new(v1alpha1.AutoscalingListener)
err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, listener)
if err != nil {
return "", err
}
return string(listener.UID), nil
func(g Gomega) {
current := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingRunnerSet.Name, Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
g.Expect(current.UID).To(Equal(originalRunnerSetUID), "EphemeralRunnerSet should be updated in place")
g.Expect(current.Spec.EphemeralRunnerSpec.GitHubConfigSecret).To(Equal(updatedSecret.Name))
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).ShouldNot(BeEquivalentTo(string(listener.UID)), "New Listener should be created")
).Should(Succeed())
Eventually(
func(g Gomega) {
current := new(v1alpha1.AutoscalingListener)
err := k8sClient.Get(ctx, client.ObjectKey{Name: scaleSetListenerName(autoscalingRunnerSet), Namespace: autoscalingRunnerSet.Namespace}, current)
g.Expect(err).NotTo(HaveOccurred(), "failed to get Listener")
g.Expect(current.UID).NotTo(Equal(originalListenerUID), "Listener should be recreated")
g.Expect(current.Spec.GitHubConfigSecret).To(Equal(updatedSecret.Name))
},
autoscalingRunnerSetTestTimeout,
autoscalingRunnerSetTestInterval,
).Should(Succeed())
})
It("It should update RunnerScaleSet's runner group on service when it changes", func() {
@ -670,9 +838,7 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() {
})
Context("When updating an AutoscalingRunnerSet with running or pending jobs", func() {
It("It should wait for running and pending jobs to finish before applying the update. Update Strategy is set to eventual.", func() {
// Switch update strategy to eventual (drain jobs )
controller.UpdateStrategy = UpdateStrategyEventual
It("It should wait for running and pending jobs to finish before applying the update.", func() {
// Wait till the listener is created
listener := new(v1alpha1.AutoscalingListener)
Eventually(
@ -740,7 +906,7 @@ var _ = Describe("Test AutoScalingRunnerSet controller", Ordered, func() {
if patched.Annotations == nil {
patched.Annotations = make(map[string]string)
}
patched.Annotations[annotationKeyValuesHash] = "testgroup2"
patched.Annotations[annotationKeyIntegrityHash] = "testgroup2"
patched.Spec.Template.Spec = corev1.PodSpec{
Containers: []corev1.Container{
{

View File

@ -1,32 +0,0 @@
package actionsgithubcom
import (
"context"
kclient "sigs.k8s.io/controller-runtime/pkg/client"
)
type kubernetesObject[T kclient.Object] interface {
kclient.Object
DeepCopy() T
}
type patcher interface {
Patch(ctx context.Context, obj kclient.Object, patch kclient.Patch, opts ...kclient.PatchOption) error
}
func patch[T kubernetesObject[T]](ctx context.Context, client patcher, obj T, update func(obj T)) error {
original := obj.DeepCopy()
update(obj)
return client.Patch(ctx, obj, kclient.MergeFrom(original))
}
type subResourcePatcher interface {
Patch(ctx context.Context, obj kclient.Object, patch kclient.Patch, opts ...kclient.SubResourcePatchOption) error
}
func patchSubResource[T kubernetesObject[T]](ctx context.Context, client subResourcePatcher, obj T, update func(obj T)) error {
original := obj.DeepCopy()
update(obj)
return client.Patch(ctx, obj, kclient.MergeFrom(original))
}

View File

@ -79,19 +79,20 @@ const maxFailures = 5
func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("ephemeralrunner", req.NamespacedName)
ephemeralRunner := new(v1alpha1.EphemeralRunner)
if err := r.Get(ctx, req.NamespacedName, ephemeralRunner); err != nil {
var ephemeralRunner v1alpha1.EphemeralRunner
if err := r.Get(ctx, req.NamespacedName, &ephemeralRunner); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
original := ephemeralRunner.DeepCopy()
if !ephemeralRunner.DeletionTimestamp.IsZero() {
if !controllerutil.ContainsFinalizer(ephemeralRunner, ephemeralRunnerFinalizerName) {
if !controllerutil.ContainsFinalizer(&ephemeralRunner, ephemeralRunnerFinalizerName) {
return ctrl.Result{}, nil
}
if controllerutil.ContainsFinalizer(ephemeralRunner, ephemeralRunnerActionsFinalizerName) {
if controllerutil.ContainsFinalizer(&ephemeralRunner, ephemeralRunnerActionsFinalizerName) {
log.Info("Trying to clean up runner from the service")
ok, err := r.cleanupRunnerFromService(ctx, ephemeralRunner, log)
ok, err := r.cleanupRunnerFromService(ctx, &ephemeralRunner, log)
if err != nil {
log.Error(err, "Failed to clean up runner from service")
return ctrl.Result{}, err
@ -102,16 +103,18 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
log.Info("Runner is cleaned up from the service, removing finalizer")
if err := patch(ctx, r.Client, ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
controllerutil.RemoveFinalizer(obj, ephemeralRunnerActionsFinalizerName)
}); err != nil {
return ctrl.Result{}, err
if controllerutil.RemoveFinalizer(&ephemeralRunner, ephemeralRunnerActionsFinalizerName) {
log.Info("Removed finalizer from ephemeral runner")
if err := r.Patch(ctx, &ephemeralRunner, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update ephemeral runner after removing finalizer")
return ctrl.Result{}, err
}
}
log.Info("Removed finalizer from ephemeral runner")
}
log.Info("Finalizing ephemeral runner")
err := r.cleanupResources(ctx, ephemeralRunner, log)
err := r.cleanupResources(ctx, &ephemeralRunner, log)
if err != nil {
log.Error(err, "Failed to clean up ephemeral runner owned resources")
return ctrl.Result{}, err
@ -119,7 +122,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
if ephemeralRunner.HasContainerHookConfigured() {
log.Info("Runner has container hook configured, cleaning up container hook resources")
err = r.cleanupContainerHooksResources(ctx, ephemeralRunner, log)
err = r.cleanupContainerHooksResources(ctx, &ephemeralRunner, log)
if err != nil {
log.Error(err, "Failed to clean up container hooks resources")
return ctrl.Result{}, err
@ -127,12 +130,12 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
log.Info("Removing finalizer")
err = patch(ctx, r.Client, ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
controllerutil.RemoveFinalizer(obj, ephemeralRunnerFinalizerName)
})
if err != nil && !kerrors.IsNotFound(err) {
log.Error(err, "Failed to update ephemeral runner without the finalizer")
return ctrl.Result{}, err
if controllerutil.RemoveFinalizer(&ephemeralRunner, ephemeralRunnerFinalizerName) {
log.Info("Removed finalizer from ephemeral runner")
if err := r.Patch(ctx, &ephemeralRunner, client.MergeFrom(original)); client.IgnoreNotFound(err) != nil {
log.Error(err, "Failed to update ephemeral runner after removing finalizer")
return ctrl.Result{}, err
}
}
log.Info("Successfully removed finalizer after cleanup")
@ -141,7 +144,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
if ephemeralRunner.IsDone() {
log.Info("Cleaning up resources after after ephemeral runner termination", "phase", ephemeralRunner.Status.Phase)
err := r.cleanupResources(ctx, ephemeralRunner, log)
err := r.cleanupResources(ctx, &ephemeralRunner, log)
if err != nil {
log.Error(err, "Failed to clean up ephemeral runner owned resources")
return ctrl.Result{}, err
@ -153,15 +156,17 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
return ctrl.Result{}, nil
}
addFinalizers := !controllerutil.ContainsFinalizer(ephemeralRunner, ephemeralRunnerFinalizerName) || !controllerutil.ContainsFinalizer(ephemeralRunner, ephemeralRunnerActionsFinalizerName)
addFinalizers := !controllerutil.ContainsFinalizer(&ephemeralRunner, ephemeralRunnerFinalizerName) || !controllerutil.ContainsFinalizer(&ephemeralRunner, ephemeralRunnerActionsFinalizerName)
if addFinalizers {
log.Info("Adding finalizers")
if err := patch(ctx, r.Client, ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
controllerutil.AddFinalizer(obj, ephemeralRunnerFinalizerName)
controllerutil.AddFinalizer(obj, ephemeralRunnerActionsFinalizerName)
}); err != nil {
log.Error(err, "Failed to update with finalizer set")
return ctrl.Result{}, err
var addedFinalizers bool
addedFinalizers = addedFinalizers || controllerutil.AddFinalizer(&ephemeralRunner, ephemeralRunnerFinalizerName)
addedFinalizers = addedFinalizers || controllerutil.AddFinalizer(&ephemeralRunner, ephemeralRunnerActionsFinalizerName)
if addedFinalizers {
if err := r.Patch(ctx, &ephemeralRunner, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update with finalizer set")
return ctrl.Result{}, err
}
}
log.Info("Successfully added finalizers")
}
@ -173,12 +178,12 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
return ctrl.Result{}, err
}
jitConfig, err := r.createRunnerJitConfig(ctx, ephemeralRunner, log)
jitConfig, err := r.createRunnerJitConfig(ctx, &ephemeralRunner, log)
switch {
case err == nil:
// create secret if not created
log.Info("Creating new ephemeral runner secret for jitconfig.")
jitSecret, err := r.createSecret(ctx, ephemeralRunner, jitConfig, log)
jitSecret, err := r.createSecret(ctx, &ephemeralRunner, jitConfig, log)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create secret: %w", err)
}
@ -190,7 +195,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
return ctrl.Result{Requeue: true}, nil
case errors.Is(err, fatalError):
log.Info("JIT config cannot be created for this ephemeral runner, issuing delete", "error", err.Error())
if err := r.Delete(ctx, ephemeralRunner); err != nil {
if err := r.Delete(ctx, &ephemeralRunner); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to delete the ephemeral runner: %w", err)
}
log.Info("Request to delete ephemeral runner has been issued")
@ -215,20 +220,19 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
runnerName := string(secret.Data["runnerName"])
if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
obj.Status.RunnerID = runnerID
obj.Status.RunnerName = runnerName
}); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %w", err)
}
original := ephemeralRunner.DeepCopy()
ephemeralRunner.Status.RunnerID = runnerID
ephemeralRunner.Status.RunnerName = runnerName
if err := r.Status().Patch(ctx, &ephemeralRunner, client.MergeFrom(original)); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName: %w", err)
}
log.Info("Updated ephemeral runner status with runnerId and runnerName")
}
if len(ephemeralRunner.Status.Failures) > maxFailures {
log.Info(fmt.Sprintf("EphemeralRunner has failed more than %d times. Deleting ephemeral runner so it can be re-created", maxFailures))
if err := r.Delete(ctx, ephemeralRunner); err != nil {
if err := r.Delete(ctx, &ephemeralRunner); err != nil {
log.Error(fmt.Errorf("failed to delete ephemeral runner after %d failures: %w", maxFailures, err), "Failed to delete ephemeral runner")
return ctrl.Result{}, err
}
@ -242,7 +246,8 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
nextReconciliation := lastFailure.Add(backoffDuration)
if !lastFailure.IsZero() && now.Before(&metav1.Time{Time: nextReconciliation}) {
requeueAfter := nextReconciliation.Sub(now.Time)
log.Info("Backing off the next reconciliation due to failure",
log.Info(
"Backing off the next reconciliation due to failure",
"lastFailure", lastFailure,
"nextReconciliation", nextReconciliation,
"requeueAfter", requeueAfter,
@ -261,7 +266,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
log.Info("Ephemeral runner pod does not exist. Creating new ephemeral runner")
result, err := r.createPod(ctx, ephemeralRunner, secret, log)
result, err := r.createPod(ctx, &ephemeralRunner, secret, log)
switch {
case err == nil:
return result, nil
@ -271,7 +276,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
case kerrors.IsInvalid(err):
log.Error(err, "Failed to create a pod due to unrecoverable failure")
errMessage := fmt.Sprintf("Failed to create the pod: %v", err)
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
if err := r.markAsFailed(ctx, &ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
log.Error(err, "Failed to set ephemeral runner to phase Failed")
return ctrl.Result{}, err
}
@ -283,7 +288,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
switch {
case isResourceQuotaExceeded && isAboutToExpire:
log.Error(err, "Failed to create a pod due to resource quota exceeded and the ephemeral runner is about to expire; re-creating the ephemeral runner")
if err := r.Delete(ctx, ephemeralRunner); err != nil {
if err := r.Delete(ctx, &ephemeralRunner); err != nil {
log.Error(err, "Failed to delete the ephemeral runner")
return ctrl.Result{}, err
}
@ -298,7 +303,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
log.Error(err, "Failed to create a pod due to unrecoverable failure")
errMessage := fmt.Sprintf("Failed to create the pod: %v", err)
if err := r.markAsFailed(ctx, ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
if err := r.markAsFailed(ctx, &ephemeralRunner, errMessage, ReasonInvalidPodFailure, log); err != nil {
log.Error(err, "Failed to set ephemeral runner to phase Failed")
return ctrl.Result{}, err
}
@ -312,7 +317,8 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
cs := runnerContainerStatus(pod)
switch {
case pod.Status.Phase == corev1.PodFailed: // All containers are stopped
log.Info("Pod is in failed phase, inspecting runner container status",
log.Info(
"Pod is in failed phase, inspecting runner container status",
"podReason", pod.Status.Reason,
"podMessage", pod.Status.Message,
"podConditions", pod.Status.Conditions,
@ -321,7 +327,7 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
// Therefore, we should try to restart it.
if cs == nil || cs.State.Terminated == nil {
log.Info("Runner container does not have state set, deleting pod as failed so it can be restarted")
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log)
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, &ephemeralRunner, pod, log)
}
switch cs.State.Terminated.ExitCode {
@ -331,13 +337,13 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
// If the runner container exits with 0, we assume that the runner has finished successfully.
// If side-car container exits with non-zero, it shouldn't affect the runner. Runner exit code
// drives the controller's inference of whether the job has succeeded or failed.
if err := r.Delete(ctx, ephemeralRunner); err != nil {
if err := r.Delete(ctx, &ephemeralRunner); err != nil {
log.Error(err, "Failed to delete ephemeral runner after successful completion")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
case 7:
if err := r.markAsOutdated(ctx, ephemeralRunner, log); err != nil {
if err := r.markAsOutdated(ctx, &ephemeralRunner, log); err != nil {
log.Error(err, "Failed to set ephemeral runner to phase Outdated")
return ctrl.Result{}, err
}
@ -349,13 +355,14 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
"Ephemeral runner container has failed, and runner container termination exit code is non-zero",
"containerTerminatedState", cs.State.Terminated,
)
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log)
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, &ephemeralRunner, pod, log)
case initContainerFailed(pod):
log.Info("Pod has a failed init container, deleting pod as failed so it can be restarted",
log.Info(
"Pod has a failed init container, deleting pod as failed so it can be restarted",
"initContainerStatuses", pod.Status.InitContainerStatuses,
)
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log)
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, &ephemeralRunner, pod, log)
case cs == nil:
// starting, no container state yet
@ -364,14 +371,14 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
case cs.State.Terminated == nil: // container is not terminated and pod phase is not failed, so runner is still running
log.Info("Runner container is still running; updating ephemeral runner status")
if err := r.updateRunStatusFromPod(ctx, ephemeralRunner, pod, log); err != nil {
if err := r.updateRunStatusFromPod(ctx, &ephemeralRunner, pod, log); err != nil {
log.Info("Failed to update ephemeral runner status. Requeue to not miss this event")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
case cs.State.Terminated.ExitCode == 7: // outdated
if err := r.markAsOutdated(ctx, ephemeralRunner, log); err != nil {
if err := r.markAsOutdated(ctx, &ephemeralRunner, log); err != nil {
log.Error(err, "Failed to set ephemeral runner to phase Outdated")
return ctrl.Result{}, err
}
@ -379,11 +386,11 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
case cs.State.Terminated.ExitCode != 0: // failed
log.Info("Ephemeral runner container failed", "exitCode", cs.State.Terminated.ExitCode)
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, ephemeralRunner, pod, log)
return ctrl.Result{}, r.deleteEphemeralRunnerOrPod(ctx, &ephemeralRunner, pod, log)
default: // succeeded
log.Info("Ephemeral runner has finished successfully, deleting ephemeral runner", "exitCode", cs.State.Terminated.ExitCode)
if err := r.Delete(ctx, ephemeralRunner); err != nil {
if err := r.Delete(ctx, &ephemeralRunner); err != nil {
log.Error(err, "Failed to delete ephemeral runner after successful completion")
return ctrl.Result{}, err
}
@ -568,11 +575,12 @@ func (r *EphemeralRunnerReconciler) cleanupRunnerLinkedSecrets(ctx context.Conte
func (r *EphemeralRunnerReconciler) markAsFailed(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, errMessage string, reason string, log logr.Logger) error {
log.Info("Updating ephemeral runner status to Failed")
if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
obj.Status.Phase = v1alpha1.EphemeralRunnerPhaseFailed
obj.Status.Reason = reason
obj.Status.Message = errMessage
}); err != nil {
original := ephemeralRunner.DeepCopy()
ephemeralRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseFailed
ephemeralRunner.Status.Reason = reason
ephemeralRunner.Status.Message = errMessage
if err := r.Status().Patch(ctx, ephemeralRunner, client.MergeFrom(original)); err != nil {
return fmt.Errorf("failed to update ephemeral runner status Phase/Message: %w", err)
}
@ -588,11 +596,12 @@ func (r *EphemeralRunnerReconciler) markAsFailed(ctx context.Context, ephemeralR
func (r *EphemeralRunnerReconciler) markAsOutdated(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) error {
log.Info("Updating ephemeral runner status to Outdated")
if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
obj.Status.Phase = v1alpha1.EphemeralRunnerPhaseOutdated
obj.Status.Reason = "Outdated"
obj.Status.Message = "Runner is deprecated"
}); err != nil {
original := ephemeralRunner.DeepCopy()
ephemeralRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseOutdated
ephemeralRunner.Status.Reason = "Outdated"
ephemeralRunner.Status.Message = "Runner is deprecated"
if err := r.Status().Patch(ctx, ephemeralRunner, client.MergeFrom(original)); err != nil {
return fmt.Errorf("failed to update ephemeral runner status Phase/Message: %w", err)
}
@ -616,16 +625,17 @@ func (r *EphemeralRunnerReconciler) deletePodAsFailed(ctx context.Context, ephem
}
log.Info("Updating ephemeral runner status to track the failure count")
if err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
if obj.Status.Failures == nil {
obj.Status.Failures = make(map[string]metav1.Time)
}
obj.Status.Failures[string(pod.UID)] = metav1.Now()
obj.Status.Ready = false
obj.Status.Reason = pod.Status.Reason
obj.Status.Message = pod.Status.Message
}); err != nil {
return fmt.Errorf("failed to update ephemeral runner status: failed attempts: %w", err)
original := ephemeralRunner.DeepCopy()
if ephemeralRunner.Status.Failures == nil {
ephemeralRunner.Status.Failures = make(map[string]metav1.Time)
}
ephemeralRunner.Status.Failures[string(pod.UID)] = metav1.Now()
ephemeralRunner.Status.Ready = false
ephemeralRunner.Status.Reason = pod.Status.Reason
ephemeralRunner.Status.Message = pod.Status.Message
if err := r.Status().Patch(ctx, ephemeralRunner, client.MergeFrom(original)); err != nil {
return fmt.Errorf("failed to update ephemeral runner status with failure count: %w", err)
}
log.Info("EphemeralRunner pod is deleted and status is updated with failure count")
@ -744,10 +754,9 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp
}
log.Info("Creating new pod for ephemeral runner")
newPod := r.newEphemeralRunnerPod(runner, secret, envs...)
if err := ctrl.SetControllerReference(runner, newPod, r.Scheme); err != nil {
log.Error(err, "Failed to set controller reference to a new pod")
newPod, err := r.newEphemeralRunnerPod(runner, secret, envs...)
if err != nil {
log.Error(err, "Failed to build new pod")
return ctrl.Result{}, err
}
@ -761,7 +770,7 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp
"runnerScaleSetId", runner.Spec.RunnerScaleSetID,
"runnerName", runner.Status.RunnerName,
"runnerId", runner.Status.RunnerID,
"configUrl", runner.Spec.GitHubConfigUrl,
"configUrl", runner.Spec.GitHubConfigURL,
"podName", newPod.Name)
return ctrl.Result{}, nil
@ -769,10 +778,9 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp
func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, jitConfig *scaleset.RunnerScaleSetJitRunnerConfig, log logr.Logger) (*corev1.Secret, error) {
log.Info("Creating new secret for ephemeral runner")
jitSecret := r.newEphemeralRunnerJitSecret(runner, jitConfig)
if err := ctrl.SetControllerReference(runner, jitSecret, r.Scheme); err != nil {
return nil, fmt.Errorf("failed to set controller reference: %w", err)
jitSecret, err := r.newEphemeralRunnerJitSecret(runner, jitConfig)
if err != nil {
return nil, fmt.Errorf("failed to build jit secret: %w", err)
}
log.Info("Created new secret spec for ephemeral runner")
@ -818,13 +826,13 @@ func (r *EphemeralRunnerReconciler) updateRunStatusFromPod(ctx context.Context,
"statusMessage", pod.Status.Message,
"ready", ready,
)
err := patchSubResource(ctx, r.Status(), ephemeralRunner, func(obj *v1alpha1.EphemeralRunner) {
obj.Status.Phase = phase
obj.Status.Ready = ready
obj.Status.Reason = pod.Status.Reason
obj.Status.Message = pod.Status.Message
})
if err != nil {
original := ephemeralRunner.DeepCopy()
ephemeralRunner.Status.Phase = phase
ephemeralRunner.Status.Ready = ready
ephemeralRunner.Status.Reason = pod.Status.Reason
ephemeralRunner.Status.Message = pod.Status.Message
if err := r.Status().Patch(ctx, ephemeralRunner, client.MergeFrom(original)); err != nil {
return fmt.Errorf("failed to update runner status for Phase/Reason/Message/Ready: %w", err)
}
@ -850,6 +858,8 @@ func (r *EphemeralRunnerReconciler) deleteRunnerFromService(ctx context.Context,
// SetupWithManager sets up the controller with the Manager.
func (r *EphemeralRunnerReconciler) SetupWithManager(mgr ctrl.Manager, opts ...Option) error {
r.ResourceBuilder.setSchemeIfUnset(r.Scheme)
return builderWithOptions(
ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.EphemeralRunner{}).

View File

@ -41,7 +41,7 @@ func newExampleRunner(name, namespace, configSecretName string) *v1alpha1.Epheme
Namespace: namespace,
},
Spec: v1alpha1.EphemeralRunnerSpec{
GitHubConfigUrl: "https://github.com/owner/repo",
GitHubConfigURL: "https://github.com/owner/repo",
GitHubConfigSecret: configSecretName,
RunnerScaleSetID: 1,
PodTemplateSpec: corev1.PodTemplateSpec{
@ -198,12 +198,13 @@ var _ = Describe("EphemeralRunner", func() {
Expect(err).To(BeNil(), "failed to delete pod")
pod = new(corev1.Pod)
Eventually(func() (bool, error) {
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil {
return false, err
}
return true, nil
},
Eventually(
func() (bool, error) {
if err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, pod); err != nil {
return false, err
}
return true, nil
},
ephemeralRunnerTimeout,
ephemeralRunnerInterval,
).Should(BeEquivalentTo(true))
@ -547,9 +548,10 @@ var _ = Describe("EphemeralRunner", func() {
It("It should mark as failed when job is not assigned and pod is failed", func() {
er := new(v1alpha1.EphemeralRunner)
Eventually(func() error {
return k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, er)
},
Eventually(
func() error {
return k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunner.Name, Namespace: ephemeralRunner.Namespace}, er)
},
ephemeralRunnerTimeout,
ephemeralRunnerInterval,
).Should(Succeed(), "failed to get ephemeral runner")
@ -1362,7 +1364,7 @@ var _ = Describe("EphemeralRunner", func() {
Expect(err).NotTo(HaveOccurred(), "failed to create secret credentials")
ephemeralRunner := newExampleRunner("test-runner", autoScalingNS.Name, configSecret.Name)
ephemeralRunner.Spec.GitHubConfigUrl = "http://example.com/org/repo"
ephemeralRunner.Spec.GitHubConfigURL = "http://example.com/org/repo"
ephemeralRunner.Spec.Proxy = &v1alpha1.ProxyConfig{
HTTP: &v1alpha1.ProxyServerConfig{
Url: proxy.URL,
@ -1524,7 +1526,7 @@ var _ = Describe("EphemeralRunner", func() {
}
ephemeralRunner := newExampleRunner("test-runner", autoScalingNS.Name, configSecret.Name)
ephemeralRunner.Spec.GitHubConfigUrl = server.URL + "/my-org"
ephemeralRunner.Spec.GitHubConfigURL = server.URL + "/my-org"
ephemeralRunner.Spec.GitHubServerTLS = &v1alpha1.TLSConfig{
CertificateFrom: &v1alpha1.TLSCertificateSource{
ConfigMapKeyRef: &corev1.ConfigMapKeySelector{

View File

@ -17,11 +17,14 @@ limitations under the License.
package actionsgithubcom
import (
"bytes"
"context"
"errors"
"fmt"
"maps"
"sort"
"strconv"
"time"
"github.com/actions/actions-runner-controller/apis/actions.github.com/v1alpha1"
"github.com/actions/actions-runner-controller/controllers/actions.github.com/metrics"
@ -32,7 +35,6 @@ import (
"go.uber.org/multierr"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
@ -42,17 +44,16 @@ import (
)
const (
ephemeralRunnerSetFinalizerName = "ephemeralrunner.actions.github.com/finalizer"
// EphemeralRunnerSetFinalizerName is the finalizer name used in EphemeralRunnerSet resource to protect the cleanup process of the child ephemeral runners and proxy secret.
EphemeralRunnerSetFinalizerName = "ephemeralrunnerset.actions.github.com/finalizer"
)
// EphemeralRunnerSetReconciler reconciles a EphemeralRunnerSet object
type EphemeralRunnerSetReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
Log logr.Logger
Scheme *runtime.Scheme
PublishMetrics bool
ResourceBuilder
}
@ -77,19 +78,20 @@ type EphemeralRunnerSetReconciler struct {
func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("ephemeralrunnerset", req.NamespacedName)
ephemeralRunnerSet := new(v1alpha1.EphemeralRunnerSet)
if err := r.Get(ctx, req.NamespacedName, ephemeralRunnerSet); err != nil {
var ephemeralRunnerSet v1alpha1.EphemeralRunnerSet
if err := r.Get(ctx, req.NamespacedName, &ephemeralRunnerSet); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
original := ephemeralRunnerSet.DeepCopy()
// Requested deletion does not need reconciled.
if !ephemeralRunnerSet.DeletionTimestamp.IsZero() {
if !controllerutil.ContainsFinalizer(ephemeralRunnerSet, ephemeralRunnerSetFinalizerName) {
if !controllerutil.ContainsFinalizer(&ephemeralRunnerSet, EphemeralRunnerSetFinalizerName) {
return ctrl.Result{}, nil
}
log.Info("Deleting resources")
done, err := r.cleanUpEphemeralRunners(ctx, ephemeralRunnerSet, log)
done, err := r.cleanUpEphemeralRunners(ctx, &ephemeralRunnerSet, log)
if err != nil {
log.Error(err, "Failed to clean up EphemeralRunners")
return ctrl.Result{}, err
@ -99,25 +101,33 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
return ctrl.Result{}, nil
}
log.Info("Removing finalizer")
if err := patch(ctx, r.Client, ephemeralRunnerSet, func(obj *v1alpha1.EphemeralRunnerSet) {
controllerutil.RemoveFinalizer(obj, ephemeralRunnerSetFinalizerName)
}); err != nil && !kerrors.IsNotFound(err) {
log.Error(err, "Failed to update ephemeral runner set with removed finalizer")
done, err = r.cleanUpEphemeralRunnerSetProxySecret(ctx, &ephemeralRunnerSet, log)
if err != nil {
log.Error(err, "Failed to clean up EphemeralRunnerSet proxy secret")
return ctrl.Result{}, err
}
if !done {
log.Info("Waiting for proxy secret to be deleted")
return ctrl.Result{RequeueAfter: 1 * time.Second}, nil
}
log.Info("Removing finalizer")
if controllerutil.RemoveFinalizer(&ephemeralRunnerSet, EphemeralRunnerSetFinalizerName) {
if err := r.Patch(ctx, &ephemeralRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update ephemeral runner set with removed finalizer")
return ctrl.Result{}, err
}
}
log.Info("Successfully removed finalizer after cleanup")
return ctrl.Result{}, nil
}
// Add finalizer if not present
if !controllerutil.ContainsFinalizer(ephemeralRunnerSet, ephemeralRunnerSetFinalizerName) {
if controllerutil.AddFinalizer(&ephemeralRunnerSet, EphemeralRunnerSetFinalizerName) {
log.Info("Adding finalizer")
if err := patch(ctx, r.Client, ephemeralRunnerSet, func(obj *v1alpha1.EphemeralRunnerSet) {
controllerutil.AddFinalizer(obj, ephemeralRunnerSetFinalizerName)
}); err != nil {
log.Error(err, "Failed to update ephemeral runner set with finalizer added")
if err := r.Patch(ctx, &ephemeralRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update ephemeral runner set with new finalizer")
return ctrl.Result{}, err
}
@ -125,34 +135,57 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
return ctrl.Result{}, nil
}
if ephemeralRunnerSet.Status.Phase == v1alpha1.EphemeralRunnerSetPhaseOutdated {
log.Info("ephemeral runner set is outdated, waiting for autoscaling runner set to remove it")
// If hash spec has changed, delete idle ephemeral runners
// in order to apply the change to the runners that did not yet receive a job.
ephemeralRunnerIntegrityHash := ephemeralRunnerSetIntegrityHash(&ephemeralRunnerSet)
if ephemeralRunnerSet.Annotations[annotationKeyIntegrityHash] != ephemeralRunnerIntegrityHash {
log.Info("EphemeralRunnerSpec has changed, deleting idle ephemeral runners to apply the new spec")
if _, err := r.cleanUpEphemeralRunners(ctx, &ephemeralRunnerSet, log); err != nil {
log.Error(err, "Failed to clean up EphemeralRunners")
return ctrl.Result{}, err
}
if _, _, err := r.reconcileEphemeralRunnerSetProxySecret(ctx, &ephemeralRunnerSet, log); err != nil {
log.Error(err, "Failed to update EphemeralRunnerSet proxy secret")
return ctrl.Result{}, err
}
log.Info("Updating EphemeralRunnerSet with new spec hash")
original := ephemeralRunnerSet.DeepCopy()
if ephemeralRunnerSet.Annotations == nil {
ephemeralRunnerSet.Annotations = make(map[string]string)
}
ephemeralRunnerSet.Annotations[annotationKeyIntegrityHash] = ephemeralRunnerIntegrityHash
if err := r.Patch(ctx, &ephemeralRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update ephemeral runner set with new spec hash")
return ctrl.Result{}, err
}
log.Info("Updated ephemeral runner set with new spec hash")
return ctrl.Result{}, nil
}
// Create proxy secret if not present
if ephemeralRunnerSet.Spec.EphemeralRunnerSpec.Proxy != nil {
proxySecret := new(corev1.Secret)
if err := r.Get(ctx, types.NamespacedName{Namespace: ephemeralRunnerSet.Namespace, Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet)}, proxySecret); err != nil {
if !kerrors.IsNotFound(err) {
log.Error(err, "Unable to get ephemeralRunnerSet proxy secret", "namespace", ephemeralRunnerSet.Namespace, "name", proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet))
return ctrl.Result{}, err
}
// Create a compiled secret for the runner pods in the runnerset namespace
log.Info("Creating a ephemeralRunnerSet proxy secret for the runner pods")
if err := r.createProxySecret(ctx, ephemeralRunnerSet, log); err != nil {
log.Error(err, "Unable to create ephemeralRunnerSet proxy secret", "namespace", ephemeralRunnerSet.Namespace, "set-name", ephemeralRunnerSet.Name)
return ctrl.Result{}, err
}
if ephemeralRunnerSet.Status.Phase == v1alpha1.EphemeralRunnerSetPhaseOutdated {
if _, err := r.cleanUpEphemeralRunners(ctx, &ephemeralRunnerSet, log); err != nil {
log.Error(err, "Failed to clean up EphemeralRunners")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
// Create or update proxy secret if needed
if _, updated, err := r.reconcileEphemeralRunnerSetProxySecret(ctx, &ephemeralRunnerSet, log); err != nil {
log.Error(err, "Unable to reconcile ephemeralRunnerSet proxy secret", "namespace", ephemeralRunnerSet.Namespace, "name", proxyEphemeralRunnerSetSecretName(&ephemeralRunnerSet))
return ctrl.Result{}, err
} else if updated {
return ctrl.Result{RequeueAfter: 1 * time.Second}, nil
}
// Find all EphemeralRunner with matching namespace and own by this EphemeralRunnerSet.
ephemeralRunnerList := new(v1alpha1.EphemeralRunnerList)
var ephemeralRunnerList v1alpha1.EphemeralRunnerList
if err := r.List(
ctx,
ephemeralRunnerList,
&ephemeralRunnerList,
client.InNamespace(req.Namespace),
client.MatchingFields{resourceOwnerKey: req.Name},
); err != nil {
@ -160,9 +193,10 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
return ctrl.Result{}, err
}
ephemeralRunnersByState := newEphemeralRunnersByStates(ephemeralRunnerList)
ephemeralRunnersByState := newEphemeralRunnersByStates(&ephemeralRunnerList)
log.Info("Ephemeral runner counts",
log.Info(
"Ephemeral runner counts",
"outdated", len(ephemeralRunnersByState.outdated),
"pending", len(ephemeralRunnersByState.pending),
"running", len(ephemeralRunnersByState.running),
@ -172,7 +206,7 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
)
if r.PublishMetrics {
githubConfigURL := ephemeralRunnerSet.Spec.EphemeralRunnerSpec.GitHubConfigUrl
githubConfigURL := ephemeralRunnerSet.Spec.EphemeralRunnerSpec.GitHubConfigURL
parsedURL, err := actions.ParseGitHubConfigFromURL(githubConfigURL)
if err != nil {
log.Error(err, "Github Config URL is invalid", "URL", githubConfigURL)
@ -206,7 +240,7 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
case total < ephemeralRunnerSet.Spec.Replicas: // Handle scale up
count := ephemeralRunnerSet.Spec.Replicas - total
log.Info("Creating new ephemeral runners (scale up)", "count", count)
if err := r.createEphemeralRunners(ctx, ephemeralRunnerSet, count, log); err != nil {
if err := r.createEphemeralRunners(ctx, &ephemeralRunnerSet, count, log); err != nil {
log.Error(err, "failed to make ephemeral runner")
return ctrl.Result{}, err
}
@ -221,7 +255,7 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
log.Info("Deleting ephemeral runners (scale down)", "count", count)
if err := r.deleteIdleEphemeralRunners(
ctx,
ephemeralRunnerSet,
&ephemeralRunnerSet,
ephemeralRunnersByState.pending,
ephemeralRunnersByState.running,
count,
@ -233,10 +267,11 @@ func (r *EphemeralRunnerSetReconciler) Reconcile(ctx context.Context, req ctrl.R
}
}
return ctrl.Result{}, r.updateStatus(ctx, ephemeralRunnerSet, ephemeralRunnersByState, log)
return ctrl.Result{}, r.updateStatus(ctx, &ephemeralRunnerSet, ephemeralRunnersByState, log)
}
func (r *EphemeralRunnerSetReconciler) updateStatus(ctx context.Context, ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, state *ephemeralRunnersByState, log logr.Logger) error {
original := ephemeralRunnerSet.DeepCopy()
total := state.scaleTotal()
var phase v1alpha1.EphemeralRunnerSetPhase
switch {
@ -257,15 +292,13 @@ func (r *EphemeralRunnerSetReconciler) updateStatus(ctx context.Context, ephemer
// Update the status if needed.
if ephemeralRunnerSet.Status != desiredStatus {
log.Info("Updating status with current runners count", "count", total)
ephemeralRunnerSet := ephemeralRunnerSet.DeepCopy()
ephemeralRunnerSet.Status.CurrentReplicas = -1 // ALWAYS update current replicas
if err := patchSubResource(ctx, r.Status(), ephemeralRunnerSet, func(obj *v1alpha1.EphemeralRunnerSet) {
obj.Status = desiredStatus
}); err != nil {
log.Error(err, "Failed to update status with current runners count")
ephemeralRunnerSet.Status = desiredStatus
if err := r.Status().Patch(ctx, ephemeralRunnerSet, client.MergeFrom(original)); err != nil {
log.Error(err, "Failed to update EphemeralRunnerSet status")
return err
}
log.Info("Updated EphemeralRunnerSet status", "status", ephemeralRunnerSet.Status)
}
return nil
}
@ -323,7 +356,8 @@ func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunners(ctx context.Conte
ephemeralRunnerState := newEphemeralRunnersByStates(ephemeralRunnerList)
log.Info("Clean up runner counts",
log.Info(
"Clean up runner counts",
"pending", len(ephemeralRunnerState.pending),
"running", len(ephemeralRunnerState.running),
"finished", len(ephemeralRunnerState.finished),
@ -359,7 +393,7 @@ func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunners(ctx context.Conte
log.Info("Cleanup pending or running ephemeral runners")
errs = errs[0:0]
for _, ephemeralRunner := range append(ephemeralRunnerState.pending, ephemeralRunnerState.running...) {
for _, ephemeralRunner := range ephemeralRunnerState.pending {
log.Info("Removing the ephemeral runner from the service", "name", ephemeralRunner.Name)
_, err := r.deleteEphemeralRunnerWithActionsClient(ctx, ephemeralRunner, actionsClient, log)
if err != nil {
@ -367,6 +401,24 @@ func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunners(ctx context.Conte
}
}
for _, ephemeralRunner := range ephemeralRunnerState.running {
if ephemeralRunner.HasJob() {
log.Info(
"Skipping ephemeral runner since it is running a job",
"name", ephemeralRunner.Name,
"workflowRunId", ephemeralRunner.Status.WorkflowRunID,
"jobId", ephemeralRunner.Status.JobID,
)
continue
}
log.Info("Removing the idle ephemeral runner from the service", "name", ephemeralRunner.Name)
_, err := r.deleteEphemeralRunnerWithActionsClient(ctx, ephemeralRunner, actionsClient, log)
if err != nil {
errs = append(errs, err)
}
}
if len(errs) > 0 {
mergedErrs := multierr.Combine(errs...)
log.Error(mergedErrs, "Failed to remove ephemeral runners from the service")
@ -376,22 +428,124 @@ func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunners(ctx context.Conte
return false, nil
}
func (r *EphemeralRunnerSetReconciler) cleanUpEphemeralRunnerSetProxySecret(ctx context.Context, ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, log logr.Logger) (done bool, err error) {
if ephemeralRunnerSet.Spec.EphemeralRunnerSpec.Proxy == nil {
return true, nil
}
var proxySecret corev1.Secret
err = r.Get(
ctx,
types.NamespacedName{
Namespace: ephemeralRunnerSet.Namespace,
Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
},
&proxySecret,
)
switch {
case err == nil:
log.Info("Deleting proxy secret")
if err := r.Delete(ctx, &proxySecret); err != nil && !kerrors.IsNotFound(err) {
log.Error(err, "Failed to delete proxy secret")
return false, err
}
log.Info("Deleted proxy secret")
return false, nil
case kerrors.IsNotFound(err):
log.Info("Proxy secret already deleted")
return true, nil
default:
log.Error(
err,
"Unable to get ephemeralRunnerSet proxy secret",
"namespace",
ephemeralRunnerSet.Namespace,
"name",
proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
)
return false, err
}
}
func (r *EphemeralRunnerSetReconciler) reconcileEphemeralRunnerSetProxySecret(ctx context.Context, ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, log logr.Logger) (secret *corev1.Secret, updated bool, err error) {
if ephemeralRunnerSet.Spec.EphemeralRunnerSpec.Proxy == nil {
return nil, false, nil
}
var proxySecret corev1.Secret
err = r.Get(
ctx,
types.NamespacedName{
Namespace: ephemeralRunnerSet.Namespace,
Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
},
&proxySecret,
)
switch {
case err == nil:
proxySecretData, err := ephemeralRunnerSet.Spec.EphemeralRunnerSpec.Proxy.ToSecretData(func(s string) (*corev1.Secret, error) {
secret := new(corev1.Secret)
err := r.Get(ctx, types.NamespacedName{Namespace: ephemeralRunnerSet.Namespace, Name: s}, secret)
return secret, err
})
if err != nil {
return nil, false, fmt.Errorf("failed to convert proxy config to secret data: %w", err)
}
desiredRunnerSetProxy, err := r.newEphemeralRunnerSetProxySecret(ephemeralRunnerSet, proxySecretData)
if err != nil {
return nil, false, fmt.Errorf("failed to build desired ephemeralRunnerSet proxy secret: %w", err)
}
updatedProxySecret := proxySecret.DeepCopy()
var shouldUpdate bool
if !maps.EqualFunc(proxySecret.Data, desiredRunnerSetProxy.Data, bytes.Equal) {
updatedProxySecret.Data = desiredRunnerSetProxy.Data
shouldUpdate = true
}
desiredLabels := r.filterAndMergeLabels(proxySecret.Labels, desiredRunnerSetProxy.Labels)
if !maps.Equal(proxySecret.Labels, desiredLabels) {
updatedProxySecret.Labels = desiredLabels
shouldUpdate = true
}
desiredAnnotations := r.mergeAnnotations(proxySecret.Annotations, desiredRunnerSetProxy.Annotations)
if !maps.Equal(proxySecret.Annotations, desiredAnnotations) {
updatedProxySecret.Annotations = desiredAnnotations
shouldUpdate = true
}
if shouldUpdate {
log.Info("Updating ephemeralRunnerSet proxy secret")
if err := r.Update(ctx, updatedProxySecret); err != nil {
return nil, false, fmt.Errorf("failed to update ephemeralRunnerSet proxy secret: %w", err)
}
return updatedProxySecret, true, nil
}
return &proxySecret, false, nil
case kerrors.IsNotFound(err):
// Create a compiled secret for the runner pods in the runnerset namespace
log.Info("Creating a ephemeralRunnerSet proxy secret for the runner pods")
if err := r.createProxySecret(ctx, ephemeralRunnerSet, log); err != nil {
return nil, false, fmt.Errorf("failed to create ephemeralRunnerSet proxy secret: %w", err)
}
return nil, false, nil
default:
return nil, false, err
}
}
// createEphemeralRunners provisions `count` number of v1alpha1.EphemeralRunner resources in the cluster.
func (r *EphemeralRunnerSetReconciler) createEphemeralRunners(ctx context.Context, runnerSet *v1alpha1.EphemeralRunnerSet, count int, log logr.Logger) error {
// Track multiple errors at once and return the bundle.
errs := make([]error, 0)
for i := range count {
ephemeralRunner := r.newEphemeralRunner(runnerSet)
if runnerSet.Spec.EphemeralRunnerSpec.Proxy != nil {
ephemeralRunner.Spec.ProxySecretRef = proxyEphemeralRunnerSetSecretName(runnerSet)
}
// Make sure that we own the resource we create.
if err := ctrl.SetControllerReference(runnerSet, ephemeralRunner, r.Scheme); err != nil {
log.Error(err, "failed to set controller reference on ephemeral runner")
ephemeralRunner, err := r.newEphemeralRunner(runnerSet)
if err != nil {
log.Error(err, "failed to build ephemeral runner")
errs = append(errs, err)
continue
}
if runnerSet.Spec.EphemeralRunnerSpec.Proxy != nil {
ephemeralRunner.Spec.ProxySecretRef = proxyEphemeralRunnerSetSecretName(runnerSet)
}
log.Info("Creating new ephemeral runner", "progress", i+1, "total", count)
if err := r.Create(ctx, ephemeralRunner); err != nil {
@ -416,21 +570,9 @@ func (r *EphemeralRunnerSetReconciler) createProxySecret(ctx context.Context, ep
return fmt.Errorf("failed to convert proxy config to secret data: %w", err)
}
runnerPodProxySecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
Namespace: ephemeralRunnerSet.Namespace,
Labels: map[string]string{
LabelKeyGitHubScaleSetName: ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetName],
LabelKeyGitHubScaleSetNamespace: ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetNamespace],
},
},
Data: proxySecretData,
}
// Make sure that we own the resource we create.
if err := ctrl.SetControllerReference(ephemeralRunnerSet, runnerPodProxySecret, r.Scheme); err != nil {
log.Error(err, "failed to set controller reference on proxy secret")
runnerPodProxySecret, err := r.newEphemeralRunnerSetProxySecret(ephemeralRunnerSet, proxySecretData)
if err != nil {
log.Error(err, "failed to build proxy secret")
return err
}
@ -523,6 +665,8 @@ func (r *EphemeralRunnerSetReconciler) deleteEphemeralRunnerWithActionsClient(ct
// SetupWithManager sets up the controller with the Manager.
func (r *EphemeralRunnerSetReconciler) SetupWithManager(mgr ctrl.Manager, opts ...Option) error {
r.setSchemeIfUnset(r.Scheme)
return builderWithOptions(
ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.EphemeralRunnerSet{}).

View File

@ -71,10 +71,13 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
ObjectMeta: metav1.ObjectMeta{
Name: "test-asrs",
Namespace: autoscalingNS.Name,
Annotations: map[string]string{
"arc.test/runner-set-annotation": "initial",
},
},
Spec: v1alpha1.EphemeralRunnerSetSpec{
EphemeralRunnerSpec: v1alpha1.EphemeralRunnerSpec{
GitHubConfigUrl: "https://github.com/owner/repo",
GitHubConfigURL: "https://github.com/owner/repo",
GitHubConfigSecret: configSecret.Name,
RunnerScaleSetID: 100,
PodTemplateSpec: corev1.PodTemplateSpec{
@ -113,7 +116,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
return created.Finalizers[0], nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(BeEquivalentTo(ephemeralRunnerSetFinalizerName), "EphemeralRunnerSet should have a finalizer")
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(EphemeralRunnerSetFinalizerName), "EphemeralRunnerSet should have a finalizer")
// Check if the number of ephemeral runners are stay 0
Consistently(
@ -126,7 +130,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(BeEquivalentTo(0), "No EphemeralRunner should be created")
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(0), "No EphemeralRunner should be created")
// Check if the status stay 0
Consistently(
@ -140,7 +145,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
return int(runnerSet.Status.CurrentReplicas), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(BeEquivalentTo(0), "EphemeralRunnerSet status should be 0")
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(0), "EphemeralRunnerSet status should be 0")
// Scaling up the EphemeralRunnerSet
updated := created.DeepCopy()
@ -178,7 +184,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(BeEquivalentTo(5), "5 EphemeralRunner should be created")
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(5), "5 EphemeralRunner should be created")
// Check if the status is updated
Eventually(
@ -192,7 +199,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
return int(runnerSet.Status.CurrentReplicas), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(BeEquivalentTo(5), "EphemeralRunnerSet status should be 5")
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(5), "EphemeralRunnerSet status should be 5")
})
})
@ -238,7 +246,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(BeEquivalentTo(5), "5 EphemeralRunner should be created")
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(5), "5 EphemeralRunner should be created")
// Delete the EphemeralRunnerSet
err = k8sClient.Delete(ctx, created)
@ -255,7 +264,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(BeEquivalentTo(0), "All EphemeralRunner should be deleted")
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(0), "All EphemeralRunner should be deleted")
// Check if the EphemeralRunnerSet is deleted
Eventually(
@ -273,7 +283,8 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
return fmt.Errorf("EphemeralRunnerSet is not deleted")
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(Succeed(), "EphemeralRunnerSet should be deleted")
ephemeralRunnerSetTestInterval,
).Should(Succeed(), "EphemeralRunnerSet should be deleted")
})
})
@ -304,6 +315,64 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
).Should(BeEquivalentTo(5), "5 EphemeralRunner should be created")
})
It("propagates updated EphemeralRunnerSet annotations to newly created EphemeralRunners", func() {
ers := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunnerSet.Name, Namespace: ephemeralRunnerSet.Namespace}, ers)
Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
updated := ers.DeepCopy()
updated.Spec.Replicas = 1
updated.Spec.PatchID = 0
err = k8sClient.Patch(ctx, updated, client.MergeFrom(ers))
Expect(err).NotTo(HaveOccurred(), "failed to scale EphemeralRunnerSet")
runnerList := new(v1alpha1.EphemeralRunnerList)
Eventually(
func(g Gomega) {
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
g.Expect(err).NotTo(HaveOccurred(), "failed to list EphemeralRunners")
g.Expect(runnerList.Items).To(HaveLen(1))
g.Expect(runnerList.Items[0].Annotations["arc.test/runner-set-annotation"]).To(Equal("initial"))
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(Succeed())
ers = new(v1alpha1.EphemeralRunnerSet)
err = k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunnerSet.Name, Namespace: ephemeralRunnerSet.Namespace}, ers)
Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
updated = ers.DeepCopy()
updated.Annotations["arc.test/runner-set-annotation"] = "updated"
updated.Annotations["arc.test/new-runner-set-annotation"] = "added"
updated.Spec.Replicas = 2
updated.Spec.PatchID = 1
err = k8sClient.Patch(ctx, updated, client.MergeFrom(ers))
Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunnerSet annotations")
Eventually(
func(g Gomega) {
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
g.Expect(err).NotTo(HaveOccurred(), "failed to list EphemeralRunners")
g.Expect(runnerList.Items).To(HaveLen(2))
annotationsByValue := map[string]int{}
var updatedRunnerHasNewAnnotation bool
for _, runner := range runnerList.Items {
annotationsByValue[runner.Annotations["arc.test/runner-set-annotation"]]++
if runner.Annotations["arc.test/runner-set-annotation"] == "updated" && runner.Annotations["arc.test/new-runner-set-annotation"] == "added" {
updatedRunnerHasNewAnnotation = true
}
}
g.Expect(annotationsByValue["initial"]).To(Equal(1))
g.Expect(annotationsByValue["updated"]).To(Equal(1))
g.Expect(updatedRunnerHasNewAnnotation).To(BeTrue())
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(Succeed())
})
It("Should scale up when patch ID changes", func() {
ers := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunnerSet.Name, Namespace: ephemeralRunnerSet.Namespace}, ers)
@ -910,6 +979,115 @@ var _ = Describe("Test EphemeralRunnerSet controller", func() {
).Should(BeNil(), "2 EphemeralRunner should be created and none should be in Succeeded phase")
})
It("Should delete idle runners, keep busy runners, and create new runners when the spec changes", func() {
ers := new(v1alpha1.EphemeralRunnerSet)
err := k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunnerSet.Name, Namespace: ephemeralRunnerSet.Namespace}, ers)
Expect(err).NotTo(HaveOccurred(), "failed to get EphemeralRunnerSet")
updated := ers.DeepCopy()
updated.Spec.Replicas = 3
updated.Spec.PatchID = 0
err = k8sClient.Patch(ctx, updated, client.MergeFrom(ers))
Expect(err).NotTo(HaveOccurred(), "failed to update EphemeralRunnerSet")
runnerList := new(v1alpha1.EphemeralRunnerList)
Eventually(
func() (int, error) {
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
if err != nil {
return -1, err
}
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(3), "3 EphemeralRunner should be created")
idleRunnerNames := map[string]struct{}{}
for i := 0; i < 2; i++ {
idleRunner := runnerList.Items[i].DeepCopy()
idleRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning
idleRunner.Status.RunnerID = i + 101
err = k8sClient.Status().Patch(ctx, idleRunner, client.MergeFrom(&runnerList.Items[i]))
Expect(err).NotTo(HaveOccurred(), "failed to update idle EphemeralRunner")
idleRunnerNames[idleRunner.Name] = struct{}{}
}
busyRunner := runnerList.Items[2].DeepCopy()
busyRunner.Status.Phase = v1alpha1.EphemeralRunnerPhaseRunning
busyRunner.Status.RunnerID = 103
busyRunner.Status.JobID = "job-1"
busyRunner.Status.WorkflowRunID = 9001
err = k8sClient.Status().Patch(ctx, busyRunner, client.MergeFrom(&runnerList.Items[2]))
Expect(err).NotTo(HaveOccurred(), "failed to update busy EphemeralRunner")
busyRunnerName := busyRunner.Name
ers = new(v1alpha1.EphemeralRunnerSet)
err = k8sClient.Get(ctx, client.ObjectKey{Name: ephemeralRunnerSet.Name, Namespace: ephemeralRunnerSet.Namespace}, ers)
Expect(err).NotTo(HaveOccurred(), "failed to re-fetch EphemeralRunnerSet")
updated = ers.DeepCopy()
updated.Spec.EphemeralRunnerSpec.PodTemplateSpec.Spec.Containers[0].Image = "ghcr.io/actions/runner:new"
err = k8sClient.Patch(ctx, updated, client.MergeFrom(ers))
Expect(err).NotTo(HaveOccurred(), "failed to patch EphemeralRunnerSet with new spec")
Eventually(
func() error {
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
if err != nil {
return err
}
if len(runnerList.Items) != 3 {
return fmt.Errorf("expected 3 runners after spec update, got %d", len(runnerList.Items))
}
busyRunnerFound := false
newSpecRunnerCount := 0
for _, runner := range runnerList.Items {
if _, ok := idleRunnerNames[runner.Name]; ok {
return fmt.Errorf("expected idle runner %s to be deleted", runner.Name)
}
if runner.Name == busyRunnerName {
busyRunnerFound = true
if !runner.HasJob() {
return fmt.Errorf("expected remaining runner to still be busy")
}
if runner.Spec.PodTemplateSpec.Spec.Containers[0].Image != "ghcr.io/actions/runner" {
return fmt.Errorf("expected busy runner to keep original image, got %s", runner.Spec.PodTemplateSpec.Spec.Containers[0].Image)
}
continue
}
if len(runner.Spec.PodTemplateSpec.Spec.Containers) == 0 {
return fmt.Errorf("new runner has empty container spec")
}
if runner.Spec.PodTemplateSpec.Spec.Containers[0].Image != "ghcr.io/actions/runner:new" {
return fmt.Errorf("expected new runner image to be updated, got %s", runner.Spec.PodTemplateSpec.Spec.Containers[0].Image)
}
newSpecRunnerCount++
}
if !busyRunnerFound {
return fmt.Errorf("expected busy runner %s to remain", busyRunnerName)
}
if newSpecRunnerCount != 2 {
return fmt.Errorf("expected 2 runners with updated spec, got %d", newSpecRunnerCount)
}
return nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(BeNil(), "busy runner should stay while idle runners are replaced with the updated spec")
})
It("Should update status on Ephemeral Runner state changes", func() {
created := new(v1alpha1.EphemeralRunnerSet)
Eventually(
@ -1144,7 +1322,7 @@ var _ = Describe("Test EphemeralRunnerSet controller with proxy settings", func(
Spec: v1alpha1.EphemeralRunnerSetSpec{
Replicas: 1,
EphemeralRunnerSpec: v1alpha1.EphemeralRunnerSpec{
GitHubConfigUrl: "http://example.com/owner/repo",
GitHubConfigURL: "http://example.com/owner/repo",
GitHubConfigSecret: configSecret.Name,
RunnerScaleSetID: 100,
Proxy: &v1alpha1.ProxyConfig{
@ -1175,29 +1353,30 @@ var _ = Describe("Test EphemeralRunnerSet controller with proxy settings", func(
err = k8sClient.Create(ctx, ephemeralRunnerSet)
Expect(err).NotTo(HaveOccurred(), "failed to create EphemeralRunnerSet")
Eventually(func(g Gomega) {
// Compiled / flattened proxy secret should exist at this point
actualProxySecret := &corev1.Secret{}
err = k8sClient.Get(ctx, client.ObjectKey{
Namespace: autoscalingNS.Name,
Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
}, actualProxySecret)
g.Expect(err).NotTo(HaveOccurred(), "failed to get compiled / flattened proxy secret")
secretFetcher := func(name string) (*corev1.Secret, error) {
secret := &corev1.Secret{}
Eventually(
func(g Gomega) {
// Compiled / flattened proxy secret should exist at this point
actualProxySecret := &corev1.Secret{}
err = k8sClient.Get(ctx, client.ObjectKey{
Namespace: autoscalingNS.Name,
Name: name,
}, secret)
return secret, err
}
Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
}, actualProxySecret)
g.Expect(err).NotTo(HaveOccurred(), "failed to get compiled / flattened proxy secret")
// Assert that the proxy secret is created with the correct values
expectedData, err := ephemeralRunnerSet.Spec.EphemeralRunnerSpec.Proxy.ToSecretData(secretFetcher)
g.Expect(err).NotTo(HaveOccurred(), "failed to get proxy secret data")
g.Expect(actualProxySecret.Data).To(Equal(expectedData))
},
secretFetcher := func(name string) (*corev1.Secret, error) {
secret := &corev1.Secret{}
err = k8sClient.Get(ctx, client.ObjectKey{
Namespace: autoscalingNS.Name,
Name: name,
}, secret)
return secret, err
}
// Assert that the proxy secret is created with the correct values
expectedData, err := ephemeralRunnerSet.Spec.EphemeralRunnerSpec.Proxy.ToSecretData(secretFetcher)
g.Expect(err).NotTo(HaveOccurred(), "failed to get proxy secret data")
g.Expect(actualProxySecret.Data).To(Equal(expectedData))
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(Succeed(), "compiled / flattened proxy secret should exist")
@ -1250,34 +1429,37 @@ var _ = Describe("Test EphemeralRunnerSet controller with proxy settings", func(
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval).Should(BeEquivalentTo(1), "1 EphemeralRunner should exist")
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(1), "1 EphemeralRunner should exist")
// Delete the EphemeralRunnerSet
err = k8sClient.Delete(ctx, ephemeralRunnerSet)
Expect(err).NotTo(HaveOccurred(), "failed to delete EphemeralRunnerSet")
Eventually(func(g Gomega) (int, error) {
runnerList := new(v1alpha1.EphemeralRunnerList)
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
if err != nil {
return -1, err
}
return len(runnerList.Items), nil
},
Eventually(
func(g Gomega) (int, error) {
runnerList := new(v1alpha1.EphemeralRunnerList)
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
if err != nil {
return -1, err
}
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(0), "EphemeralRunners should be deleted")
// Assert that the proxy secret is deleted
Eventually(func(g Gomega) {
proxySecret := &corev1.Secret{}
err = k8sClient.Get(ctx, client.ObjectKey{
Namespace: autoscalingNS.Name,
Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
}, proxySecret)
g.Expect(err).To(HaveOccurred(), "proxy secret should be deleted")
g.Expect(kerrors.IsNotFound(err)).To(BeTrue(), "proxy secret should be deleted")
},
Eventually(
func(g Gomega) {
proxySecret := &corev1.Secret{}
err = k8sClient.Get(ctx, client.ObjectKey{
Namespace: autoscalingNS.Name,
Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
}, proxySecret)
g.Expect(err).To(HaveOccurred(), "proxy secret should be deleted")
g.Expect(kerrors.IsNotFound(err)).To(BeTrue(), "proxy secret should be deleted")
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(Succeed(), "proxy secret should be deleted")
@ -1323,7 +1505,7 @@ var _ = Describe("Test EphemeralRunnerSet controller with proxy settings", func(
Spec: v1alpha1.EphemeralRunnerSetSpec{
Replicas: 1,
EphemeralRunnerSpec: v1alpha1.EphemeralRunnerSpec{
GitHubConfigUrl: "http://example.com/owner/repo",
GitHubConfigURL: "http://example.com/owner/repo",
GitHubConfigSecret: configSecret.Name,
RunnerScaleSetID: 100,
Proxy: &v1alpha1.ProxyConfig{
@ -1350,14 +1532,15 @@ var _ = Describe("Test EphemeralRunnerSet controller with proxy settings", func(
Expect(err).NotTo(HaveOccurred(), "failed to create EphemeralRunnerSet")
runnerList := new(v1alpha1.EphemeralRunnerList)
Eventually(func() (int, error) {
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
if err != nil {
return -1, err
}
Eventually(
func() (int, error) {
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
if err != nil {
return -1, err
}
return len(runnerList.Items), nil
},
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(1), "failed to create ephemeral runner")
@ -1443,7 +1626,7 @@ var _ = Describe("Test EphemeralRunnerSet controller with custom root CA", func(
Spec: v1alpha1.EphemeralRunnerSetSpec{
Replicas: 1,
EphemeralRunnerSpec: v1alpha1.EphemeralRunnerSpec{
GitHubConfigUrl: "https://github.example.com/api/v3",
GitHubConfigURL: "https://github.example.com/api/v3",
GitHubConfigSecret: configSecret.Name,
GitHubServerTLS: &v1alpha1.TLSConfig{
CertificateFrom: &v1alpha1.TLSCertificateSource{
@ -1474,14 +1657,15 @@ var _ = Describe("Test EphemeralRunnerSet controller with custom root CA", func(
Expect(err).NotTo(HaveOccurred(), "failed to create EphemeralRunnerSet")
runnerList := new(v1alpha1.EphemeralRunnerList)
Eventually(func() (int, error) {
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
if err != nil {
return -1, err
}
Eventually(
func() (int, error) {
err := listEphemeralRunnersAndRemoveFinalizers(ctx, k8sClient, runnerList, ephemeralRunnerSet.Namespace)
if err != nil {
return -1, err
}
return len(runnerList.Items), nil
},
return len(runnerList.Items), nil
},
ephemeralRunnerSetTestTimeout,
ephemeralRunnerSetTestInterval,
).Should(BeEquivalentTo(1), "failed to create ephemeral runner")
@ -1504,9 +1688,9 @@ func listEphemeralRunnersAndRemoveFinalizers(ctx context.Context, k8sClient clie
liveItems := make([]v1alpha1.EphemeralRunner, 0)
for _, item := range list.Items {
if !item.DeletionTimestamp.IsZero() {
if err := patch(ctx, k8sClient, &item, func(runner *v1alpha1.EphemeralRunner) {
runner.Finalizers = []string{}
}); err != nil {
original := item.DeepCopy()
item.Finalizers = []string{}
if err := k8sClient.Patch(ctx, &item, client.MergeFrom(original)); err != nil {
return err
}
continue

View File

@ -14,6 +14,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/config"
"sigs.k8s.io/controller-runtime/pkg/manager"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
)
const defaultGitHubToken = "gh_token"
@ -54,6 +55,9 @@ func createNamespace(t ginkgo.GinkgoTInterface, client client.Client) (*corev1.N
Controller: config.Controller{
SkipNameValidation: ptr.To(true),
},
Metrics: metricsserver.Options{
BindAddress: "0",
},
Cache: cache.Options{
DefaultNamespaces: map[string]cache.Config{
ns.Name: {},

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,9 @@ import (
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)
// secret constants
@ -43,6 +46,15 @@ var commonLabelKeys = [...]string{
LabelKeyGitHubRepository,
}
// annotationKeyIntegrityHash is used as a hash of the important fields
// of each resource to determine if more drastic action should be taken.
//
// For example, annotations/labels are not something that should modify
// the behavior of a resource, while the change in spec is. Therefore,
// the spec hash should contain the spec fields in order to determine
// modifications.
const annotationKeyIntegrityHash = "actions.github.com/integrity-hash"
const labelValueKubernetesPartOf = "gha-runner-scale-set"
var (
@ -83,9 +95,27 @@ type SecretResolver interface {
type ResourceBuilder struct {
ExcludeLabelPropagationPrefixes []string
SecretResolver
Scheme *runtime.Scheme
}
func (b *ResourceBuilder) newAutoScalingListener(autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, namespace, image string, imagePullSecrets []corev1.LocalObjectReference) (*v1alpha1.AutoscalingListener, error) {
func (b *ResourceBuilder) setSchemeIfUnset(scheme *runtime.Scheme) {
if b.Scheme == nil {
b.Scheme = scheme
}
}
func (b *ResourceBuilder) setControllerReference(owner client.Object, object client.Object) error {
if b.Scheme == nil {
b.Scheme = runtime.NewScheme()
if err := v1alpha1.AddToScheme(b.Scheme); err != nil {
return err
}
}
return ctrl.SetControllerReference(owner, object, b.Scheme)
}
func (b *ResourceBuilder) newAutoscalingListener(autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet, ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, namespace, image string, imagePullSecrets []corev1.LocalObjectReference) (*v1alpha1.AutoscalingListener, error) {
runnerScaleSetID, err := strconv.Atoi(autoscalingRunnerSet.Annotations[runnerScaleSetIDAnnotationKey])
if err != nil {
return nil, err
@ -100,6 +130,28 @@ func (b *ResourceBuilder) newAutoScalingListener(autoscalingRunnerSet *v1alpha1.
effectiveMinRunners = *autoscalingRunnerSet.Spec.MinRunners
}
spec := v1alpha1.AutoscalingListenerSpec{
GitHubConfigURL: autoscalingRunnerSet.Spec.GitHubConfigUrl,
GitHubConfigSecret: autoscalingRunnerSet.Spec.GitHubConfigSecret,
VaultConfig: autoscalingRunnerSet.VaultConfig(),
RunnerScaleSetID: runnerScaleSetID,
AutoscalingRunnerSetNamespace: autoscalingRunnerSet.Namespace,
AutoscalingRunnerSetName: autoscalingRunnerSet.Name,
EphemeralRunnerSetName: ephemeralRunnerSet.Name,
MinRunners: effectiveMinRunners,
MaxRunners: effectiveMaxRunners,
Image: image,
ImagePullSecrets: imagePullSecrets,
Proxy: autoscalingRunnerSet.Spec.Proxy,
GitHubServerTLS: autoscalingRunnerSet.Spec.GitHubServerTLS,
Metrics: autoscalingRunnerSet.Spec.ListenerMetrics,
Template: autoscalingRunnerSet.Spec.ListenerTemplate,
ServiceAccountMetadata: autoscalingRunnerSet.Spec.ListenerServiceAccountMetadata,
RoleMetadata: autoscalingRunnerSet.Spec.ListenerRoleMetadata,
RoleBindingMetadata: autoscalingRunnerSet.Spec.ListenerRoleBindingMetadata,
ConfigSecretMetadata: autoscalingRunnerSet.Spec.ListenerConfigSecretMetadata,
}
labels := b.filterAndMergeLabels(autoscalingRunnerSet.Labels, map[string]string{
LabelKeyGitHubScaleSetNamespace: autoscalingRunnerSet.Namespace,
LabelKeyGitHubScaleSetName: autoscalingRunnerSet.Name,
@ -113,8 +165,7 @@ func (b *ResourceBuilder) newAutoScalingListener(autoscalingRunnerSet *v1alpha1.
}
annotations := map[string]string{
annotationKeyRunnerSpecHash: autoscalingRunnerSet.ListenerSpecHash(),
annotationKeyValuesHash: autoscalingRunnerSet.Annotations[annotationKeyValuesHash],
annotationKeyIntegrityHash: spec.Hash(),
}
if autoscalingRunnerSet.Spec.AutoscalingListenerMetadata != nil {
@ -129,27 +180,7 @@ func (b *ResourceBuilder) newAutoScalingListener(autoscalingRunnerSet *v1alpha1.
Labels: labels,
Annotations: annotations,
},
Spec: v1alpha1.AutoscalingListenerSpec{
GitHubConfigUrl: autoscalingRunnerSet.Spec.GitHubConfigUrl,
GitHubConfigSecret: autoscalingRunnerSet.Spec.GitHubConfigSecret,
VaultConfig: autoscalingRunnerSet.VaultConfig(),
RunnerScaleSetId: runnerScaleSetID,
AutoscalingRunnerSetNamespace: autoscalingRunnerSet.Namespace,
AutoscalingRunnerSetName: autoscalingRunnerSet.Name,
EphemeralRunnerSetName: ephemeralRunnerSet.Name,
MinRunners: effectiveMinRunners,
MaxRunners: effectiveMaxRunners,
Image: image,
ImagePullSecrets: imagePullSecrets,
Proxy: autoscalingRunnerSet.Spec.Proxy,
GitHubServerTLS: autoscalingRunnerSet.Spec.GitHubServerTLS,
Metrics: autoscalingRunnerSet.Spec.ListenerMetrics,
Template: autoscalingRunnerSet.Spec.ListenerTemplate,
ServiceAccountMetadata: autoscalingRunnerSet.Spec.ListenerServiceAccountMetadata,
RoleMetadata: autoscalingRunnerSet.Spec.ListenerRoleMetadata,
RoleBindingMetadata: autoscalingRunnerSet.Spec.ListenerRoleBindingMetadata,
ConfigSecretMetadata: autoscalingRunnerSet.Spec.ListenerConfigSecretMetadata,
},
Spec: spec,
}
return autoscalingListener, nil
@ -187,12 +218,12 @@ func (b *ResourceBuilder) newScaleSetListenerConfig(autoscalingListener *v1alpha
}
config := ghalistenerconfig.Config{
ConfigureURL: autoscalingListener.Spec.GitHubConfigUrl,
ConfigureURL: autoscalingListener.Spec.GitHubConfigURL,
EphemeralRunnerSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
EphemeralRunnerSetName: autoscalingListener.Spec.EphemeralRunnerSetName,
MaxRunners: autoscalingListener.Spec.MaxRunners,
MinRunners: autoscalingListener.Spec.MinRunners,
RunnerScaleSetID: autoscalingListener.Spec.RunnerScaleSetId,
RunnerScaleSetID: autoscalingListener.Spec.RunnerScaleSetID,
RunnerScaleSetName: autoscalingListener.Spec.AutoscalingRunnerSetName,
ServerRootCA: cert,
LogLevel: scaleSetListenerLogLevel,
@ -230,12 +261,12 @@ func (b *ResourceBuilder) newScaleSetListenerConfig(autoscalingListener *v1alpha
labels = b.filterAndMergeLabels(autoscalingListener.Spec.ConfigSecretMetadata.Labels, nil)
}
var annotations map[string]string
annotations := make(map[string]string)
if autoscalingListener.Spec.ConfigSecretMetadata != nil && len(autoscalingListener.Spec.ConfigSecretMetadata.Annotations) > 0 {
annotations = autoscalingListener.Spec.ConfigSecretMetadata.Annotations
}
return &corev1.Secret{
desiredSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: scaleSetListenerConfigName(autoscalingListener),
Namespace: autoscalingListener.Namespace,
@ -245,17 +276,90 @@ func (b *ResourceBuilder) newScaleSetListenerConfig(autoscalingListener *v1alpha
Data: map[string][]byte{
"config.json": buf.Bytes(),
},
}, nil
}
desiredSecret.Annotations[annotationKeyIntegrityHash] = scaleSetListenerConfigIntegrityHash(desiredSecret)
if err := b.setControllerReference(autoscalingListener, desiredSecret); err != nil {
return nil, fmt.Errorf("failed to set controller reference for listener config secret: %w", err)
}
return desiredSecret, nil
}
func (b *ResourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.AutoscalingListener, podConfig *corev1.Secret, serviceAccount *corev1.ServiceAccount, metricsConfig *listenerMetricsServerConfig, envs ...corev1.EnvVar) (*corev1.Pod, error) {
listenerEnv := []corev1.EnvVar{
func scaleSetListenerConfigIntegrityHash(secret *corev1.Secret) string {
type data struct {
Data map[string][]byte `json:"data,omitempty"`
}
d := data{
Data: secret.Data,
}
return hash.ComputeTemplateHash(&d)
}
func (b *ResourceBuilder) newScaleSetListenerPod(
autoscalingListener *v1alpha1.AutoscalingListener,
podConfig *corev1.Secret,
serviceAccount *corev1.ServiceAccount,
role *rbacv1.Role,
roleBinding *rbacv1.RoleBinding,
metricsConfig *listenerMetricsServerConfig,
) (*corev1.Pod, error) {
envs := []corev1.EnvVar{
{
Name: "LISTENER_CONFIG_PATH",
Value: "/etc/gha-listener/config.json",
},
}
listenerEnv = append(listenerEnv, envs...)
if autoscalingListener.Spec.Proxy != nil {
httpURL := corev1.EnvVar{
Name: "http_proxy",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: proxyListenerSecretName(autoscalingListener),
},
Key: "http_proxy",
},
},
}
if autoscalingListener.Spec.Proxy.HTTP != nil {
envs = append(envs, httpURL)
}
httpsURL := corev1.EnvVar{
Name: "https_proxy",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: proxyListenerSecretName(autoscalingListener),
},
Key: "https_proxy",
},
},
}
if autoscalingListener.Spec.Proxy.HTTPS != nil {
envs = append(envs, httpsURL)
}
noProxy := corev1.EnvVar{
Name: "no_proxy",
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{
Name: proxyListenerSecretName(autoscalingListener),
},
Key: "no_proxy",
},
},
}
if len(autoscalingListener.Spec.Proxy.NoProxy) > 0 {
envs = append(envs, noProxy)
}
}
var ports []corev1.ContainerPort
if metricsConfig != nil && len(metricsConfig.addr) != 0 {
@ -276,7 +380,7 @@ func (b *ResourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
{
Name: autoscalingListenerContainerName,
Image: autoscalingListener.Spec.Image,
Env: listenerEnv,
Env: envs,
Command: []string{
scaleSetListenerEntrypoint,
},
@ -314,23 +418,28 @@ func (b *ResourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: autoscalingListener.Name,
Namespace: autoscalingListener.Namespace,
Labels: labels,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: v1alpha1.GroupVersion.String(),
Kind: "AutoscalingListener",
UID: autoscalingListener.GetUID(),
Name: autoscalingListener.GetName(),
Controller: new(true),
BlockOwnerDeletion: new(true),
},
},
Name: autoscalingListener.Name,
Namespace: autoscalingListener.Namespace,
Labels: labels,
Annotations: make(map[string]string),
},
Spec: podSpec,
}
newRunnerScaleSetListenerPod.Annotations[annotationKeyIntegrityHash] = scaleSetListenerPodIntegrity(
newRunnerScaleSetListenerPod,
autoscalingListener,
podConfig,
serviceAccount,
role,
roleBinding,
metricsConfig,
)
if err := b.setControllerReference(autoscalingListener, newRunnerScaleSetListenerPod); err != nil {
return nil, fmt.Errorf("failed to set controller reference for listener pod: %w", err)
}
if autoscalingListener.Spec.Template != nil {
mergeListenerPodWithTemplate(newRunnerScaleSetListenerPod, autoscalingListener.Spec.Template)
}
@ -338,6 +447,38 @@ func (b *ResourceBuilder) newScaleSetListenerPod(autoscalingListener *v1alpha1.A
return newRunnerScaleSetListenerPod, nil
}
func scaleSetListenerPodIntegrity(
pod *corev1.Pod,
autoscalingListener *v1alpha1.AutoscalingListener,
podConfig *corev1.Secret,
serviceAccount *corev1.ServiceAccount,
role *rbacv1.Role,
roleBinding *rbacv1.RoleBinding,
metricsConfig *listenerMetricsServerConfig,
) string {
type data struct {
ListenerPodSpec *corev1.PodSpec `json:"listenerPodSpec,omitempty"`
AutoscalingListenerIntegrityHash string `json:"autoscalingListenerIntegrityHash"`
ConfigSecretIntegrityHash string `json:"configSecretIntegrityHash"`
ServiceAccountIntegrityHash string `json:"serviceAccountIntegrityHash"`
RoleIntegrityHash string `json:"roleIntegrityHash"`
RoleBindingIntegrityHash string `json:"roleBindingIntegrityHash"`
MetricsConfig *listenerMetricsServerConfig `json:"metricsConfig,omitempty"`
}
d := data{
ListenerPodSpec: &pod.Spec,
AutoscalingListenerIntegrityHash: autoscalingListener.Annotations[annotationKeyIntegrityHash],
ConfigSecretIntegrityHash: podConfig.Annotations[annotationKeyIntegrityHash],
ServiceAccountIntegrityHash: serviceAccount.Annotations[annotationKeyIntegrityHash],
RoleIntegrityHash: role.Annotations[annotationKeyIntegrityHash],
RoleBindingIntegrityHash: roleBinding.Annotations[annotationKeyIntegrityHash],
MetricsConfig: metricsConfig,
}
return hash.ComputeTemplateHash(&d)
}
func mergeListenerPodWithTemplate(pod *corev1.Pod, tmpl *corev1.PodTemplateSpec) {
if pod.Annotations == nil {
pod.Annotations = make(map[string]string)
@ -455,7 +596,7 @@ func mergeListenerContainer(base, from *corev1.Container) {
base.TTY = from.TTY
}
func (b *ResourceBuilder) newScaleSetListenerServiceAccount(autoscalingListener *v1alpha1.AutoscalingListener) *corev1.ServiceAccount {
func (b *ResourceBuilder) newScaleSetListenerServiceAccount(autoscalingListener *v1alpha1.AutoscalingListener) (*corev1.ServiceAccount, error) {
base := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: autoscalingListener.Name,
@ -464,6 +605,7 @@ func (b *ResourceBuilder) newScaleSetListenerServiceAccount(autoscalingListener
LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
LabelKeyGitHubScaleSetName: autoscalingListener.Spec.AutoscalingRunnerSetName,
}),
Annotations: make(map[string]string),
},
}
@ -472,22 +614,40 @@ func (b *ResourceBuilder) newScaleSetListenerServiceAccount(autoscalingListener
base.Annotations = b.mergeAnnotations(autoscalingListener.Spec.ServiceAccountMetadata.Annotations, base.Annotations)
}
return base
base.Annotations[annotationKeyIntegrityHash] = scaleSetListenerServiceAccountIntegrityHash(base)
if err := b.setControllerReference(autoscalingListener, base); err != nil {
return nil, fmt.Errorf("failed to set controller reference for listener service account: %w", err)
}
return base, nil
}
func scaleSetListenerServiceAccountIntegrityHash(sa *corev1.ServiceAccount) string {
type data struct {
Secrets []corev1.ObjectReference `json:"secrets"`
ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets"`
AutomountServiceAccountToken *bool `json:"automountServiceAccountToken"`
}
d := data{
Secrets: sa.Secrets,
ImagePullSecrets: sa.ImagePullSecrets,
AutomountServiceAccountToken: sa.AutomountServiceAccountToken,
}
return hash.ComputeTemplateHash(&d)
}
func (b *ResourceBuilder) newScaleSetListenerRole(autoscalingListener *v1alpha1.AutoscalingListener) *rbacv1.Role {
rules := rulesForListenerRole([]string{autoscalingListener.Spec.EphemeralRunnerSetName})
rulesHash := hash.ComputeTemplateHash(&rules)
labels := b.filterAndMergeLabels(autoscalingListener.Labels, map[string]string{
LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
LabelKeyGitHubScaleSetName: autoscalingListener.Spec.AutoscalingRunnerSetName,
labelKeyListenerNamespace: autoscalingListener.Namespace,
labelKeyListenerName: autoscalingListener.Name,
"role-policy-rules-hash": rulesHash,
})
var annotations map[string]string
annotations := make(map[string]string)
if autoscalingListener.Spec.RoleMetadata != nil {
labels = b.filterAndMergeLabels(autoscalingListener.Spec.RoleMetadata.Labels, labels)
annotations = b.mergeAnnotations(autoscalingListener.Spec.RoleMetadata.Annotations, nil)
@ -500,18 +660,31 @@ func (b *ResourceBuilder) newScaleSetListenerRole(autoscalingListener *v1alpha1.
Labels: labels,
Annotations: annotations,
},
Rules: rules,
Rules: rulesForListenerRole([]string{autoscalingListener.Spec.EphemeralRunnerSetName}),
}
newRole.Annotations[annotationKeyIntegrityHash] = scaleSetRoleIntegrityHash(newRole)
return newRole
}
func scaleSetRoleIntegrityHash(role *rbacv1.Role) string {
type data struct {
Rules []rbacv1.PolicyRule `json:"rules"`
}
d := data{
Rules: role.Rules,
}
return hash.ComputeTemplateHash(&d)
}
func (b *ResourceBuilder) newScaleSetListenerRoleBinding(autoscalingListener *v1alpha1.AutoscalingListener, listenerRole *rbacv1.Role, serviceAccount *corev1.ServiceAccount) *rbacv1.RoleBinding {
roleRef := rbacv1.RoleRef{
Kind: "Role",
Name: listenerRole.Name,
}
roleRefHash := hash.ComputeTemplateHash(&roleRef)
subjects := []rbacv1.Subject{
{
@ -520,19 +693,15 @@ func (b *ResourceBuilder) newScaleSetListenerRoleBinding(autoscalingListener *v1
Name: serviceAccount.Name,
},
}
subjectHash := hash.ComputeTemplateHash(&subjects)
labels := b.filterAndMergeLabels(autoscalingListener.Labels, map[string]string{
LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
LabelKeyGitHubScaleSetName: autoscalingListener.Spec.AutoscalingRunnerSetName,
labelKeyListenerNamespace: autoscalingListener.Namespace,
labelKeyListenerName: autoscalingListener.Name,
"role-binding-role-ref-hash": roleRefHash,
"role-binding-subject-hash": subjectHash,
})
var annotations map[string]string
annotations := make(map[string]string)
if autoscalingListener.Spec.RoleBindingMetadata != nil {
labels = b.filterAndMergeLabels(autoscalingListener.Spec.RoleBindingMetadata.Labels, labels)
annotations = autoscalingListener.Spec.RoleBindingMetadata.Annotations
@ -549,15 +718,45 @@ func (b *ResourceBuilder) newScaleSetListenerRoleBinding(autoscalingListener *v1
Subjects: subjects,
}
newRoleBinding.Annotations[annotationKeyIntegrityHash] = scaleSetListenerRoleBindingIntegrityHash(newRoleBinding)
return newRoleBinding
}
func scaleSetListenerRoleBindingIntegrityHash(rb *rbacv1.RoleBinding) string {
type data struct {
RoleRef rbacv1.RoleRef `json:"roleRef"`
Subjects []rbacv1.Subject `json:"subjects"`
}
d := data{
RoleRef: rb.RoleRef,
Subjects: rb.Subjects,
}
return hash.ComputeTemplateHash(&d)
}
func (b *ResourceBuilder) newEphemeralRunnerSet(autoscalingRunnerSet *v1alpha1.AutoscalingRunnerSet) (*v1alpha1.EphemeralRunnerSet, error) {
runnerScaleSetID, err := strconv.Atoi(autoscalingRunnerSet.Annotations[runnerScaleSetIDAnnotationKey])
if err != nil {
return nil, err
}
runnerSpecHash := autoscalingRunnerSet.RunnerSetSpecHash()
spec := v1alpha1.EphemeralRunnerSetSpec{
Replicas: 0,
EphemeralRunnerSpec: v1alpha1.EphemeralRunnerSpec{
RunnerScaleSetID: runnerScaleSetID,
GitHubConfigURL: autoscalingRunnerSet.Spec.GitHubConfigUrl,
GitHubConfigSecret: autoscalingRunnerSet.Spec.GitHubConfigSecret,
Proxy: autoscalingRunnerSet.Spec.Proxy,
GitHubServerTLS: autoscalingRunnerSet.Spec.GitHubServerTLS,
PodTemplateSpec: autoscalingRunnerSet.Spec.Template,
VaultConfig: autoscalingRunnerSet.VaultConfig(),
EphemeralRunnerConfigSecretMetadata: autoscalingRunnerSet.Spec.EphemeralRunnerConfigSecretMetadata,
},
EphemeralRunnerMetadata: autoscalingRunnerSet.Spec.EphemeralRunnerMetadata,
}
labels := b.filterAndMergeLabels(autoscalingRunnerSet.Labels, map[string]string{
LabelKeyKubernetesPartOf: labelValueKubernetesPartOf,
@ -574,7 +773,6 @@ func (b *ResourceBuilder) newEphemeralRunnerSet(autoscalingRunnerSet *v1alpha1.A
annotations := map[string]string{
AnnotationKeyGitHubRunnerGroupName: autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerGroupName],
AnnotationKeyGitHubRunnerScaleSetName: autoscalingRunnerSet.Annotations[AnnotationKeyGitHubRunnerScaleSetName],
annotationKeyRunnerSpecHash: runnerSpecHash,
}
if autoscalingRunnerSet.Spec.EphemeralRunnerSetMetadata != nil {
@ -585,41 +783,70 @@ func (b *ResourceBuilder) newEphemeralRunnerSet(autoscalingRunnerSet *v1alpha1.A
newEphemeralRunnerSet := &v1alpha1.EphemeralRunnerSet{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{
GenerateName: autoscalingRunnerSet.Name + "-",
Namespace: autoscalingRunnerSet.Namespace,
Labels: labels,
Annotations: annotations,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: v1alpha1.GroupVersion.String(),
Kind: "AutoscalingRunnerSet",
UID: autoscalingRunnerSet.GetUID(),
Name: autoscalingRunnerSet.GetName(),
Controller: new(true),
BlockOwnerDeletion: new(true),
},
},
},
Spec: v1alpha1.EphemeralRunnerSetSpec{
Replicas: 0,
EphemeralRunnerSpec: v1alpha1.EphemeralRunnerSpec{
RunnerScaleSetID: runnerScaleSetID,
GitHubConfigUrl: autoscalingRunnerSet.Spec.GitHubConfigUrl,
GitHubConfigSecret: autoscalingRunnerSet.Spec.GitHubConfigSecret,
Proxy: autoscalingRunnerSet.Spec.Proxy,
GitHubServerTLS: autoscalingRunnerSet.Spec.GitHubServerTLS,
PodTemplateSpec: autoscalingRunnerSet.Spec.Template,
VaultConfig: autoscalingRunnerSet.VaultConfig(),
EphemeralRunnerConfigSecretMetadata: autoscalingRunnerSet.Spec.EphemeralRunnerConfigSecretMetadata,
},
EphemeralRunnerMetadata: autoscalingRunnerSet.Spec.EphemeralRunnerMetadata,
Name: autoscalingRunnerSet.Name,
Namespace: autoscalingRunnerSet.Namespace,
Labels: labels,
Annotations: annotations,
},
Spec: spec,
}
newEphemeralRunnerSet.Annotations[annotationKeyIntegrityHash] = ephemeralRunnerSetIntegrityHash(newEphemeralRunnerSet)
if err := b.setControllerReference(autoscalingRunnerSet, newEphemeralRunnerSet); err != nil {
return nil, fmt.Errorf("failed to set controller reference for ephemeral runner set: %w", err)
}
return newEphemeralRunnerSet, nil
}
func (b *ResourceBuilder) newEphemeralRunner(ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet) *v1alpha1.EphemeralRunner {
func ephemeralRunnerSetIntegrityHash(ers *v1alpha1.EphemeralRunnerSet) string {
type data struct {
EphemeralRunnerSpec v1alpha1.EphemeralRunnerSpec `json:"ephemeralRunnerSpec"`
}
d := data{
EphemeralRunnerSpec: ers.Spec.EphemeralRunnerSpec,
}
return hash.ComputeTemplateHash(&d)
}
func (b *ResourceBuilder) newAutoscalingListenerProxySecret(autoscalingListener *v1alpha1.AutoscalingListener, data map[string][]byte) (*corev1.Secret, error) {
newProxySecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: proxyListenerSecretName(autoscalingListener),
Namespace: autoscalingListener.Namespace,
Labels: map[string]string{
LabelKeyGitHubScaleSetNamespace: autoscalingListener.Spec.AutoscalingRunnerSetNamespace,
LabelKeyGitHubScaleSetName: autoscalingListener.Spec.AutoscalingRunnerSetName,
},
Annotations: make(map[string]string, 1),
},
Data: data,
}
newProxySecret.Annotations[annotationKeyIntegrityHash] = autoscalingListenerProxySecretIntegrityHash(newProxySecret)
if err := b.setControllerReference(autoscalingListener, newProxySecret); err != nil {
return nil, fmt.Errorf("failed to set controller reference for listener proxy secret: %w", err)
}
return newProxySecret, nil
}
func autoscalingListenerProxySecretIntegrityHash(secret *corev1.Secret) string {
type data struct {
Data map[string][]byte `json:"data"`
}
d := data{
Data: secret.Data,
}
return hash.ComputeTemplateHash(&d)
}
func (b *ResourceBuilder) newEphemeralRunner(ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet) (*v1alpha1.EphemeralRunner, error) {
labels := make(map[string]string, len(ephemeralRunnerSet.Labels))
maps.Copy(labels, ephemeralRunnerSet.Labels)
labels[LabelKeyKubernetesComponent] = "runner"
@ -633,7 +860,7 @@ func (b *ResourceBuilder) newEphemeralRunner(ephemeralRunnerSet *v1alpha1.Epheme
annotations = b.mergeAnnotations(ephemeralRunnerSet.Spec.EphemeralRunnerMetadata.Annotations, annotations)
}
return &v1alpha1.EphemeralRunner{
ephemeralRunner := &v1alpha1.EphemeralRunner{
ObjectMeta: metav1.ObjectMeta{
GenerateName: ephemeralRunnerSet.Name + "-runner-",
Namespace: ephemeralRunnerSet.Namespace,
@ -643,22 +870,17 @@ func (b *ResourceBuilder) newEphemeralRunner(ephemeralRunnerSet *v1alpha1.Epheme
ephemeralRunnerFinalizerName,
ephemeralRunnerActionsFinalizerName,
},
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: v1alpha1.GroupVersion.String(),
Kind: "EphemeralRunnerSet",
UID: ephemeralRunnerSet.GetUID(),
Name: ephemeralRunnerSet.GetName(),
Controller: new(true),
BlockOwnerDeletion: new(true),
},
},
},
Spec: ephemeralRunnerSet.Spec.EphemeralRunnerSpec,
}
if err := b.setControllerReference(ephemeralRunnerSet, ephemeralRunner); err != nil {
return nil, fmt.Errorf("failed to set controller reference for ephemeral runner: %w", err)
}
return ephemeralRunner, nil
}
func (b *ResourceBuilder) newEphemeralRunnerPod(runner *v1alpha1.EphemeralRunner, secret *corev1.Secret, envs ...corev1.EnvVar) *corev1.Pod {
func (b *ResourceBuilder) newEphemeralRunnerPod(runner *v1alpha1.EphemeralRunner, secret *corev1.Secret, envs ...corev1.EnvVar) (*corev1.Pod, error) {
var newPod corev1.Pod
annotations := make(map[string]string, len(runner.Annotations)+len(runner.Spec.Annotations))
@ -681,16 +903,6 @@ func (b *ResourceBuilder) newEphemeralRunnerPod(runner *v1alpha1.EphemeralRunner
Namespace: runner.Namespace,
Labels: labels,
Annotations: annotations,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: v1alpha1.GroupVersion.String(),
Kind: "EphemeralRunner",
UID: runner.GetUID(),
Name: runner.GetName(),
Controller: new(true),
BlockOwnerDeletion: new(true),
},
},
}
newPod.ObjectMeta = objectMeta
@ -727,10 +939,14 @@ func (b *ResourceBuilder) newEphemeralRunnerPod(runner *v1alpha1.EphemeralRunner
newPod.Spec.Containers = append(newPod.Spec.Containers, c)
}
return &newPod
if err := b.setControllerReference(runner, &newPod); err != nil {
return nil, fmt.Errorf("failed to set controller reference for ephemeral runner pod: %w", err)
}
return &newPod, nil
}
func (b *ResourceBuilder) newEphemeralRunnerJitSecret(ephemeralRunner *v1alpha1.EphemeralRunner, jitConfig *scaleset.RunnerScaleSetJitRunnerConfig) *corev1.Secret {
func (b *ResourceBuilder) newEphemeralRunnerJitSecret(ephemeralRunner *v1alpha1.EphemeralRunner, jitConfig *scaleset.RunnerScaleSetJitRunnerConfig) (*corev1.Secret, error) {
var (
labels map[string]string
annotations map[string]string
@ -741,7 +957,7 @@ func (b *ResourceBuilder) newEphemeralRunnerJitSecret(ephemeralRunner *v1alpha1.
annotations = ephemeralRunner.Spec.EphemeralRunnerConfigSecretMetadata.Annotations
}
return &corev1.Secret{
jitSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: ephemeralRunner.Name,
Namespace: ephemeralRunner.Namespace,
@ -755,10 +971,50 @@ func (b *ResourceBuilder) newEphemeralRunnerJitSecret(ephemeralRunner *v1alpha1.
"scaleSetId": []byte(strconv.Itoa(jitConfig.Runner.RunnerScaleSetID)),
},
}
if err := b.setControllerReference(ephemeralRunner, jitSecret); err != nil {
return nil, fmt.Errorf("failed to set controller reference for ephemeral runner jit secret: %w", err)
}
return jitSecret, nil
}
func (b *ResourceBuilder) newEphemeralRunnerSetProxySecret(ephemeralRunnerSet *v1alpha1.EphemeralRunnerSet, data map[string][]byte) (*corev1.Secret, error) {
runnerPodProxySecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: proxyEphemeralRunnerSetSecretName(ephemeralRunnerSet),
Namespace: ephemeralRunnerSet.Namespace,
Labels: map[string]string{
LabelKeyGitHubScaleSetName: ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetName],
LabelKeyGitHubScaleSetNamespace: ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetNamespace],
},
Annotations: make(map[string]string, 1),
},
Data: data,
}
runnerPodProxySecret.Annotations[annotationKeyIntegrityHash] = ephemeralRunnerSetProxySecretZIdentityHash(runnerPodProxySecret)
if err := b.setControllerReference(ephemeralRunnerSet, runnerPodProxySecret); err != nil {
return nil, fmt.Errorf("failed to set controller reference for ephemeral runner set proxy secret: %w", err)
}
return runnerPodProxySecret, nil
}
func ephemeralRunnerSetProxySecretZIdentityHash(secret *corev1.Secret) string {
type data struct {
Data map[string][]byte `json:"data"`
}
d := data{
Data: secret.Data,
}
return hash.ComputeTemplateHash(&d)
}
func scaleSetListenerConfigName(autoscalingListener *v1alpha1.AutoscalingListener) string {
return fmt.Sprintf("%s-config", autoscalingListener.Name)
return autoscalingListener.Name + "-config"
}
func hashSuffix(namespace, runnerGroup, configURL string) string {

View File

@ -113,7 +113,7 @@ func TestMetadataPropagation(t *testing.T) {
assert.Equal(t, labelValueKubernetesPartOf, ephemeralRunnerSet.Labels[LabelKeyKubernetesPartOf])
assert.Equal(t, "runner-set", ephemeralRunnerSet.Labels[LabelKeyKubernetesComponent])
assert.Equal(t, autoscalingRunnerSet.Labels[LabelKeyKubernetesVersion], ephemeralRunnerSet.Labels[LabelKeyKubernetesVersion])
assert.NotEmpty(t, ephemeralRunnerSet.Annotations[annotationKeyRunnerSpecHash])
assert.NotEmpty(t, ephemeralRunnerSet.Annotations[annotationKeyIntegrityHash])
assert.Equal(t, autoscalingRunnerSet.Name, ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetName])
assert.Equal(t, autoscalingRunnerSet.Namespace, ephemeralRunnerSet.Labels[LabelKeyGitHubScaleSetNamespace])
assert.Equal(t, "", ephemeralRunnerSet.Labels[LabelKeyGitHubEnterprise])
@ -125,12 +125,12 @@ func TestMetadataPropagation(t *testing.T) {
assert.Equal(t, "ephemeral-runner-set-label", ephemeralRunnerSet.Labels["test.com/ephemeral-runner-set-label"])
assert.Equal(t, "ephemeral-runner-set-annotation", ephemeralRunnerSet.Annotations["test.com/ephemeral-runner-set-annotation"])
listener, err := b.newAutoScalingListener(&autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
listener, err := b.newAutoscalingListener(&autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
require.NoError(t, err)
assert.Equal(t, labelValueKubernetesPartOf, listener.Labels[LabelKeyKubernetesPartOf])
assert.Equal(t, "runner-scale-set-listener", listener.Labels[LabelKeyKubernetesComponent])
assert.Equal(t, autoscalingRunnerSet.Labels[LabelKeyKubernetesVersion], listener.Labels[LabelKeyKubernetesVersion])
assert.NotEmpty(t, ephemeralRunnerSet.Annotations[annotationKeyRunnerSpecHash])
assert.NotEmpty(t, ephemeralRunnerSet.Annotations[annotationKeyIntegrityHash])
assert.Equal(t, autoscalingRunnerSet.Name, listener.Labels[LabelKeyGitHubScaleSetName])
assert.Equal(t, autoscalingRunnerSet.Namespace, listener.Labels[LabelKeyGitHubScaleSetNamespace])
assert.Equal(t, "", listener.Labels[LabelKeyGitHubEnterprise])
@ -145,7 +145,8 @@ func TestMetadataPropagation(t *testing.T) {
assert.NotContains(t, listener.Labels, "directly.excluded.org/label")
assert.Equal(t, "not-excluded-value", listener.Labels["directly.excluded.org/arbitrary"])
listenerServiceAccount := b.newScaleSetListenerServiceAccount(listener)
listenerServiceAccount, err := b.newScaleSetListenerServiceAccount(listener)
require.NoError(t, err)
assert.Equal(t, "listener-service-account-label", listenerServiceAccount.Labels["test.com/listener-service-account-label"])
assert.Equal(t, "listener-service-account-annotation", listenerServiceAccount.Annotations["test.com/listener-service-account-annotation"])
@ -157,11 +158,18 @@ func TestMetadataPropagation(t *testing.T) {
assert.Equal(t, "listener-role-binding-label", listenerRoleBinding.Labels["test.com/listener-role-binding-label"])
assert.Equal(t, "listener-role-binding-annotation", listenerRoleBinding.Annotations["test.com/listener-role-binding-annotation"])
listenerPod, err := b.newScaleSetListenerPod(listener, &corev1.Secret{}, listenerServiceAccount, nil)
listenerPod, err := b.newScaleSetListenerPod(
listener,
&corev1.Secret{},
listenerServiceAccount,
listenerRole,
listenerRoleBinding,
nil,
)
require.NoError(t, err)
assert.Equal(t, listenerPod.Labels, listener.Labels)
ephemeralRunner := b.newEphemeralRunner(ephemeralRunnerSet)
ephemeralRunner, err := b.newEphemeralRunner(ephemeralRunnerSet)
require.NoError(t, err)
for _, key := range commonLabelKeys {
@ -176,7 +184,7 @@ func TestMetadataPropagation(t *testing.T) {
assert.Equal(t, "ephemeral-runner-label", ephemeralRunner.Labels["test.com/ephemeral-runner-label"])
assert.Equal(t, "ephemeral-runner-annotation", ephemeralRunner.Annotations["test.com/ephemeral-runner-annotation"])
runnerSecret := b.newEphemeralRunnerJitSecret(ephemeralRunner, &scaleset.RunnerScaleSetJitRunnerConfig{
runnerSecret, err := b.newEphemeralRunnerJitSecret(ephemeralRunner, &scaleset.RunnerScaleSetJitRunnerConfig{
Runner: &scaleset.RunnerReference{
ID: 1,
Name: "test",
@ -184,15 +192,44 @@ func TestMetadataPropagation(t *testing.T) {
},
EncodedJITConfig: "",
})
require.NoError(t, err)
assert.Equal(t, "ephemeral-runner-config-secret-label", runnerSecret.Labels["test.com/ephemeral-runner-config-secret-label"])
assert.Equal(t, "ephemeral-runner-config-secret-annotation", runnerSecret.Annotations["test.com/ephemeral-runner-config-secret-annotation"])
pod := b.newEphemeralRunnerPod(ephemeralRunner, runnerSecret)
pod, err := b.newEphemeralRunnerPod(ephemeralRunner, runnerSecret)
require.NoError(t, err)
for key := range ephemeralRunner.Labels {
assert.Equal(t, ephemeralRunner.Labels[key], pod.Labels[key])
}
}
func TestEphemeralRunnerSetProxySecretZIdentityHash(t *testing.T) {
ephemeralRunnerSet := &v1alpha1.EphemeralRunnerSet{
ObjectMeta: metav1.ObjectMeta{
Name: "test-scale-set",
Namespace: "test-ns",
Labels: map[string]string{
LabelKeyGitHubScaleSetName: "test-scale-set",
LabelKeyGitHubScaleSetNamespace: "test-ns",
},
},
}
var b ResourceBuilder
proxySecret, err := b.newEphemeralRunnerSetProxySecret(ephemeralRunnerSet, map[string][]byte{
"http_proxy": []byte("http://proxy.example.com"),
})
require.NoError(t, err)
actualHash := proxySecret.Annotations[annotationKeyIntegrityHash]
assert.NotEmpty(t, actualHash)
assert.Equal(t, ephemeralRunnerSetProxySecretZIdentityHash(proxySecret), actualHash)
changedProxySecret := proxySecret.DeepCopy()
changedProxySecret.Data["http_proxy"] = []byte("http://updated-proxy.example.com")
assert.NotEqual(t, actualHash, ephemeralRunnerSetProxySecretZIdentityHash(changedProxySecret))
}
func TestGitHubURLTrimLabelValues(t *testing.T) {
enterprise := strings.Repeat("a", 64)
organization := strings.Repeat("b", 64)
@ -229,7 +266,7 @@ func TestGitHubURLTrimLabelValues(t *testing.T) {
assert.True(t, strings.HasSuffix(ephemeralRunnerSet.Labels[LabelKeyGitHubOrganization], trimLabelVauleSuffix))
assert.True(t, strings.HasSuffix(ephemeralRunnerSet.Labels[LabelKeyGitHubRepository], trimLabelVauleSuffix))
listener, err := b.newAutoScalingListener(autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
listener, err := b.newAutoscalingListener(autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
require.NoError(t, err)
assert.Len(t, listener.Labels[LabelKeyGitHubEnterprise], 0)
assert.Len(t, listener.Labels[LabelKeyGitHubOrganization], 63)
@ -252,7 +289,7 @@ func TestGitHubURLTrimLabelValues(t *testing.T) {
assert.Len(t, ephemeralRunnerSet.Labels[LabelKeyGitHubOrganization], 0)
assert.Len(t, ephemeralRunnerSet.Labels[LabelKeyGitHubRepository], 0)
listener, err := b.newAutoScalingListener(autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
listener, err := b.newAutoscalingListener(autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
require.NoError(t, err)
assert.Len(t, listener.Labels[LabelKeyGitHubEnterprise], 63)
assert.True(t, strings.HasSuffix(ephemeralRunnerSet.Labels[LabelKeyGitHubEnterprise], trimLabelVauleSuffix))
@ -276,7 +313,7 @@ func TestOwnershipRelationships(t *testing.T) {
runnerScaleSetIDAnnotationKey: "1",
AnnotationKeyGitHubRunnerGroupName: "test-group",
AnnotationKeyGitHubRunnerScaleSetName: "test-scale-set",
annotationKeyValuesHash: "test-hash",
annotationKeyIntegrityHash: "test-hash",
},
},
Spec: v1alpha1.AutoscalingRunnerSetSpec{
@ -292,13 +329,23 @@ func TestOwnershipRelationships(t *testing.T) {
require.NoError(t, err)
// Create and test Listener Pod ownership
listener, err := b.newAutoScalingListener(&autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
listener, err := b.newAutoscalingListener(&autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
require.NoError(t, err)
listener.UID = "test-listener-uid"
listenerServiceAccount := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{Name: "test-sa"},
}
listenerPod, err := b.newScaleSetListenerPod(listener, &corev1.Secret{}, listenerServiceAccount, nil)
listenerServiceAccount, err := b.newScaleSetListenerServiceAccount(listener)
require.NoError(t, err)
listenerRole := b.newScaleSetListenerRole(listener)
listenerRoleBinding := b.newScaleSetListenerRoleBinding(listener, listenerRole, listenerServiceAccount)
listenerPod, err := b.newScaleSetListenerPod(
listener,
&corev1.Secret{},
listenerServiceAccount,
listenerRole,
listenerRoleBinding,
nil,
)
require.NoError(t, err)
require.Len(t, listenerPod.OwnerReferences, 1, "Listener Pod should have exactly one owner reference")
@ -321,7 +368,8 @@ func TestOwnershipRelationships(t *testing.T) {
assert.Equal(t, true, *ownerRef.BlockOwnerDeletion, "BlockOwnerDeletion flag should be true")
// Create EphemeralRunner
ephemeralRunner := b.newEphemeralRunner(ephemeralRunnerSet)
ephemeralRunner, err := b.newEphemeralRunner(ephemeralRunnerSet)
require.NoError(t, err)
// Test EphemeralRunner ownership
require.Len(t, ephemeralRunner.OwnerReferences, 1, "EphemeralRunner should have exactly one owner reference")
@ -339,7 +387,8 @@ func TestOwnershipRelationships(t *testing.T) {
Name: "test-secret",
},
}
pod := b.newEphemeralRunnerPod(ephemeralRunner, runnerSecret)
pod, err := b.newEphemeralRunnerPod(ephemeralRunner, runnerSecret)
require.NoError(t, err)
// Test EphemeralRunnerPod ownership
require.Len(t, pod.OwnerReferences, 1, "EphemeralRunnerPod should have exactly one owner reference")
@ -376,17 +425,23 @@ func TestListenerPodNodeSelector(t *testing.T) {
ephemeralRunnerSet, err := b.newEphemeralRunnerSet(&autoscalingRunnerSet)
require.NoError(t, err)
listener, err := b.newAutoScalingListener(&autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
listener, err := b.newAutoscalingListener(&autoscalingRunnerSet, ephemeralRunnerSet, autoscalingRunnerSet.Namespace, "test:latest", nil)
require.NoError(t, err)
listenerServiceAccount := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
}
listenerServiceAccount, err := b.newScaleSetListenerServiceAccount(listener)
require.NoError(t, err)
listenerRole := b.newScaleSetListenerRole(listener)
listenerRoleBinding := b.newScaleSetListenerRoleBinding(listener, listenerRole, listenerServiceAccount)
t.Run("default listener pod has linux nodeSelector", func(t *testing.T) {
pod, err := b.newScaleSetListenerPod(listener, &corev1.Secret{}, listenerServiceAccount, nil)
pod, err := b.newScaleSetListenerPod(
listener,
&corev1.Secret{},
listenerServiceAccount,
listenerRole,
listenerRoleBinding,
nil,
)
require.NoError(t, err)
require.NotNil(t, pod.Spec.NodeSelector)
assert.Equal(t, "linux", pod.Spec.NodeSelector[LabelKeyKubernetesOS],
@ -397,7 +452,14 @@ func TestListenerPodNodeSelector(t *testing.T) {
listenerNoTemplate := listener.DeepCopy()
listenerNoTemplate.Spec.Template = nil
pod, err := b.newScaleSetListenerPod(listenerNoTemplate, &corev1.Secret{}, listenerServiceAccount, nil)
pod, err := b.newScaleSetListenerPod(
listenerNoTemplate,
&corev1.Secret{},
listenerServiceAccount,
listenerRole,
listenerRoleBinding,
nil,
)
require.NoError(t, err)
require.NotNil(t, pod.Spec.NodeSelector)
assert.Equal(t, "linux", pod.Spec.NodeSelector[LabelKeyKubernetesOS],
@ -415,7 +477,14 @@ func TestListenerPodNodeSelector(t *testing.T) {
},
}
pod, err := b.newScaleSetListenerPod(listenerWithTemplate, &corev1.Secret{}, listenerServiceAccount, nil)
pod, err := b.newScaleSetListenerPod(
listenerWithTemplate,
&corev1.Secret{},
listenerServiceAccount,
listenerRole,
listenerRoleBinding,
nil,
)
require.NoError(t, err)
require.NotNil(t, pod.Spec.NodeSelector,
"linux nodeSelector should not be cleared by template with nil nodeSelector")
@ -434,7 +503,14 @@ func TestListenerPodNodeSelector(t *testing.T) {
},
}
pod, err := b.newScaleSetListenerPod(listenerWithTemplate, &corev1.Secret{}, listenerServiceAccount, nil)
pod, err := b.newScaleSetListenerPod(
listenerWithTemplate,
&corev1.Secret{},
listenerServiceAccount,
listenerRole,
listenerRoleBinding,
nil,
)
require.NoError(t, err)
require.NotNil(t, pod.Spec.NodeSelector)
assert.Equal(t, "linux", pod.Spec.NodeSelector[LabelKeyKubernetesOS])
@ -450,7 +526,14 @@ func TestListenerPodNodeSelector(t *testing.T) {
},
}
pod, err := b.newScaleSetListenerPod(listenerWithTemplate, &corev1.Secret{}, listenerServiceAccount, nil)
pod, err := b.newScaleSetListenerPod(
listenerWithTemplate,
&corev1.Secret{},
listenerServiceAccount,
listenerRole,
listenerRoleBinding,
nil,
)
require.NoError(t, err)
// An explicitly set empty map is non-nil, so it overrides the default.
// This is intentional: the user explicitly opted out of nodeSelector constraints.

15
main.go
View File

@ -91,7 +91,6 @@ func main() {
autoScalingRunnerSetOnly bool
enableLeaderElection bool
disableAdmissionWebhook bool
updateStrategy string
leaderElectionID string
port int
syncPeriod time.Duration
@ -160,7 +159,6 @@ func main() {
flag.StringVar(&logLevel, "log-level", logging.LogLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`)
flag.StringVar(&logFormat, "log-format", "text", `The log format. Valid options are "text" and "json". Defaults to "text"`)
flag.BoolVar(&autoScalingRunnerSetOnly, "auto-scaling-runner-set-only", false, "Make controller only reconcile AutoRunnerScaleSet object.")
flag.StringVar(&updateStrategy, "update-strategy", "immediate", `Resources reconciliation strategy on upgrade with running/pending jobs. Valid values are: "immediate", "eventual". Defaults to "immediate".`)
flag.Var(&autoScalerImagePullSecrets, "auto-scaler-image-pull-secrets", "The default image-pull secret name for auto-scaler listener container.")
flag.IntVar(&k8sClientRateLimiterQPS, "k8s-client-rate-limiter-qps", 20, "The QPS value of the K8s client rate limiter.")
flag.IntVar(&k8sClientRateLimiterBurst, "k8s-client-rate-limiter-burst", 30, "The burst value of the K8s client rate limiter.")
@ -211,14 +209,6 @@ func main() {
defaultNamespaces[watchSingleNamespace] = cache.Config{}
defaultNamespaces[managerNamespace] = cache.Config{}
}
switch updateStrategy {
case "eventual", "immediate":
log.Info(`Update strategy set to:`, "updateStrategy", updateStrategy)
default:
log.Info(`Update strategy not recognized. Defaulting to "immediately"`, "updateStrategy", updateStrategy)
updateStrategy = "immediate"
}
}
if actionsgithubcom.SetListenerLoggingParameters(logLevel, logFormat) {
@ -312,6 +302,7 @@ func main() {
rb := actionsgithubcom.ResourceBuilder{
ExcludeLabelPropagationPrefixes: excludeLabelPropagationPrefixes,
SecretResolver: secretResolver,
Scheme: mgr.GetScheme(),
}
log.Info("Resource builder initializing")
@ -320,7 +311,8 @@ func main() {
switch workqueueRateLimiter {
case "typed_rate_limiter":
log.Info("Using typed rate limiter (per-item only, no global token bucket)")
controllerOpts = append(controllerOpts,
controllerOpts = append(
controllerOpts,
actionsgithubcom.WithTypedRateLimiter(workqueue.DefaultTypedItemBasedRateLimiter[reconcile.Request]()),
)
case "bucket_rate_limiter", "":
@ -336,7 +328,6 @@ func main() {
Scheme: mgr.GetScheme(),
ControllerNamespace: managerNamespace,
DefaultRunnerScaleSetListenerImage: managerImage,
UpdateStrategy: actionsgithubcom.UpdateStrategy(updateStrategy),
DefaultRunnerScaleSetListenerImagePullSecrets: autoScalerImagePullSecrets,
ResourceBuilder: rb,
}).SetupWithManager(mgr, controllerOpts...); err != nil {

View File

@ -10,6 +10,10 @@ export TARGET_ORG="${TARGET_ORG:-actions-runner-controller}"
export TARGET_REPO="${TARGET_REPO:-arc_e2e_test_dummy}"
export IMAGE_NAME="${IMAGE_NAME:-arc-test-image}"
function log() {
echo "[$(date -u +%FT%T%Z)] $*" >&2
}
# Trims a single pair of matching surrounding quotes from the provided string.
# Examples:
# trim_quotes '"1.2.3"' -> 1.2.3
@ -56,7 +60,7 @@ function chart_version() {
version="$(trim_quotes "${version}" | tr -d "[:space:]")"
if [[ -z "${version}" ]]; then
echo "Failed to extract version from ${chart_yaml} via yq" >&2
log "Failed to extract version from ${chart_yaml} via yq"
return 1
fi
printf '%s\n' "${version}"
@ -65,14 +69,14 @@ function chart_version() {
function ensure_version_set() {
if [[ -z "${VERSION:-}" ]]; then
echo 'VERSION is not set. Set it in the test, e.g. export VERSION="$(chart_version path/to/Chart.yaml)".' >&2
log 'VERSION is not set. Set it in the test, e.g. export VERSION="$(chart_version path/to/Chart.yaml)".'
return 1
fi
# Defensive: if a tool produced quoted output, normalize it before using in tags/args.
export VERSION="$(printf '%s' "${VERSION}" | tr -d "\"'[:space:]")"
if [[ -z "${VERSION}" ]]; then
echo "VERSION resolved to an empty value" >&2
log "VERSION resolved to an empty value"
return 1
fi
@ -86,7 +90,7 @@ export COMMIT_SHA
function build_image() {
ensure_version_set || return 1
echo "Building ARC image ${IMAGE}"
log "Building ARC image ${IMAGE}"
cd "${ROOT_DIR}" || exit 1
@ -103,46 +107,46 @@ function build_image() {
function create_cluster() {
ensure_version_set || return 1
echo "Deleting minikube cluster if exists"
log "Deleting minikube cluster if exists"
minikube delete || true
echo "Creating minikube cluster"
log "Creating minikube cluster"
minikube start --driver=docker --container-runtime=docker --wait=all
echo "Verifying ns works"
log "Verifying ns works"
if ! minikube ssh "nslookup github.com >/dev/null 2>&1"; then
echo "Nameserver configuration failed"
log "Nameserver configuration failed"
exit 1
fi
echo "Loading image into minikube cluster"
log "Loading image into minikube cluster"
minikube image load "${IMAGE}"
echo "Loading runner image into minikube cluster"
log "Loading runner image into minikube cluster"
minikube image load "ghcr.io/actions/actions-runner:latest"
}
function delete_cluster() {
echo "Deleting minikube cluster"
log "Deleting minikube cluster"
minikube delete
}
function log_arc() {
echo "ARC logs"
log "ARC logs"
kubectl logs -n "${NAMESPACE}" -l "app.kubernetes.io/part-of=gha-rs-controller,app.kubernetes.io/component=controller-manager"
}
function wait_for_arc() {
echo "Waiting for ARC to be ready"
log "Waiting for ARC to be ready"
local count=0
while true; do
POD_NAME=$(kubectl get pods -n "${NAMESPACE}" -l "app.kubernetes.io/part-of=gha-rs-controller,app.kubernetes.io/component=controller-manager" -o name 2>/dev/null || true)
if [ -n "$POD_NAME" ]; then
echo "Pod found: $POD_NAME"
log "Pod found: $POD_NAME"
break
fi
if [ "$count" -ge 60 ]; then
echo "Timeout waiting for controller pod with labels app.kubernetes.io/part-of=gha-rs-controller,app.kubernetes.io/component=controller-manager"
log "Timeout waiting for controller pod with labels app.kubernetes.io/part-of=gha-rs-controller,app.kubernetes.io/component=controller-manager"
return 1
fi
sleep 1
@ -159,12 +163,12 @@ function wait_for_scale_set() {
while true; do
POD_NAME=$(kubectl get pods -n "${NAMESPACE}" -l "actions.github.com/scale-set-name=${NAME}" -o name)
if [ -n "$POD_NAME" ]; then
echo "Pod found: ${POD_NAME}"
log "Pod found: ${POD_NAME}"
break
fi
if [ "$count" -ge 60 ]; then
echo "Timeout waiting for listener pod with label actions.github.com/scale-set-name=${NAME}"
log "Timeout waiting for listener pod with label actions.github.com/scale-set-name=${NAME}"
return 1
fi
@ -176,8 +180,10 @@ function wait_for_scale_set() {
}
function cleanup_scale_set() {
log "Uninstalling Helm release ${INSTALLATION_NAME}"
helm uninstall "${INSTALLATION_NAME}" --namespace "${NAMESPACE}" --debug
log "Waiting for autoscaling runner sets to be deleted"
kubectl wait --timeout=40s --for=delete autoscalingrunnersets -n "${NAMESPACE}" -l app.kubernetes.io/instance="${INSTALLATION_NAME}"
}
@ -197,7 +203,24 @@ function print_results() {
fi
}
function run_workflow() {
function extract_run_id() {
local workflow_output="$1"
local run_id
run_id="$(printf '%s\n' "${workflow_output}" | awk '/^[[:space:]]*[0-9]+[[:space:]]*$/ { gsub(/[[:space:]]/, ""); print; exit }')"
if [[ -z "${run_id}" ]]; then
run_id="$(printf '%s\n' "${workflow_output}" | awk 'match($0, /actions\/runs\/[0-9]+/) { run=substr($0, RSTART, RLENGTH); sub(/^actions\/runs\//, "", run); print run; exit }')"
fi
if [[ -z "${run_id}" ]]; then
log "Failed to extract run id from output: ${workflow_output}"
return 1
fi
printf '%s\n' "${run_id}"
}
function start_workflow() {
local repo
repo="${TARGET_ORG}/${TARGET_REPO}"
@ -240,50 +263,74 @@ function run_workflow() {
fi
if [[ -z "${workflow_id}" ]]; then
echo "Workflow not found in ${repo}: ${WORKFLOW_FILE}" >&2
echo "Available workflows in ${repo}:" >&2
log "Workflow not found in ${repo}: ${WORKFLOW_FILE}" >&2
log "Available workflows in ${repo}:" >&2
gh workflow list -R "${repo}" --limit 50 || true
echo "Hint: set TARGET_ORG/TARGET_REPO to a repo that contains the workflow on its default branch, or set WORKFLOW_FILE to a valid workflow name/id/filename." >&2
log "Hint: set TARGET_ORG/TARGET_REPO to a repo that contains the workflow on its default branch, or set WORKFLOW_FILE to a valid workflow name/id/filename." >&2
return 1
fi
echo "Resolved workflow id: ${workflow_id} (ref: ${WORKFLOW_REF})"
log "Resolved workflow id: ${workflow_id} (ref: ${WORKFLOW_REF})"
local queue_time
queue_time="$(date -u +%FT%TZ)"
echo "Running workflow ${WORKFLOW_FILE}"
gh workflow run -R "${repo}" "${workflow_id}" --ref "${WORKFLOW_REF}" -f arc_name="${SCALE_SET_NAME}" || return 1
log "Running workflow ${WORKFLOW_FILE}"
local workflow_output
workflow_output="$(gh workflow run -R "${repo}" "${workflow_id}" --ref "${WORKFLOW_REF}" -f arc_name="${SCALE_SET_NAME}")" || return 1
if [[ -n "${workflow_output}" ]]; then
log "${workflow_output}"
fi
echo "Waiting for run to start"
log "Waiting for run to start"
local count=0
local run_id=
local run_id_output=
while true; do
if [[ "${count}" -ge 12 ]]; then
echo "Timeout waiting for run to start"
log "Timeout waiting for run to start"
return 1
fi
run_id=$(gh run list -R "${repo}" --workflow "${workflow_id}" --created ">${queue_time}" --json "name,databaseId" --jq ".[] | select(.name | contains(\"${SCALE_SET_NAME}\")) | .databaseId")
echo "Run ID: ${run_id}"
run_id_output=$(gh run list -R "${repo}" --workflow "${workflow_id}" --created ">${queue_time}" --json "name,databaseId" --jq ".[] | select(.name | contains(\"${SCALE_SET_NAME}\")) | .databaseId")
if [[ -n "${run_id_output}" ]]; then
run_id=$(extract_run_id "${run_id_output}" || true)
fi
log "Run ID: ${run_id}"
if [ -n "$run_id" ]; then
echo "Run found!"
log "Run found!"
break
fi
echo "Run not found yet, waiting 5 seconds"
log "Run not found yet, waiting 5 seconds"
sleep 5
count=$((count + 1))
done
echo "Waiting for run to complete"
echo "${run_id}"
}
function wait_for_run_completion() {
local run_id="$1"
local repo="${TARGET_ORG}/${TARGET_REPO}"
log "Waiting for run ${run_id} to complete"
gh run watch "${run_id}" -R "${repo}" --exit-status &>/dev/null
local status=$?
if [[ "${status}" -ne 0 ]]; then
echo "Run failed with exit code ${status}"
log "Run failed with exit code ${status}"
return 1
fi
echo "Run completed successfully"
log "Run completed successfully"
}
function run_workflow() {
local run_id
if ! run_id=$(start_workflow); then
log "Failed to start workflow"
return 1
fi
wait_for_run_completion "${run_id}"
}
function retry() {
@ -295,10 +342,10 @@ function retry() {
until "$@"; do
if [[ $n -ge $retries ]]; then
echo "Attempt $n failed! No more retries left."
log "Attempt $n failed! No more retries left."
return 1
else
echo "Attempt $n failed! Retrying in $delay seconds..."
log "Attempt $n failed! Retrying in $delay seconds..."
sleep "$delay"
n=$((n + 1))
fi
@ -306,7 +353,7 @@ function retry() {
}
function install_openebs() {
echo "Install openebs/dynamic-localpv-provisioner"
log "Install openebs/dynamic-localpv-provisioner"
helm repo add openebs https://openebs.github.io/openebs
helm repo update
helm install openebs openebs/openebs -n openebs --create-namespace

View File

@ -19,13 +19,14 @@ WORKFLOW_FILE="arc-test-sleepy-matrix.yaml"
ARC_NAME="arc"
ARC_NAMESPACE="arc-systems"
UPGRADE_MARKER="e2e-upgrade-${SCALE_SET_NAME}-$(date +%s)"
function install_arc() {
echo "Installing ARC"
helm install "${ARC_NAME}" \
--namespace "${ARC_NAMESPACE}" \
--create-namespace \
--set controller.manager.container.image="${IMAGE_NAME}:${IMAGE_TAG}" \
--set controller.manager.config.updateStrategy="eventual" \
"${ROOT_DIR}/charts/gha-runner-scale-set-controller-experimental" \
--debug
@ -44,6 +45,8 @@ function install_scale_set() {
--set controllerServiceAccount.namespace="${ARC_NAMESPACE}" \
--set auth.url="https://github.com/${TARGET_ORG}/${TARGET_REPO}" \
--set auth.githubToken="${GITHUB_TOKEN}" \
--set scaleset.name="${SCALE_SET_NAME}" \
--set scaleset.minRunners=5 \
"${ROOT_DIR}/charts/gha-runner-scale-set-experimental" \
--version="${VERSION}" \
--debug
@ -56,87 +59,75 @@ function install_scale_set() {
function upgrade_scale_set() {
echo "Upgrading scale set ${SCALE_SET_NAME}/${SCALE_SET_NAMESPACE}"
echo "Generated upgrade marker: ${UPGRADE_MARKER}"
helm upgrade "${SCALE_SET_NAME}" \
--namespace "${SCALE_SET_NAMESPACE}" \
--set controllerServiceAccount.name="${ARC_NAME}-gha-rs-controller" \
--set controllerServiceAccount.namespace="${ARC_NAMESPACE}" \
--set auth.url="https://github.com/${TARGET_ORG}/${TARGET_REPO}" \
--set auth.githubToken="${GITHUB_TOKEN}" \
--set scaleset.name="${SCALE_SET_NAME}" \
--set runner.container.image="ghcr.io/actions/actions-runner:latest" \
--set runner.container.command={"/home/runner/run.sh"} \
--set runner.env[0].name="TEST" \
--set runner.env[0].value="E2E TESTS" \
--set runner.container.env[0].name="TEST" \
--set runner.container.env[0].value="E2E TESTS" \
--set "runner.pod.metadata.labels.e2e\.arc/upgrade-marker=${UPGRADE_MARKER}" \
"${ROOT_DIR}/charts/gha-runner-scale-set-experimental" \
--version="${VERSION}" \
--debug
}
function assert_listener_deleted() {
function assert_idle_pod_recreated() {
echo "Waiting for idle pod recreation"
local count=0
while true; do
LISTENER_COUNT="$(kubectl get pods -l actions.github.com/scale-set-name="${SCALE_SET_NAME}" -n "${ARC_NAMESPACE}" --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
RUNNERS_COUNT="$(kubectl get pods -l app.kubernetes.io/component=runner -n "${SCALE_SET_NAMESPACE}" --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
RESOURCES="$(kubectl get pods -A)"
if [ "${LISTENER_COUNT}" -eq 0 ]; then
echo "Listener has been deleted"
echo "${RESOURCES}"
return 0
fi
if [ "${count}" -ge 60 ]; then
echo "Timeout waiting for listener to be deleted"
echo "${RESOURCES}"
while true; do
local pods
if ! pods=$(kubectl get pods -n "${SCALE_SET_NAMESPACE}" -l "actions.github.com/scale-set-name=${SCALE_SET_NAME},e2e.arc/upgrade-marker=${UPGRADE_MARKER}" -o jsonpath='{.items[*].metadata.name}'); then
echo "Failed to get pods: $pods"
return 1
fi
echo "Waiting for listener to be deleted"
echo "Listener count: ${LISTENER_COUNT} target: 0 | Runners count: ${RUNNERS_COUNT} target: 3"
sleep 1
count=$((count + 1))
done
}
function assert_listener_recreated() {
count=0
while true; do
LISTENER_COUNT="$(kubectl get pods -l actions.github.com/scale-set-name="${SCALE_SET_NAME}" -n "${ARC_NAMESPACE}" --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
RUNNERS_COUNT="$(kubectl get pods -l app.kubernetes.io/component=runner -n "${SCALE_SET_NAMESPACE}" --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
RESOURCES="$(kubectl get pods -A)"
if [ "${LISTENER_COUNT}" -eq 1 ]; then
echo "Listener is up!"
echo "${RESOURCES}"
if [[ -n "$pods" ]]; then
echo "Found idle pod with upgrade marker: $pods"
return 0
fi
if [ "${count}" -ge 120 ]; then
echo "Timeout waiting for listener to be recreated"
echo "${RESOURCES}"
if ((count >= 30)); then
echo "Timeout waiting for idle pod recreation after upgrade"
return 1
fi
echo "Waiting for listener to be recreated"
echo "Listener count: ${LISTENER_COUNT} target: 1 | Runners count: ${RUNNERS_COUNT} target: 0"
sleep 1
count=$((count + 1))
echo "No idle pod with upgrade marker found yet, retrying... ($((count + 1))/30)"
sleep 10
((count++))
done
}
function main() {
local failed=()
local run_id=""
build_image
create_cluster
install_arc
install_scale_set
WORKFLOW_FILE="${WORKFLOW_FILE}" SCALE_SET_NAME="${SCALE_SET_NAME}" run_workflow || failed+=("run_workflow")
upgrade_scale_set || failed+=("upgrade_scale_set")
assert_listener_deleted || failed+=("assert_listener_deleted")
assert_listener_recreated || failed+=("assert_listener_recreated")
if ! run_id=$(WORKFLOW_FILE="${WORKFLOW_FILE}" SCALE_SET_NAME="${SCALE_SET_NAME}" start_workflow); then
failed+=("run_workflow")
fi
assert_idle_pod_recreated || failed+=("assert_idle_pod_recreated")
if [[ -n "${run_id}" ]]; then
wait_for_run_completion "${run_id}" || failed+=("wait_for_run_completion")
fi
INSTALLATION_NAME="${SCALE_SET_NAME}" NAMESPACE="${SCALE_SET_NAMESPACE}" cleanup_scale_set || failed+=("cleanup_scale_set")

View File

@ -19,6 +19,8 @@ WORKFLOW_FILE="arc-test-sleepy-matrix.yaml"
ARC_NAME="arc"
ARC_NAMESPACE="arc-systems"
UPGRADE_MARKER="e2e-upgrade-${SCALE_SET_NAME}-$(date +%s)"
function install_arc() {
echo "Installing ARC"
helm install "${ARC_NAME}" \
@ -26,7 +28,6 @@ function install_arc() {
--create-namespace \
--set image.repository="${IMAGE_NAME}" \
--set image.tag="${IMAGE_TAG}" \
--set flags.updateStrategy="eventual" \
"${ROOT_DIR}/charts/gha-runner-scale-set-controller" \
--debug
@ -43,6 +44,7 @@ function install_scale_set() {
--create-namespace \
--set githubConfigUrl="https://github.com/${TARGET_ORG}/${TARGET_REPO}" \
--set githubConfigSecret.github_token="${GITHUB_TOKEN}" \
--set minRunners=1 \
"${ROOT_DIR}/charts/gha-runner-scale-set" \
--debug
@ -54,86 +56,77 @@ function install_scale_set() {
function upgrade_scale_set() {
echo "Upgrading scale set ${SCALE_SET_NAME}/${SCALE_SET_NAMESPACE}"
echo "Generated upgrade marker: ${UPGRADE_MARKER}"
PATCH_APPLIED_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
echo "Captured PATCH_APPLIED_TIME: ${PATCH_APPLIED_TIME}"
helm upgrade "${SCALE_SET_NAME}" \
--namespace "${SCALE_SET_NAMESPACE}" \
--set githubConfigUrl="https://github.com/${TARGET_ORG}/${TARGET_REPO}" \
--set githubConfigSecret.github_token="${GITHUB_TOKEN}" \
--set minRunners=1 \
--set template.spec.containers[0].name="runner" \
--set template.spec.containers[0].image="ghcr.io/actions/actions-runner:latest" \
--set template.spec.containers[0].command={"/home/runner/run.sh"} \
--set template.spec.containers[0].env[0].name="TEST" \
--set template.spec.containers[0].env[0].value="E2E TESTS" \
--set "template.metadata.labels.e2e\.arc/upgrade-marker=${UPGRADE_MARKER}" \
"${ROOT_DIR}/charts/gha-runner-scale-set" \
--version="${VERSION}" \
--debug
}
function assert_listener_deleted() {
function assert_idle_pod_recreated() {
echo "Waiting for idle pod recreation"
local count=0
while true; do
LISTENER_COUNT="$(kubectl get pods -l actions.github.com/scale-set-name="${SCALE_SET_NAME}" -n "${ARC_NAMESPACE}" --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
RUNNERS_COUNT="$(kubectl get pods -l app.kubernetes.io/component=runner -n "${SCALE_SET_NAMESPACE}" --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
RESOURCES="$(kubectl get pods -A)"
if [ "${LISTENER_COUNT}" -eq 0 ]; then
echo "Listener has been deleted"
echo "${RESOURCES}"
return 0
fi
if [ "${count}" -ge 60 ]; then
echo "Timeout waiting for listener to be deleted"
echo "${RESOURCES}"
while true; do
local pods
if ! pods=$(kubectl get pods -n "${SCALE_SET_NAMESPACE}" -l "actions.github.com/scale-set-name=${SCALE_SET_NAME},e2e.arc/upgrade-marker=${UPGRADE_MARKER}" -o jsonpath='{.items[*].metadata.name}'); then
echo "Failed to get pods: $pods"
return 1
fi
echo "Waiting for listener to be deleted"
echo "Listener count: ${LISTENER_COUNT} target: 0 | Runners count: ${RUNNERS_COUNT} target: 3"
sleep 1
count=$((count + 1))
done
}
function assert_listener_recreated() {
count=0
while true; do
LISTENER_COUNT="$(kubectl get pods -l actions.github.com/scale-set-name="${SCALE_SET_NAME}" -n "${ARC_NAMESPACE}" --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
RUNNERS_COUNT="$(kubectl get pods -l app.kubernetes.io/component=runner -n "${SCALE_SET_NAMESPACE}" --field-selector=status.phase=Running -o=jsonpath='{.items}' | jq 'length')"
RESOURCES="$(kubectl get pods -A)"
if [ "${LISTENER_COUNT}" -eq 1 ]; then
echo "Listener is up!"
echo "${RESOURCES}"
if [[ -n "$pods" ]]; then
echo "Found idle pod with upgrade marker: $pods"
return 0
fi
if [ "${count}" -ge 120 ]; then
echo "Timeout waiting for listener to be recreated"
echo "${RESOURCES}"
if ((count >= 30)); then
echo "Timeout waiting for idle pod recreation after upgrade"
return 1
fi
echo "Waiting for listener to be recreated"
echo "Listener count: ${LISTENER_COUNT} target: 1 | Runners count: ${RUNNERS_COUNT} target: 0"
sleep 1
count=$((count + 1))
echo "No idle pod with upgrade marker found yet, retrying... ($((count + 1))/30)"
sleep 10
((count++))
done
}
function main() {
local failed=()
local run_id=""
build_image
create_cluster
install_arc
install_scale_set
WORKFLOW_FILE="${WORKFLOW_FILE}" SCALE_SET_NAME="${SCALE_SET_NAME}" run_workflow || failed+=("run_workflow")
upgrade_scale_set || failed+=("upgrade_scale_set")
assert_listener_deleted || failed+=("assert_listener_deleted")
assert_listener_recreated || failed+=("assert_listener_recreated")
if ! run_id=$(WORKFLOW_FILE="${WORKFLOW_FILE}" SCALE_SET_NAME="${SCALE_SET_NAME}" start_workflow); then
failed+=("run_workflow")
fi
assert_idle_pod_recreated || failed+=("assert_idle_pod_recreated")
if [[ -n "${run_id}" ]]; then
wait_for_run_completion "${run_id}" || failed+=("wait_for_run_completion")
fi
INSTALLATION_NAME="${SCALE_SET_NAME}" NAMESPACE="${SCALE_SET_NAMESPACE}" cleanup_scale_set || failed+=("cleanup_scale_set")