feat: Support for scaling from/to zero (#465)
This is an attempt to support scaling from/to zero. The basic idea is that we create a one-off "registration-only" runner pod on RunnerReplicaSet being scaled to zero, so that there is one "offline" runner, which enables GitHub Actions to queue jobs instead of discarding those. GitHub Actions seems to immediately throw away the new job when there are no runners at all. Generally, having runners of any status, `busy`, `idle`, or `offline` would prevent GitHub actions from failing jobs. But retaining `busy` or `idle` runners means that we need to keep runner pods running, which conflicts with our desired to scale to/from zero, hence we retain `offline` runners. In this change, I enhanced the runnerreplicaset controller to create a registration-only runner on very beginning of its reconciliation logic, only when a runnerreplicaset is scaled to zero. The runner controller creates the registration-only runner pod, waits for it to become "offline", and then removes the runner pod. The runner on GitHub stays `offline`, until the runner resource on K8s is deleted. As we remove the registration-only runner pod as soon as it registers, this doesn't block cluster-autoscaler. Related to #447
This commit is contained in:
parent
7e766282aa
commit
dbd7b486d2
17
Makefile
17
Makefile
|
|
@ -113,7 +113,7 @@ generate: controller-gen
|
|||
$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths="./..."
|
||||
|
||||
# Build the docker image
|
||||
docker-build: test
|
||||
docker-build:
|
||||
docker build . -t ${NAME}:${VERSION}
|
||||
docker build runner -t ${RUNNER_NAME}:${VERSION} --build-arg TARGETPLATFORM=$(shell arch)
|
||||
|
||||
|
|
@ -172,7 +172,7 @@ acceptance/pull: docker-build
|
|||
|
||||
acceptance/setup:
|
||||
kubectl apply --validate=false -f https://github.com/jetstack/cert-manager/releases/download/v1.0.4/cert-manager.yaml #kubectl create namespace actions-runner-system
|
||||
kubectl -n cert-manager wait deploy/cert-manager-cainjector --for condition=available --timeout 60s
|
||||
kubectl -n cert-manager wait deploy/cert-manager-cainjector --for condition=available --timeout 90s
|
||||
kubectl -n cert-manager wait deploy/cert-manager-webhook --for condition=available --timeout 60s
|
||||
kubectl -n cert-manager wait deploy/cert-manager --for condition=available --timeout 60s
|
||||
kubectl create namespace actions-runner-system || true
|
||||
|
|
@ -230,6 +230,7 @@ OS_NAME := $(shell uname -s | tr A-Z a-z)
|
|||
|
||||
# find or download etcd
|
||||
etcd:
|
||||
ifeq (, $(shell which etcd))
|
||||
ifeq (, $(wildcard $(TEST_ASSETS)/etcd))
|
||||
@{ \
|
||||
set -xe ;\
|
||||
|
|
@ -247,9 +248,13 @@ ETCD_BIN=$(TEST_ASSETS)/etcd
|
|||
else
|
||||
ETCD_BIN=$(TEST_ASSETS)/etcd
|
||||
endif
|
||||
else
|
||||
ETCD_BIN=$(shell which etcd)
|
||||
endif
|
||||
|
||||
# find or download kube-apiserver
|
||||
kube-apiserver:
|
||||
ifeq (, $(shell which kube-apiserver))
|
||||
ifeq (, $(wildcard $(TEST_ASSETS)/kube-apiserver))
|
||||
@{ \
|
||||
set -xe ;\
|
||||
|
|
@ -267,10 +272,13 @@ KUBE_APISERVER_BIN=$(TEST_ASSETS)/kube-apiserver
|
|||
else
|
||||
KUBE_APISERVER_BIN=$(TEST_ASSETS)/kube-apiserver
|
||||
endif
|
||||
|
||||
else
|
||||
KUBE_APISERVER_BIN=$(shell which kube-apiserver)
|
||||
endif
|
||||
|
||||
# find or download kubectl
|
||||
kubectl:
|
||||
ifeq (, $(shell which kubectl))
|
||||
ifeq (, $(wildcard $(TEST_ASSETS)/kubectl))
|
||||
@{ \
|
||||
set -xe ;\
|
||||
|
|
@ -288,3 +296,6 @@ KUBECTL_BIN=$(TEST_ASSETS)/kubectl
|
|||
else
|
||||
KUBECTL_BIN=$(TEST_ASSETS)/kubectl
|
||||
endif
|
||||
else
|
||||
KUBECTL_BIN=$(shell which kubectl)
|
||||
endif
|
||||
|
|
|
|||
62
README.md
62
README.md
|
|
@ -17,6 +17,7 @@ ToC:
|
|||
- [Organization Runners](#organization-runners)
|
||||
- [Enterprise Runners](#enterprise-runners)
|
||||
- [Runner Deployments](#runnerdeployments)
|
||||
- [Note on scaling to/from 0](#note-on-scaling-to-from-zero)
|
||||
- [Autoscaling](#autoscaling)
|
||||
- [Faster Autoscaling with GitHub Webhook](#faster-autoscaling-with-github-webhook)
|
||||
- [Runner with DinD](#runner-with-dind)
|
||||
|
|
@ -279,6 +280,22 @@ example-runnerdeploy2475h595fr mumoshu/actions-runner-controller-ci Running
|
|||
example-runnerdeploy2475ht2qbr mumoshu/actions-runner-controller-ci Running
|
||||
```
|
||||
|
||||
##### Note on scaling to/from 0
|
||||
|
||||
You can either delete the runner deployment, or update it to have `replicas: 0`, so that there will be 0 runner pods in the cluster. This, in combination with e.g. `cluster-autoscaler`, enables you to save your infrastructure cost when there's no need to run Actions jobs.
|
||||
|
||||
```yaml
|
||||
# runnerdeployment.yaml
|
||||
apiVersion: actions.summerwind.dev/v1alpha1
|
||||
kind: RunnerDeployment
|
||||
metadata:
|
||||
name: example-runnerdeploy
|
||||
spec:
|
||||
replicas: 0
|
||||
```
|
||||
|
||||
The implication of setting `replicas: 0` instead of deleting the runner depoyment is that you can let GitHub Actions queue jobs until there will be one or more runners. See [#465](https://github.com/actions-runner-controller/actions-runner-controller/pull/465) for more information.
|
||||
|
||||
#### Autoscaling
|
||||
|
||||
__**IMPORTANT : Due to limitations / a bug with GitHub's [routing engine](https://docs.github.com/en/actions/hosting-your-own-runners/using-self-hosted-runners-in-a-workflow#routing-precedence-for-self-hosted-runners) autoscaling does NOT work correctly with RunnerDeployments that target the enterprise level. Scaling activity works as expected however jobs fail to get assigned to the scaled out replicas. This was explored in issue [#470](https://github.com/actions-runner-controller/actions-runner-controller/issues/470). Once GitHub resolves the issue with their backend service we expect the solution to be able to support autoscaled enterprise runnerdeploments without any additional changes.**__
|
||||
|
|
@ -816,5 +833,50 @@ KUBECONFIG=path/to/kubeconfig \
|
|||
acceptance/tests
|
||||
```
|
||||
|
||||
**Development Tips**
|
||||
|
||||
If you've already deployed actions-runner-controller and only want to recreate pods to use the newer image, you can run:
|
||||
|
||||
```
|
||||
NAME=$DOCKER_USER/actions-runner-controller \
|
||||
make docker-build docker-push && \
|
||||
kubectl -n actions-runner-system delete po $(kubectl -n actions-runner-system get po -ojsonpath={.items[*].metadata.name})
|
||||
```
|
||||
|
||||
Similary, if you'd like to recreate runner pods with the newer runner image,
|
||||
|
||||
```
|
||||
NAME=$DOCKER_USER/actions-runner make \
|
||||
-C runner docker-{build,push}-ubuntu && \
|
||||
(kubectl get po -ojsonpath={.items[*].metadata.name} | xargs -n1 kubectl delete po)
|
||||
```
|
||||
|
||||
**Runner Tests**<br />
|
||||
A set of example pipelines (./acceptance/pipelines) are provided in this repository which you can use to validate your runners are working as expected. When raising a PR please run the relevant suites to prove your change hasn't broken anything.
|
||||
|
||||
**Running Ginkgo Tests**
|
||||
|
||||
You can run the integration test suite that is written in Ginkgo with:
|
||||
|
||||
```bash
|
||||
make test-with-deps
|
||||
```
|
||||
|
||||
This will firstly install a few binaries required to setup the integration test environment and then runs `go test` to start the Ginkgo test.
|
||||
|
||||
If you don't want to use `make`, like when you're running tests from your IDE, install required binaries to `/usr/local/kubebuilder/bin`. That's the directory in which controller-runtime's `envtest` framework locates the binaries.
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /usr/local/kubebuilder/bin
|
||||
make kube-apiserver etcd
|
||||
sudo mv test-assets/{etcd,kube-apiserver} /usr/local/kubebuilder/bin/
|
||||
go test -v -run TestAPIs github.com/summerwind/actions-runner-controller/controllers
|
||||
```
|
||||
|
||||
To run Ginkgo tests selectively, set the pattern of target test names to `GINKGO_FOCUS`.
|
||||
All the Ginkgo test that matches `GINKGO_FOCUS` will be run.
|
||||
|
||||
```bash
|
||||
GINKGO_FOCUS='[It] should create a new Runner resource from the specified template, add a another Runner on replicas increased, and removes all the replicas when set to 0' \
|
||||
go test -v -run TestAPIs github.com/summerwind/actions-runner-controller/controllers
|
||||
```
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@ spec:
|
|||
template:
|
||||
spec:
|
||||
repository: ${TEST_REPO}
|
||||
|
||||
#
|
||||
# Custom runner image
|
||||
#
|
||||
image: ${RUNNER_NAME}:${VERSION}
|
||||
imagePullPolicy: IfNotPresent
|
||||
|
||||
|
|
@ -21,3 +25,13 @@ spec:
|
|||
# Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu)
|
||||
#
|
||||
#dockerMTU: 1450
|
||||
|
||||
#Runner group
|
||||
# labels:
|
||||
# - "mylabel 1"
|
||||
# - "mylabel 2"
|
||||
|
||||
#
|
||||
# Non-standard working directory
|
||||
#
|
||||
# workDir: "/"
|
||||
|
|
|
|||
|
|
@ -4,5 +4,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
|
|||
kind: Kustomization
|
||||
images:
|
||||
- name: controller
|
||||
newName: summerwind/actions-runner-controller
|
||||
newTag: latest
|
||||
newName: mumoshu/actions-runner-controller
|
||||
newTag: dev
|
||||
|
|
|
|||
|
|
@ -446,9 +446,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||
}
|
||||
|
||||
|
|
@ -554,9 +551,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||
}
|
||||
|
||||
|
|
@ -595,9 +589,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||
}
|
||||
|
||||
|
|
@ -606,9 +597,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
env.SendOrgCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
|
||||
}
|
||||
|
||||
|
|
@ -616,9 +604,8 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
{
|
||||
env.SendOrgCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after second webhook event")
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||
}
|
||||
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||
})
|
||||
|
||||
It("should create and scale user's repository runners on pull_request event", func() {
|
||||
|
|
@ -884,9 +871,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
ExpectCreate(ctx, rd, "test RunnerDeployment")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(1, "count of fake list runners")
|
||||
}
|
||||
|
||||
|
|
@ -930,9 +914,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1)
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3)
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||
}
|
||||
|
||||
|
|
@ -941,9 +922,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
env.SendUserCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 4, "runners after first webhook event")
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(4, "count of fake list runners")
|
||||
}
|
||||
|
||||
|
|
@ -951,9 +929,8 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
{
|
||||
env.SendUserCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 5, "runners after second webhook event")
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(5, "count of fake list runners")
|
||||
}
|
||||
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(5, "count of fake list runners")
|
||||
})
|
||||
|
||||
It("should create and scale user's repository runners only on check_run event", func() {
|
||||
|
|
@ -1045,9 +1022,6 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
env.SendUserCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event")
|
||||
}
|
||||
|
||||
{
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners")
|
||||
}
|
||||
|
||||
|
|
@ -1055,9 +1029,8 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() {
|
|||
{
|
||||
env.SendUserCheckRunEvent("test", "valid", "pending", "created")
|
||||
ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after second webhook event")
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||
}
|
||||
|
||||
env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners")
|
||||
})
|
||||
|
||||
})
|
||||
|
|
|
|||
|
|
@ -48,6 +48,9 @@ const (
|
|||
LabelKeyPodTemplateHash = "pod-template-hash"
|
||||
|
||||
retryDelayOnGitHubAPIRateLimitError = 30 * time.Second
|
||||
|
||||
// This is an annotation internal to actions-runner-controller and can change in backward-incompatible ways
|
||||
annotationKeyRegistrationOnly = "actions-runner-controller/registration-only"
|
||||
)
|
||||
|
||||
// RunnerReconciler reconciles a Runner object
|
||||
|
|
@ -145,6 +148,34 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
|||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
registrationOnly := metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly)
|
||||
if registrationOnly && runner.Status.Phase != "" {
|
||||
// At this point we are sure that the registration-only runner has successfully configured and
|
||||
// is of `offline` status, because we set runner.Status.Phase to that of the runner pod only after
|
||||
// successful registration.
|
||||
|
||||
var pod corev1.Pod
|
||||
if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
|
||||
if !kerrors.IsNotFound(err) {
|
||||
log.Info(fmt.Sprintf("Retrying soon as we failed to get registration-only runner pod: %v", err))
|
||||
|
||||
return ctrl.Result{Requeue: true}, nil
|
||||
}
|
||||
} else if err := r.Delete(ctx, &pod); err != nil {
|
||||
if !kerrors.IsNotFound(err) {
|
||||
log.Info(fmt.Sprintf("Retrying soon as we failed to delete registration-only runner pod: %v", err))
|
||||
|
||||
return ctrl.Result{Requeue: true}, nil
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("Successfully deleted egistration-only runner pod to free node and cluster resource")
|
||||
|
||||
// Return here to not recreate the deleted pod, because recreating it is the waste of cluster and node resource,
|
||||
// and also defeats the original purpose of scale-from/to-zero we're trying to implement by using the registration-only runner.
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
var pod corev1.Pod
|
||||
if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
|
||||
if !kerrors.IsNotFound(err) {
|
||||
|
|
@ -221,20 +252,33 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
|||
|
||||
// If pod has ended up succeeded we need to restart it
|
||||
// Happens e.g. when dind is in runner and run completes
|
||||
restart := pod.Status.Phase == corev1.PodSucceeded
|
||||
stopped := pod.Status.Phase == corev1.PodSucceeded
|
||||
|
||||
if pod.Status.Phase == corev1.PodRunning {
|
||||
for _, status := range pod.Status.ContainerStatuses {
|
||||
if status.Name != containerName {
|
||||
continue
|
||||
}
|
||||
if !stopped {
|
||||
if pod.Status.Phase == corev1.PodRunning {
|
||||
for _, status := range pod.Status.ContainerStatuses {
|
||||
if status.Name != containerName {
|
||||
continue
|
||||
}
|
||||
|
||||
if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 {
|
||||
restart = true
|
||||
if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 {
|
||||
stopped = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
restart := stopped
|
||||
|
||||
if registrationOnly && stopped {
|
||||
restart = false
|
||||
|
||||
log.Info(
|
||||
"Observed that registration-only runner for scaling-from-zero has successfully stopped. " +
|
||||
"Unlike other pods, this one will be recreated only when runner spec changes.",
|
||||
)
|
||||
}
|
||||
|
||||
if updated, err := r.updateRegistrationToken(ctx, runner); err != nil {
|
||||
return ctrl.Result{}, err
|
||||
} else if updated {
|
||||
|
|
@ -247,11 +291,21 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
|||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
if registrationOnly {
|
||||
newPod.Spec.Containers[0].Env = append(
|
||||
newPod.Spec.Containers[0].Env,
|
||||
corev1.EnvVar{
|
||||
Name: "RUNNER_REGISTRATION_ONLY",
|
||||
Value: "true",
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
var registrationRecheckDelay time.Duration
|
||||
|
||||
// all checks done below only decide whether a restart is needed
|
||||
// if a restart was already decided before, there is no need for the checks
|
||||
// saving API calls and scary{ log messages
|
||||
// saving API calls and scary log messages
|
||||
if !restart {
|
||||
registrationCheckInterval := time.Minute
|
||||
if r.RegistrationRecheckInterval > 0 {
|
||||
|
|
@ -356,7 +410,14 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
|||
)
|
||||
}
|
||||
} else if offline {
|
||||
if registrationDidTimeout {
|
||||
if registrationOnly {
|
||||
log.Info(
|
||||
"Observed that registration-only runner for scaling-from-zero has successfully been registered.",
|
||||
"podCreationTimestamp", pod.CreationTimestamp,
|
||||
"currentTime", currentTime,
|
||||
"configuredRegistrationTimeout", registrationTimeout,
|
||||
)
|
||||
} else if registrationDidTimeout {
|
||||
log.Info(
|
||||
"Already existing GitHub runner still appears offline . "+
|
||||
"Recreating the pod to see if it resolves the issue. "+
|
||||
|
|
@ -375,7 +436,7 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
|
|||
}
|
||||
}
|
||||
|
||||
if (notFound || offline) && !registrationDidTimeout {
|
||||
if (notFound || (offline && !registrationOnly)) && !registrationDidTimeout {
|
||||
registrationRecheckJitter := 10 * time.Second
|
||||
if r.RegistrationRecheckJitter > 0 {
|
||||
registrationRecheckJitter = r.RegistrationRecheckJitter
|
||||
|
|
@ -566,6 +627,14 @@ func (r *RunnerReconciler) newPod(runner v1alpha1.Runner) (corev1.Pod, error) {
|
|||
},
|
||||
}
|
||||
|
||||
if metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly) {
|
||||
env = append(env, corev1.EnvVar{
|
||||
Name: "RUNNER_REGISTRATION_ONLY",
|
||||
Value: "true",
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
env = append(env, runner.Spec.Env...)
|
||||
|
||||
labels := map[string]string{}
|
||||
|
|
|
|||
|
|
@ -188,7 +188,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
|||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
// Do we old runner replica sets that should eventually deleted?
|
||||
// Do we have old runner replica sets that should eventually deleted?
|
||||
if len(oldSets) > 0 {
|
||||
readyReplicas := newestSet.Status.ReadyReplicas
|
||||
|
||||
|
|
|
|||
|
|
@ -68,6 +68,65 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
|||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
registrationOnlyRunnerNeeded := rs.Spec.Replicas != nil && *rs.Spec.Replicas == 0
|
||||
registrationOnlyRunner := v1alpha1.Runner{}
|
||||
registrationOnlyRunnerNsName := req.NamespacedName
|
||||
registrationOnlyRunnerNsName.Name = registrationOnlyRunnerNameFor(rs.Name)
|
||||
|
||||
registrationOnlyRunnerExists := false
|
||||
if err := r.Get(
|
||||
ctx,
|
||||
registrationOnlyRunnerNsName,
|
||||
®istrationOnlyRunner,
|
||||
); err != nil {
|
||||
if !kerrors.IsNotFound(err) {
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
} else {
|
||||
registrationOnlyRunnerExists = true
|
||||
}
|
||||
|
||||
if registrationOnlyRunnerNeeded {
|
||||
if registrationOnlyRunnerExists {
|
||||
if registrationOnlyRunner.Status.Phase == "" {
|
||||
log.Info("Still waiting for the registration-only runner to be registered")
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
} else {
|
||||
// A registration-only runner does not exist and is needed, hence create it.
|
||||
|
||||
runnerForScaleFromToZero, err := r.newRunner(rs)
|
||||
if err != nil {
|
||||
return ctrl.Result{}, fmt.Errorf("failed to create runner for scale from/to zero: %v", err)
|
||||
}
|
||||
|
||||
runnerForScaleFromToZero.ObjectMeta.Name = registrationOnlyRunnerNsName.Name
|
||||
runnerForScaleFromToZero.ObjectMeta.GenerateName = ""
|
||||
runnerForScaleFromToZero.ObjectMeta.Labels = nil
|
||||
metav1.SetMetaDataAnnotation(&runnerForScaleFromToZero.ObjectMeta, annotationKeyRegistrationOnly, "true")
|
||||
|
||||
if err := r.Client.Create(ctx, &runnerForScaleFromToZero); err != nil {
|
||||
log.Error(err, "Failed to create runner for scale from/to zero")
|
||||
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
// We can continue to deleting runner pods only after the
|
||||
// registration-only runner gets registered.
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
} else {
|
||||
// A registration-only runner exists and is not needed, hence delete it.
|
||||
if registrationOnlyRunnerExists {
|
||||
if err := r.Client.Delete(ctx, ®istrationOnlyRunner); err != nil {
|
||||
log.Error(err, "Retrying soon because we failed to delete registration-only runner")
|
||||
|
||||
return ctrl.Result{Requeue: true}, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector)
|
||||
if err != nil {
|
||||
return ctrl.Result{}, err
|
||||
|
|
@ -95,7 +154,7 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
|
|||
for _, r := range allRunners.Items {
|
||||
// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
|
||||
// to not treat all the runners in the namespace as its children.
|
||||
if metav1.IsControlledBy(&r, &rs) {
|
||||
if metav1.IsControlledBy(&r, &rs) && !metav1.HasAnnotation(r.ObjectMeta, annotationKeyRegistrationOnly) {
|
||||
myRunners = append(myRunners, r)
|
||||
|
||||
available += 1
|
||||
|
|
@ -265,3 +324,7 @@ func (r *RunnerReplicaSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|||
Named(name).
|
||||
Complete(r)
|
||||
}
|
||||
|
||||
func registrationOnlyRunnerNameFor(rsName string) string {
|
||||
return rsName + "-registration-only"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package controllers
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/http/httptest"
|
||||
"time"
|
||||
|
|
@ -262,8 +263,36 @@ var _ = Context("Inside of a new namespace", func() {
|
|||
|
||||
Eventually(
|
||||
func() int {
|
||||
err := k8sClient.List(ctx, &runners, client.InNamespace(ns.Name))
|
||||
if err != nil {
|
||||
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
|
||||
MatchLabels: map[string]string{
|
||||
"foo": "bar",
|
||||
},
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
var regOnly actionsv1alpha1.Runner
|
||||
if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns.Name, Name: registrationOnlyRunnerNameFor(name)}, ®Only); err != nil {
|
||||
logf.Log.Info(fmt.Sprintf("Failed getting registration-only runner in test: %v", err))
|
||||
return -1
|
||||
} else {
|
||||
updated := regOnly.DeepCopy()
|
||||
updated.Status.Phase = "Completed"
|
||||
|
||||
if err := k8sClient.Status().Patch(ctx, updated, client.MergeFrom(®Only)); err != nil {
|
||||
logf.Log.Info(fmt.Sprintf("Failed updating registration-only runner in test: %v", err))
|
||||
return -1
|
||||
}
|
||||
|
||||
runnersList.Add(&github.Runner{
|
||||
ID: pointer.Int64Ptr(1001),
|
||||
Name: pointer.StringPtr(regOnly.Name),
|
||||
OS: pointer.StringPtr("linux"),
|
||||
Status: pointer.StringPtr("offline"),
|
||||
Busy: pointer.BoolPtr(false),
|
||||
})
|
||||
}
|
||||
|
||||
if err := k8sClient.List(ctx, &runners, client.InNamespace(ns.Name), client.MatchingLabelsSelector{Selector: selector}); err != nil {
|
||||
logf.Log.Error(err, "list runners")
|
||||
return -1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,6 +56,37 @@ cd /runner
|
|||
--labels "${RUNNER_LABELS}" \
|
||||
--work "${RUNNER_WORKDIR}"
|
||||
|
||||
if [ -f /runner/.runner ]; then
|
||||
echo Runner has successfully been configured with the following data.
|
||||
cat /runner/.runner
|
||||
# Note: the `.runner` file's content should be something like the below:
|
||||
#
|
||||
# $ cat /runner/.runner
|
||||
# {
|
||||
# "agentId": 117, #=> corresponds to the ID of the runner
|
||||
# "agentName": "THE_RUNNER_POD_NAME",
|
||||
# "poolId": 1,
|
||||
# "poolName": "Default",
|
||||
# "serverUrl": "https://pipelines.actions.githubusercontent.com/SOME_RANDOM_ID",
|
||||
# "gitHubUrl": "https://github.com/USER/REPO",
|
||||
# "workFolder": "/some/work/dir" #=> corresponds to Runner.Spec.WorkDir
|
||||
# }
|
||||
#
|
||||
# Especially `agentId` is important, as other than listing all the runners in the repo,
|
||||
# this is the only change we could get the exact runnner ID which can be useful for further
|
||||
# GitHub API call like the below. Note that 171 is the agentId seen above.
|
||||
# curl \
|
||||
# -H "Accept: application/vnd.github.v3+json" \
|
||||
# -H "Authorization: bearer ${GITHUB_TOKEN}"
|
||||
# https://api.github.com/repos/USER/REPO/actions/runners/171
|
||||
fi
|
||||
|
||||
if [ -n "${RUNNER_REGISTRATION_ONLY}" ]; then
|
||||
echo
|
||||
echo "This runner is configured to be registration-only. Existing without starting the runner service..."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
mkdir ./externals
|
||||
# Hack due to the DinD volumes
|
||||
mv ./externalstmp/* ./externals/
|
||||
|
|
|
|||
Loading…
Reference in New Issue