Merge branch 'master' into improve-logs
This commit is contained in:
		
						commit
						eff0c7364f
					
				|  | @ -15,7 +15,7 @@ on: | ||||||
|       - '!**.md' |       - '!**.md' | ||||||
| 
 | 
 | ||||||
| env: | env: | ||||||
|   RUNNER_VERSION: 2.287.1 |   RUNNER_VERSION: 2.288.0 | ||||||
|   DOCKER_VERSION: 20.10.12 |   DOCKER_VERSION: 20.10.12 | ||||||
|   DOCKERHUB_USERNAME: summerwind |   DOCKERHUB_USERNAME: summerwind | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -37,7 +37,10 @@ fi | ||||||
| 
 | 
 | ||||||
| tool=${ACCEPTANCE_TEST_DEPLOYMENT_TOOL} | tool=${ACCEPTANCE_TEST_DEPLOYMENT_TOOL} | ||||||
| 
 | 
 | ||||||
|  | TEST_ID=${TEST_ID:-default} | ||||||
|  | 
 | ||||||
| if [ "${tool}" == "helm" ]; then | if [ "${tool}" == "helm" ]; then | ||||||
|  |   set -v | ||||||
|   helm upgrade --install actions-runner-controller \ |   helm upgrade --install actions-runner-controller \ | ||||||
|     charts/actions-runner-controller \ |     charts/actions-runner-controller \ | ||||||
|     -n actions-runner-system \ |     -n actions-runner-system \ | ||||||
|  | @ -46,7 +49,10 @@ if [ "${tool}" == "helm" ]; then | ||||||
|     --set authSecret.create=false \ |     --set authSecret.create=false \ | ||||||
|     --set image.repository=${NAME} \ |     --set image.repository=${NAME} \ | ||||||
|     --set image.tag=${VERSION} \ |     --set image.tag=${VERSION} \ | ||||||
|  |     --set podAnnotations.test-id=${TEST_ID} \ | ||||||
|  |     --set githubWebhookServer.podAnnotations.test-id=${TEST_ID} \ | ||||||
|     -f ${VALUES_FILE} |     -f ${VALUES_FILE} | ||||||
|  |   set +v | ||||||
|   # To prevent `CustomResourceDefinition.apiextensions.k8s.io "runners.actions.summerwind.dev" is invalid: metadata.annotations: Too long: must have at most 262144 bytes` |   # To prevent `CustomResourceDefinition.apiextensions.k8s.io "runners.actions.summerwind.dev" is invalid: metadata.annotations: Too long: must have at most 262144 bytes` | ||||||
|   # errors |   # errors | ||||||
|   kubectl create -f charts/actions-runner-controller/crds || kubectl replace -f charts/actions-runner-controller/crds |   kubectl create -f charts/actions-runner-controller/crds || kubectl replace -f charts/actions-runner-controller/crds | ||||||
|  | @ -77,7 +83,7 @@ else | ||||||
| fi | fi | ||||||
| 
 | 
 | ||||||
| if [ -n "${TEST_ORG}" ]; then | if [ -n "${TEST_ORG}" ]; then | ||||||
|   cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= NAME=org-runnerdeploy envsubst | kubectl apply -f - |   cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= RUNNER_MIN_REPLICAS=${ORG_RUNNER_MIN_REPLICAS} NAME=org-runnerdeploy envsubst | kubectl apply -f - | ||||||
| 
 | 
 | ||||||
|   if [ -n "${TEST_ORG_GROUP}" ]; then |   if [ -n "${TEST_ORG_GROUP}" ]; then | ||||||
|     cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= TEST_GROUP=${TEST_ORG_GROUP} NAME=orggroup-runnerdeploy envsubst | kubectl apply -f - |     cat acceptance/testdata/runnerdeploy.envsubst.yaml | TEST_ENTERPRISE= TEST_REPO= TEST_GROUP=${TEST_ORG_GROUP} NAME=orggroup-runnerdeploy envsubst | kubectl apply -f - | ||||||
|  |  | ||||||
|  | @ -28,6 +28,7 @@ spec: | ||||||
|       ## Replace `mumoshu/actions-runner-dind:dev` with your dind image |       ## Replace `mumoshu/actions-runner-dind:dev` with your dind image | ||||||
|       #dockerdWithinRunnerContainer: true |       #dockerdWithinRunnerContainer: true | ||||||
|       #image: mumoshu/actions-runner-dind:dev |       #image: mumoshu/actions-runner-dind:dev | ||||||
|  |       dockerdWithinRunnerContainer: ${RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER} | ||||||
| 
 | 
 | ||||||
|       # |       # | ||||||
|       # Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu) |       # Set the MTU used by dockerd-managed network interfaces (including docker-build-ubuntu) | ||||||
|  | @ -56,6 +57,7 @@ spec: | ||||||
|   scaleUpTriggers: |   scaleUpTriggers: | ||||||
|   - githubEvent: {} |   - githubEvent: {} | ||||||
|     amount: 1 |     amount: 1 | ||||||
|     duration: "1m" |     duration: "10m" | ||||||
|   minReplicas: 0 |   minReplicas: ${RUNNER_MIN_REPLICAS} | ||||||
|   maxReplicas: 10 |   maxReplicas: 10 | ||||||
|  |   scaleDownDelaySecondsAfterScaleOut: ${RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT} | ||||||
|  |  | ||||||
|  | @ -1,7 +1,8 @@ | ||||||
| # Set actions-runner-controller settings for testing | # Set actions-runner-controller settings for testing | ||||||
| githubAPICacheDuration: 10s | githubAPICacheDuration: 10s | ||||||
|  | logLevel: "-3" | ||||||
| githubWebhookServer: | githubWebhookServer: | ||||||
|   logLevel: debug |   logLevel: "-3" | ||||||
|   enabled: true |   enabled: true | ||||||
|   labels: {} |   labels: {} | ||||||
|   replicaCount: 1 |   replicaCount: 1 | ||||||
|  |  | ||||||
|  | @ -107,6 +107,9 @@ type CapacityReservation struct { | ||||||
| 	Name           string      `json:"name,omitempty"` | 	Name           string      `json:"name,omitempty"` | ||||||
| 	ExpirationTime metav1.Time `json:"expirationTime,omitempty"` | 	ExpirationTime metav1.Time `json:"expirationTime,omitempty"` | ||||||
| 	Replicas       int         `json:"replicas,omitempty"` | 	Replicas       int         `json:"replicas,omitempty"` | ||||||
|  | 
 | ||||||
|  | 	// +optional
 | ||||||
|  | 	EffectiveTime metav1.Time `json:"effectiveTime,omitempty"` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| type ScaleTargetRef struct { | type ScaleTargetRef struct { | ||||||
|  |  | ||||||
|  | @ -31,6 +31,14 @@ type RunnerDeploymentSpec struct { | ||||||
| 	// +nullable
 | 	// +nullable
 | ||||||
| 	Replicas *int `json:"replicas,omitempty"` | 	Replicas *int `json:"replicas,omitempty"` | ||||||
| 
 | 
 | ||||||
|  | 	// EffectiveTime is the time the upstream controller requested to sync Replicas.
 | ||||||
|  | 	// It is usually populated by the webhook-based autoscaler via HRA.
 | ||||||
|  | 	// The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated.
 | ||||||
|  | 	//
 | ||||||
|  | 	// +optional
 | ||||||
|  | 	// +nullable
 | ||||||
|  | 	EffectiveTime *metav1.Time `json:"effectiveTime"` | ||||||
|  | 
 | ||||||
| 	// +optional
 | 	// +optional
 | ||||||
| 	// +nullable
 | 	// +nullable
 | ||||||
| 	Selector *metav1.LabelSelector `json:"selector"` | 	Selector *metav1.LabelSelector `json:"selector"` | ||||||
|  |  | ||||||
|  | @ -26,6 +26,15 @@ type RunnerReplicaSetSpec struct { | ||||||
| 	// +nullable
 | 	// +nullable
 | ||||||
| 	Replicas *int `json:"replicas,omitempty"` | 	Replicas *int `json:"replicas,omitempty"` | ||||||
| 
 | 
 | ||||||
|  | 	// EffectiveTime is the time the upstream controller requested to sync Replicas.
 | ||||||
|  | 	// It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment.
 | ||||||
|  | 	// The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners
 | ||||||
|  | 	// based on potentially outdated Replicas value.
 | ||||||
|  | 	//
 | ||||||
|  | 	// +optional
 | ||||||
|  | 	// +nullable
 | ||||||
|  | 	EffectiveTime *metav1.Time `json:"effectiveTime"` | ||||||
|  | 
 | ||||||
| 	// +optional
 | 	// +optional
 | ||||||
| 	// +nullable
 | 	// +nullable
 | ||||||
| 	Selector *metav1.LabelSelector `json:"selector"` | 	Selector *metav1.LabelSelector `json:"selector"` | ||||||
|  |  | ||||||
|  | @ -47,6 +47,7 @@ func (in *CacheEntry) DeepCopy() *CacheEntry { | ||||||
| func (in *CapacityReservation) DeepCopyInto(out *CapacityReservation) { | func (in *CapacityReservation) DeepCopyInto(out *CapacityReservation) { | ||||||
| 	*out = *in | 	*out = *in | ||||||
| 	in.ExpirationTime.DeepCopyInto(&out.ExpirationTime) | 	in.ExpirationTime.DeepCopyInto(&out.ExpirationTime) | ||||||
|  | 	in.EffectiveTime.DeepCopyInto(&out.EffectiveTime) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservation.
 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CapacityReservation.
 | ||||||
|  | @ -498,6 +499,10 @@ func (in *RunnerDeploymentSpec) DeepCopyInto(out *RunnerDeploymentSpec) { | ||||||
| 		*out = new(int) | 		*out = new(int) | ||||||
| 		**out = **in | 		**out = **in | ||||||
| 	} | 	} | ||||||
|  | 	if in.EffectiveTime != nil { | ||||||
|  | 		in, out := &in.EffectiveTime, &out.EffectiveTime | ||||||
|  | 		*out = (*in).DeepCopy() | ||||||
|  | 	} | ||||||
| 	if in.Selector != nil { | 	if in.Selector != nil { | ||||||
| 		in, out := &in.Selector, &out.Selector | 		in, out := &in.Selector, &out.Selector | ||||||
| 		*out = new(metav1.LabelSelector) | 		*out = new(metav1.LabelSelector) | ||||||
|  | @ -812,6 +817,10 @@ func (in *RunnerReplicaSetSpec) DeepCopyInto(out *RunnerReplicaSetSpec) { | ||||||
| 		*out = new(int) | 		*out = new(int) | ||||||
| 		**out = **in | 		**out = **in | ||||||
| 	} | 	} | ||||||
|  | 	if in.EffectiveTime != nil { | ||||||
|  | 		in, out := &in.EffectiveTime, &out.EffectiveTime | ||||||
|  | 		*out = (*in).DeepCopy() | ||||||
|  | 	} | ||||||
| 	if in.Selector != nil { | 	if in.Selector != nil { | ||||||
| 		in, out := &in.Selector, &out.Selector | 		in, out := &in.Selector, &out.Selector | ||||||
| 		*out = new(metav1.LabelSelector) | 		*out = new(metav1.LabelSelector) | ||||||
|  |  | ||||||
|  | @ -33,6 +33,7 @@ All additional docs are kept in the `docs/` folder, this README is solely for do | ||||||
| | `authSecret.github_basicauth_username`                     | Username for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API                 |                                                                      | | | `authSecret.github_basicauth_username`                     | Username for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API                 |                                                                      | | ||||||
| | `authSecret.github_basicauth_password`                     | Password for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API                 |                                                                      | | | `authSecret.github_basicauth_password`                     | Password for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API                 |                                                                      | | ||||||
| | `dockerRegistryMirror`                                   | The default Docker Registry Mirror used by runners.                                                                        |                                                                      | | | `dockerRegistryMirror`                                   | The default Docker Registry Mirror used by runners.                                                                        |                                                                      | | ||||||
|  | | `hostNetwork`                                            | The "hostNetwork" of the controller container                                                                              | false                                                                | | ||||||
| | `image.repository`                                       | The "repository/image" of the controller container                                                                         | summerwind/actions-runner-controller                                 | | | `image.repository`                                       | The "repository/image" of the controller container                                                                         | summerwind/actions-runner-controller                                 | | ||||||
| | `image.tag`                                              | The tag of the controller container                                                                                        |                                                                      | | | `image.tag`                                              | The tag of the controller container                                                                                        |                                                                      | | ||||||
| | `image.actionsRunnerRepositoryAndTag`                    | The "repository/image" of the actions runner container                                                                     | summerwind/actions-runner:latest                                     | | | `image.actionsRunnerRepositoryAndTag`                    | The "repository/image" of the actions runner container                                                                     | summerwind/actions-runner:latest                                     | | ||||||
|  |  | ||||||
|  | @ -49,6 +49,9 @@ spec: | ||||||
|                   items: |                   items: | ||||||
|                     description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime. |                     description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime. | ||||||
|                     properties: |                     properties: | ||||||
|  |                       effectiveTime: | ||||||
|  |                         format: date-time | ||||||
|  |                         type: string | ||||||
|                       expirationTime: |                       expirationTime: | ||||||
|                         format: date-time |                         format: date-time | ||||||
|                         type: string |                         type: string | ||||||
|  |  | ||||||
|  | @ -48,6 +48,11 @@ spec: | ||||||
|             spec: |             spec: | ||||||
|               description: RunnerDeploymentSpec defines the desired state of RunnerDeployment |               description: RunnerDeploymentSpec defines the desired state of RunnerDeployment | ||||||
|               properties: |               properties: | ||||||
|  |                 effectiveTime: | ||||||
|  |                   description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA. The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated. | ||||||
|  |                   format: date-time | ||||||
|  |                   nullable: true | ||||||
|  |                   type: string | ||||||
|                 replicas: |                 replicas: | ||||||
|                   nullable: true |                   nullable: true | ||||||
|                   type: integer |                   type: integer | ||||||
|  |  | ||||||
|  | @ -45,6 +45,11 @@ spec: | ||||||
|             spec: |             spec: | ||||||
|               description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet |               description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet | ||||||
|               properties: |               properties: | ||||||
|  |                 effectiveTime: | ||||||
|  |                   description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment. The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners based on potentially outdated Replicas value. | ||||||
|  |                   format: date-time | ||||||
|  |                   nullable: true | ||||||
|  |                   type: string | ||||||
|                 replicas: |                 replicas: | ||||||
|                   nullable: true |                   nullable: true | ||||||
|                   type: integer |                   type: integer | ||||||
|  |  | ||||||
|  | @ -14,6 +14,7 @@ spec: | ||||||
|     metadata: |     metadata: | ||||||
|       {{- with .Values.podAnnotations }} |       {{- with .Values.podAnnotations }} | ||||||
|       annotations: |       annotations: | ||||||
|  |         kubectl.kubernetes.io/default-logs-container: "manager" | ||||||
|         {{- toYaml . | nindent 8 }} |         {{- toYaml . | nindent 8 }} | ||||||
|       {{- end }} |       {{- end }} | ||||||
|       labels: |       labels: | ||||||
|  | @ -199,3 +200,6 @@ spec: | ||||||
|       topologySpreadConstraints: |       topologySpreadConstraints: | ||||||
|         {{- toYaml . | nindent 8 }} |         {{- toYaml . | nindent 8 }} | ||||||
|       {{- end }} |       {{- end }} | ||||||
|  |       {{- if .Values.hostNetwork }} | ||||||
|  |       hostNetwork: {{ .Values.hostNetwork }} | ||||||
|  |       {{- end }} | ||||||
|  |  | ||||||
|  | @ -15,6 +15,7 @@ spec: | ||||||
|     metadata: |     metadata: | ||||||
|       {{- with .Values.githubWebhookServer.podAnnotations }} |       {{- with .Values.githubWebhookServer.podAnnotations }} | ||||||
|       annotations: |       annotations: | ||||||
|  |         kubectl.kubernetes.io/default-logs-container: "github-webhook-server" | ||||||
|         {{- toYaml . | nindent 8 }} |         {{- toYaml . | nindent 8 }} | ||||||
|       {{- end }} |       {{- end }} | ||||||
|       labels: |       labels: | ||||||
|  |  | ||||||
|  | @ -165,6 +165,10 @@ admissionWebHooks: | ||||||
|   {} |   {} | ||||||
|   #caBundle: "Ci0tLS0tQk...<base64-encoded PEM bundle containing the CA that signed the webhook's serving certificate>...tLS0K" |   #caBundle: "Ci0tLS0tQk...<base64-encoded PEM bundle containing the CA that signed the webhook's serving certificate>...tLS0K" | ||||||
| 
 | 
 | ||||||
|  | # There may be alternatives to setting `hostNetwork: true`, see | ||||||
|  | # https://github.com/actions-runner-controller/actions-runner-controller/issues/1005#issuecomment-993097155 | ||||||
|  | #hostNetwork: true | ||||||
|  | 
 | ||||||
| githubWebhookServer: | githubWebhookServer: | ||||||
|   enabled: false |   enabled: false | ||||||
|   replicaCount: 1 |   replicaCount: 1 | ||||||
|  |  | ||||||
|  | @ -29,15 +29,14 @@ import ( | ||||||
| 	actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1" | 	actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1" | ||||||
| 	"github.com/actions-runner-controller/actions-runner-controller/controllers" | 	"github.com/actions-runner-controller/actions-runner-controller/controllers" | ||||||
| 	"github.com/actions-runner-controller/actions-runner-controller/github" | 	"github.com/actions-runner-controller/actions-runner-controller/github" | ||||||
|  | 	"github.com/actions-runner-controller/actions-runner-controller/logging" | ||||||
| 	"github.com/kelseyhightower/envconfig" | 	"github.com/kelseyhightower/envconfig" | ||||||
| 	zaplib "go.uber.org/zap" |  | ||||||
| 	"k8s.io/apimachinery/pkg/runtime" | 	"k8s.io/apimachinery/pkg/runtime" | ||||||
| 	clientgoscheme "k8s.io/client-go/kubernetes/scheme" | 	clientgoscheme "k8s.io/client-go/kubernetes/scheme" | ||||||
| 	_ "k8s.io/client-go/plugin/pkg/client/auth/exec" | 	_ "k8s.io/client-go/plugin/pkg/client/auth/exec" | ||||||
| 	_ "k8s.io/client-go/plugin/pkg/client/auth/gcp" | 	_ "k8s.io/client-go/plugin/pkg/client/auth/gcp" | ||||||
| 	_ "k8s.io/client-go/plugin/pkg/client/auth/oidc" | 	_ "k8s.io/client-go/plugin/pkg/client/auth/oidc" | ||||||
| 	ctrl "sigs.k8s.io/controller-runtime" | 	ctrl "sigs.k8s.io/controller-runtime" | ||||||
| 	"sigs.k8s.io/controller-runtime/pkg/log/zap" |  | ||||||
| 	// +kubebuilder:scaffold:imports
 | 	// +kubebuilder:scaffold:imports
 | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -47,11 +46,6 @@ var ( | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| const ( | const ( | ||||||
| 	logLevelDebug = "debug" |  | ||||||
| 	logLevelInfo  = "info" |  | ||||||
| 	logLevelWarn  = "warn" |  | ||||||
| 	logLevelError = "error" |  | ||||||
| 
 |  | ||||||
| 	webhookSecretTokenEnvName = "GITHUB_WEBHOOK_SECRET_TOKEN" | 	webhookSecretTokenEnvName = "GITHUB_WEBHOOK_SECRET_TOKEN" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -97,7 +91,7 @@ func main() { | ||||||
| 	flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, | 	flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, | ||||||
| 		"Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.") | 		"Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.") | ||||||
| 	flag.DurationVar(&syncPeriod, "sync-period", 10*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled. When you use autoscaling, set to a lower value like 10 minute, because this corresponds to the minimum time to react on demand change") | 	flag.DurationVar(&syncPeriod, "sync-period", 10*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled. When you use autoscaling, set to a lower value like 10 minute, because this corresponds to the minimum time to react on demand change") | ||||||
| 	flag.StringVar(&logLevel, "log-level", logLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`) | 	flag.StringVar(&logLevel, "log-level", logging.LogLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`) | ||||||
| 	flag.StringVar(&webhookSecretToken, "github-webhook-secret-token", "", "The personal access token of GitHub.") | 	flag.StringVar(&webhookSecretToken, "github-webhook-secret-token", "", "The personal access token of GitHub.") | ||||||
| 	flag.StringVar(&c.Token, "github-token", c.Token, "The personal access token of GitHub.") | 	flag.StringVar(&c.Token, "github-token", c.Token, "The personal access token of GitHub.") | ||||||
| 	flag.Int64Var(&c.AppID, "github-app-id", c.AppID, "The application ID of GitHub App.") | 	flag.Int64Var(&c.AppID, "github-app-id", c.AppID, "The application ID of GitHub App.") | ||||||
|  | @ -126,23 +120,7 @@ func main() { | ||||||
| 		setupLog.Info("-watch-namespace is %q. Only HorizontalRunnerAutoscalers in %q are watched, cached, and considered as scale targets.") | 		setupLog.Info("-watch-namespace is %q. Only HorizontalRunnerAutoscalers in %q are watched, cached, and considered as scale targets.") | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	logger := zap.New(func(o *zap.Options) { | 	logger := logging.NewLogger(logLevel) | ||||||
| 		switch logLevel { |  | ||||||
| 		case logLevelDebug: |  | ||||||
| 			o.Development = true |  | ||||||
| 			lvl := zaplib.NewAtomicLevelAt(-2) // maps to logr's V(2)
 |  | ||||||
| 			o.Level = &lvl |  | ||||||
| 		case logLevelInfo: |  | ||||||
| 			lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel) |  | ||||||
| 			o.Level = &lvl |  | ||||||
| 		case logLevelWarn: |  | ||||||
| 			lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel) |  | ||||||
| 			o.Level = &lvl |  | ||||||
| 		case logLevelError: |  | ||||||
| 			lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel) |  | ||||||
| 			o.Level = &lvl |  | ||||||
| 		} |  | ||||||
| 	}) |  | ||||||
| 
 | 
 | ||||||
| 	ctrl.SetLogger(logger) | 	ctrl.SetLogger(logger) | ||||||
| 
 | 
 | ||||||
|  | @ -152,6 +130,8 @@ func main() { | ||||||
| 	// That is, all runner groups managed by ARC are assumed to be visible to any repositories,
 | 	// That is, all runner groups managed by ARC are assumed to be visible to any repositories,
 | ||||||
| 	// which is wrong when you have one or more non-default runner groups in your organization or enterprise.
 | 	// which is wrong when you have one or more non-default runner groups in your organization or enterprise.
 | ||||||
| 	if len(c.Token) > 0 || (c.AppID > 0 && c.AppInstallationID > 0 && c.AppPrivateKey != "") || (len(c.BasicauthUsername) > 0 && len(c.BasicauthPassword) > 0) { | 	if len(c.Token) > 0 || (c.AppID > 0 && c.AppInstallationID > 0 && c.AppPrivateKey != "") || (len(c.BasicauthUsername) > 0 && len(c.BasicauthPassword) > 0) { | ||||||
|  | 		c.Log = &logger | ||||||
|  | 
 | ||||||
| 		ghClient, err = c.NewClient() | 		ghClient, err = c.NewClient() | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			fmt.Fprintln(os.Stderr, "Error: Client creation failed.", err) | 			fmt.Fprintln(os.Stderr, "Error: Client creation failed.", err) | ||||||
|  |  | ||||||
|  | @ -49,6 +49,9 @@ spec: | ||||||
|                   items: |                   items: | ||||||
|                     description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime. |                     description: CapacityReservation specifies the number of replicas temporarily added to the scale target until ExpirationTime. | ||||||
|                     properties: |                     properties: | ||||||
|  |                       effectiveTime: | ||||||
|  |                         format: date-time | ||||||
|  |                         type: string | ||||||
|                       expirationTime: |                       expirationTime: | ||||||
|                         format: date-time |                         format: date-time | ||||||
|                         type: string |                         type: string | ||||||
|  |  | ||||||
|  | @ -48,6 +48,11 @@ spec: | ||||||
|             spec: |             spec: | ||||||
|               description: RunnerDeploymentSpec defines the desired state of RunnerDeployment |               description: RunnerDeploymentSpec defines the desired state of RunnerDeployment | ||||||
|               properties: |               properties: | ||||||
|  |                 effectiveTime: | ||||||
|  |                   description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA. The value is inherited to RunnerRepicaSet(s) and used to prevent ephemeral runners from unnecessarily recreated. | ||||||
|  |                   format: date-time | ||||||
|  |                   nullable: true | ||||||
|  |                   type: string | ||||||
|                 replicas: |                 replicas: | ||||||
|                   nullable: true |                   nullable: true | ||||||
|                   type: integer |                   type: integer | ||||||
|  |  | ||||||
|  | @ -45,6 +45,11 @@ spec: | ||||||
|             spec: |             spec: | ||||||
|               description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet |               description: RunnerReplicaSetSpec defines the desired state of RunnerReplicaSet | ||||||
|               properties: |               properties: | ||||||
|  |                 effectiveTime: | ||||||
|  |                   description: EffectiveTime is the time the upstream controller requested to sync Replicas. It is usually populated by the webhook-based autoscaler via HRA and RunnerDeployment. The value is used to prevent runnerreplicaset controller from unnecessarily recreating ephemeral runners based on potentially outdated Replicas value. | ||||||
|  |                   format: date-time | ||||||
|  |                   nullable: true | ||||||
|  |                   type: string | ||||||
|                 replicas: |                 replicas: | ||||||
|                   nullable: true |                   nullable: true | ||||||
|                   type: integer |                   type: integer | ||||||
|  |  | ||||||
|  | @ -768,8 +768,10 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) tryScale(ctx context. | ||||||
| 	capacityReservations := getValidCapacityReservations(copy) | 	capacityReservations := getValidCapacityReservations(copy) | ||||||
| 
 | 
 | ||||||
| 	if amount > 0 { | 	if amount > 0 { | ||||||
|  | 		now := time.Now() | ||||||
| 		copy.Spec.CapacityReservations = append(capacityReservations, v1alpha1.CapacityReservation{ | 		copy.Spec.CapacityReservations = append(capacityReservations, v1alpha1.CapacityReservation{ | ||||||
| 			ExpirationTime: metav1.Time{Time: time.Now().Add(target.ScaleUpTrigger.Duration.Duration)}, | 			EffectiveTime:  metav1.Time{Time: now}, | ||||||
|  | 			ExpirationTime: metav1.Time{Time: now.Add(target.ScaleUpTrigger.Duration.Duration)}, | ||||||
| 			Replicas:       amount, | 			Replicas:       amount, | ||||||
| 		}) | 		}) | ||||||
| 	} else if amount < 0 { | 	} else if amount < 0 { | ||||||
|  | @ -788,10 +790,16 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) tryScale(ctx context. | ||||||
| 		copy.Spec.CapacityReservations = reservations | 		copy.Spec.CapacityReservations = reservations | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	autoscaler.Log.Info( | 	before := len(target.HorizontalRunnerAutoscaler.Spec.CapacityReservations) | ||||||
|  | 	expired := before - len(capacityReservations) | ||||||
|  | 	after := len(copy.Spec.CapacityReservations) | ||||||
|  | 
 | ||||||
|  | 	autoscaler.Log.V(1).Info( | ||||||
| 		fmt.Sprintf("Patching hra %s for capacityReservations update", target.HorizontalRunnerAutoscaler.Name), | 		fmt.Sprintf("Patching hra %s for capacityReservations update", target.HorizontalRunnerAutoscaler.Name), | ||||||
| 		"before", target.HorizontalRunnerAutoscaler.Spec.CapacityReservations, | 		"before", before, | ||||||
| 		"after", copy.Spec.CapacityReservations, | 		"expired", expired, | ||||||
|  | 		"amount", amount, | ||||||
|  | 		"after", after, | ||||||
| 	) | 	) | ||||||
| 
 | 
 | ||||||
| 	if err := autoscaler.Client.Patch(ctx, copy, client.MergeFrom(&target.HorizontalRunnerAutoscaler)); err != nil { | 	if err := autoscaler.Client.Patch(ctx, copy, client.MergeFrom(&target.HorizontalRunnerAutoscaler)); err != nil { | ||||||
|  |  | ||||||
|  | @ -99,11 +99,33 @@ func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(ctx context.Context, re | ||||||
| 		return r.reconcile(ctx, req, log, hra, st, func(newDesiredReplicas int) error { | 		return r.reconcile(ctx, req, log, hra, st, func(newDesiredReplicas int) error { | ||||||
| 			currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas) | 			currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas) | ||||||
| 
 | 
 | ||||||
|  | 			ephemeral := rd.Spec.Template.Spec.Ephemeral == nil || *rd.Spec.Template.Spec.Ephemeral | ||||||
|  | 
 | ||||||
|  | 			var effectiveTime *time.Time | ||||||
|  | 
 | ||||||
|  | 			for _, r := range hra.Spec.CapacityReservations { | ||||||
|  | 				t := r.EffectiveTime | ||||||
|  | 				if effectiveTime == nil || effectiveTime.Before(t.Time) { | ||||||
|  | 					effectiveTime = &t.Time | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
| 			// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
 | 			// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
 | ||||||
| 			if currentDesiredReplicas != newDesiredReplicas { | 			if currentDesiredReplicas != newDesiredReplicas { | ||||||
| 				copy := rd.DeepCopy() | 				copy := rd.DeepCopy() | ||||||
| 				copy.Spec.Replicas = &newDesiredReplicas | 				copy.Spec.Replicas = &newDesiredReplicas | ||||||
| 
 | 
 | ||||||
|  | 				if ephemeral && effectiveTime != nil { | ||||||
|  | 					copy.Spec.EffectiveTime = &metav1.Time{Time: *effectiveTime} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil { | ||||||
|  | 					return fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err) | ||||||
|  | 				} | ||||||
|  | 			} else if ephemeral && effectiveTime != nil { | ||||||
|  | 				copy := rd.DeepCopy() | ||||||
|  | 				copy.Spec.EffectiveTime = &metav1.Time{Time: *effectiveTime} | ||||||
|  | 
 | ||||||
| 				if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil { | 				if err := r.Client.Patch(ctx, copy, client.MergeFrom(&rd)); err != nil { | ||||||
| 					return fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err) | 					return fmt.Errorf("patching runnerdeployment to have %d replicas: %w", newDesiredReplicas, err) | ||||||
| 				} | 				} | ||||||
|  |  | ||||||
|  | @ -110,6 +110,8 @@ func SetupIntegrationTest(ctx2 context.Context) *testEnvironment { | ||||||
| 			Name:                        controllerName("runner"), | 			Name:                        controllerName("runner"), | ||||||
| 			RegistrationRecheckInterval: time.Millisecond, | 			RegistrationRecheckInterval: time.Millisecond, | ||||||
| 			RegistrationRecheckJitter:   time.Millisecond, | 			RegistrationRecheckJitter:   time.Millisecond, | ||||||
|  | 			UnregistrationTimeout:       1 * time.Second, | ||||||
|  | 			UnregistrationRetryDelay:    1 * time.Second, | ||||||
| 		} | 		} | ||||||
| 		err = runnerController.SetupWithManager(mgr) | 		err = runnerController.SetupWithManager(mgr) | ||||||
| 		Expect(err).NotTo(HaveOccurred(), "failed to setup runner controller") | 		Expect(err).NotTo(HaveOccurred(), "failed to setup runner controller") | ||||||
|  |  | ||||||
|  | @ -72,6 +72,9 @@ type RunnerReconciler struct { | ||||||
| 	Name                        string | 	Name                        string | ||||||
| 	RegistrationRecheckInterval time.Duration | 	RegistrationRecheckInterval time.Duration | ||||||
| 	RegistrationRecheckJitter   time.Duration | 	RegistrationRecheckJitter   time.Duration | ||||||
|  | 
 | ||||||
|  | 	UnregistrationTimeout    time.Duration | ||||||
|  | 	UnregistrationRetryDelay time.Duration | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
 | // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
 | ||||||
|  | @ -110,8 +113,23 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr | ||||||
| 			return ctrl.Result{}, nil | 			return ctrl.Result{}, nil | ||||||
| 		} | 		} | ||||||
| 	} else { | 	} else { | ||||||
|  | 		var p *corev1.Pod | ||||||
|  | 
 | ||||||
|  | 		{ | ||||||
|  | 			var pod corev1.Pod | ||||||
|  | 			if err := r.Get(ctx, req.NamespacedName, &pod); err != nil { | ||||||
|  | 				if !kerrors.IsNotFound(err) { | ||||||
|  | 					log.Info(fmt.Sprintf("Retrying soon as we failed to get registration-only runner pod: %v", err)) | ||||||
|  | 
 | ||||||
|  | 					return ctrl.Result{Requeue: true}, nil | ||||||
|  | 				} | ||||||
|  | 			} else { | ||||||
|  | 				p = &pod | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
| 		// Request to remove a runner. DeletionTimestamp was set in the runner - we need to unregister runner
 | 		// Request to remove a runner. DeletionTimestamp was set in the runner - we need to unregister runner
 | ||||||
| 		return r.processRunnerDeletion(runner, ctx, log) | 		return r.processRunnerDeletion(runner, ctx, log, p) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	registrationOnly := metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly) | 	registrationOnly := metav1.HasAnnotation(runner.ObjectMeta, annotationKeyRegistrationOnly) | ||||||
|  | @ -159,20 +177,27 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr | ||||||
| 
 | 
 | ||||||
| 	// If pod has ended up succeeded we need to restart it
 | 	// If pod has ended up succeeded we need to restart it
 | ||||||
| 	// Happens e.g. when dind is in runner and run completes
 | 	// Happens e.g. when dind is in runner and run completes
 | ||||||
| 	stopped := pod.Status.Phase == corev1.PodSucceeded | 	stopped := runnerPodOrContainerIsStopped(&pod) | ||||||
| 
 | 
 | ||||||
| 	if !stopped { | 	ephemeral := runner.Spec.Ephemeral == nil || *runner.Spec.Ephemeral | ||||||
| 		if pod.Status.Phase == corev1.PodRunning { |  | ||||||
| 			for _, status := range pod.Status.ContainerStatuses { |  | ||||||
| 				if status.Name != containerName { |  | ||||||
| 					continue |  | ||||||
| 				} |  | ||||||
| 
 | 
 | ||||||
| 				if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 { | 	if stopped && ephemeral { | ||||||
| 					stopped = true | 		log.V(1).Info("Ephemeral runner has been stopped successfully. Marking this runner for deletion.") | ||||||
| 				} | 
 | ||||||
| 			} | 		// This is the key to make ephemeral runners to work reliably with webhook-based autoscale.
 | ||||||
|  | 		// See https://github.com/actions-runner-controller/actions-runner-controller/issues/911#issuecomment-1046161384 for more context.
 | ||||||
|  | 		//
 | ||||||
|  | 		// In the next reconcilation loop, this triggers a runner unregistration.
 | ||||||
|  | 		// (Note that the unregistration can fail safely because an ephemeral runner usually unregisters itself from GitHub but we do it just for confirmation)
 | ||||||
|  | 		//
 | ||||||
|  | 		// See the code path above that is executed when `runner.ObjectMeta.DeletionTimestamp.IsZero()` isn't true,
 | ||||||
|  | 		// which handles the unregistrationa the removal of the completed pod, and so on.
 | ||||||
|  | 		if err := r.Delete(ctx, &runner); err != nil { | ||||||
|  | 			log.V(1).Error(err, "Retrying to mark this runner for deletion in 10 seconds.") | ||||||
|  | 			return ctrl.Result{RequeueAfter: 10 * time.Second}, nil | ||||||
| 		} | 		} | ||||||
|  | 
 | ||||||
|  | 		return ctrl.Result{Requeue: true}, nil | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	restart := stopped | 	restart := stopped | ||||||
|  | @ -404,64 +429,53 @@ func (r *RunnerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr | ||||||
| 		return ctrl.Result{}, nil | 		return ctrl.Result{}, nil | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// Try to delete current pod if recreation is needed
 | 	updatedPod, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name, &pod) | ||||||
| 	safeToDeletePod := false | 	if res != nil { | ||||||
| 	ok, err := r.unregisterRunner(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name) | 		return *res, err | ||||||
| 	if err != nil { |  | ||||||
| 		log.Error(err, "Failed to unregister runner before deleting the pod.", "runner", runner.Name) |  | ||||||
| 	} else { |  | ||||||
| 		// `r.unregisterRunner()` will returns `false, nil` if the runner is not found on GitHub.
 |  | ||||||
| 		if !ok { |  | ||||||
| 			log.Info("Runner no longer exists on GitHub", "runner", runner.Name) |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		safeToDeletePod = true |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if safeToDeletePod { | 	// Only delete the pod if we successfully unregistered the runner or the runner is already deleted from the service.
 | ||||||
| 		// Only delete the pod if we successfully unregistered the runner or the runner is already deleted from the service.
 | 	// This should help us avoid race condition between runner pickup job after we think the runner is not busy.
 | ||||||
| 		// This should help us avoid race condition between runner pickup job after we think the runner is not busy.
 | 	if err := r.Delete(ctx, updatedPod); err != nil { | ||||||
| 		if err := r.Delete(ctx, &pod); err != nil { | 		log.Error(err, "Failed to delete pod resource") | ||||||
| 			log.Error(err, "Failed to delete pod resource") | 		return ctrl.Result{}, err | ||||||
| 			return ctrl.Result{}, err |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Deleted pod '%s'", newPod.Name)) |  | ||||||
| 		log.Info("Deleted runner pod", "repository", runner.Spec.Repository) |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	r.Recorder.Event(&runner, corev1.EventTypeNormal, "PodDeleted", fmt.Sprintf("Deleted pod '%s'", newPod.Name)) | ||||||
|  | 	log.Info("Deleted runner pod", "repository", runner.Spec.Repository) | ||||||
|  | 
 | ||||||
| 	return ctrl.Result{}, nil | 	return ctrl.Result{}, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger) (reconcile.Result, error) { | func runnerPodOrContainerIsStopped(pod *corev1.Pod) bool { | ||||||
|  | 	// If pod has ended up succeeded we need to restart it
 | ||||||
|  | 	// Happens e.g. when dind is in runner and run completes
 | ||||||
|  | 	stopped := pod.Status.Phase == corev1.PodSucceeded | ||||||
|  | 
 | ||||||
|  | 	if !stopped { | ||||||
|  | 		if pod.Status.Phase == corev1.PodRunning { | ||||||
|  | 			for _, status := range pod.Status.ContainerStatuses { | ||||||
|  | 				if status.Name != containerName { | ||||||
|  | 					continue | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 { | ||||||
|  | 					stopped = true | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return stopped | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger, pod *corev1.Pod) (reconcile.Result, error) { | ||||||
| 	finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName) | 	finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName) | ||||||
| 
 | 
 | ||||||
| 	if removed { | 	if removed { | ||||||
| 		if len(runner.Status.Registration.Token) > 0 { | 		_, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name, pod) | ||||||
| 			ok, err := r.unregisterRunner(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name) | 		if res != nil { | ||||||
| 			if err != nil { | 			return *res, err | ||||||
| 				if errors.Is(err, &gogithub.RateLimitError{}) { |  | ||||||
| 					// We log the underlying error when we failed calling GitHub API to list or unregisters,
 |  | ||||||
| 					// or the runner is still busy.
 |  | ||||||
| 					log.Error( |  | ||||||
| 						err, |  | ||||||
| 						fmt.Sprintf( |  | ||||||
| 							"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls", |  | ||||||
| 							retryDelayOnGitHubAPIRateLimitError, |  | ||||||
| 						), |  | ||||||
| 					) |  | ||||||
| 
 |  | ||||||
| 					return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err |  | ||||||
| 				} |  | ||||||
| 
 |  | ||||||
| 				return ctrl.Result{}, err |  | ||||||
| 			} |  | ||||||
| 
 |  | ||||||
| 			if !ok { |  | ||||||
| 				log.V(1).Info("Runner no longer exists on GitHub") |  | ||||||
| 			} |  | ||||||
| 		} else { |  | ||||||
| 			log.V(1).Info("Runner was never registered on GitHub") |  | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		newRunner := runner.DeepCopy() | 		newRunner := runner.DeepCopy() | ||||||
|  | @ -478,6 +492,24 @@ func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx con | ||||||
| 	return ctrl.Result{}, nil | 	return ctrl.Result{}, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func (r *RunnerReconciler) unregistrationTimeout() time.Duration { | ||||||
|  | 	unregistrationTimeout := DefaultUnregistrationTimeout | ||||||
|  | 
 | ||||||
|  | 	if r.UnregistrationTimeout > 0 { | ||||||
|  | 		unregistrationTimeout = r.UnregistrationTimeout | ||||||
|  | 	} | ||||||
|  | 	return unregistrationTimeout | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (r *RunnerReconciler) unregistrationRetryDelay() time.Duration { | ||||||
|  | 	retryDelay := DefaultUnregistrationRetryDelay | ||||||
|  | 
 | ||||||
|  | 	if r.UnregistrationRetryDelay > 0 { | ||||||
|  | 		retryDelay = r.UnregistrationRetryDelay | ||||||
|  | 	} | ||||||
|  | 	return retryDelay | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func (r *RunnerReconciler) processRunnerPodDeletion(ctx context.Context, runner v1alpha1.Runner, log logr.Logger, pod corev1.Pod) (reconcile.Result, error) { | func (r *RunnerReconciler) processRunnerPodDeletion(ctx context.Context, runner v1alpha1.Runner, log logr.Logger, pod corev1.Pod) (reconcile.Result, error) { | ||||||
| 	deletionTimeout := 1 * time.Minute | 	deletionTimeout := 1 * time.Minute | ||||||
| 	currentTime := time.Now() | 	currentTime := time.Now() | ||||||
|  |  | ||||||
|  | @ -0,0 +1,249 @@ | ||||||
|  | package controllers | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"context" | ||||||
|  | 	"errors" | ||||||
|  | 	"fmt" | ||||||
|  | 	"time" | ||||||
|  | 
 | ||||||
|  | 	"github.com/actions-runner-controller/actions-runner-controller/github" | ||||||
|  | 	"github.com/go-logr/logr" | ||||||
|  | 	gogithub "github.com/google/go-github/v39/github" | ||||||
|  | 	corev1 "k8s.io/api/core/v1" | ||||||
|  | 	ctrl "sigs.k8s.io/controller-runtime" | ||||||
|  | 	"sigs.k8s.io/controller-runtime/pkg/client" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | const ( | ||||||
|  | 	unregistrationCompleteTimestamp = "unregistration-complete-timestamp" | ||||||
|  | 	unregistrationStartTimestamp    = "unregistration-start-timestamp" | ||||||
|  | 
 | ||||||
|  | 	// DefaultUnregistrationTimeout is the duration until ARC gives up retrying the combo of ListRunners API (to detect the runner ID by name)
 | ||||||
|  | 	// and RemoveRunner API (to actually unregister the runner) calls.
 | ||||||
|  | 	// This needs to be longer than 60 seconds because a part of the combo, the ListRunners API, seems to use the Cache-Control header of max-age=60s
 | ||||||
|  | 	// and that instructs our cache library httpcache to cache responses for 60 seconds, which results in ARC unable to see the runner in the ListRunners response
 | ||||||
|  | 	// up to 60 seconds (or even more depending on the situation).
 | ||||||
|  | 	DefaultUnregistrationTimeout = 60 * time.Second | ||||||
|  | 
 | ||||||
|  | 	// This can be any value but a larger value can make an unregistration timeout longer than configured in practice.
 | ||||||
|  | 	DefaultUnregistrationRetryDelay = 30 * time.Second | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | // tickRunnerGracefulStop reconciles the runner and the runner pod in a way so that
 | ||||||
|  | // we can delete the runner pod without disrupting a workflow job.
 | ||||||
|  | //
 | ||||||
|  | // This function returns a non-nil pointer to corev1.Pod as the first return value
 | ||||||
|  | // if the runner is considered to have gracefully stopped, hence it's pod is safe for deletion.
 | ||||||
|  | //
 | ||||||
|  | // It's a "tick" operation so a graceful stop can take multiple calls to complete.
 | ||||||
|  | // This function is designed to complete a length graceful stop process in a unblocking way.
 | ||||||
|  | // When it wants to be retried later, the function returns a non-nil *ctrl.Result as the second return value, may or may not populating the error in the second return value.
 | ||||||
|  | // The caller is expected to return the returned ctrl.Result and error to postpone the current reconcilation loop and trigger a scheduled retry.
 | ||||||
|  | func tickRunnerGracefulStop(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) { | ||||||
|  | 	if pod != nil { | ||||||
|  | 		if _, ok := getAnnotation(pod, unregistrationStartTimestamp); !ok { | ||||||
|  | 			updated := pod.DeepCopy() | ||||||
|  | 			setAnnotation(updated, unregistrationStartTimestamp, time.Now().Format(time.RFC3339)) | ||||||
|  | 			if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil { | ||||||
|  | 				log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", unregistrationStartTimestamp)) | ||||||
|  | 				return nil, &ctrl.Result{}, err | ||||||
|  | 			} | ||||||
|  | 			pod = updated | ||||||
|  | 
 | ||||||
|  | 			log.Info("Runner has started unregistration") | ||||||
|  | 		} else { | ||||||
|  | 			log.Info("Runner has already started unregistration") | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if res, err := ensureRunnerUnregistration(ctx, unregistrationTimeout, retryDelay, log, ghClient, enterprise, organization, repository, runner, pod); res != nil { | ||||||
|  | 		return nil, res, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if pod != nil { | ||||||
|  | 		if _, ok := getAnnotation(pod, unregistrationCompleteTimestamp); !ok { | ||||||
|  | 			updated := pod.DeepCopy() | ||||||
|  | 			setAnnotation(updated, unregistrationCompleteTimestamp, time.Now().Format(time.RFC3339)) | ||||||
|  | 			if err := c.Patch(ctx, updated, client.MergeFrom(pod)); err != nil { | ||||||
|  | 				log.Error(err, fmt.Sprintf("Failed to patch pod to have %s annotation", unregistrationCompleteTimestamp)) | ||||||
|  | 				return nil, &ctrl.Result{}, err | ||||||
|  | 			} | ||||||
|  | 			pod = updated | ||||||
|  | 
 | ||||||
|  | 			log.Info("Runner has completed unregistration") | ||||||
|  | 		} else { | ||||||
|  | 			log.Info("Runner has already completed unregistration") | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return pod, nil, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // If the first return value is nil, it's safe to delete the runner pod.
 | ||||||
|  | func ensureRunnerUnregistration(ctx context.Context, unregistrationTimeout time.Duration, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*ctrl.Result, error) { | ||||||
|  | 	ok, err := unregisterRunner(ctx, ghClient, enterprise, organization, repository, runner) | ||||||
|  | 	if err != nil { | ||||||
|  | 		if errors.Is(err, &gogithub.RateLimitError{}) { | ||||||
|  | 			// We log the underlying error when we failed calling GitHub API to list or unregisters,
 | ||||||
|  | 			// or the runner is still busy.
 | ||||||
|  | 			log.Error( | ||||||
|  | 				err, | ||||||
|  | 				fmt.Sprintf( | ||||||
|  | 					"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls", | ||||||
|  | 					retryDelayOnGitHubAPIRateLimitError, | ||||||
|  | 				), | ||||||
|  | 			) | ||||||
|  | 
 | ||||||
|  | 			return &ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		log.Error(err, "Failed to unregister runner before deleting the pod.") | ||||||
|  | 
 | ||||||
|  | 		return &ctrl.Result{}, err | ||||||
|  | 	} else if ok { | ||||||
|  | 		log.Info("Runner has just been unregistered. Removing the runner pod.") | ||||||
|  | 	} else if pod == nil { | ||||||
|  | 		// `r.unregisterRunner()` will returns `false, nil` if the runner is not found on GitHub.
 | ||||||
|  | 		// However, that doesn't always mean the pod can be safely removed.
 | ||||||
|  | 		//
 | ||||||
|  | 		// If the pod does not exist for the runner,
 | ||||||
|  | 		// it may be due to that the runner pod has never been created.
 | ||||||
|  | 		// In that case we can safely assume that the runner will never be registered.
 | ||||||
|  | 
 | ||||||
|  | 		log.Info("Runner was not found on GitHub and the runner pod was not found on Kuberntes.") | ||||||
|  | 	} else if pod.Annotations[unregistrationCompleteTimestamp] != "" { | ||||||
|  | 		// If it's already unregistered in the previous reconcilation loop,
 | ||||||
|  | 		// you can safely assume that it won't get registered again so it's safe to delete the runner pod.
 | ||||||
|  | 		log.Info("Runner pod is marked as already unregistered.") | ||||||
|  | 	} else if runnerPodOrContainerIsStopped(pod) { | ||||||
|  | 		// If it's an ephemeral runner with the actions/runner container exited with 0,
 | ||||||
|  | 		// we can safely assume that it has unregistered itself from GitHub Actions
 | ||||||
|  | 		// so it's natural that RemoveRunner fails due to 404.
 | ||||||
|  | 
 | ||||||
|  | 		// If pod has ended up succeeded we need to restart it
 | ||||||
|  | 		// Happens e.g. when dind is in runner and run completes
 | ||||||
|  | 		log.Info("Runner pod has been stopped with a successful status.") | ||||||
|  | 	} else if ts := pod.Annotations[unregistrationStartTimestamp]; ts != "" { | ||||||
|  | 		t, err := time.Parse(time.RFC3339, ts) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return &ctrl.Result{RequeueAfter: retryDelay}, err | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if r := time.Until(t.Add(unregistrationTimeout)); r > 0 { | ||||||
|  | 			log.Info("Runner unregistration is in-progress.", "timeout", unregistrationTimeout, "remaining", r) | ||||||
|  | 			return &ctrl.Result{RequeueAfter: retryDelay}, err | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		log.Info("Runner unregistration has been timed out. The runner pod will be deleted soon.", "timeout", unregistrationTimeout) | ||||||
|  | 	} else { | ||||||
|  | 		// A runner and a runner pod that is created by this version of ARC should match
 | ||||||
|  | 		// any of the above branches.
 | ||||||
|  | 		//
 | ||||||
|  | 		// But we leave this match all branch for potential backward-compatibility.
 | ||||||
|  | 		// The caller is expected to take appropriate actions, like annotating the pod as started the unregistration process,
 | ||||||
|  | 		// and retry later.
 | ||||||
|  | 		log.V(1).Info("Runner unregistration is being retried later.") | ||||||
|  | 
 | ||||||
|  | 		return &ctrl.Result{RequeueAfter: retryDelay}, nil | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return nil, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func getAnnotation(pod *corev1.Pod, key string) (string, bool) { | ||||||
|  | 	if pod.Annotations == nil { | ||||||
|  | 		return "", false | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	v, ok := pod.Annotations[key] | ||||||
|  | 
 | ||||||
|  | 	return v, ok | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func setAnnotation(pod *corev1.Pod, key, value string) { | ||||||
|  | 	if pod.Annotations == nil { | ||||||
|  | 		pod.Annotations = map[string]string{} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	pod.Annotations[key] = value | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // unregisterRunner unregisters the runner from GitHub Actions by name.
 | ||||||
|  | //
 | ||||||
|  | // This function returns:
 | ||||||
|  | //
 | ||||||
|  | // Case 1. (true, nil) when it has successfully unregistered the runner.
 | ||||||
|  | // Case 2. (false, nil) when (2-1.) the runner has been already unregistered OR (2-2.) the runner will never be created OR (2-3.) the runner is not created yet and it is about to be registered(hence we couldn't see it's existence from GitHub Actions API yet)
 | ||||||
|  | // Case 3. (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
 | ||||||
|  | //   an error returned by GitHub API.
 | ||||||
|  | //
 | ||||||
|  | // When the returned values is "Case 2. (false, nil)", the caller must handle the three possible sub-cases appropriately.
 | ||||||
|  | // In other words, all those three sub-cases cannot be distinguished by this function alone.
 | ||||||
|  | //
 | ||||||
|  | // - Case "2-1." can happen when e.g. ARC has successfully unregistered in a previous reconcilation loop or it was an ephemeral runner that finished it's job run(an ephemeral runner is designed to stop after a job run).
 | ||||||
|  | //   You'd need to maintain the runner state(i.e. if it's already unregistered or not) somewhere,
 | ||||||
|  | //   so that you can either not call this function at all if the runner state says it's already unregistered, or determine that it's case "2-1." when you got (false, nil).
 | ||||||
|  | //
 | ||||||
|  | // - Case "2-2." can happen when e.g. the runner registration token was somehow broken so that `config.sh` within the runner container was never meant to succeed.
 | ||||||
|  | //   Waiting and retrying forever on this case is not a solution, because `config.sh` won't succeed with a wrong token hence the runner gets stuck in this state forever.
 | ||||||
|  | //   There isn't a perfect solution to this, but a practical workaround would be implement a "grace period" in the caller side.
 | ||||||
|  | //
 | ||||||
|  | // - Case "2-3." can happen when e.g. ARC recreated an ephemral runner pod in a previous reconcilation loop and then it was requested to delete the runner before the runner comes up.
 | ||||||
|  | //   If handled inappropriately, this can cause a race condition betweeen a deletion of the runner pod and GitHub scheduling a workflow job onto the runner.
 | ||||||
|  | //
 | ||||||
|  | // Once successfully detected case "2-1." or "2-2.", you can safely delete the runner pod because you know that the runner won't come back
 | ||||||
|  | // as long as you recreate the runner pod.
 | ||||||
|  | //
 | ||||||
|  | // If it was "2-3.", you need a workaround to avoid the race condition.
 | ||||||
|  | //
 | ||||||
|  | // You shall introduce a "grace period" mechanism, similar or equal to that is required for "Case 2-2.", so that you ever
 | ||||||
|  | // start the runner pod deletion only after it's more and more likely that the runner pod is not coming up.
 | ||||||
|  | //
 | ||||||
|  | // Beware though, you need extra care to set an appropriate grace period depending on your environment.
 | ||||||
|  | // There isn't a single right grace period that works for everyone.
 | ||||||
|  | // The longer the grace period is, the earlier a cluster resource shortage can occur due to throttoled runner pod deletions,
 | ||||||
|  | // while the shorter the grace period is, the more likely you may encounter the race issue.
 | ||||||
|  | func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo, name string) (bool, error) { | ||||||
|  | 	runners, err := client.ListRunners(ctx, enterprise, org, repo) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return false, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	id := int64(0) | ||||||
|  | 	for _, runner := range runners { | ||||||
|  | 		if runner.GetName() == name { | ||||||
|  | 			id = runner.GetID() | ||||||
|  | 			break | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if id == int64(0) { | ||||||
|  | 		return false, nil | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// For the record, historically ARC did not try to call RemoveRunner on a busy runner, but it's no longer true.
 | ||||||
|  | 	// The reason ARC did so was to let a runner running a job to not stop prematurely.
 | ||||||
|  | 	//
 | ||||||
|  | 	// However, we learned that RemoveRunner already has an ability to prevent stopping a busy runner,
 | ||||||
|  | 	// so ARC doesn't need to do anything special for a graceful runner stop.
 | ||||||
|  | 	// It can just call RemoveRunner, and if it returned 200 you're guaranteed that the runner will not automatically come back and
 | ||||||
|  | 	// the runner pod is safe for deletion.
 | ||||||
|  | 	//
 | ||||||
|  | 	// Trying to remove a busy runner can result in errors like the following:
 | ||||||
|  | 	//    failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
 | ||||||
|  | 	//
 | ||||||
|  | 	// # NOTES
 | ||||||
|  | 	//
 | ||||||
|  | 	// - It can be "status=offline" at the same time but that's another story.
 | ||||||
|  | 	// - After https://github.com/actions-runner-controller/actions-runner-controller/pull/1127, ListRunners responses that are used to
 | ||||||
|  | 	//   determine if the runner is busy can be more outdated than before, as those responeses are now cached for 60 seconds.
 | ||||||
|  | 	// - Note that 60 seconds is controlled by the Cache-Control response header provided by GitHub so we don't have a strict control on it but we assume it won't
 | ||||||
|  | 	//   change from 60 seconds.
 | ||||||
|  | 	//
 | ||||||
|  | 	// TODO: Probably we can just remove the runner by ID without seeing if the runner is busy, by treating it as busy when a remove-runner call failed with 422?
 | ||||||
|  | 	if err := client.RemoveRunner(ctx, enterprise, org, repo, id); err != nil { | ||||||
|  | 		return false, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return true, nil | ||||||
|  | } | ||||||
|  | @ -47,6 +47,9 @@ type RunnerPodReconciler struct { | ||||||
| 	Name                        string | 	Name                        string | ||||||
| 	RegistrationRecheckInterval time.Duration | 	RegistrationRecheckInterval time.Duration | ||||||
| 	RegistrationRecheckJitter   time.Duration | 	RegistrationRecheckJitter   time.Duration | ||||||
|  | 
 | ||||||
|  | 	UnregistrationTimeout    time.Duration | ||||||
|  | 	UnregistrationRetryDelay time.Duration | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const ( | const ( | ||||||
|  | @ -105,33 +108,16 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( | ||||||
| 		finalizers, removed := removeFinalizer(runnerPod.ObjectMeta.Finalizers, runnerPodFinalizerName) | 		finalizers, removed := removeFinalizer(runnerPod.ObjectMeta.Finalizers, runnerPodFinalizerName) | ||||||
| 
 | 
 | ||||||
| 		if removed { | 		if removed { | ||||||
| 			ok, err := r.unregisterRunner(ctx, enterprise, org, repo, runnerPod.Name) | 			updatedPod, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod) | ||||||
| 			if err != nil { | 			if res != nil { | ||||||
| 				if errors.Is(err, &gogithub.RateLimitError{}) { | 				return *res, err | ||||||
| 					// We log the underlying error when we failed calling GitHub API to list or unregisters,
 |  | ||||||
| 					// or the runner is still busy.
 |  | ||||||
| 					log.Error( |  | ||||||
| 						err, |  | ||||||
| 						fmt.Sprintf( |  | ||||||
| 							"Failed to unregister runner due to GitHub API rate limits. Delaying retry for %s to avoid excessive GitHub API calls", |  | ||||||
| 							retryDelayOnGitHubAPIRateLimitError, |  | ||||||
| 						), |  | ||||||
| 					) |  | ||||||
| 
 |  | ||||||
| 					return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err |  | ||||||
| 				} |  | ||||||
| 
 |  | ||||||
| 				return ctrl.Result{}, err |  | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			if !ok { | 			patchedPod := updatedPod.DeepCopy() | ||||||
| 				log.V(1).Info("Runner no longer exists on GitHub") | 			patchedPod.ObjectMeta.Finalizers = finalizers | ||||||
| 			} |  | ||||||
| 
 | 
 | ||||||
| 			newRunner := runnerPod.DeepCopy() | 			// We commit the removal of the finalizer so that Kuberenetes notices it and delete the pod resource from the cluster.
 | ||||||
| 			newRunner.ObjectMeta.Finalizers = finalizers | 			if err := r.Patch(ctx, patchedPod, client.MergeFrom(&runnerPod)); err != nil { | ||||||
| 
 |  | ||||||
| 			if err := r.Patch(ctx, newRunner, client.MergeFrom(&runnerPod)); err != nil { |  | ||||||
| 				log.Error(err, "Failed to update runner for finalizer removal") | 				log.Error(err, "Failed to update runner for finalizer removal") | ||||||
| 				return ctrl.Result{}, err | 				return ctrl.Result{}, err | ||||||
| 			} | 			} | ||||||
|  | @ -365,8 +351,13 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( | ||||||
| 		return ctrl.Result{}, nil | 		return ctrl.Result{}, nil | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	updated, res, err := tickRunnerGracefulStop(ctx, r.unregistrationTimeout(), r.unregistrationRetryDelay(), log, r.GitHubClient, r.Client, enterprise, org, repo, runnerPod.Name, &runnerPod) | ||||||
|  | 	if res != nil { | ||||||
|  | 		return *res, err | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	// Delete current pod if recreation is needed
 | 	// Delete current pod if recreation is needed
 | ||||||
| 	if err := r.Delete(ctx, &runnerPod); err != nil { | 	if err := r.Delete(ctx, updated); err != nil { | ||||||
| 		log.Error(err, "Failed to delete pod resource") | 		log.Error(err, "Failed to delete pod resource") | ||||||
| 		return ctrl.Result{}, err | 		return ctrl.Result{}, err | ||||||
| 	} | 	} | ||||||
|  | @ -377,8 +368,22 @@ func (r *RunnerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( | ||||||
| 	return ctrl.Result{}, nil | 	return ctrl.Result{}, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (r *RunnerPodReconciler) unregisterRunner(ctx context.Context, enterprise, org, repo, name string) (bool, error) { | func (r *RunnerPodReconciler) unregistrationTimeout() time.Duration { | ||||||
| 	return unregisterRunner(ctx, r.GitHubClient, enterprise, org, repo, name) | 	unregistrationTimeout := DefaultUnregistrationTimeout | ||||||
|  | 
 | ||||||
|  | 	if r.UnregistrationTimeout > 0 { | ||||||
|  | 		unregistrationTimeout = r.UnregistrationTimeout | ||||||
|  | 	} | ||||||
|  | 	return unregistrationTimeout | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (r *RunnerPodReconciler) unregistrationRetryDelay() time.Duration { | ||||||
|  | 	retryDelay := DefaultUnregistrationRetryDelay | ||||||
|  | 
 | ||||||
|  | 	if r.UnregistrationRetryDelay > 0 { | ||||||
|  | 		retryDelay = r.UnregistrationRetryDelay | ||||||
|  | 	} | ||||||
|  | 	return retryDelay | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error { | func (r *RunnerPodReconciler) SetupWithManager(mgr ctrl.Manager) error { | ||||||
|  |  | ||||||
|  | @ -177,6 +177,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req | ||||||
| 	// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
 | 	// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
 | ||||||
| 	if currentDesiredReplicas != newDesiredReplicas { | 	if currentDesiredReplicas != newDesiredReplicas { | ||||||
| 		newestSet.Spec.Replicas = &newDesiredReplicas | 		newestSet.Spec.Replicas = &newDesiredReplicas | ||||||
|  | 		newestSet.Spec.EffectiveTime = rd.Spec.EffectiveTime | ||||||
| 
 | 
 | ||||||
| 		if err := r.Client.Update(ctx, newestSet); err != nil { | 		if err := r.Client.Update(ctx, newestSet); err != nil { | ||||||
| 			log.Error(err, "Failed to update runnerreplicaset resource") | 			log.Error(err, "Failed to update runnerreplicaset resource") | ||||||
|  | @ -417,9 +418,10 @@ func newRunnerReplicaSet(rd *v1alpha1.RunnerDeployment, commonRunnerLabels []str | ||||||
| 			Labels:       newRSTemplate.ObjectMeta.Labels, | 			Labels:       newRSTemplate.ObjectMeta.Labels, | ||||||
| 		}, | 		}, | ||||||
| 		Spec: v1alpha1.RunnerReplicaSetSpec{ | 		Spec: v1alpha1.RunnerReplicaSetSpec{ | ||||||
| 			Replicas: rd.Spec.Replicas, | 			Replicas:      rd.Spec.Replicas, | ||||||
| 			Selector: newRSSelector, | 			Selector:      newRSSelector, | ||||||
| 			Template: newRSTemplate, | 			Template:      newRSTemplate, | ||||||
|  | 			EffectiveTime: rd.Spec.EffectiveTime, | ||||||
| 		}, | 		}, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -49,6 +49,10 @@ type RunnerReplicaSetReconciler struct { | ||||||
| 	Name         string | 	Name         string | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | const ( | ||||||
|  | 	SyncTimeAnnotationKey = "sync-time" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets,verbs=get;list;watch;create;update;patch;delete
 | // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets,verbs=get;list;watch;create;update;patch;delete
 | ||||||
| // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/finalizers,verbs=get;list;watch;create;update;patch;delete
 | // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/finalizers,verbs=get;list;watch;create;update;patch;delete
 | ||||||
| // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/status,verbs=get;update;patch
 | // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/status,verbs=get;update;patch
 | ||||||
|  | @ -85,19 +89,36 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	var myRunners []v1alpha1.Runner |  | ||||||
| 
 |  | ||||||
| 	var ( | 	var ( | ||||||
| 		current   int | 		current   int | ||||||
| 		ready     int | 		ready     int | ||||||
| 		available int | 		available int | ||||||
|  | 
 | ||||||
|  | 		lastSyncTime *time.Time | ||||||
| 	) | 	) | ||||||
| 
 | 
 | ||||||
| 	for _, r := range allRunners.Items { | 	for _, r := range allRunners.Items { | ||||||
| 		// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
 | 		// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
 | ||||||
| 		// to not treat all the runners in the namespace as its children.
 | 		// to not treat all the runners in the namespace as its children.
 | ||||||
| 		if metav1.IsControlledBy(&r, &rs) && !metav1.HasAnnotation(r.ObjectMeta, annotationKeyRegistrationOnly) { | 		if metav1.IsControlledBy(&r, &rs) && !metav1.HasAnnotation(r.ObjectMeta, annotationKeyRegistrationOnly) { | ||||||
| 			myRunners = append(myRunners, r) | 			// If the runner is already marked for deletion(=has a non-zero deletion timestamp) by the runner controller (can be caused by an ephemeral runner completion)
 | ||||||
|  | 			// or by runnerreplicaset controller (in case it was deleted in the previous reconcilation loop),
 | ||||||
|  | 			// we don't need to bother calling GitHub API to re-mark the runner for deletion.
 | ||||||
|  | 			// Just hold on, and runners will disappear as long as the runner controller is up and running.
 | ||||||
|  | 			if !r.DeletionTimestamp.IsZero() { | ||||||
|  | 				continue | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			if r.Annotations != nil { | ||||||
|  | 				if a, ok := r.Annotations[SyncTimeAnnotationKey]; ok { | ||||||
|  | 					t, err := time.Parse(time.RFC3339, a) | ||||||
|  | 					if err == nil { | ||||||
|  | 						if lastSyncTime == nil || lastSyncTime.Before(t) { | ||||||
|  | 							lastSyncTime = &t | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
| 
 | 
 | ||||||
| 			current += 1 | 			current += 1 | ||||||
| 
 | 
 | ||||||
|  | @ -152,7 +173,30 @@ func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Req | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if current > desired { | 	effectiveTime := rs.Spec.EffectiveTime | ||||||
|  | 	ephemeral := rs.Spec.Template.Spec.Ephemeral == nil || *rs.Spec.Template.Spec.Ephemeral | ||||||
|  | 
 | ||||||
|  | 	if current < desired && ephemeral && lastSyncTime != nil && effectiveTime != nil && lastSyncTime.After(effectiveTime.Time) { | ||||||
|  | 		log.V(1).Info("Detected that some ephemeral runners have disappeared. Usually this is due to that ephemeral runner completions so ARC does not create new runners until EffectiveTime is updated.", "lastSyncTime", metav1.Time{Time: *lastSyncTime}, "effectiveTime", *effectiveTime, "desired", desired, "available", current, "ready", ready) | ||||||
|  | 	} else if current > desired { | ||||||
|  | 		// If you use ephemeral runners with webhook-based autoscaler and the runner controller is working normally,
 | ||||||
|  | 		// you're unlikely to fall into this branch.
 | ||||||
|  | 		//
 | ||||||
|  | 		// That's becaseu all the stakeholders work like this:
 | ||||||
|  | 		//
 | ||||||
|  | 		// 1. A runner pod completes with the runner container exiting with code 0
 | ||||||
|  | 		// 2. ARC runner controller detects the pod completion, marks the runner resource on k8s for deletion (=Runner.DeletionTimestamp becomes non-zero)
 | ||||||
|  | 		// 3. GitHub triggers a corresponding workflow_job "complete" webhook event
 | ||||||
|  | 		// 4. ARC github-webhook-server (webhook-based autoscaler) receives the webhook event updates HRA with removing the oldest capacity reservation
 | ||||||
|  | 		// 5. ARC horizontalrunnerautoscaler updates RunnerDeployment's desired replicas based on capacity reservations
 | ||||||
|  | 		// 6. ARC runnerdeployment controller updates RunnerReplicaSet's desired replicas
 | ||||||
|  | 		// 7. (We're here) ARC runnerreplicaset controller (this controller) starts reconciling the RunnerReplicaSet
 | ||||||
|  | 		//
 | ||||||
|  | 		// In a normally working ARC installation, the runner that was used to run the workflow job should already have been
 | ||||||
|  | 		// marked for deletion by the runner controller.
 | ||||||
|  | 		// This runnerreplicaset controller doesn't count marked runners into the `current` value, hence you're unlikely to
 | ||||||
|  | 		// fall into this branch when you're using ephemeral runners with webhook-based-autoscaler.
 | ||||||
|  | 
 | ||||||
| 		n := current - desired | 		n := current - desired | ||||||
| 
 | 
 | ||||||
| 		log.V(0).Info(fmt.Sprintf("Deleting %d runners from RunnerReplicaSet %s", n, req.NamespacedName), "desired", desired, "current", current, "ready", ready) | 		log.V(0).Info(fmt.Sprintf("Deleting %d runners from RunnerReplicaSet %s", n, req.NamespacedName), "desired", desired, "current", current, "ready", ready) | ||||||
|  | @ -282,6 +326,10 @@ func (r *RunnerReplicaSetReconciler) newRunner(rs v1alpha1.RunnerReplicaSet) (v1 | ||||||
| 
 | 
 | ||||||
| 	objectMeta.GenerateName = rs.ObjectMeta.Name + "-" | 	objectMeta.GenerateName = rs.ObjectMeta.Name + "-" | ||||||
| 	objectMeta.Namespace = rs.ObjectMeta.Namespace | 	objectMeta.Namespace = rs.ObjectMeta.Namespace | ||||||
|  | 	if objectMeta.Annotations == nil { | ||||||
|  | 		objectMeta.Annotations = map[string]string{} | ||||||
|  | 	} | ||||||
|  | 	objectMeta.Annotations[SyncTimeAnnotationKey] = time.Now().Format(time.RFC3339) | ||||||
| 
 | 
 | ||||||
| 	runner := v1alpha1.Runner{ | 	runner := v1alpha1.Runner{ | ||||||
| 		TypeMeta:   metav1.TypeMeta{}, | 		TypeMeta:   metav1.TypeMeta{}, | ||||||
|  |  | ||||||
|  | @ -1,49 +0,0 @@ | ||||||
| package controllers |  | ||||||
| 
 |  | ||||||
| import ( |  | ||||||
| 	"context" |  | ||||||
| 	"fmt" |  | ||||||
| 
 |  | ||||||
| 	"github.com/actions-runner-controller/actions-runner-controller/github" |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| // unregisterRunner unregisters the runner from GitHub Actions by name.
 |  | ||||||
| //
 |  | ||||||
| // This function returns:
 |  | ||||||
| // - (true, nil) when it has successfully unregistered the runner.
 |  | ||||||
| // - (false, nil) when the runner has been already unregistered.
 |  | ||||||
| // - (false, err) when it postponed unregistration due to the runner being busy, or it tried to unregister the runner but failed due to
 |  | ||||||
| //   an error returned by GitHub API.
 |  | ||||||
| func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo, name string) (bool, error) { |  | ||||||
| 	runners, err := client.ListRunners(ctx, enterprise, org, repo) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return false, err |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	id := int64(0) |  | ||||||
| 	for _, runner := range runners { |  | ||||||
| 		if runner.GetName() == name { |  | ||||||
| 			// Note that sometimes a runner can stuck "busy" even though it is already "offline".
 |  | ||||||
| 			// But we assume that it's not actually offline and still running a job.
 |  | ||||||
| 			if runner.GetBusy() { |  | ||||||
| 				return false, fmt.Errorf("runner is busy") |  | ||||||
| 			} |  | ||||||
| 			id = runner.GetID() |  | ||||||
| 			break |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	if id == int64(0) { |  | ||||||
| 		return false, nil |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	// Trying to remove a busy runner can result in errors like the following:
 |  | ||||||
| 	//    failed to remove runner: DELETE https://api.github.com/repos/actions-runner-controller/mumoshu-actions-test/actions/runners/47: 422 Bad request - Runner \"example-runnerset-0\" is still running a job\" []
 |  | ||||||
| 	//
 |  | ||||||
| 	// TODO: Probably we can just remove the runner by ID without seeing if the runner is busy, by treating it as busy when a remove-runner call failed with 422?
 |  | ||||||
| 	if err := client.RemoveRunner(ctx, enterprise, org, repo, id); err != nil { |  | ||||||
| 		return false, err |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	return true, nil |  | ||||||
| } |  | ||||||
|  | @ -11,8 +11,11 @@ import ( | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
| 	"github.com/actions-runner-controller/actions-runner-controller/github/metrics" | 	"github.com/actions-runner-controller/actions-runner-controller/github/metrics" | ||||||
|  | 	"github.com/actions-runner-controller/actions-runner-controller/logging" | ||||||
| 	"github.com/bradleyfalzon/ghinstallation" | 	"github.com/bradleyfalzon/ghinstallation" | ||||||
|  | 	"github.com/go-logr/logr" | ||||||
| 	"github.com/google/go-github/v39/github" | 	"github.com/google/go-github/v39/github" | ||||||
|  | 	"github.com/gregjones/httpcache" | ||||||
| 	"golang.org/x/oauth2" | 	"golang.org/x/oauth2" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -28,6 +31,8 @@ type Config struct { | ||||||
| 	BasicauthUsername string `split_words:"true"` | 	BasicauthUsername string `split_words:"true"` | ||||||
| 	BasicauthPassword string `split_words:"true"` | 	BasicauthPassword string `split_words:"true"` | ||||||
| 	RunnerGitHubURL   string `split_words:"true"` | 	RunnerGitHubURL   string `split_words:"true"` | ||||||
|  | 
 | ||||||
|  | 	Log *logr.Logger | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Client wraps GitHub client with some additional
 | // Client wraps GitHub client with some additional
 | ||||||
|  | @ -46,7 +51,6 @@ type BasicAuthTransport struct { | ||||||
| 
 | 
 | ||||||
| func (p BasicAuthTransport) RoundTrip(req *http.Request) (*http.Response, error) { | func (p BasicAuthTransport) RoundTrip(req *http.Request) (*http.Response, error) { | ||||||
| 	req.SetBasicAuth(p.Username, p.Password) | 	req.SetBasicAuth(p.Username, p.Password) | ||||||
| 	req.Header.Set("User-Agent", "actions-runner-controller") |  | ||||||
| 	return http.DefaultTransport.RoundTrip(req) | 	return http.DefaultTransport.RoundTrip(req) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -82,8 +86,11 @@ func (c *Config) NewClient() (*Client, error) { | ||||||
| 		transport = tr | 		transport = tr | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	transport = metrics.Transport{Transport: transport} | 	cached := httpcache.NewTransport(httpcache.NewMemoryCache()) | ||||||
| 	httpClient := &http.Client{Transport: transport} | 	cached.Transport = transport | ||||||
|  | 	loggingTransport := logging.Transport{Transport: cached, Log: c.Log} | ||||||
|  | 	metricsTransport := metrics.Transport{Transport: loggingTransport} | ||||||
|  | 	httpClient := &http.Client{Transport: metricsTransport} | ||||||
| 
 | 
 | ||||||
| 	var client *github.Client | 	var client *github.Client | ||||||
| 	var githubBaseURL string | 	var githubBaseURL string | ||||||
|  | @ -128,6 +135,8 @@ func (c *Config) NewClient() (*Client, error) { | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	client.UserAgent = "actions-runner-controller" | ||||||
|  | 
 | ||||||
| 	return &Client{ | 	return &Client{ | ||||||
| 		Client:        client, | 		Client:        client, | ||||||
| 		regTokens:     map[string]*github.RegistrationToken{}, | 		regTokens:     map[string]*github.RegistrationToken{}, | ||||||
|  |  | ||||||
|  | @ -152,3 +152,10 @@ func TestCleanup(t *testing.T) { | ||||||
| 		t.Errorf("expired token still exists") | 		t.Errorf("expired token still exists") | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | func TestUserAgent(t *testing.T) { | ||||||
|  | 	client := newTestClient() | ||||||
|  | 	if client.UserAgent != "actions-runner-controller" { | ||||||
|  | 		t.Errorf("UserAgent should be set to actions-runner-controller") | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
							
								
								
									
										1
									
								
								go.mod
								
								
								
								
							
							
						
						
									
										1
									
								
								go.mod
								
								
								
								
							|  | @ -41,6 +41,7 @@ require ( | ||||||
| 	github.com/google/gofuzz v1.1.0 // indirect | 	github.com/google/gofuzz v1.1.0 // indirect | ||||||
| 	github.com/google/uuid v1.1.2 // indirect | 	github.com/google/uuid v1.1.2 // indirect | ||||||
| 	github.com/googleapis/gnostic v0.5.5 // indirect | 	github.com/googleapis/gnostic v0.5.5 // indirect | ||||||
|  | 	github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect | ||||||
| 	github.com/imdario/mergo v0.3.12 // indirect | 	github.com/imdario/mergo v0.3.12 // indirect | ||||||
| 	github.com/json-iterator/go v1.1.12 // indirect | 	github.com/json-iterator/go v1.1.12 // indirect | ||||||
| 	github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect | 	github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect | ||||||
|  |  | ||||||
							
								
								
									
										2
									
								
								go.sum
								
								
								
								
							
							
						
						
									
										2
									
								
								go.sum
								
								
								
								
							|  | @ -259,6 +259,8 @@ github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= | ||||||
| github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= | github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= | ||||||
| github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= | github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= | ||||||
| github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= | github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= | ||||||
|  | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA= | ||||||
|  | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= | ||||||
| github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= | github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= | ||||||
| github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= | github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= | ||||||
| github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= | ||||||
|  |  | ||||||
|  | @ -0,0 +1,54 @@ | ||||||
|  | package logging | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"os" | ||||||
|  | 	"strconv" | ||||||
|  | 
 | ||||||
|  | 	"github.com/go-logr/logr" | ||||||
|  | 	zaplib "go.uber.org/zap" | ||||||
|  | 	"go.uber.org/zap/zapcore" | ||||||
|  | 	"sigs.k8s.io/controller-runtime/pkg/log/zap" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | const ( | ||||||
|  | 	LogLevelDebug = "debug" | ||||||
|  | 	LogLevelInfo  = "info" | ||||||
|  | 	LogLevelWarn  = "warn" | ||||||
|  | 	LogLevelError = "error" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | func NewLogger(logLevel string) logr.Logger { | ||||||
|  | 	log := zap.New(func(o *zap.Options) { | ||||||
|  | 		switch logLevel { | ||||||
|  | 		case LogLevelDebug: | ||||||
|  | 			o.Development = true | ||||||
|  | 			lvl := zaplib.NewAtomicLevelAt(zaplib.DebugLevel) // maps to logr's V(1)
 | ||||||
|  | 			o.Level = &lvl | ||||||
|  | 		case LogLevelInfo: | ||||||
|  | 			lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel) | ||||||
|  | 			o.Level = &lvl | ||||||
|  | 		case LogLevelWarn: | ||||||
|  | 			lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel) | ||||||
|  | 			o.Level = &lvl | ||||||
|  | 		case LogLevelError: | ||||||
|  | 			lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel) | ||||||
|  | 			o.Level = &lvl | ||||||
|  | 		default: | ||||||
|  | 			// We use bitsize of 8 as zapcore.Level is a type alias to int8
 | ||||||
|  | 			levelInt, err := strconv.ParseInt(logLevel, 10, 8) | ||||||
|  | 			if err != nil { | ||||||
|  | 				fmt.Fprintf(os.Stderr, "Failed to parse --log-level=%s: %v", logLevel, err) | ||||||
|  | 				os.Exit(1) | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			// For example, --log-level=debug a.k.a --log-level=-1 maps to zaplib.DebugLevel, which is associated to logr's V(1)
 | ||||||
|  | 			// --log-level=-2 maps the specific custom log level that is associated to logr's V(2).
 | ||||||
|  | 			level := zapcore.Level(levelInt) | ||||||
|  | 			atomicLevel := zaplib.NewAtomicLevelAt(level) | ||||||
|  | 			o.Level = &atomicLevel | ||||||
|  | 		} | ||||||
|  | 	}) | ||||||
|  | 
 | ||||||
|  | 	return log | ||||||
|  | } | ||||||
|  | @ -0,0 +1,51 @@ | ||||||
|  | // Package logging provides various logging helpers for ARC
 | ||||||
|  | package logging | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"net/http" | ||||||
|  | 
 | ||||||
|  | 	"github.com/go-logr/logr" | ||||||
|  | 	"github.com/gregjones/httpcache" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | const ( | ||||||
|  | 	// https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting
 | ||||||
|  | 	headerRateLimitRemaining = "X-RateLimit-Remaining" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | // Transport wraps a transport with metrics monitoring
 | ||||||
|  | type Transport struct { | ||||||
|  | 	Transport http.RoundTripper | ||||||
|  | 
 | ||||||
|  | 	Log *logr.Logger | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (t Transport) RoundTrip(req *http.Request) (*http.Response, error) { | ||||||
|  | 	resp, err := t.Transport.RoundTrip(req) | ||||||
|  | 	if resp != nil { | ||||||
|  | 		t.log(req, resp) | ||||||
|  | 	} | ||||||
|  | 	return resp, err | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (t Transport) log(req *http.Request, resp *http.Response) { | ||||||
|  | 	if t.Log == nil { | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	var args []interface{} | ||||||
|  | 
 | ||||||
|  | 	marked := resp.Header.Get(httpcache.XFromCache) == "1" | ||||||
|  | 
 | ||||||
|  | 	args = append(args, "from_cache", marked, "method", req.Method, "url", req.URL.String()) | ||||||
|  | 
 | ||||||
|  | 	if !marked { | ||||||
|  | 		// Do not log outdated rate limit remaining value
 | ||||||
|  | 
 | ||||||
|  | 		remaining := resp.Header.Get(headerRateLimitRemaining) | ||||||
|  | 
 | ||||||
|  | 		args = append(args, "ratelimit_remaining", remaining) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	t.Log.V(3).Info("Seen HTTP response", args...) | ||||||
|  | } | ||||||
							
								
								
									
										28
									
								
								main.go
								
								
								
								
							
							
						
						
									
										28
									
								
								main.go
								
								
								
								
							|  | @ -26,24 +26,18 @@ import ( | ||||||
| 	actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1" | 	actionsv1alpha1 "github.com/actions-runner-controller/actions-runner-controller/api/v1alpha1" | ||||||
| 	"github.com/actions-runner-controller/actions-runner-controller/controllers" | 	"github.com/actions-runner-controller/actions-runner-controller/controllers" | ||||||
| 	"github.com/actions-runner-controller/actions-runner-controller/github" | 	"github.com/actions-runner-controller/actions-runner-controller/github" | ||||||
|  | 	"github.com/actions-runner-controller/actions-runner-controller/logging" | ||||||
| 	"github.com/kelseyhightower/envconfig" | 	"github.com/kelseyhightower/envconfig" | ||||||
| 	zaplib "go.uber.org/zap" |  | ||||||
| 	"k8s.io/apimachinery/pkg/runtime" | 	"k8s.io/apimachinery/pkg/runtime" | ||||||
| 	clientgoscheme "k8s.io/client-go/kubernetes/scheme" | 	clientgoscheme "k8s.io/client-go/kubernetes/scheme" | ||||||
| 	_ "k8s.io/client-go/plugin/pkg/client/auth/gcp" | 	_ "k8s.io/client-go/plugin/pkg/client/auth/gcp" | ||||||
| 	ctrl "sigs.k8s.io/controller-runtime" | 	ctrl "sigs.k8s.io/controller-runtime" | ||||||
| 	"sigs.k8s.io/controller-runtime/pkg/log/zap" |  | ||||||
| 	// +kubebuilder:scaffold:imports
 | 	// +kubebuilder:scaffold:imports
 | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| const ( | const ( | ||||||
| 	defaultRunnerImage = "summerwind/actions-runner:latest" | 	defaultRunnerImage = "summerwind/actions-runner:latest" | ||||||
| 	defaultDockerImage = "docker:dind" | 	defaultDockerImage = "docker:dind" | ||||||
| 
 |  | ||||||
| 	logLevelDebug = "debug" |  | ||||||
| 	logLevelInfo  = "info" |  | ||||||
| 	logLevelWarn  = "warn" |  | ||||||
| 	logLevelError = "error" |  | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| var ( | var ( | ||||||
|  | @ -120,24 +114,12 @@ func main() { | ||||||
| 	flag.DurationVar(&syncPeriod, "sync-period", 10*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled. When you use autoscaling, set to a lower value like 10 minute, because this corresponds to the minimum time to react on demand change. . If you're tweaking this in order to make autoscaling more responsive, you'll probably want to tweak github-api-cache-duration, too") | 	flag.DurationVar(&syncPeriod, "sync-period", 10*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled. When you use autoscaling, set to a lower value like 10 minute, because this corresponds to the minimum time to react on demand change. . If you're tweaking this in order to make autoscaling more responsive, you'll probably want to tweak github-api-cache-duration, too") | ||||||
| 	flag.Var(&commonRunnerLabels, "common-runner-labels", "Runner labels in the K1=V1,K2=V2,... format that are inherited all the runners created by the controller. See https://github.com/actions-runner-controller/actions-runner-controller/issues/321 for more information") | 	flag.Var(&commonRunnerLabels, "common-runner-labels", "Runner labels in the K1=V1,K2=V2,... format that are inherited all the runners created by the controller. See https://github.com/actions-runner-controller/actions-runner-controller/issues/321 for more information") | ||||||
| 	flag.StringVar(&namespace, "watch-namespace", "", "The namespace to watch for custom resources. Set to empty for letting it watch for all namespaces.") | 	flag.StringVar(&namespace, "watch-namespace", "", "The namespace to watch for custom resources. Set to empty for letting it watch for all namespaces.") | ||||||
| 	flag.StringVar(&logLevel, "log-level", logLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`) | 	flag.StringVar(&logLevel, "log-level", logging.LogLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`) | ||||||
| 	flag.Parse() | 	flag.Parse() | ||||||
| 
 | 
 | ||||||
| 	logger := zap.New(func(o *zap.Options) { | 	logger := logging.NewLogger(logLevel) | ||||||
| 		switch logLevel { | 
 | ||||||
| 		case logLevelDebug: | 	c.Log = &logger | ||||||
| 			o.Development = true |  | ||||||
| 		case logLevelInfo: |  | ||||||
| 			lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel) |  | ||||||
| 			o.Level = &lvl |  | ||||||
| 		case logLevelWarn: |  | ||||||
| 			lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel) |  | ||||||
| 			o.Level = &lvl |  | ||||||
| 		case logLevelError: |  | ||||||
| 			lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel) |  | ||||||
| 			o.Level = &lvl |  | ||||||
| 		} |  | ||||||
| 	}) |  | ||||||
| 
 | 
 | ||||||
| 	ghClient, err = c.NewClient() | 	ghClient, err = c.NewClient() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|  |  | ||||||
|  | @ -17,12 +17,15 @@ var ( | ||||||
| 	controllerImageTag  = "e2e" | 	controllerImageTag  = "e2e" | ||||||
| 	controllerImage     = testing.Img(controllerImageRepo, controllerImageTag) | 	controllerImage     = testing.Img(controllerImageRepo, controllerImageTag) | ||||||
| 	runnerImageRepo     = "actionsrunnercontrollere2e/actions-runner" | 	runnerImageRepo     = "actionsrunnercontrollere2e/actions-runner" | ||||||
|  | 	runnerDindImageRepo = "actionsrunnercontrollere2e/actions-runner-dind" | ||||||
| 	runnerImageTag      = "e2e" | 	runnerImageTag      = "e2e" | ||||||
| 	runnerImage         = testing.Img(runnerImageRepo, runnerImageTag) | 	runnerImage         = testing.Img(runnerImageRepo, runnerImageTag) | ||||||
|  | 	runnerDindImage     = testing.Img(runnerDindImageRepo, runnerImageTag) | ||||||
| 
 | 
 | ||||||
| 	prebuildImages = []testing.ContainerImage{ | 	prebuildImages = []testing.ContainerImage{ | ||||||
| 		controllerImage, | 		controllerImage, | ||||||
| 		runnerImage, | 		runnerImage, | ||||||
|  | 		runnerDindImage, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	builds = []testing.DockerBuild{ | 	builds = []testing.DockerBuild{ | ||||||
|  | @ -36,6 +39,11 @@ var ( | ||||||
| 			Args:       []testing.BuildArg{}, | 			Args:       []testing.BuildArg{}, | ||||||
| 			Image:      runnerImage, | 			Image:      runnerImage, | ||||||
| 		}, | 		}, | ||||||
|  | 		{ | ||||||
|  | 			Dockerfile: "../../runner/Dockerfile.dindrunner", | ||||||
|  | 			Args:       []testing.BuildArg{}, | ||||||
|  | 			Image:      runnerDindImage, | ||||||
|  | 		}, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	certManagerVersion = "v1.1.1" | 	certManagerVersion = "v1.1.1" | ||||||
|  | @ -52,7 +60,6 @@ var ( | ||||||
| 		"SYNC_PERIOD=" + "10s", | 		"SYNC_PERIOD=" + "10s", | ||||||
| 		"NAME=" + controllerImageRepo, | 		"NAME=" + controllerImageRepo, | ||||||
| 		"VERSION=" + controllerImageTag, | 		"VERSION=" + controllerImageTag, | ||||||
| 		"RUNNER_NAME=" + runnerImageRepo, |  | ||||||
| 		"RUNNER_TAG=" + runnerImageTag, | 		"RUNNER_TAG=" + runnerImageTag, | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -167,11 +174,15 @@ type env struct { | ||||||
| 	useRunnerSet bool | 	useRunnerSet bool | ||||||
| 
 | 
 | ||||||
| 	testID                                                   string | 	testID                                                   string | ||||||
|  | 	testName                                                 string | ||||||
| 	repoToCommit                                             string | 	repoToCommit                                             string | ||||||
| 	runnerLabel, githubToken, testRepo, testOrg, testOrgRepo string | 	runnerLabel, githubToken, testRepo, testOrg, testOrgRepo string | ||||||
| 	githubTokenWebhook                                       string | 	githubTokenWebhook                                       string | ||||||
| 	testEnterprise                                           string | 	testEnterprise                                           string | ||||||
| 	featureFlagEphemeral                                     bool | 	featureFlagEphemeral                                     bool | ||||||
|  | 	scaleDownDelaySecondsAfterScaleOut                       int64 | ||||||
|  | 	minReplicas                                              int64 | ||||||
|  | 	dockerdWithinRunnerContainer                             bool | ||||||
| 	testJobs                                                 []job | 	testJobs                                                 []job | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -184,11 +195,12 @@ func initTestEnv(t *testing.T) *env { | ||||||
| 
 | 
 | ||||||
| 	id := e.ID() | 	id := e.ID() | ||||||
| 
 | 
 | ||||||
| 	testID := t.Name() + " " + id | 	testName := t.Name() + " " + id | ||||||
| 
 | 
 | ||||||
| 	t.Logf("Using test id %s", testID) | 	t.Logf("Initializing test with name %s", testName) | ||||||
| 
 | 
 | ||||||
| 	e.testID = testID | 	e.testID = id | ||||||
|  | 	e.testName = testName | ||||||
| 	e.runnerLabel = "test-" + id | 	e.runnerLabel = "test-" + id | ||||||
| 	e.githubToken = testing.Getenv(t, "GITHUB_TOKEN") | 	e.githubToken = testing.Getenv(t, "GITHUB_TOKEN") | ||||||
| 	e.githubTokenWebhook = testing.Getenv(t, "WEBHOOK_GITHUB_TOKEN") | 	e.githubTokenWebhook = testing.Getenv(t, "WEBHOOK_GITHUB_TOKEN") | ||||||
|  | @ -197,9 +209,17 @@ func initTestEnv(t *testing.T) *env { | ||||||
| 	e.testOrg = testing.Getenv(t, "TEST_ORG", "") | 	e.testOrg = testing.Getenv(t, "TEST_ORG", "") | ||||||
| 	e.testOrgRepo = testing.Getenv(t, "TEST_ORG_REPO", "") | 	e.testOrgRepo = testing.Getenv(t, "TEST_ORG_REPO", "") | ||||||
| 	e.testEnterprise = testing.Getenv(t, "TEST_ENTERPRISE") | 	e.testEnterprise = testing.Getenv(t, "TEST_ENTERPRISE") | ||||||
| 	e.testJobs = createTestJobs(id, testResultCMNamePrefix, 10) | 	e.testJobs = createTestJobs(id, testResultCMNamePrefix, 100) | ||||||
| 	ephemeral, _ := strconv.ParseBool(testing.Getenv(t, "TEST_FEATURE_FLAG_EPHEMERAL")) | 	ephemeral, _ := strconv.ParseBool(testing.Getenv(t, "TEST_FEATURE_FLAG_EPHEMERAL")) | ||||||
| 	e.featureFlagEphemeral = ephemeral | 	e.featureFlagEphemeral = ephemeral | ||||||
|  | 	e.scaleDownDelaySecondsAfterScaleOut, _ = strconv.ParseInt(testing.Getenv(t, "TEST_RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT", "10"), 10, 32) | ||||||
|  | 	e.minReplicas, _ = strconv.ParseInt(testing.Getenv(t, "TEST_RUNNER_MIN_REPLICAS", "1"), 10, 32) | ||||||
|  | 
 | ||||||
|  | 	var err error | ||||||
|  | 	e.dockerdWithinRunnerContainer, err = strconv.ParseBool(testing.Getenv(t, "TEST_RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER", "false")) | ||||||
|  | 	if err != nil { | ||||||
|  | 		panic(fmt.Sprintf("unable to parse bool from TEST_RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER: %v", err)) | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	return e | 	return e | ||||||
| } | } | ||||||
|  | @ -254,7 +274,22 @@ func (e *env) installActionsRunnerController(t *testing.T) { | ||||||
| 		"GITHUB_TOKEN=" + e.githubToken, | 		"GITHUB_TOKEN=" + e.githubToken, | ||||||
| 		"WEBHOOK_GITHUB_TOKEN=" + e.githubTokenWebhook, | 		"WEBHOOK_GITHUB_TOKEN=" + e.githubTokenWebhook, | ||||||
| 		"RUNNER_LABEL=" + e.runnerLabel, | 		"RUNNER_LABEL=" + e.runnerLabel, | ||||||
|  | 		"TEST_ID=" + e.testID, | ||||||
| 		fmt.Sprintf("RUNNER_FEATURE_FLAG_EPHEMERAL=%v", e.featureFlagEphemeral), | 		fmt.Sprintf("RUNNER_FEATURE_FLAG_EPHEMERAL=%v", e.featureFlagEphemeral), | ||||||
|  | 		fmt.Sprintf("RUNNER_SCALE_DOWN_DELAY_SECONDS_AFTER_SCALE_OUT=%d", e.scaleDownDelaySecondsAfterScaleOut), | ||||||
|  | 		fmt.Sprintf("ORG_RUNNER_MIN_REPLICAS=%d", e.minReplicas), | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if e.dockerdWithinRunnerContainer { | ||||||
|  | 		varEnv = append(varEnv, | ||||||
|  | 			"RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER=true", | ||||||
|  | 			"RUNNER_NAME="+runnerDindImageRepo, | ||||||
|  | 		) | ||||||
|  | 	} else { | ||||||
|  | 		varEnv = append(varEnv, | ||||||
|  | 			"RUNNER_DOCKERD_WITHIN_RUNNER_CONTAINER=false", | ||||||
|  | 			"RUNNER_NAME="+runnerImageRepo, | ||||||
|  | 		) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	scriptEnv = append(scriptEnv, varEnv...) | 	scriptEnv = append(scriptEnv, varEnv...) | ||||||
|  | @ -273,7 +308,7 @@ func (e *env) createControllerNamespaceAndServiceAccount(t *testing.T) { | ||||||
| func (e *env) installActionsWorkflow(t *testing.T) { | func (e *env) installActionsWorkflow(t *testing.T) { | ||||||
| 	t.Helper() | 	t.Helper() | ||||||
| 
 | 
 | ||||||
| 	installActionsWorkflow(t, e.testID, e.runnerLabel, testResultCMNamePrefix, e.repoToCommit, e.testJobs) | 	installActionsWorkflow(t, e.testName, e.runnerLabel, testResultCMNamePrefix, e.repoToCommit, e.testJobs) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (e *env) verifyActionsWorkflowRun(t *testing.T) { | func (e *env) verifyActionsWorkflowRun(t *testing.T) { | ||||||
|  | @ -302,13 +337,13 @@ func createTestJobs(id, testResultCMNamePrefix string, numJobs int) []job { | ||||||
| 
 | 
 | ||||||
| const Branch = "main" | const Branch = "main" | ||||||
| 
 | 
 | ||||||
| func installActionsWorkflow(t *testing.T, testID, runnerLabel, testResultCMNamePrefix, testRepo string, testJobs []job) { | func installActionsWorkflow(t *testing.T, testName, runnerLabel, testResultCMNamePrefix, testRepo string, testJobs []job) { | ||||||
| 	t.Helper() | 	t.Helper() | ||||||
| 
 | 
 | ||||||
| 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) | 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) | ||||||
| 	defer cancel() | 	defer cancel() | ||||||
| 
 | 
 | ||||||
| 	wfName := "E2E " + testID | 	wfName := "E2E " + testName | ||||||
| 	wf := testing.Workflow{ | 	wf := testing.Workflow{ | ||||||
| 		Name: wfName, | 		Name: wfName, | ||||||
| 		On: testing.On{ | 		On: testing.On{ | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue