diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 2a069bdf..dc0472ab 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -55,22 +55,37 @@ context deadline exceeded **Solution**
-To fix this, you need to set up a firewall rule to allow the master node to connect to the webhook port. -The exact way to do this may wary, but the following script should point you in the right direction: +To fix this, you may either: -``` -# 1) Retrieve the network tag automatically given to the worker nodes -# NOTE: this only works if you have only one cluster in your GCP project. You will have to manually inspect the result of this command to find the tag for the cluster you want to target -WORKER_NODES_TAG=$(gcloud compute instances list --format='text(tags.items[0])' --filter='metadata.kubelet-config:*' | grep tags | awk '{print $2}' | sort | uniq) +1. Configure the webhook to use another port, such as 443 or 10250, [each of + which allow traffic by default](https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#add_firewall_rules). -# 2) Take note of the VPC network in which you deployed your cluster -# NOTE this only works if you have only one network in which you deploy your clusters -NETWORK=$(gcloud compute instances list --format='text(networkInterfaces[0].network)' --filter='metadata.kubelet-config:*' | grep networks | awk -F'/' '{print $NF}' | sort | uniq) + ```sh + # With helm, you'd set `webhookPort` to the port number of your choice + # See https://github.com/actions-runner-controller/actions-runner-controller/pull/1410/files for more information + helm upgrade --install --namespace actions-runner-system --create-namespace \ + --wait actions-runner-controller actions-runner-controller/actions-runner-controller \ + --set webhookPort=10250 + ``` -# 3) Get the master source ip block -SOURCE=$(gcloud container clusters describe --region | grep masterIpv4CidrBlock| cut -d ':' -f 2 | tr -d ' ') -gcloud compute firewall-rules create k8s-cert-manager --source-ranges $SOURCE --target-tags $WORKER_NODES_TAG --allow TCP:9443 --network $NETWORK -``` +2. Set up a firewall rule to allow the master node to connect to the default + webhook port. The exact way to do this may vary, but the following script + should point you in the right direction: + + ```sh + # 1) Retrieve the network tag automatically given to the worker nodes + # NOTE: this only works if you have only one cluster in your GCP project. You will have to manually inspect the result of this command to find the tag for the cluster you want to target + WORKER_NODES_TAG=$(gcloud compute instances list --format='text(tags.items[0])' --filter='metadata.kubelet-config:*' | grep tags | awk '{print $2}' | sort | uniq) + + # 2) Take note of the VPC network in which you deployed your cluster + # NOTE this only works if you have only one network in which you deploy your clusters + NETWORK=$(gcloud compute instances list --format='text(networkInterfaces[0].network)' --filter='metadata.kubelet-config:*' | grep networks | awk -F'/' '{print $NF}' | sort | uniq) + + # 3) Get the master source ip block + SOURCE=$(gcloud container clusters describe --region | grep masterIpv4CidrBlock| cut -d ':' -f 2 | tr -d ' ') + + gcloud compute firewall-rules create k8s-cert-manager --source-ranges $SOURCE --target-tags $WORKER_NODES_TAG --allow TCP:9443 --network $NETWORK + ``` ## Operations diff --git a/charts/actions-runner-controller/README.md b/charts/actions-runner-controller/README.md index ad90d854..15483f2a 100644 --- a/charts/actions-runner-controller/README.md +++ b/charts/actions-runner-controller/README.md @@ -12,6 +12,7 @@ All additional docs are kept in the `docs/` folder, this README is solely for do |----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------| | `labels` | Set labels to apply to all resources in the chart | | | `replicaCount` | Set the number of controller pods | 1 | +| `webhookPort` | Set the containerPort for the webhook Pod | 9443 | | `syncPeriod` | Set the period in which the controler reconciles the desired runners count | 10m | | `enableLeaderElection` | Enable election configuration | true | | `leaderElectionId` | Set the election ID for the controller group | | diff --git a/charts/actions-runner-controller/templates/deployment.yaml b/charts/actions-runner-controller/templates/deployment.yaml index 691c07c2..f35d6ff3 100644 --- a/charts/actions-runner-controller/templates/deployment.yaml +++ b/charts/actions-runner-controller/templates/deployment.yaml @@ -44,6 +44,7 @@ spec: {{- if .Values.leaderElectionId }} - "--leader-election-id={{ .Values.leaderElectionId }}" {{- end }} + - "--port={{ .Values.webhookPort }}" - "--sync-period={{ .Values.syncPeriod }}" - "--default-scale-down-delay={{ .Values.defaultScaleDownDelay }}" - "--docker-image={{ .Values.image.dindSidecarRepositoryAndTag }}" @@ -125,7 +126,7 @@ spec: name: manager imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - - containerPort: 9443 + - containerPort: {{ .Values.webhookPort }} name: webhook-server protocol: TCP {{- if not .Values.metrics.proxy.enabled }} diff --git a/charts/actions-runner-controller/templates/webhook_service.yaml b/charts/actions-runner-controller/templates/webhook_service.yaml index c7014a56..41425f42 100644 --- a/charts/actions-runner-controller/templates/webhook_service.yaml +++ b/charts/actions-runner-controller/templates/webhook_service.yaml @@ -13,7 +13,7 @@ spec: type: {{ .Values.service.type }} ports: - port: 443 - targetPort: 9443 + targetPort: {{ .Values.webhookPort }} protocol: TCP name: https selector: diff --git a/charts/actions-runner-controller/values.yaml b/charts/actions-runner-controller/values.yaml index 41c01162..633e619e 100644 --- a/charts/actions-runner-controller/values.yaml +++ b/charts/actions-runner-controller/values.yaml @@ -6,6 +6,7 @@ labels: {} replicaCount: 1 +webhookPort: 9443 syncPeriod: 1m defaultScaleDownDelay: 10m diff --git a/main.go b/main.go index 9bd31304..8204824b 100644 --- a/main.go +++ b/main.go @@ -71,6 +71,7 @@ func main() { metricsAddr string enableLeaderElection bool leaderElectionId string + port int syncPeriod time.Duration gitHubAPICacheDuration time.Duration @@ -113,6 +114,7 @@ func main() { flag.StringVar(&c.RunnerGitHubURL, "runner-github-url", c.RunnerGitHubURL, "GitHub URL to be used by runners during registration") flag.DurationVar(&gitHubAPICacheDuration, "github-api-cache-duration", 0, "DEPRECATED: The duration until the GitHub API cache expires. Setting this to e.g. 10m results in the controller tries its best not to make the same API call within 10m to reduce the chance of being rate-limited. Defaults to mostly the same value as sync-period. If you're tweaking this in order to make autoscaling more responsive, you'll probably want to tweak sync-period, too") flag.DurationVar(&defaultScaleDownDelay, "default-scale-down-delay", controllers.DefaultScaleDownDelay, "The approximate delay for a scale down followed by a scale up, used to prevent flapping (down->up->down->... loop)") + flag.IntVar(&port, "port", 9443, "The port to which the admission webhook endpoint should bind") flag.DurationVar(&syncPeriod, "sync-period", 1*time.Minute, "Determines the minimum frequency at which K8s resources managed by this controller are reconciled.") flag.Var(&commonRunnerLabels, "common-runner-labels", "Runner labels in the K1=V1,K2=V2,... format that are inherited all the runners created by the controller. See https://github.com/actions-runner-controller/actions-runner-controller/issues/321 for more information") flag.StringVar(&namespace, "watch-namespace", "", "The namespace to watch for custom resources. Set to empty for letting it watch for all namespaces.") @@ -136,7 +138,7 @@ func main() { MetricsBindAddress: metricsAddr, LeaderElection: enableLeaderElection, LeaderElectionID: leaderElectionId, - Port: 9443, + Port: port, SyncPeriod: &syncPeriod, Namespace: namespace, })