fix typos
This commit is contained in:
parent
a1a8dc5606
commit
66880820ed
|
|
@ -21,14 +21,14 @@ RUN go mod download
|
|||
# Usage:
|
||||
# docker buildx build --tag repo/img:tag -f ./Dockerfile . --platform linux/amd64,linux/arm64,linux/arm/v7
|
||||
#
|
||||
# With the above commmand,
|
||||
# With the above command,
|
||||
# TARGETOS can be "linux", TARGETARCH can be "amd64", "arm64", and "arm", TARGETVARIANT can be "v7".
|
||||
|
||||
ARG TARGETPLATFORM TARGETOS TARGETARCH TARGETVARIANT VERSION=dev COMMIT_SHA=dev
|
||||
|
||||
# We intentionally avoid `--mount=type=cache,mode=0777,target=/go/pkg/mod` in the `go mod download` and the `go build` runs
|
||||
# to avoid https://github.com/moby/buildkit/issues/2334
|
||||
# We can use docker layer cache so the build is fast enogh anyway
|
||||
# We can use docker layer cache so the build is fast enough anyway
|
||||
# We also use per-platform GOCACHE for the same reason.
|
||||
ENV GOCACHE /build/${TARGETPLATFORM}/root/.cache/go-build
|
||||
|
||||
|
|
|
|||
|
|
@ -49,10 +49,10 @@ type RunnerReplicaSetStatus struct {
|
|||
// +optional
|
||||
Replicas *int `json:"replicas"`
|
||||
|
||||
// ReadyReplicas is the number of runners that are created and Runnning.
|
||||
// ReadyReplicas is the number of runners that are created and Running.
|
||||
ReadyReplicas *int `json:"readyReplicas"`
|
||||
|
||||
// AvailableReplicas is the number of runners that are created and Runnning.
|
||||
// AvailableReplicas is the number of runners that are created and Running.
|
||||
// This is currently same as ReadyReplicas but perserved for future use.
|
||||
AvailableReplicas *int `json:"availableReplicas"`
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9304,11 +9304,11 @@ spec:
|
|||
properties:
|
||||
availableReplicas:
|
||||
description: |-
|
||||
AvailableReplicas is the number of runners that are created and Runnning.
|
||||
AvailableReplicas is the number of runners that are created and Running.
|
||||
This is currently same as ReadyReplicas but perserved for future use.
|
||||
type: integer
|
||||
readyReplicas:
|
||||
description: ReadyReplicas is the number of runners that are created and Runnning.
|
||||
description: ReadyReplicas is the number of runners that are created and Running.
|
||||
type: integer
|
||||
replicas:
|
||||
description: Replicas is the number of runners that are created and still being managed by this runner replica set.
|
||||
|
|
|
|||
|
|
@ -221,7 +221,7 @@ func (w *Worker) HandleDesiredRunnerCount(ctx context.Context, count, jobsComple
|
|||
return w.lastPatch, nil
|
||||
}
|
||||
|
||||
// calculateDesiredState calculates the desired state of the worker based on the desired count and the the number of jobs completed.
|
||||
// calculateDesiredState calculates the desired state of the worker based on the desired count and the number of jobs completed.
|
||||
func (w *Worker) setDesiredWorkerState(count, jobsCompleted int) int {
|
||||
// Max runners should always be set by the resource builder either to the configured value,
|
||||
// or the maximum int32 (resourcebuilder.newAutoScalingListener()).
|
||||
|
|
|
|||
|
|
@ -20,12 +20,12 @@ import (
|
|||
// we can delete the runner pod without disrupting a workflow job.
|
||||
//
|
||||
// This function returns a non-nil pointer to corev1.Pod as the first return value
|
||||
// if the runner is considered to have gracefully stopped, hence it's pod is safe for deletion.
|
||||
// if the runner is considered to have gracefully stopped, hence its pod is safe for deletion.
|
||||
//
|
||||
// It's a "tick" operation so a graceful stop can take multiple calls to complete.
|
||||
// This function is designed to complete a lengthy graceful stop process in a unblocking way.
|
||||
// This function is designed to complete a lengthy graceful stop process in an unblocking way.
|
||||
// When it wants to be retried later, the function returns a non-nil *ctrl.Result as the second return value, may or may not populating the error in the second return value.
|
||||
// The caller is expected to return the returned ctrl.Result and error to postpone the current reconcilation loop and trigger a scheduled retry.
|
||||
// The caller is expected to return the returned ctrl.Result and error to postpone the current reconciliation loop and trigger a scheduled retry.
|
||||
func tickRunnerGracefulStop(ctx context.Context, retryDelay time.Duration, log logr.Logger, ghClient *github.Client, c client.Client, enterprise, organization, repository, runner string, pod *corev1.Pod) (*corev1.Pod, *ctrl.Result, error) {
|
||||
pod, err := annotatePodOnce(ctx, c, log, pod, AnnotationKeyUnregistrationStartTimestamp, time.Now().Format(time.RFC3339))
|
||||
if err != nil {
|
||||
|
|
@ -95,7 +95,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
code := runnerContainerExitCode(pod)
|
||||
|
||||
if pod != nil && pod.Annotations[AnnotationKeyUnregistrationCompleteTimestamp] != "" {
|
||||
// If it's already unregistered in the previous reconcilation loop,
|
||||
// If it's already unregistered in the previous reconciliation loop,
|
||||
// you can safely assume that it won't get registered again so it's safe to delete the runner pod.
|
||||
log.Info("Runner pod is marked as already unregistered.")
|
||||
} else if runnerID == nil && !runnerPodOrContainerIsStopped(pod) && !podConditionTransitionTimeAfter(pod, corev1.PodReady, registrationTimeout) &&
|
||||
|
|
@ -114,7 +114,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
// If we didn't handle this case here, ARC would end up with waiting forever until the
|
||||
// PV provider(s) provision PVs for the pod, which seems to never happen.
|
||||
//
|
||||
// For reference, the below is an eaxmple of pod.status that you might see when it happened:
|
||||
// For reference, the below is an example of pod.status that you might see when it happened:
|
||||
// status:
|
||||
// conditions:
|
||||
// - lastProbeTime: null
|
||||
|
|
@ -146,7 +146,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
"Unregistration started before runner ID is assigned and the runner was unable to obtain ID within registration timeout. "+
|
||||
"Perhaps the runner has communication issue, or a firewall egress rule is dropping traffic to GitHub API, or GitHub API is unavailable? "+
|
||||
"Marking unregistration as completed anyway because there's nothing ARC can do. "+
|
||||
"This may result in in cancelling the job depending on your terminationGracePeriodSeconds and RUNNER_GRACEFUL_STOP_TIMEOUT settings.",
|
||||
"This may result in cancelling the job depending on your terminationGracePeriodSeconds and RUNNER_GRACEFUL_STOP_TIMEOUT settings.",
|
||||
"registrationTimeout", registrationTimeout,
|
||||
)
|
||||
} else if pod != nil && runnerPodOrContainerIsStopped(pod) {
|
||||
|
|
@ -242,7 +242,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
}
|
||||
|
||||
// We want to prevent spamming the deletion attemps but returning ctrl.Result with RequeueAfter doesn't
|
||||
// work as the reconcilation can happen earlier due to pod status update.
|
||||
// work as the reconciliation can happen earlier due to pod status update.
|
||||
// For ephemeral runners, we can expect it to stop and unregister itself on completion.
|
||||
// So we can just wait for the completion without actively retrying unregistration.
|
||||
ephemeral := getRunnerEnv(pod, EnvVarEphemeral)
|
||||
|
|
@ -266,7 +266,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
} else if ok {
|
||||
log.Info("Runner has just been unregistered.")
|
||||
} else if pod == nil {
|
||||
// `r.unregisterRunner()` will returns `false, nil` if the runner is not found on GitHub.
|
||||
// `r.unregisterRunner()` will return `false, nil` if the runner is not found on GitHub.
|
||||
// However, that doesn't always mean the pod can be safely removed.
|
||||
//
|
||||
// If the pod does not exist for the runner,
|
||||
|
|
@ -275,7 +275,7 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l
|
|||
|
||||
log.Info("Runner was not found on GitHub and the runner pod was not found on Kuberntes.")
|
||||
} else if ts := pod.Annotations[AnnotationKeyUnregistrationStartTimestamp]; ts != "" {
|
||||
log.Info("Runner unregistration is in-progress. It can take forever to complete if if it's a static runner constantly running jobs."+
|
||||
log.Info("Runner unregistration is in-progress. It can take forever to complete if it's a static runner constantly running jobs."+
|
||||
" It can also take very long time if it's an ephemeral runner that is running a log-running job.", "error", err)
|
||||
|
||||
return &ctrl.Result{RequeueAfter: retryDelay}, nil
|
||||
|
|
@ -414,8 +414,8 @@ func setRunnerEnv(pod *corev1.Pod, key, value string) {
|
|||
// Waiting and retrying forever on this case is not a solution, because `config.sh` won't succeed with a wrong token hence the runner gets stuck in this state forever.
|
||||
// There isn't a perfect solution to this, but a practical workaround would be implement a "grace period" in the caller side.
|
||||
//
|
||||
// - Case "2-3." can happen when e.g. ARC recreated an ephemral runner pod in a previous reconcilation loop and then it was requested to delete the runner before the runner comes up.
|
||||
// If handled inappropriately, this can cause a race condition betweeen a deletion of the runner pod and GitHub scheduling a workflow job onto the runner.
|
||||
// - Case "2-3." can happen when e.g. ARC recreated an ephemeral runner pod in a previous reconcilation loop and then it was requested to delete the runner before the runner comes up.
|
||||
// If handled inappropriately, this can cause a race condition between a deletion of the runner pod and GitHub scheduling a workflow job onto the runner.
|
||||
//
|
||||
// Once successfully detected case "2-1." or "2-2.", you can safely delete the runner pod because you know that the runner won't come back
|
||||
// as long as you recreate the runner pod.
|
||||
|
|
@ -427,7 +427,7 @@ func setRunnerEnv(pod *corev1.Pod, key, value string) {
|
|||
//
|
||||
// Beware though, you need extra care to set an appropriate grace period depending on your environment.
|
||||
// There isn't a single right grace period that works for everyone.
|
||||
// The longer the grace period is, the earlier a cluster resource shortage can occur due to throttoled runner pod deletions,
|
||||
// The longer the grace period is, the earlier a cluster resource shortage can occur due to throttled runner pod deletions,
|
||||
// while the shorter the grace period is, the more likely you may encounter the race issue.
|
||||
func unregisterRunner(ctx context.Context, client *github.Client, enterprise, org, repo string, id int64) (bool, error) {
|
||||
// For the record, historically ARC did not try to call RemoveRunner on a busy runner, but it's no longer true.
|
||||
|
|
@ -445,7 +445,7 @@ func unregisterRunner(ctx context.Context, client *github.Client, enterprise, or
|
|||
//
|
||||
// - It can be "status=offline" at the same time but that's another story.
|
||||
// - After https://github.com/actions/actions-runner-controller/pull/1127, ListRunners responses that are used to
|
||||
// determine if the runner is busy can be more outdated than before, as those responeses are now cached for 60 seconds.
|
||||
// determine if the runner is busy can be more outdated than before, as those responses are now cached for 60 seconds.
|
||||
// - Note that 60 seconds is controlled by the Cache-Control response header provided by GitHub so we don't have a strict control on it but we assume it won't
|
||||
// change from 60 seconds.
|
||||
//
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ type result struct {
|
|||
// can't have .Revision.
|
||||
// Now, imagine that you are to add 2 runner replicas on scale up.
|
||||
// We create one resource object per a replica that ends up calling 2 client.Create calls.
|
||||
// If we were reusing client.Object to be passed to client.Create calls, only the first call suceeeds.
|
||||
// If we were reusing client.Object to be passed to client.Create calls, only the first call suceeds.
|
||||
// The second call fails due to the first call mutated the client.Object to have .Revision.
|
||||
// Passing a factory function of client.Object and creating a brand-new client.Object per a client.Create call resolves this issue,
|
||||
// allowing us to create two or more replicas in one reconcilation loop without being rejected by K8s.
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ spec:
|
|||
repository: mumoshu/actions-runner-controller-ci
|
||||
```
|
||||
|
||||
- `kind: RunnerDeployment`: indicates its a kind of custom resource RunnerDeployment.
|
||||
- `kind: RunnerDeployment`: indicates it's a kind of custom resource RunnerDeployment.
|
||||
- `replicas: 1` : will deploy one replica. Multiple replicas can also be deployed ( more on that later).
|
||||
- `repository: mumoshu/actions-runner-controller-ci` : is the repository to link to when the pod comes up with the Actions runner (Note, this can be configured to link at the Enterprise or Organization level also).
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ ARC also allows for scaling the runners dynamically. There are two mechanisms fo
|
|||
|
||||
You can enable scaling with 3 steps
|
||||
1) Enable `HorizontalRunnerAutoscaler` - Create a `deployment.yaml` file of type `HorizontalRunnerAutoscaler`. The schema for this file is defined below.
|
||||
2) Scaling parameters - `minReplicas` and `maxReplicas` indicates the min and max number of replicas to scale to.
|
||||
2) Scaling parameters - `minReplicas` and `maxReplicas` indicate the min and max number of replicas to scale to.
|
||||
3) Scaling metrics - ARC currently supports `PercentageRunnersBusy` as a metric type. The `PercentageRunnersBusy` will poll GitHub for the number of runners in the `busy` state in the RunnerDeployment's namespace, it will then scale depending on how you have configured the scale factors.
|
||||
|
||||
### Pull Driven Scaling Schema
|
||||
|
|
|
|||
|
|
@ -20,9 +20,9 @@ If you're building a custom runner image on your own and it still requires the u
|
|||
|
||||
Relevant PR(s): #1384, #1385
|
||||
|
||||
## FIX : Prevent runner form stucking in Terminating when the container disappeared
|
||||
## FIX : Prevent runner from stucking in Terminating when the container disappeared
|
||||
|
||||
We occasionally heard about runnner pods stuck in Terminating after the node and containers running on it disappeared due to, for example, the machine terminated prematurely.
|
||||
We occasionally heard about runner pods stuck in Terminating after the node and containers running on it disappeared due to, for example, the machine terminated prematurely.
|
||||
|
||||
We now set runner pods' restartPolicy to `Never` and remove runner pods stuck in `Waiting` after restarting, so that the pods are more likely to NOT stuck forever.
|
||||
|
||||
|
|
@ -30,15 +30,15 @@ Relevant PR(s): #1395, #1420
|
|||
|
||||
## ENHANCEMENT : Support arbitrarily setting `privileged: true` for runner container
|
||||
|
||||
This is a frequently asked feature that alows you to force `privileged: true` in case you don't need docker but still need privileged tasks to be run in a job step.
|
||||
This is a frequently asked feature that allows you to force `privileged: true` in case you don't need docker but still need privileged tasks to be run in a job step.
|
||||
|
||||
In combination with a container runtime like `sysbox` this should enable you to run docker builds within the dind sidecar, all without privileges. See [the discussion related to Sysbox](https://github.com/actions/actions-runner-controller/discussions/977) for more information.
|
||||
|
||||
Note that we ARC maintainers still have no bandwidth to provide a complete description on how to make ARC work with `sysbox` yet, but almost certainly we'd welcome contributions to the documentation if you managed to make it work.
|
||||
Note that we ARC maintainers still have no bandwidth to provide a complete description of how to make ARC work with `sysbox` yet, but almost certainly we'd welcome contributions to the documentation if you managed to make it work.
|
||||
|
||||
Relevant PR(s): #1383
|
||||
|
||||
## ENHANCEMENT : RunnerSet can now retain PVs accross restarts
|
||||
## ENHANCEMENT : RunnerSet can now retain PVs across restarts
|
||||
|
||||
This enhancement makes it more practical to use RunnerSet in combination with `volumeClaimTemplates` to make your workflow jobs faster.
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
> This feature requires controller version => [v0.26.0](https://github.com/actions/actions-runner-controller/releases/tag/v0.26.0)
|
||||
|
||||
In a large enterprise, there might be many GitHub organizations that requires self-hosted runners. Previously, the only way to provide ARC-managed self-hosted runners in such environment was [Deploying Multiple Controllers](deploying-arc-runners.md#deploying-multiple-controllers), which incurs overhead due to it requires one ARC installation per GitHub organization.
|
||||
In a large enterprise, there might be many GitHub organizations that require self-hosted runners. Previously, the only way to provide ARC-managed self-hosted runners in such environment was [Deploying Multiple Controllers](deploying-arc-runners.md#deploying-multiple-controllers), which incurs overhead due to it requires one ARC installation per GitHub organization.
|
||||
|
||||
With multitenancy, you can let ARC manage self-hosted runners across organizations. It's enabled by default and the only thing you need to start using it is to set the `spec.githubAPICredentialsFrom.secretRef.name` fields for the following resources:
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import (
|
|||
)
|
||||
|
||||
// newActionsServer returns a new httptest.Server that handles the
|
||||
// authentication requests neeeded to create a new client. Any requests not
|
||||
// authentication requests needed to create a new client. Any requests not
|
||||
// made to the /actions/runners/registration-token or
|
||||
// /actions/runner-registration endpoints will be handled by the provided
|
||||
// handler. The returned server is started and will be automatically closed
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import (
|
|||
)
|
||||
|
||||
// New returns a new httptest.Server that handles the
|
||||
// authentication requests neeeded to create a new client. Any requests not
|
||||
// authentication requests needed to create a new client. Any requests not
|
||||
// made to the /actions/runners/registration-token or
|
||||
// /actions/runner-registration endpoints will be handled by the provided
|
||||
// handler. The returned server is started and will be automatically closed
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ cat .runner
|
|||
# }
|
||||
#
|
||||
# Especially `agentId` is important, as other than listing all the runners in the repo,
|
||||
# this is the only change we could get the exact runnner ID which can be useful for further
|
||||
# this is the only change we could get the exact runner ID which can be useful for further
|
||||
# GitHub API call like the below. Note that 171 is the agentId seen above.
|
||||
# curl \
|
||||
# -H "Accept: application/vnd.github.v3+json" \
|
||||
|
|
|
|||
Loading…
Reference in New Issue