diff --git a/acceptance/deploy.sh b/acceptance/deploy.sh index 729e9256..f9924e52 100755 --- a/acceptance/deploy.sh +++ b/acceptance/deploy.sh @@ -53,6 +53,10 @@ if [ "${tool}" == "helm" ]; then if [ "${CHART_VERSION}" != "" ]; then flags+=( --version ${CHART_VERSION}) fi + if [ "${LOG_FORMAT}" != "" ]; then + flags+=( --set logFormat=${LOG_FORMAT}) + flags+=( --set githubWebhookServer.logFormat=${LOG_FORMAT}) + fi set -vx diff --git a/charts/actions-runner-controller/README.md b/charts/actions-runner-controller/README.md index ddcab5de..678ac867 100644 --- a/charts/actions-runner-controller/README.md +++ b/charts/actions-runner-controller/README.md @@ -21,6 +21,7 @@ All additional docs are kept in the `docs/` folder, this README is solely for do | `githubUploadURL` | Override GitHub Upload URL to be used for GitHub API calls | | | `runnerGithubURL` | Override GitHub URL to be used by runners during registration | | | `logLevel` | Set the log level of the controller container | | +| `logFormat` | Set the log format of the controller. Valid options are "text" and "json" | text | | `additionalVolumes` | Set additional volumes to add to the manager container | | | `additionalVolumeMounts` | Set additional volume mounts to add to the manager container | | | `authSecret.create` | Deploy the controller auth secret | false | @@ -76,6 +77,7 @@ All additional docs are kept in the `docs/` folder, this README is solely for do | `runner.statusUpdateHook.enabled` | Use custom RBAC for runners (role, role binding and service account), this will enable reporting runner statuses | false | | `admissionWebHooks.caBundle` | Base64-encoded PEM bundle containing the CA that signed the webhook's serving certificate | | | `githubWebhookServer.logLevel` | Set the log level of the githubWebhookServer container | | +| `githubWebhookServer.logFormat` | Set the log format of the githubWebhookServer controller. Valid options are "text" and "json" | text | | `githubWebhookServer.replicaCount` | Set the number of webhook server pods | 1 | | `githubWebhookServer.useRunnerGroupsVisibility` | Enable supporting runner groups with custom visibility. This will incur in extra API calls and may blow up your budget. Currently, you also need to set `githubWebhookServer.secret.enabled` to enable this feature. | false | | `githubWebhookServer.enabled` | Deploy the webhook server pod | false | diff --git a/charts/actions-runner-controller/templates/deployment.yaml b/charts/actions-runner-controller/templates/deployment.yaml index 522d835b..020e9662 100644 --- a/charts/actions-runner-controller/templates/deployment.yaml +++ b/charts/actions-runner-controller/templates/deployment.yaml @@ -67,6 +67,9 @@ spec: {{- if .Values.runner.statusUpdateHook.enabled }} - "--runner-status-update-hook" {{- end }} + {{- if .Values.logFormat }} + - "--log-format={{ .Values.logFormat }}" + {{- end }} command: - "/manager" env: diff --git a/charts/actions-runner-controller/templates/githubwebhook.deployment.yaml b/charts/actions-runner-controller/templates/githubwebhook.deployment.yaml index 6d9d5738..373ddb86 100644 --- a/charts/actions-runner-controller/templates/githubwebhook.deployment.yaml +++ b/charts/actions-runner-controller/templates/githubwebhook.deployment.yaml @@ -51,6 +51,9 @@ spec: {{- if .Values.githubWebhookServer.queueLimit }} - "--queue-limit={{ .Values.githubWebhookServer.queueLimit }}" {{- end }} + {{- if .Values.githubWebhookServer.logFormat }} + - "--log-format={{ .Values.githubWebhookServer.logFormat }}" + {{- end }} command: - "/github-webhook-server" env: diff --git a/charts/actions-runner-controller/values.yaml b/charts/actions-runner-controller/values.yaml index b5706130..9cd4e37f 100644 --- a/charts/actions-runner-controller/values.yaml +++ b/charts/actions-runner-controller/values.yaml @@ -188,10 +188,16 @@ admissionWebHooks: # https://github.com/actions-runner-controller/actions-runner-controller/issues/1005#issuecomment-993097155 #hostNetwork: true +## specify log format for actions runner controller. Valid options are "text" and "json" +logFormat: text + + githubWebhookServer: enabled: false replicaCount: 1 useRunnerGroupsVisibility: false + ## specify log format for github webhook controller. Valid options are "text" and "json" + logFormat: text secret: enabled: false create: false diff --git a/cmd/githubwebhookserver/main.go b/cmd/githubwebhookserver/main.go index 794f9361..13503f8c 100644 --- a/cmd/githubwebhookserver/main.go +++ b/cmd/githubwebhookserver/main.go @@ -31,6 +31,7 @@ import ( "github.com/actions-runner-controller/actions-runner-controller/github" "github.com/actions-runner-controller/actions-runner-controller/logging" "github.com/kelseyhightower/envconfig" + "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" _ "k8s.io/client-go/plugin/pkg/client/auth/exec" @@ -41,8 +42,7 @@ import ( ) var ( - scheme = runtime.NewScheme() - setupLog = ctrl.Log.WithName("setup") + scheme = runtime.NewScheme() ) const ( @@ -71,6 +71,7 @@ func main() { logLevel string queueLimit int + logFormat string ghClient *github.Client ) @@ -99,26 +100,32 @@ func main() { flag.StringVar(&c.BasicauthUsername, "github-basicauth-username", c.BasicauthUsername, "Username for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API") flag.StringVar(&c.BasicauthPassword, "github-basicauth-password", c.BasicauthPassword, "Password for GitHub basic auth to use instead of PAT or GitHub APP in case it's running behind a proxy API") flag.StringVar(&c.RunnerGitHubURL, "runner-github-url", c.RunnerGitHubURL, "GitHub URL to be used by runners during registration") + flag.StringVar(&logFormat, "log-format", "text", `The log format. Valid options are "text" and "json". Defaults to "text"`) flag.Parse() + logger, err := logging.NewLogger(logLevel, logFormat) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: creating logger: %v\n", err) + os.Exit(1) + } + logger.WithName("setup") + if webhookSecretToken == "" && webhookSecretTokenEnv != "" { - setupLog.Info(fmt.Sprintf("Using the value from %s for -github-webhook-secret-token", webhookSecretTokenEnvName)) + logger.Info(fmt.Sprintf("Using the value from %s for -github-webhook-secret-token", webhookSecretTokenEnvName)) webhookSecretToken = webhookSecretTokenEnv } if webhookSecretToken == "" { - setupLog.Info(fmt.Sprintf("-github-webhook-secret-token and %s are missing or empty. Create one following https://docs.github.com/en/developers/webhooks-and-events/securing-your-webhooks and specify it via the flag or the envvar", webhookSecretTokenEnvName)) + logger.Info(fmt.Sprintf("-github-webhook-secret-token and %s are missing or empty. Create one following https://docs.github.com/en/developers/webhooks-and-events/securing-your-webhooks and specify it via the flag or the envvar", webhookSecretTokenEnvName)) } if watchNamespace == "" { - setupLog.Info("-watch-namespace is empty. HorizontalRunnerAutoscalers in all the namespaces are watched, cached, and considered as scale targets.") + logger.Info("-watch-namespace is empty. HorizontalRunnerAutoscalers in all the namespaces are watched, cached, and considered as scale targets.") } else { - setupLog.Info("-watch-namespace is %q. Only HorizontalRunnerAutoscalers in %q are watched, cached, and considered as scale targets.") + logger.Info("-watch-namespace is %q. Only HorizontalRunnerAutoscalers in %q are watched, cached, and considered as scale targets.") } - logger := logging.NewLogger(logLevel) - ctrl.SetLogger(logger) // In order to support runner groups with custom visibility (selected repositories), we need to perform some GitHub API calls. @@ -132,11 +139,11 @@ func main() { ghClient, err = c.NewClient() if err != nil { fmt.Fprintln(os.Stderr, "Error: Client creation failed.", err) - setupLog.Error(err, "unable to create controller", "controller", "Runner") + logger.Error(err, "unable to create controller", "controller", "Runner") os.Exit(1) } } else { - setupLog.Info("GitHub client is not initialized. Runner groups with custom visibility are not supported. If needed, please provide GitHub authentication. This will incur in extra GitHub API calls") + logger.Info("GitHub client is not initialized. Runner groups with custom visibility are not supported. If needed, please provide GitHub authentication. This will incur in extra GitHub API calls") } syncPeriod := 10 * time.Minute @@ -148,7 +155,7 @@ func main() { Port: 9443, }) if err != nil { - setupLog.Error(err, "unable to start manager") + logger.Error(err, "unable to start manager") os.Exit(1) } @@ -165,7 +172,7 @@ func main() { } if err = hraGitHubWebhook.SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "webhookbasedautoscaler") + logger.Error(err, "unable to create controller", "controller", "webhookbasedautoscaler") os.Exit(1) } @@ -178,9 +185,9 @@ func main() { defer cancel() defer wg.Done() - setupLog.Info("starting webhook server") + logger.Info("starting webhook server") if err := mgr.Start(ctx); err != nil { - setupLog.Error(err, "problem running manager") + logger.Error(err, "problem running manager") os.Exit(1) } }() @@ -206,7 +213,7 @@ func main() { if err := srv.ListenAndServe(); err != nil { if !errors.Is(err, http.ErrServerClosed) { - setupLog.Error(err, "problem running http server") + logger.Error(err, "problem running http server") } } }() diff --git a/controllers/horizontal_runner_autoscaler_webhook.go b/controllers/horizontal_runner_autoscaler_webhook.go index c500860d..0a969390 100644 --- a/controllers/horizontal_runner_autoscaler_webhook.go +++ b/controllers/horizontal_runner_autoscaler_webhook.go @@ -235,6 +235,8 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) Handle(w http.Respons "repository.owner.type", e.Repo.Owner.GetType(), "enterprise.slug", enterpriseSlug, "action", e.GetAction(), + "workflowJob.runID", e.WorkflowJob.GetRunID(), + "workflowJob.ID", e.WorkflowJob.GetID(), ) } @@ -343,7 +345,7 @@ func (autoscaler *HorizontalRunnerAutoscalerGitHubWebhook) Handle(w http.Respons msg := fmt.Sprintf("scaled %s by %d", target.Name, target.Amount) - autoscaler.Log.Info(msg) + log.Info(msg) if written, err := w.Write([]byte(msg)); err != nil { log.Error(err, "failed writing http response", "msg", msg, "written", written) diff --git a/controllers/integration_test.go b/controllers/integration_test.go index 3cb4ba14..a0d7d22c 100644 --- a/controllers/integration_test.go +++ b/controllers/integration_test.go @@ -616,14 +616,14 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() { // Scale-up to 2 replicas on first workflow_job.queued webhook event { - env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}) + env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}, int64(1234), int64(4321)) ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event") env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners") } // Scale-up to 3 replicas on second workflow_job.queued webhook event { - env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}) + env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}, int64(1234), int64(4321)) ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after second webhook event") env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners") } @@ -631,7 +631,7 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() { // Do not scale-up on third workflow_job.queued webhook event // repo "example" doesn't match our Spec { - env.SendWorkflowJobEvent("test", "example", "queued", []string{"self-hosted"}) + env.SendWorkflowJobEvent("test", "example", "queued", []string{"self-hosted"}, int64(1234), int64(4321)) ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 3, "runners after third webhook event") env.ExpectRegisteredNumberCountEventuallyEquals(3, "count of fake list runners") } @@ -1250,7 +1250,7 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() { // Scale-up to 2 replicas on first workflow_job webhook event { - env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}) + env.SendWorkflowJobEvent("test", "valid", "queued", []string{"self-hosted"}, int64(1234), int64(4321)) ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook") ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event") env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners") @@ -1334,7 +1334,7 @@ var _ = Context("INTEGRATION: Inside of a new namespace", func() { // Scale-up to 2 replicas on first workflow_job webhook event { - env.SendWorkflowJobEvent("test", "valid", "queued", []string{"custom-label"}) + env.SendWorkflowJobEvent("test", "valid", "queued", []string{"custom-label"}, int64(1234), int64(4321)) ExpectRunnerSetsCountEventuallyEquals(ctx, ns.Name, 1, "runner sets after webhook") ExpectRunnerSetsManagedReplicasCountEventuallyEquals(ctx, ns.Name, 2, "runners after first webhook event") env.ExpectRegisteredNumberCountEventuallyEquals(2, "count of fake list runners") @@ -1415,9 +1415,11 @@ func (env *testEnvironment) SendOrgCheckRunEvent(org, repo, status, action strin ExpectWithOffset(1, resp.StatusCode).To(Equal(200)) } -func (env *testEnvironment) SendWorkflowJobEvent(org, repo, statusAndAction string, labels []string) { +func (env *testEnvironment) SendWorkflowJobEvent(org, repo, statusAndAction string, labels []string, runID int64, ID int64) { resp, err := sendWebhook(env.webhookServer, "workflow_job", &github.WorkflowJobEvent{ WorkflowJob: &github.WorkflowJob{ + ID: &ID, + RunID: &runID, Status: &statusAndAction, Labels: labels, }, diff --git a/controllers/runner_graceful_stop.go b/controllers/runner_graceful_stop.go index 176cd6e5..0391edf1 100644 --- a/controllers/runner_graceful_stop.go +++ b/controllers/runner_graceful_stop.go @@ -110,9 +110,23 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l // Note: This logic is here to prevent a dead-lock between ARC and the PV provider. // // The author of this logic thinks that some (or all?) of CSI plugins and PV providers - // do not supported to provision dynamic PVs for a pod that is already marked for deletion. + // do not support provisioning dynamic PVs for a pod that is already marked for deletion. // If we didn't handle this case here, ARC would end up with waiting forever until the - // PV provider to provision PVs for the pod, which seems to never happen. + // PV provider(s) provision PVs for the pod, which seems to never happen. + // + // For reference, the below is an eaxmple of pod.status that you might see when it happened: + // status: + // conditions: + // - lastProbeTime: null + // lastTransitionTime: "2022-11-04T00:04:05Z" + // message: 'binding rejected: running Bind plugin "DefaultBinder": Operation cannot + // be fulfilled on pods/binding "org-runnerdeploy-xv2lg-pm6t2": pod org-runnerdeploy-xv2lg-pm6t2 + // is being deleted, cannot be assigned to a host' + // reason: SchedulerError + // status: "False" + // type: PodScheduled + // phase: Pending + // qosClass: BestEffort log.Info( "Unregistration started before runner pod gets scheduled onto a node. "+ "Perhaps the runner is taking a long time due to e.g. slow CSI slugin not giving us a PV in a timely manner, or your Kubernetes cluster is overloaded? "+ diff --git a/logging/logger.go b/logging/logger.go index 97005610..f7b2a5d7 100644 --- a/logging/logger.go +++ b/logging/logger.go @@ -1,6 +1,7 @@ package logging import ( + "errors" "fmt" "os" "strconv" @@ -19,38 +20,69 @@ const ( LogLevelError = "error" ) -func NewLogger(logLevel string) logr.Logger { - log := zap.New(func(o *zap.Options) { - switch logLevel { - case LogLevelDebug: - o.Development = true - lvl := zaplib.NewAtomicLevelAt(zaplib.DebugLevel) // maps to logr's V(1) - o.Level = &lvl - case LogLevelInfo: - lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel) - o.Level = &lvl - case LogLevelWarn: - lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel) - o.Level = &lvl - case LogLevelError: - lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel) - o.Level = &lvl - default: - // We use bitsize of 8 as zapcore.Level is a type alias to int8 - levelInt, err := strconv.ParseInt(logLevel, 10, 8) - if err != nil { - fmt.Fprintf(os.Stderr, "Failed to parse --log-level=%s: %v", logLevel, err) - os.Exit(1) - } +var ( + LogOpts = zap.Options{ + TimeEncoder: zapcore.TimeEncoderOfLayout(time.RFC3339), + Development: true, + EncoderConfigOptions: []zap.EncoderConfigOption{ + func(ec *zapcore.EncoderConfig) { + ec.LevelKey = "severity" + ec.MessageKey = "message" + }, + }, + } +) - // For example, --log-level=debug a.k.a --log-level=-1 maps to zaplib.DebugLevel, which is associated to logr's V(1) - // --log-level=-2 maps the specific custom log level that is associated to logr's V(2). - level := zapcore.Level(levelInt) - atomicLevel := zaplib.NewAtomicLevelAt(level) - o.Level = &atomicLevel +func NewLogger(logLevel string, logFormat string) (logr.Logger, error) { + + if !validLogFormat(logFormat) { + return logr.Logger{}, errors.New("invalid log format specified") + } + + o := LogOpts + o.EncoderConfigOptions = make([]zap.EncoderConfigOption, len(LogOpts.EncoderConfigOptions)) + copy(o.EncoderConfigOptions, LogOpts.EncoderConfigOptions) + + if logFormat == "json" { + o.Development = false + o.TimeEncoder = nil + } + + switch logLevel { + case LogLevelDebug: + lvl := zaplib.NewAtomicLevelAt(zaplib.DebugLevel) // maps to logr's V(1) + o.Level = &lvl + case LogLevelInfo: + lvl := zaplib.NewAtomicLevelAt(zaplib.InfoLevel) + o.Level = &lvl + case LogLevelWarn: + lvl := zaplib.NewAtomicLevelAt(zaplib.WarnLevel) + o.Level = &lvl + case LogLevelError: + lvl := zaplib.NewAtomicLevelAt(zaplib.ErrorLevel) + o.Level = &lvl + default: + // We use bitsize of 8 as zapcore.Level is a type alias to int8 + levelInt, err := strconv.ParseInt(logLevel, 10, 8) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to parse --log-level=%s: %v", logLevel, err) + os.Exit(1) } - o.TimeEncoder = zapcore.TimeEncoderOfLayout(time.RFC3339) - }) - - return log + // For example, --log-level=debug a.k.a --log-level=-1 maps to zaplib.DebugLevel, which is associated to logr's V(1) + // --log-level=-2 maps the specific custom log level that is associated to logr's V(2). + level := zapcore.Level(levelInt) + atomicLevel := zaplib.NewAtomicLevelAt(level) + o.Level = &atomicLevel + } + return zap.New(zap.UseFlagOptions(&o)), nil +} + +func validLogFormat(logFormat string) bool { + validFormat := []string{"text", "json"} + for _, v := range validFormat { + if v == logFormat { + return true + } + } + return false } diff --git a/main.go b/main.go index 8ae04e0c..45394edf 100644 --- a/main.go +++ b/main.go @@ -30,6 +30,7 @@ import ( "github.com/actions-runner-controller/actions-runner-controller/logging" "github.com/kelseyhightower/envconfig" "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" ctrl "sigs.k8s.io/controller-runtime" @@ -43,7 +44,6 @@ const ( var ( scheme = runtime.NewScheme() - log = ctrl.Log.WithName("actions-runner-controller") ) func init() { @@ -84,6 +84,7 @@ func main() { dockerRegistryMirror string namespace string logLevel string + logFormat string commonRunnerLabels commaSeparatedStringSlice ) @@ -119,10 +120,16 @@ func main() { flag.Var(&commonRunnerLabels, "common-runner-labels", "Runner labels in the K1=V1,K2=V2,... format that are inherited all the runners created by the controller. See https://github.com/actions-runner-controller/actions-runner-controller/issues/321 for more information") flag.StringVar(&namespace, "watch-namespace", "", "The namespace to watch for custom resources. Set to empty for letting it watch for all namespaces.") flag.StringVar(&logLevel, "log-level", logging.LogLevelDebug, `The verbosity of the logging. Valid values are "debug", "info", "warn", "error". Defaults to "debug".`) + flag.StringVar(&logFormat, "log-format", "text", `The log format. Valid options are "text" and "json". Defaults to "text"`) + flag.Parse() - logger := logging.NewLogger(logLevel) - c.Log = &logger + log, err := logging.NewLogger(logLevel, logFormat) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: creating logger: %v\n", err) + os.Exit(1) + } + c.Log = &log ghClient, err = c.NewClient() if err != nil { @@ -130,7 +137,7 @@ func main() { os.Exit(1) } - ctrl.SetLogger(logger) + ctrl.SetLogger(log) mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme, diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ce361278..95e8db74 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -379,6 +379,7 @@ type env struct { runnerGracefulStopTimeout string runnerTerminationGracePeriodSeconds string runnerNamespace string + logFormat string remoteKubeconfig string imagePullSecretName string imagePullPolicy string @@ -511,6 +512,7 @@ func initTestEnv(t *testing.T, k8sMinorVer string, vars vars) *env { e.runnerTerminationGracePeriodSeconds = testing.Getenv(t, "TEST_RUNNER_TERMINATION_GRACE_PERIOD_SECONDS", "30") e.runnerGracefulStopTimeout = testing.Getenv(t, "TEST_RUNNER_GRACEFUL_STOP_TIMEOUT", "15") e.runnerNamespace = testing.Getenv(t, "TEST_RUNNER_NAMESPACE", "default") + e.logFormat = testing.Getenv(t, "ARC_E2E_LOG_FORMAT", "") e.remoteKubeconfig = testing.Getenv(t, "ARC_E2E_REMOTE_KUBECONFIG", "") e.imagePullSecretName = testing.Getenv(t, "ARC_E2E_IMAGE_PULL_SECRET_NAME", "") e.vars = vars @@ -706,6 +708,12 @@ func (e *env) installActionsRunnerController(t *testing.T, repo, tag, testID, ch ) } + if e.logFormat != "" { + varEnv = append(varEnv, + "LOG_FORMAT="+e.logFormat, + ) + } + scriptEnv = append(scriptEnv, varEnv...) scriptEnv = append(scriptEnv, e.vars.commonScriptEnv...)