Make logging more concise (#410)

This makes logging more concise by changing logger names to something like `controllers.Runner` to `actions-runner-controller.runner` after the standard `controller-rutime.controller` and reducing redundant logs by removing unnecessary requeues. I have also tweaked log messages so that their style is more consistent, which will also help readability. Also, runnerreplicaset-controller lacked useful logs so I have enhanced it.
This commit is contained in:
Yusuke Kuoka 2021-03-20 07:34:25 +09:00 committed by GitHub
parent 07f822bb08
commit bc6e499e4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 43 deletions

View File

@ -167,9 +167,11 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
// Gracefully handle pod-already-exists errors due to informer cache delay.
// Without this we got a few errors like the below on new runner pod:
// 2021-03-16T00:23:10.116Z ERROR controller-runtime.controller Reconciler error {"controller": "runner-controller", "request": "default/example-runnerdeploy-b2g2g-j4mcp", "error": "pods \"example-runnerdeploy-b2g2g-j4mcp\" already exists"}
log.Info("Runner pod already exists. Probably this pod has been already created in previous reconcilation but the new pod is not yet cached.")
log.Info(
"Failed to create pod due to AlreadyExists error. Probably this pod has been already created in previous reconcilation but is still not in the informer cache. Will retry on pod created. If it doesn't repeat, there's no problem",
)
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
return ctrl.Result{}, nil
}
log.Error(err, "Failed to create pod resource")
@ -187,7 +189,7 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
if deletionDidTimeout {
log.Info(
"Pod failed to delete itself in a timely manner. "+
fmt.Sprintf("Failed to delete pod within %s. ", deletionTimeout)+
"This is typically the case when a Kubernetes node became unreachable "+
"and the kube controller started evicting nodes. Forcefully deleting the pod to not get stuck.",
"podDeletionTimestamp", pod.DeletionTimestamp,
@ -262,9 +264,12 @@ func (r *RunnerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
if lastCheckTime := runner.Status.LastRegistrationCheckTime; lastCheckTime != nil {
nextCheckTime := lastCheckTime.Add(registrationCheckInterval)
now := time.Now()
if nextCheckTime.After(now) {
requeueAfter := nextCheckTime.Sub(now)
// Requeue scheduled by RequeueAfter can happen a bit earlier (like dozens of milliseconds)
// so to avoid excessive, in-effective retry, we heuristically ignore the remaining delay in case it is
// shorter than 1s
requeueAfter := nextCheckTime.Sub(now) - time.Second
if requeueAfter > 0 {
log.Info(
fmt.Sprintf("Skipped registration check because it's deferred until %s. Retrying in %s at latest", nextCheckTime, requeueAfter),
"lastRegistrationCheckTime", lastCheckTime,
@ -795,8 +800,6 @@ func (r *RunnerReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.Recorder = mgr.GetEventRecorderFor(name)
r.Recorder = mgr.GetEventRecorderFor(name)
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.Runner{}).
Owns(&corev1.Pod{}).

View File

@ -192,17 +192,28 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
if len(oldSets) > 0 {
readyReplicas := newestSet.Status.ReadyReplicas
if readyReplicas < currentDesiredReplicas {
log.WithValues("runnerreplicaset", types.NamespacedName{
oldSetsCount := len(oldSets)
logWithDebugInfo := log.WithValues(
"newest_runnerreplicaset", types.NamespacedName{
Namespace: newestSet.Namespace,
Name: newestSet.Name,
}).
Info("Waiting until the newest runner replica set to be 100% available",
"ready", readyReplicas,
"desired", currentDesiredReplicas,
)
},
"newest_runnerreplicaset_replicas_ready", readyReplicas,
"newest_runnerreplicaset_replicas_desired", currentDesiredReplicas,
"old_runnerreplicasets_count", oldSetsCount,
)
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
if readyReplicas < currentDesiredReplicas {
logWithDebugInfo.
Info("Waiting until the newest runnerreplicaset to be 100% available")
return ctrl.Result{}, nil
}
if oldSetsCount > 0 {
logWithDebugInfo.
Info("The newest runnerreplicaset is 100% available. Deleting old runnerreplicasets")
}
for i := range oldSets {

View File

@ -114,13 +114,14 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
desired = 1
}
log.V(0).Info("debug", "desired", desired, "available", available)
if available > desired {
n := available - desired
// get runners that are currently not busy
var notBusy []v1alpha1.Runner
log.V(0).Info(fmt.Sprintf("Deleting %d runners", n), "desired", desired, "available", available, "ready", ready)
// get runners that are currently offline/not busy/timed-out to register
var deletionCandidates []v1alpha1.Runner
for _, runner := range allRunners.Items {
busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
if err != nil {
@ -168,35 +169,37 @@ func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
"configuredRegistrationTimeout", registrationTimeout,
)
notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}
// offline runners should always be a great target for scale down
if offline {
notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}
} else if !busy {
notBusy = append(notBusy, runner)
deletionCandidates = append(deletionCandidates, runner)
}
}
if len(notBusy) < n {
n = len(notBusy)
if len(deletionCandidates) < n {
n = len(deletionCandidates)
}
for i := 0; i < n; i++ {
if err := r.Client.Delete(ctx, &notBusy[i]); client.IgnoreNotFound(err) != nil {
if err := r.Client.Delete(ctx, &deletionCandidates[i]); client.IgnoreNotFound(err) != nil {
log.Error(err, "Failed to delete runner resource")
return ctrl.Result{}, err
}
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", notBusy[i].Name))
log.Info("Deleted runner", "runnerreplicaset", rs.ObjectMeta.Name)
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", deletionCandidates[i].Name))
log.Info("Deleted runner")
}
} else if desired > available {
n := desired - available
log.V(0).Info(fmt.Sprintf("Creating %d runner(s)", n), "desired", desired, "available", available, "ready", ready)
for i := 0; i < n; i++ {
newRunner, err := r.newRunner(rs)
if err != nil {

32
main.go
View File

@ -41,8 +41,8 @@ const (
)
var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
scheme = runtime.NewScheme()
log = ctrl.Log.WithName("actions-runner-controller")
)
func init() {
@ -109,13 +109,13 @@ func main() {
Namespace: namespace,
})
if err != nil {
setupLog.Error(err, "unable to start manager")
log.Error(err, "unable to start manager")
os.Exit(1)
}
runnerReconciler := &controllers.RunnerReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("Runner"),
Log: log.WithName("runner"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
RunnerImage: runnerImage,
@ -123,64 +123,64 @@ func main() {
}
if err = runnerReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Runner")
log.Error(err, "unable to create controller", "controller", "Runner")
os.Exit(1)
}
runnerSetReconciler := &controllers.RunnerReplicaSetReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("RunnerReplicaSet"),
Log: log.WithName("runnerreplicaset"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
}
if err = runnerSetReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "RunnerReplicaSet")
log.Error(err, "unable to create controller", "controller", "RunnerReplicaSet")
os.Exit(1)
}
runnerDeploymentReconciler := &controllers.RunnerDeploymentReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("RunnerDeployment"),
Log: log.WithName("runnerdeployment"),
Scheme: mgr.GetScheme(),
CommonRunnerLabels: commonRunnerLabels,
}
if err = runnerDeploymentReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "RunnerDeployment")
log.Error(err, "unable to create controller", "controller", "RunnerDeployment")
os.Exit(1)
}
horizontalRunnerAutoscaler := &controllers.HorizontalRunnerAutoscalerReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("HorizontalRunnerAutoscaler"),
Log: log.WithName("horizontalrunnerautoscaler"),
Scheme: mgr.GetScheme(),
GitHubClient: ghClient,
CacheDuration: syncPeriod - 10*time.Second,
}
if err = horizontalRunnerAutoscaler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler")
log.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler")
os.Exit(1)
}
if err = (&actionsv1alpha1.Runner{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "Runner")
log.Error(err, "unable to create webhook", "webhook", "Runner")
os.Exit(1)
}
if err = (&actionsv1alpha1.RunnerDeployment{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "RunnerDeployment")
log.Error(err, "unable to create webhook", "webhook", "RunnerDeployment")
os.Exit(1)
}
if err = (&actionsv1alpha1.RunnerReplicaSet{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "RunnerReplicaSet")
log.Error(err, "unable to create webhook", "webhook", "RunnerReplicaSet")
os.Exit(1)
}
// +kubebuilder:scaffold:builder
setupLog.Info("starting manager")
log.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
log.Error(err, "problem running manager")
os.Exit(1)
}
}