347 lines
11 KiB
Go
347 lines
11 KiB
Go
/*
|
|
Copyright 2020 The actions-runner-controller authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package controllers
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"reflect"
|
|
"time"
|
|
|
|
gogithub "github.com/google/go-github/v33/github"
|
|
|
|
"github.com/go-logr/logr"
|
|
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/client-go/tools/record"
|
|
ctrl "sigs.k8s.io/controller-runtime"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
|
|
corev1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
|
|
"github.com/summerwind/actions-runner-controller/github"
|
|
)
|
|
|
|
// RunnerReplicaSetReconciler reconciles a Runner object
|
|
type RunnerReplicaSetReconciler struct {
|
|
client.Client
|
|
Log logr.Logger
|
|
Recorder record.EventRecorder
|
|
Scheme *runtime.Scheme
|
|
GitHubClient *github.Client
|
|
Name string
|
|
}
|
|
|
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets,verbs=get;list;watch;create;update;patch;delete
|
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/finalizers,verbs=get;list;watch;create;update;patch;delete
|
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/status,verbs=get;update;patch
|
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
|
|
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners/status,verbs=get;update;patch
|
|
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
|
|
|
|
func (r *RunnerReplicaSetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
|
log := r.Log.WithValues("runnerreplicaset", req.NamespacedName)
|
|
|
|
var rs v1alpha1.RunnerReplicaSet
|
|
if err := r.Get(ctx, req.NamespacedName, &rs); err != nil {
|
|
return ctrl.Result{}, client.IgnoreNotFound(err)
|
|
}
|
|
|
|
if !rs.ObjectMeta.DeletionTimestamp.IsZero() {
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector)
|
|
if err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
// Get the Runners managed by the target RunnerReplicaSet
|
|
var allRunners v1alpha1.RunnerList
|
|
if err := r.List(
|
|
ctx,
|
|
&allRunners,
|
|
client.InNamespace(req.Namespace),
|
|
client.MatchingLabelsSelector{Selector: selector},
|
|
); err != nil {
|
|
if !kerrors.IsNotFound(err) {
|
|
return ctrl.Result{}, err
|
|
}
|
|
}
|
|
|
|
var myRunners []v1alpha1.Runner
|
|
|
|
var (
|
|
current int
|
|
ready int
|
|
available int
|
|
)
|
|
|
|
for _, r := range allRunners.Items {
|
|
// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
|
|
// to not treat all the runners in the namespace as its children.
|
|
if metav1.IsControlledBy(&r, &rs) && !metav1.HasAnnotation(r.ObjectMeta, annotationKeyRegistrationOnly) {
|
|
myRunners = append(myRunners, r)
|
|
|
|
current += 1
|
|
|
|
if r.Status.Phase == string(corev1.PodRunning) {
|
|
ready += 1
|
|
// available is currently the same as ready, as we don't yet have minReadySeconds for runners
|
|
available += 1
|
|
}
|
|
}
|
|
}
|
|
|
|
var desired int
|
|
|
|
if rs.Spec.Replicas != nil {
|
|
desired = *rs.Spec.Replicas
|
|
} else {
|
|
desired = 1
|
|
}
|
|
|
|
registrationOnlyRunnerNsName := req.NamespacedName
|
|
registrationOnlyRunnerNsName.Name = registrationOnlyRunnerNameFor(rs.Name)
|
|
registrationOnlyRunner := v1alpha1.Runner{}
|
|
registrationOnlyRunnerExists := false
|
|
if err := r.Get(
|
|
ctx,
|
|
registrationOnlyRunnerNsName,
|
|
®istrationOnlyRunner,
|
|
); err != nil {
|
|
if !kerrors.IsNotFound(err) {
|
|
return ctrl.Result{}, err
|
|
}
|
|
} else {
|
|
registrationOnlyRunnerExists = true
|
|
}
|
|
|
|
// On scale to zero, we must have fully registered registration-only runner before we start deleting other runners, hence `desired == 0`
|
|
// On scale from zero, we must retain the registratoin-only runner until one or more other runners get registered, hence `registrationOnlyRunnerExists && available == 0`.
|
|
// On RunnerReplicaSet creation, it have always 0 replics and no registration-only runner.
|
|
// In this case We don't need to bother creating a registration-only runner which gets deleted soon after we have 1 or more available repolicas,
|
|
// hence it's not `available == 0`, but `registrationOnlyRunnerExists && available == 0`.
|
|
// See https://github.com/actions-runner-controller/actions-runner-controller/issues/516
|
|
registrationOnlyRunnerNeeded := desired == 0 || (registrationOnlyRunnerExists && current == 0)
|
|
|
|
if registrationOnlyRunnerNeeded {
|
|
if registrationOnlyRunnerExists {
|
|
if registrationOnlyRunner.Status.Phase == "" {
|
|
log.Info("Still waiting for the registration-only runner to be registered")
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
} else {
|
|
// A registration-only runner does not exist and is needed, hence create it.
|
|
|
|
runnerForScaleFromToZero, err := r.newRunner(rs)
|
|
if err != nil {
|
|
return ctrl.Result{}, fmt.Errorf("failed to create runner for scale from/to zero: %v", err)
|
|
}
|
|
|
|
runnerForScaleFromToZero.ObjectMeta.Name = registrationOnlyRunnerNsName.Name
|
|
runnerForScaleFromToZero.ObjectMeta.GenerateName = ""
|
|
runnerForScaleFromToZero.ObjectMeta.Labels = nil
|
|
metav1.SetMetaDataAnnotation(&runnerForScaleFromToZero.ObjectMeta, annotationKeyRegistrationOnly, "true")
|
|
|
|
if err := r.Client.Create(ctx, &runnerForScaleFromToZero); err != nil {
|
|
log.Error(err, "Failed to create runner for scale from/to zero")
|
|
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
// We can continue to deleting runner pods only after the
|
|
// registration-only runner gets registered.
|
|
return ctrl.Result{}, nil
|
|
}
|
|
} else {
|
|
// A registration-only runner exists and is not needed, hence delete it.
|
|
if registrationOnlyRunnerExists {
|
|
if err := r.Client.Delete(ctx, ®istrationOnlyRunner); err != nil {
|
|
log.Error(err, "Retrying soon because we failed to delete registration-only runner")
|
|
|
|
return ctrl.Result{Requeue: true}, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
if current > desired {
|
|
n := current - desired
|
|
|
|
log.V(0).Info(fmt.Sprintf("Deleting %d runners", n), "desired", desired, "current", current, "ready", ready)
|
|
|
|
// get runners that are currently offline/not busy/timed-out to register
|
|
var deletionCandidates []v1alpha1.Runner
|
|
|
|
for _, runner := range allRunners.Items {
|
|
busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
|
|
if err != nil {
|
|
notRegistered := false
|
|
offline := false
|
|
|
|
var notFoundException *github.RunnerNotFound
|
|
var offlineException *github.RunnerOffline
|
|
if errors.As(err, ¬FoundException) {
|
|
log.V(1).Info("Failed to check if runner is busy. Either this runner has never been successfully registered to GitHub or it still needs more time.", "runnerName", runner.Name)
|
|
notRegistered = true
|
|
} else if errors.As(err, &offlineException) {
|
|
offline = true
|
|
} else {
|
|
var e *gogithub.RateLimitError
|
|
if errors.As(err, &e) {
|
|
// We log the underlying error when we failed calling GitHub API to list or unregisters,
|
|
// or the runner is still busy.
|
|
log.Error(
|
|
err,
|
|
fmt.Sprintf(
|
|
"Failed to check if runner is busy due to GitHub API rate limit. Retrying in %s to avoid excessive GitHub API calls",
|
|
retryDelayOnGitHubAPIRateLimitError,
|
|
),
|
|
)
|
|
|
|
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
|
|
}
|
|
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
registrationTimeout := 15 * time.Minute
|
|
currentTime := time.Now()
|
|
registrationDidTimeout := currentTime.Sub(runner.CreationTimestamp.Add(registrationTimeout)) > 0
|
|
|
|
if notRegistered && registrationDidTimeout {
|
|
log.Info(
|
|
"Runner failed to register itself to GitHub in timely manner. "+
|
|
"Marking the runner for scale down. "+
|
|
"CAUTION: If you see this a lot, you should investigate the root cause. "+
|
|
"See https://github.com/summerwind/actions-runner-controller/issues/288",
|
|
"runnerCreationTimestamp", runner.CreationTimestamp,
|
|
"currentTime", currentTime,
|
|
"configuredRegistrationTimeout", registrationTimeout,
|
|
)
|
|
|
|
deletionCandidates = append(deletionCandidates, runner)
|
|
}
|
|
|
|
// offline runners should always be a great target for scale down
|
|
if offline {
|
|
deletionCandidates = append(deletionCandidates, runner)
|
|
}
|
|
} else if !busy {
|
|
deletionCandidates = append(deletionCandidates, runner)
|
|
}
|
|
}
|
|
|
|
if len(deletionCandidates) < n {
|
|
n = len(deletionCandidates)
|
|
}
|
|
|
|
log.V(0).Info(fmt.Sprintf("Deleting %d runner(s)", n), "desired", desired, "current", current, "ready", ready)
|
|
|
|
for i := 0; i < n; i++ {
|
|
if err := r.Client.Delete(ctx, &deletionCandidates[i]); client.IgnoreNotFound(err) != nil {
|
|
log.Error(err, "Failed to delete runner resource")
|
|
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", deletionCandidates[i].Name))
|
|
log.Info("Deleted runner")
|
|
}
|
|
} else if desired > current {
|
|
n := desired - current
|
|
|
|
log.V(0).Info(fmt.Sprintf("Creating %d runner(s)", n), "desired", desired, "available", current, "ready", ready)
|
|
|
|
for i := 0; i < n; i++ {
|
|
newRunner, err := r.newRunner(rs)
|
|
if err != nil {
|
|
log.Error(err, "Could not create runner")
|
|
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
if err := r.Client.Create(ctx, &newRunner); err != nil {
|
|
log.Error(err, "Failed to create runner resource")
|
|
|
|
return ctrl.Result{}, err
|
|
}
|
|
}
|
|
}
|
|
|
|
var status v1alpha1.RunnerReplicaSetStatus
|
|
|
|
status.Replicas = ¤t
|
|
status.AvailableReplicas = &available
|
|
status.ReadyReplicas = &ready
|
|
|
|
if !reflect.DeepEqual(rs.Status, status) {
|
|
updated := rs.DeepCopy()
|
|
updated.Status = status
|
|
|
|
if err := r.Status().Patch(ctx, updated, client.MergeFrom(&rs)); err != nil {
|
|
log.Info("Failed to update runnerreplicaset status. Retrying immediately", "error", err.Error())
|
|
return ctrl.Result{
|
|
Requeue: true,
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
func (r *RunnerReplicaSetReconciler) newRunner(rs v1alpha1.RunnerReplicaSet) (v1alpha1.Runner, error) {
|
|
objectMeta := rs.Spec.Template.ObjectMeta.DeepCopy()
|
|
|
|
objectMeta.GenerateName = rs.ObjectMeta.Name + "-"
|
|
objectMeta.Namespace = rs.ObjectMeta.Namespace
|
|
|
|
runner := v1alpha1.Runner{
|
|
TypeMeta: metav1.TypeMeta{},
|
|
ObjectMeta: *objectMeta,
|
|
Spec: rs.Spec.Template.Spec,
|
|
}
|
|
|
|
if err := ctrl.SetControllerReference(&rs, &runner, r.Scheme); err != nil {
|
|
return runner, err
|
|
}
|
|
|
|
return runner, nil
|
|
}
|
|
|
|
func (r *RunnerReplicaSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|
name := "runnerreplicaset-controller"
|
|
if r.Name != "" {
|
|
name = r.Name
|
|
}
|
|
|
|
r.Recorder = mgr.GetEventRecorderFor(name)
|
|
|
|
return ctrl.NewControllerManagedBy(mgr).
|
|
For(&v1alpha1.RunnerReplicaSet{}).
|
|
Owns(&v1alpha1.Runner{}).
|
|
Named(name).
|
|
Complete(r)
|
|
}
|
|
|
|
func registrationOnlyRunnerNameFor(rsName string) string {
|
|
return rsName + "-registration-only"
|
|
}
|