fix: RunnerDeployment should clean up old RunnerReplicaSets ASAP

Since the initial implementation of RunnerDeployment and until this change, any update to a runner deployment has been leaving old runner replicasets until the next resync interval. This fixes that, by continusouly retrying the reconcilation 10 seconds later to see if there are any old runner replicasets that can be removed.

In addition to that, the cleanup of old runner replicasets has been improved to be deferred until all the runners of the newest replica set to be available. This gives you hopefully zero or at less downtime updates of runner deployments.

Fixes #24
This commit is contained in:
Yusuke Kuoka 2020-04-02 09:51:40 +09:00 committed by KUOKA Yusuke
parent a19cd373db
commit b411d37f2b
1 changed files with 45 additions and 12 deletions

View File

@ -20,7 +20,9 @@ import (
"context" "context"
"fmt" "fmt"
"hash/fnv" "hash/fnv"
"k8s.io/apimachinery/pkg/types"
"sort" "sort"
"time"
"github.com/davecgh/go-spew/spew" "github.com/davecgh/go-spew/spew"
"github.com/go-logr/logr" "github.com/go-logr/logr"
@ -58,7 +60,7 @@ type RunnerDeploymentReconciler struct {
func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
ctx := context.Background() ctx := context.Background()
log := r.Log.WithValues("runnerreplicaset", req.NamespacedName) log := r.Log.WithValues("runnerdeployment", req.NamespacedName)
var rd v1alpha1.RunnerDeployment var rd v1alpha1.RunnerDeployment
if err := r.Get(ctx, req.NamespacedName, &rd); err != nil { if err := r.Get(ctx, req.NamespacedName, &rd); err != nil {
@ -130,12 +132,19 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
return ctrl.Result{}, err return ctrl.Result{}, err
} }
return ctrl.Result{}, nil // We requeue in order to clean up old runner replica sets later.
// Otherwise, they aren't cleaned up until the next re-sync interval.
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
} }
const defaultReplicas = 1
currentDesiredReplicas := getIntOrDefault(newestSet.Spec.Replicas, defaultReplicas)
newDesiredReplicas := getIntOrDefault(desiredRS.Spec.Replicas, defaultReplicas)
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption // Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
if newestSet.Spec.Replicas != desiredRS.Spec.Replicas { if currentDesiredReplicas != newDesiredReplicas {
newestSet.Spec.Replicas = desiredRS.Spec.Replicas newestSet.Spec.Replicas = &newDesiredReplicas
if err := r.Client.Update(ctx, newestSet); err != nil { if err := r.Client.Update(ctx, newestSet); err != nil {
log.Error(err, "Failed to update runnerreplicaset resource") log.Error(err, "Failed to update runnerreplicaset resource")
@ -143,25 +152,49 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
return ctrl.Result{}, err return ctrl.Result{}, err
} }
return ctrl.Result{}, nil return ctrl.Result{}, err
} }
for i := range oldSets { // Do we old runner replica sets that should eventually deleted?
rs := oldSets[i] if len(oldSets) > 0 {
readyReplicas := newestSet.Status.ReadyReplicas
if err := r.Client.Delete(ctx, &rs); err != nil { if readyReplicas < currentDesiredReplicas {
log.Error(err, "Failed to delete runner resource") log.WithValues("runnerreplicaset", types.NamespacedName{
Namespace: newestSet.Namespace,
Name: newestSet.Name,
}).
Info("Waiting until the newest runner replica set to be 100% available")
return ctrl.Result{}, err return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
} }
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetDeleted", fmt.Sprintf("Deleted runnerreplicaset '%s'", rs.Name)) for i := range oldSets {
log.Info("Deleted runnerreplicaset", "runnerdeployment", rd.ObjectMeta.Name, "runnerreplicaset", rs.Name) rs := oldSets[i]
if err := r.Client.Delete(ctx, &rs); err != nil {
log.Error(err, "Failed to delete runner resource")
return ctrl.Result{}, err
}
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetDeleted", fmt.Sprintf("Deleted runnerreplicaset '%s'", rs.Name))
log.Info("Deleted runnerreplicaset", "runnerdeployment", rd.ObjectMeta.Name, "runnerreplicaset", rs.Name)
}
} }
return ctrl.Result{}, nil return ctrl.Result{}, nil
} }
func getIntOrDefault(p *int, d int) int {
if p == nil {
return d
}
return *p
}
func getTemplateHash(rs *v1alpha1.RunnerReplicaSet) (string, bool) { func getTemplateHash(rs *v1alpha1.RunnerReplicaSet) (string, bool) {
hash, ok := rs.Labels[LabelKeyRunnerTemplateHash] hash, ok := rs.Labels[LabelKeyRunnerTemplateHash]