From 1ea8b3bbe6a43fa668b244e0aeb97f6be45f13a6 Mon Sep 17 00:00:00 2001 From: Oleksii Kliukin Date: Thu, 24 May 2018 11:05:19 +0200 Subject: [PATCH 1/2] Fix a crash on node migration. After an unsuccessful initial cluster sync it may happen that the cluster statefulset is empty. This has been made more likely since 88d6a7be3, since it has introduced syncing volumes before statefulsets, and the volume sync mail fail for different reasons (i.e. the volume has been shrinked, or too many calls to Amazon). --- pkg/cluster/pod.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 432597f7f..ea3e02dcd 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -203,6 +203,15 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error { c.logger.Warningf("pod %q is not a master", podName) return nil } + // we must have a statefulset in the cluster for the migration to work + if c.Statefulset == nil { + sset, err := c.KubeClient.StatefulSets(c.Namespace).Get(c.statefulSetName(), metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("could not retrieve cluster statefulset: %v", err) + } + c.Statefulset = sset + } + // We may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case. if *c.Statefulset.Spec.Replicas == 1 { c.logger.Warningf("single master pod for cluster %q, migration will cause longer downtime of the master instance", c.clusterName()) } else { From 76ea754fc3e00fea81803ee740412b58f18f3ef0 Mon Sep 17 00:00:00 2001 From: Oleksii Kliukin Date: Thu, 24 May 2018 11:17:42 +0200 Subject: [PATCH 2/2] Be lenient when asked to shrink a persisten volume. Do not hard error, emit a warning instead. The cluster is not going to be broken because of our refusal to shrink a volume. --- pkg/cluster/volumes.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/cluster/volumes.go b/pkg/cluster/volumes.go index 42b5858fc..6f539f4ee 100644 --- a/pkg/cluster/volumes.go +++ b/pkg/cluster/volumes.go @@ -103,10 +103,10 @@ func (c *Cluster) resizeVolumes(newVolume spec.Volume, resizers []volumes.Volume for _, pv := range pvs { volumeSize := quantityToGigabyte(pv.Spec.Capacity[v1.ResourceStorage]) - if volumeSize > newSize { - return fmt.Errorf("cannot shrink persistent volume") - } - if volumeSize == newSize { + if volumeSize >= newSize { + if volumeSize > newSize { + c.logger.Warningf("cannot shrink persistent volume") + } continue } for _, resizer := range resizers {