fix migration of single-node clusters (#2134)

2022-12-09 12:42:10 +01:00 · 2022-12-09 12:42:10 +01:00 · 0bef3b325f
parent 4d585250db
commit 0bef3b325f
1 changed files with 10 additions and 13 deletions
--- a/pkg/cluster/pod.go
+++ b/pkg/cluster/pod.go
@ -214,19 +214,16 @@ func (c *Cluster) movePodFromEndOfLifeNode(pod *v1.Pod) (*v1.Pod, error) {
 // MigrateMasterPod migrates master pod via failover to a replica
 func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
 	var (
-		masterCandidateName spec.NamespacedName
-		err                 error
-		eol                 bool
+		err error
+		eol bool
 	)

 	oldMaster, err := c.KubeClient.Pods(podName.Namespace).Get(context.TODO(), podName.Name, metav1.GetOptions{})
-
 	if err != nil {
-		return fmt.Errorf("could not get pod: %v", err)
+		return fmt.Errorf("could not get master pod: %v", err)
 	}

 	c.logger.Infof("starting process to migrate master pod %q", podName)
-
 	if eol, err = c.podIsEndOfLife(oldMaster); err != nil {
 		return fmt.Errorf("could not get node %q: %v", oldMaster.Spec.NodeName, err)
 	}
@ -250,21 +247,21 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
 		}
 		c.Statefulset = sset
 	}
-	// We may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case.
+	// we may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case
+	masterCandidateName := podName
+	masterCandidatePod := oldMaster
 	if *c.Statefulset.Spec.Replicas > 1 {
 		if masterCandidateName, err = c.getSwitchoverCandidate(oldMaster); err != nil {
 			return fmt.Errorf("could not find suitable replica pod as candidate for failover: %v", err)
 		}
+		masterCandidatePod, err = c.KubeClient.Pods(masterCandidateName.Namespace).Get(context.TODO(), masterCandidateName.Name, metav1.GetOptions{})
+		if err != nil {
+			return fmt.Errorf("could not get master candidate pod: %v", err)
+		}
 	} else {
 		c.logger.Warningf("migrating single pod cluster %q, this will cause downtime of the Postgres cluster until pod is back", c.clusterName())
 	}

-	masterCandidatePod, err := c.KubeClient.Pods(masterCandidateName.Namespace).Get(context.TODO(), masterCandidateName.Name, metav1.GetOptions{})
-
-	if err != nil {
-		return fmt.Errorf("could not get master candidate pod: %v", err)
-	}
-
 	// there are two cases for each postgres cluster that has its master pod on the node to migrate from:
 	// - the cluster has some replicas - migrate one of those if necessary and failover to it
 	// - there are no replicas - just terminate the master and wait until it respawns