From 7f2c68a82ddf1f56dfdcd6c4fda582376f0ed542 Mon Sep 17 00:00:00 2001
From: Felix Kunde <felix-kunde@gmx.de>
Date: Thu, 25 Feb 2021 06:43:35 +0100
Subject: [PATCH] reflect code review feedback

---
 e2e/tests/test_e2e.py | 20 +++++---------------
 pkg/cluster/pod.go    | 19 ++++++++++++++-----
 pkg/cluster/sync.go   |  8 ++++++++
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py
index 506882150..1b330c828 100644
--- a/e2e/tests/test_e2e.py
+++ b/e2e/tests/test_e2e.py
@@ -750,10 +750,6 @@ class EndToEndTestCase(unittest.TestCase):
 
         self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted")
 
-    @classmethod
-    def setUp(cls):
-        pass
-
     @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
     def test_multi_namespace_support(self):
         '''
@@ -812,8 +808,8 @@ class EndToEndTestCase(unittest.TestCase):
                 if pod.metadata.labels.get('spilo-role') == 'master':
                     old_creation_timestamp = pod.metadata.creation_timestamp
                     k8s.patch_pod(flag, pod.metadata.name, pod.metadata.namespace)
-                # operator will perform a switchover to an existing replica before recreating the master pod
                 else:
+                    # remember replica name to check if operator does a switchover
                     switchover_target = pod.metadata.name
 
             # do not wait until the next sync
@@ -852,7 +848,7 @@ class EndToEndTestCase(unittest.TestCase):
         _, replica_nodes = k8s.get_pg_nodes(cluster_label)
 
         # rolling update annotation
-        rolling_update_flag = {
+        rolling_update_patch = {
             "metadata": {
                 "annotations": {
                     flag: "true",
@@ -861,7 +857,8 @@ class EndToEndTestCase(unittest.TestCase):
         }
 
         # make pod_label_wait_timeout so short that rolling update fails on first try
-        # temporarily lower resync interval to simulate that pods get healthy in between SYNCs
+        # temporarily lower resync interval to reduce waiting for further tests
+        # pods should get healthy in the meantime
         patch_resync_config = {
             "data": {
                 "pod_label_wait_timeout": "2s",
@@ -873,7 +870,7 @@ class EndToEndTestCase(unittest.TestCase):
             # patch both pods for rolling update
             podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label)
             for pod in podList.items:
-                k8s.patch_pod(rolling_update_flag, pod.metadata.name, pod.metadata.namespace)
+                k8s.patch_pod(rolling_update_patch, pod.metadata.name, pod.metadata.namespace)
                 if pod.metadata.labels.get('spilo-role') == 'replica':
                     switchover_target = pod.metadata.name
 
@@ -888,7 +885,6 @@ class EndToEndTestCase(unittest.TestCase):
             self.eventuallyEqual(lambda: k8s.pg_get_status(), "SyncFailed", "Expected SYNC event to fail")
 
             # wait for next sync, replica should be running normally by now and be ready for switchover
-            time.sleep(10)
             k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label)
 
             # check if the former replica is now the new master
@@ -902,12 +898,6 @@ class EndToEndTestCase(unittest.TestCase):
             time.sleep(10)
             self.eventuallyEqual(lambda: k8s.pg_get_status(), "Running", "Expected running cluster after two syncs")
 
-            # rolling update should be gone now
-            podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label)
-            for pod in podList.items:
-                self.assertTrue(flag not in pod.metadata.annotations, 
-                    "Rolling update flag still present on pod {}".format(pod.metadata.name))
-
             # revert config changes
             patch_resync_config = {
                 "data": {
diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go
index 377e5deab..87de71466 100644
--- a/pkg/cluster/pod.go
+++ b/pkg/cluster/pod.go
@@ -50,6 +50,11 @@ func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) {
 // markRollingUpdateFlagForPod sets the indicator for the rolling update requirement
 // in the Pod annotation.
 func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error {
+	// no need to patch pod if annotation is already there
+	if c.getRollingUpdateFlagFromPod(pod) {
+		return nil
+	}
+
 	c.logger.Debugf("mark rolling update annotation for %s: reason %s", pod.Name, msg)
 	flag := make(map[string]string)
 	flag[rollingUpdatePodAnnotationKey] = strconv.FormatBool(true)
@@ -80,7 +85,7 @@ func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error {
 	return nil
 }
 
-// getRollingUpdateFlagFromPod returns the value of the rollingUpdate flag from the passed
+// getRollingUpdateFlagFromPod returns the value of the rollingUpdate flag from the given pod
 func (c *Cluster) getRollingUpdateFlagFromPod(pod *v1.Pod) (flag bool) {
 	anno := pod.GetAnnotations()
 	flag = false
@@ -332,8 +337,6 @@ func (c *Cluster) MigrateReplicaPod(podName spec.NamespacedName, fromNodeName st
 }
 
 func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) {
-	// TODO due to delays in sync, should double check if recreate annotation is still present
-
 	ch := c.registerPodSubscriber(podName)
 	defer c.unregisterPodSubscriber(podName)
 	stopChan := make(chan struct{})
@@ -420,6 +423,11 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
 			continue
 		}
 
+		// double check one more time if the rolling update flag is still there
+		if !c.getRollingUpdateFlagFromPod(&pod) {
+			continue
+		}
+
 		podName := util.NameFromMeta(pod.ObjectMeta)
 		newPod, err := c.recreatePod(podName)
 		if err != nil {
@@ -435,12 +443,13 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
 	}
 
 	if masterPod != nil {
-		// switchover if we have observed a master pod and replicas
+		// switchover if
+		// 1. we have not observed a new master pod when re-creating former replicas
+		// 2. we know possible switchover targets even when no replicas were recreated
 		if newMasterPod == nil && len(replicas) > 0 {
 			if err := c.Switchover(masterPod, masterCandidate(replicas)); err != nil {
 				c.logger.Warningf("could not perform switch over: %v", err)
 			}
-			// TODO if only the master pod came for recreate, we do not do switchover currently
 		} else if newMasterPod == nil && len(replicas) == 0 {
 			c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas")
 		}
diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go
index a65f0cf8b..9bce4e0a7 100644
--- a/pkg/cluster/sync.go
+++ b/pkg/cluster/sync.go
@@ -322,6 +322,10 @@ func (c *Cluster) syncStatefulSet() error {
 			if c.getRollingUpdateFlagFromPod(&pod) {
 				podsToRecreate = append(podsToRecreate, pod)
 			} else {
+				role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
+				if role == Master {
+					continue
+				}
 				switchoverCandidates = append(switchoverCandidates, util.NameFromMeta(pod.ObjectMeta))
 			}
 		}
@@ -382,6 +386,10 @@ func (c *Cluster) syncStatefulSet() error {
 					}
 					podsToRecreate = append(podsToRecreate, pod)
 				} else {
+					role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
+					if role == Master {
+						continue
+					}
 					switchoverCandidates = append(switchoverCandidates, util.NameFromMeta(pod.ObjectMeta))
 				}
 			}