reflect code review feedback

2021-02-25 06:43:35 +01:00 · 2021-02-25 06:43:35 +01:00 · 7f2c68a82d
parent 75e91c0dec
commit 7f2c68a82d
3 changed files with 27 additions and 20 deletions
--- a/e2e/tests/test_e2e.py
+++ b/e2e/tests/test_e2e.py
@ -750,10 +750,6 @@ class EndToEndTestCase(unittest.TestCase):

        self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted")

-    @classmethod
-    def setUp(cls):
-        pass
-
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_multi_namespace_support(self):
        '''
@ -812,8 +808,8 @@ class EndToEndTestCase(unittest.TestCase):
                if pod.metadata.labels.get('spilo-role') == 'master':
                    old_creation_timestamp = pod.metadata.creation_timestamp
                    k8s.patch_pod(flag, pod.metadata.name, pod.metadata.namespace)
-                # operator will perform a switchover to an existing replica before recreating the master pod
                else:
+                    # remember replica name to check if operator does a switchover
                    switchover_target = pod.metadata.name

            # do not wait until the next sync
@ -852,7 +848,7 @@ class EndToEndTestCase(unittest.TestCase):
        _, replica_nodes = k8s.get_pg_nodes(cluster_label)

        # rolling update annotation
-        rolling_update_flag = {
+        rolling_update_patch = {
            "metadata": {
                "annotations": {
                    flag: "true",
@ -861,7 +857,8 @@ class EndToEndTestCase(unittest.TestCase):
        }

        # make pod_label_wait_timeout so short that rolling update fails on first try
-        # temporarily lower resync interval to simulate that pods get healthy in between SYNCs
+        # temporarily lower resync interval to reduce waiting for further tests
+        # pods should get healthy in the meantime
        patch_resync_config = {
            "data": {
                "pod_label_wait_timeout": "2s",
@ -873,7 +870,7 @@ class EndToEndTestCase(unittest.TestCase):
            # patch both pods for rolling update
            podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label)
            for pod in podList.items:
-                k8s.patch_pod(rolling_update_flag, pod.metadata.name, pod.metadata.namespace)
+                k8s.patch_pod(rolling_update_patch, pod.metadata.name, pod.metadata.namespace)
                if pod.metadata.labels.get('spilo-role') == 'replica':
                    switchover_target = pod.metadata.name

@ -888,7 +885,6 @@ class EndToEndTestCase(unittest.TestCase):
            self.eventuallyEqual(lambda: k8s.pg_get_status(), "SyncFailed", "Expected SYNC event to fail")

            # wait for next sync, replica should be running normally by now and be ready for switchover
-            time.sleep(10)
            k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label)

            # check if the former replica is now the new master
@ -902,12 +898,6 @@ class EndToEndTestCase(unittest.TestCase):
            time.sleep(10)
            self.eventuallyEqual(lambda: k8s.pg_get_status(), "Running", "Expected running cluster after two syncs")

-            # rolling update should be gone now
-            podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label)
-            for pod in podList.items:
-                self.assertTrue(flag not in pod.metadata.annotations, 
-                    "Rolling update flag still present on pod {}".format(pod.metadata.name))
-
            # revert config changes
            patch_resync_config = {
                "data": {
--- a/pkg/cluster/pod.go
+++ b/pkg/cluster/pod.go
@ -50,6 +50,11 @@ func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) {
 // markRollingUpdateFlagForPod sets the indicator for the rolling update requirement
 // in the Pod annotation.
 func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error {
+	// no need to patch pod if annotation is already there
+	if c.getRollingUpdateFlagFromPod(pod) {
+		return nil
+	}
+
 	c.logger.Debugf("mark rolling update annotation for %s: reason %s", pod.Name, msg)
 	flag := make(map[string]string)
 	flag[rollingUpdatePodAnnotationKey] = strconv.FormatBool(true)
@ -80,7 +85,7 @@ func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error {
 	return nil
 }

-// getRollingUpdateFlagFromPod returns the value of the rollingUpdate flag from the passed
+// getRollingUpdateFlagFromPod returns the value of the rollingUpdate flag from the given pod
 func (c *Cluster) getRollingUpdateFlagFromPod(pod *v1.Pod) (flag bool) {
 	anno := pod.GetAnnotations()
 	flag = false
@ -332,8 +337,6 @@ func (c *Cluster) MigrateReplicaPod(podName spec.NamespacedName, fromNodeName st
 }

 func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) {
-	// TODO due to delays in sync, should double check if recreate annotation is still present
-
 	ch := c.registerPodSubscriber(podName)
 	defer c.unregisterPodSubscriber(podName)
 	stopChan := make(chan struct{})
@ -420,6 +423,11 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
 			continue
 		}

+		// double check one more time if the rolling update flag is still there
+		if !c.getRollingUpdateFlagFromPod(&pod) {
+			continue
+		}
+
 		podName := util.NameFromMeta(pod.ObjectMeta)
 		newPod, err := c.recreatePod(podName)
 		if err != nil {
@ -435,12 +443,13 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
 	}

 	if masterPod != nil {
-		// switchover if we have observed a master pod and replicas
+		// switchover if
+		// 1. we have not observed a new master pod when re-creating former replicas
+		// 2. we know possible switchover targets even when no replicas were recreated
 		if newMasterPod == nil && len(replicas) > 0 {
 			if err := c.Switchover(masterPod, masterCandidate(replicas)); err != nil {
 				c.logger.Warningf("could not perform switch over: %v", err)
 			}
-			// TODO if only the master pod came for recreate, we do not do switchover currently
 		} else if newMasterPod == nil && len(replicas) == 0 {
 			c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas")
 		}
--- a/pkg/cluster/sync.go
+++ b/pkg/cluster/sync.go
@ -322,6 +322,10 @@ func (c *Cluster) syncStatefulSet() error {
 			if c.getRollingUpdateFlagFromPod(&pod) {
 				podsToRecreate = append(podsToRecreate, pod)
 			} else {
+				role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
+				if role == Master {
+					continue
+				}
 				switchoverCandidates = append(switchoverCandidates, util.NameFromMeta(pod.ObjectMeta))
 			}
 		}
@ -382,6 +386,10 @@ func (c *Cluster) syncStatefulSet() error {
 					}
 					podsToRecreate = append(podsToRecreate, pod)
 				} else {
+					role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
+					if role == Master {
+						continue
+					}
 					switchoverCandidates = append(switchoverCandidates, util.NameFromMeta(pod.ObjectMeta))
 				}
 			}