From 7f2c68a82ddf1f56dfdcd6c4fda582376f0ed542 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 25 Feb 2021 06:43:35 +0100 Subject: [PATCH] reflect code review feedback --- e2e/tests/test_e2e.py | 20 +++++--------------- pkg/cluster/pod.go | 19 ++++++++++++++----- pkg/cluster/sync.go | 8 ++++++++ 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 506882150..1b330c828 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -750,10 +750,6 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted") - @classmethod - def setUp(cls): - pass - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_multi_namespace_support(self): ''' @@ -812,8 +808,8 @@ class EndToEndTestCase(unittest.TestCase): if pod.metadata.labels.get('spilo-role') == 'master': old_creation_timestamp = pod.metadata.creation_timestamp k8s.patch_pod(flag, pod.metadata.name, pod.metadata.namespace) - # operator will perform a switchover to an existing replica before recreating the master pod else: + # remember replica name to check if operator does a switchover switchover_target = pod.metadata.name # do not wait until the next sync @@ -852,7 +848,7 @@ class EndToEndTestCase(unittest.TestCase): _, replica_nodes = k8s.get_pg_nodes(cluster_label) # rolling update annotation - rolling_update_flag = { + rolling_update_patch = { "metadata": { "annotations": { flag: "true", @@ -861,7 +857,8 @@ class EndToEndTestCase(unittest.TestCase): } # make pod_label_wait_timeout so short that rolling update fails on first try - # temporarily lower resync interval to simulate that pods get healthy in between SYNCs + # temporarily lower resync interval to reduce waiting for further tests + # pods should get healthy in the meantime patch_resync_config = { "data": { "pod_label_wait_timeout": "2s", @@ -873,7 +870,7 @@ class EndToEndTestCase(unittest.TestCase): # patch both pods for rolling update podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) for pod in podList.items: - k8s.patch_pod(rolling_update_flag, pod.metadata.name, pod.metadata.namespace) + k8s.patch_pod(rolling_update_patch, pod.metadata.name, pod.metadata.namespace) if pod.metadata.labels.get('spilo-role') == 'replica': switchover_target = pod.metadata.name @@ -888,7 +885,6 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: k8s.pg_get_status(), "SyncFailed", "Expected SYNC event to fail") # wait for next sync, replica should be running normally by now and be ready for switchover - time.sleep(10) k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) # check if the former replica is now the new master @@ -902,12 +898,6 @@ class EndToEndTestCase(unittest.TestCase): time.sleep(10) self.eventuallyEqual(lambda: k8s.pg_get_status(), "Running", "Expected running cluster after two syncs") - # rolling update should be gone now - podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) - for pod in podList.items: - self.assertTrue(flag not in pod.metadata.annotations, - "Rolling update flag still present on pod {}".format(pod.metadata.name)) - # revert config changes patch_resync_config = { "data": { diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 377e5deab..87de71466 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -50,6 +50,11 @@ func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) { // markRollingUpdateFlagForPod sets the indicator for the rolling update requirement // in the Pod annotation. func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error { + // no need to patch pod if annotation is already there + if c.getRollingUpdateFlagFromPod(pod) { + return nil + } + c.logger.Debugf("mark rolling update annotation for %s: reason %s", pod.Name, msg) flag := make(map[string]string) flag[rollingUpdatePodAnnotationKey] = strconv.FormatBool(true) @@ -80,7 +85,7 @@ func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error { return nil } -// getRollingUpdateFlagFromPod returns the value of the rollingUpdate flag from the passed +// getRollingUpdateFlagFromPod returns the value of the rollingUpdate flag from the given pod func (c *Cluster) getRollingUpdateFlagFromPod(pod *v1.Pod) (flag bool) { anno := pod.GetAnnotations() flag = false @@ -332,8 +337,6 @@ func (c *Cluster) MigrateReplicaPod(podName spec.NamespacedName, fromNodeName st } func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { - // TODO due to delays in sync, should double check if recreate annotation is still present - ch := c.registerPodSubscriber(podName) defer c.unregisterPodSubscriber(podName) stopChan := make(chan struct{}) @@ -420,6 +423,11 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp continue } + // double check one more time if the rolling update flag is still there + if !c.getRollingUpdateFlagFromPod(&pod) { + continue + } + podName := util.NameFromMeta(pod.ObjectMeta) newPod, err := c.recreatePod(podName) if err != nil { @@ -435,12 +443,13 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp } if masterPod != nil { - // switchover if we have observed a master pod and replicas + // switchover if + // 1. we have not observed a new master pod when re-creating former replicas + // 2. we know possible switchover targets even when no replicas were recreated if newMasterPod == nil && len(replicas) > 0 { if err := c.Switchover(masterPod, masterCandidate(replicas)); err != nil { c.logger.Warningf("could not perform switch over: %v", err) } - // TODO if only the master pod came for recreate, we do not do switchover currently } else if newMasterPod == nil && len(replicas) == 0 { c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas") } diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index a65f0cf8b..9bce4e0a7 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -322,6 +322,10 @@ func (c *Cluster) syncStatefulSet() error { if c.getRollingUpdateFlagFromPod(&pod) { podsToRecreate = append(podsToRecreate, pod) } else { + role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) + if role == Master { + continue + } switchoverCandidates = append(switchoverCandidates, util.NameFromMeta(pod.ObjectMeta)) } } @@ -382,6 +386,10 @@ func (c *Cluster) syncStatefulSet() error { } podsToRecreate = append(podsToRecreate, pod) } else { + role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) + if role == Master { + continue + } switchoverCandidates = append(switchoverCandidates, util.NameFromMeta(pod.ObjectMeta)) } }