reflect code review feedback

This commit is contained in:
Felix Kunde 2021-02-25 06:43:35 +01:00
parent 75e91c0dec
commit 7f2c68a82d
3 changed files with 27 additions and 20 deletions

View File

@ -750,10 +750,6 @@ class EndToEndTestCase(unittest.TestCase):
self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted")
@classmethod
def setUp(cls):
pass
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_multi_namespace_support(self):
'''
@ -812,8 +808,8 @@ class EndToEndTestCase(unittest.TestCase):
if pod.metadata.labels.get('spilo-role') == 'master':
old_creation_timestamp = pod.metadata.creation_timestamp
k8s.patch_pod(flag, pod.metadata.name, pod.metadata.namespace)
# operator will perform a switchover to an existing replica before recreating the master pod
else:
# remember replica name to check if operator does a switchover
switchover_target = pod.metadata.name
# do not wait until the next sync
@ -852,7 +848,7 @@ class EndToEndTestCase(unittest.TestCase):
_, replica_nodes = k8s.get_pg_nodes(cluster_label)
# rolling update annotation
rolling_update_flag = {
rolling_update_patch = {
"metadata": {
"annotations": {
flag: "true",
@ -861,7 +857,8 @@ class EndToEndTestCase(unittest.TestCase):
}
# make pod_label_wait_timeout so short that rolling update fails on first try
# temporarily lower resync interval to simulate that pods get healthy in between SYNCs
# temporarily lower resync interval to reduce waiting for further tests
# pods should get healthy in the meantime
patch_resync_config = {
"data": {
"pod_label_wait_timeout": "2s",
@ -873,7 +870,7 @@ class EndToEndTestCase(unittest.TestCase):
# patch both pods for rolling update
podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label)
for pod in podList.items:
k8s.patch_pod(rolling_update_flag, pod.metadata.name, pod.metadata.namespace)
k8s.patch_pod(rolling_update_patch, pod.metadata.name, pod.metadata.namespace)
if pod.metadata.labels.get('spilo-role') == 'replica':
switchover_target = pod.metadata.name
@ -888,7 +885,6 @@ class EndToEndTestCase(unittest.TestCase):
self.eventuallyEqual(lambda: k8s.pg_get_status(), "SyncFailed", "Expected SYNC event to fail")
# wait for next sync, replica should be running normally by now and be ready for switchover
time.sleep(10)
k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label)
# check if the former replica is now the new master
@ -902,12 +898,6 @@ class EndToEndTestCase(unittest.TestCase):
time.sleep(10)
self.eventuallyEqual(lambda: k8s.pg_get_status(), "Running", "Expected running cluster after two syncs")
# rolling update should be gone now
podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label)
for pod in podList.items:
self.assertTrue(flag not in pod.metadata.annotations,
"Rolling update flag still present on pod {}".format(pod.metadata.name))
# revert config changes
patch_resync_config = {
"data": {

View File

@ -50,6 +50,11 @@ func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) {
// markRollingUpdateFlagForPod sets the indicator for the rolling update requirement
// in the Pod annotation.
func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error {
// no need to patch pod if annotation is already there
if c.getRollingUpdateFlagFromPod(pod) {
return nil
}
c.logger.Debugf("mark rolling update annotation for %s: reason %s", pod.Name, msg)
flag := make(map[string]string)
flag[rollingUpdatePodAnnotationKey] = strconv.FormatBool(true)
@ -80,7 +85,7 @@ func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error {
return nil
}
// getRollingUpdateFlagFromPod returns the value of the rollingUpdate flag from the passed
// getRollingUpdateFlagFromPod returns the value of the rollingUpdate flag from the given pod
func (c *Cluster) getRollingUpdateFlagFromPod(pod *v1.Pod) (flag bool) {
anno := pod.GetAnnotations()
flag = false
@ -332,8 +337,6 @@ func (c *Cluster) MigrateReplicaPod(podName spec.NamespacedName, fromNodeName st
}
func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) {
// TODO due to delays in sync, should double check if recreate annotation is still present
ch := c.registerPodSubscriber(podName)
defer c.unregisterPodSubscriber(podName)
stopChan := make(chan struct{})
@ -420,6 +423,11 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
continue
}
// double check one more time if the rolling update flag is still there
if !c.getRollingUpdateFlagFromPod(&pod) {
continue
}
podName := util.NameFromMeta(pod.ObjectMeta)
newPod, err := c.recreatePod(podName)
if err != nil {
@ -435,12 +443,13 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp
}
if masterPod != nil {
// switchover if we have observed a master pod and replicas
// switchover if
// 1. we have not observed a new master pod when re-creating former replicas
// 2. we know possible switchover targets even when no replicas were recreated
if newMasterPod == nil && len(replicas) > 0 {
if err := c.Switchover(masterPod, masterCandidate(replicas)); err != nil {
c.logger.Warningf("could not perform switch over: %v", err)
}
// TODO if only the master pod came for recreate, we do not do switchover currently
} else if newMasterPod == nil && len(replicas) == 0 {
c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas")
}

View File

@ -322,6 +322,10 @@ func (c *Cluster) syncStatefulSet() error {
if c.getRollingUpdateFlagFromPod(&pod) {
podsToRecreate = append(podsToRecreate, pod)
} else {
role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
if role == Master {
continue
}
switchoverCandidates = append(switchoverCandidates, util.NameFromMeta(pod.ObjectMeta))
}
}
@ -382,6 +386,10 @@ func (c *Cluster) syncStatefulSet() error {
}
podsToRecreate = append(podsToRecreate, pod)
} else {
role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
if role == Master {
continue
}
switchoverCandidates = append(switchoverCandidates, util.NameFromMeta(pod.ObjectMeta))
}
}