merge commit
This commit is contained in:
		
						commit
						dd10127e5d
					
				|  | @ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase): | |||
| 
 | ||||
|         k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") | ||||
|         k8s.wait_for_pod_start('spilo-role=master') | ||||
|         k8s.wait_for_pod_start('spilo-role=replica') | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_enable_load_balancer(self): | ||||
|  | @ -108,139 +109,59 @@ class EndToEndTestCase(unittest.TestCase): | |||
|                          "Expected ClusterIP service type for replica, found {}".format(repl_svc_type)) | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_min_resource_limits(self): | ||||
|     def test_lazy_image_update(self): | ||||
|         ''' | ||||
|         Lower resource limits below configured minimum and let operator fix it | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||
|         _, failover_targets = k8s.get_pg_nodes(cluster_label) | ||||
|         Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image  | ||||
|         until they are recreated for reasons other than operator's activity. That works because the operator uses  | ||||
|         "onDelete" pod update policy for stateful sets. | ||||
| 
 | ||||
|         # configure minimum boundaries for CPU and memory limits | ||||
|         minCPULimit = '500m' | ||||
|         minMemoryLimit = '500Mi' | ||||
|         patch_min_resource_limits = { | ||||
|         The test covers: | ||||
|         1) enabling lazy upgrade in existing operator deployment | ||||
|         2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod | ||||
|         ''' | ||||
| 
 | ||||
|         k8s = self.k8s | ||||
|         pod0 = "acid-minimal-cluster-0" | ||||
|         pod1 = "acid-minimal-cluster-1" | ||||
| 
 | ||||
|         # enable lazy update | ||||
|         patch_lazy_image_upgrade = { | ||||
|             "data": { | ||||
|                 "min_cpu_limit": minCPULimit, | ||||
|                 "min_memory_limit": minMemoryLimit | ||||
|                 "enable_lazy_image_upgrade": "true", | ||||
|                 "docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16" | ||||
|             } | ||||
|         } | ||||
|         k8s.update_config(patch_min_resource_limits) | ||||
|         k8s.update_config(patch_lazy_image_upgrade) | ||||
| 
 | ||||
|         # lower resource limits below minimum | ||||
|         pg_patch_resources = { | ||||
|             "spec": { | ||||
|                 "resources": { | ||||
|                     "requests": { | ||||
|                         "cpu": "10m", | ||||
|                         "memory": "50Mi" | ||||
|                     }, | ||||
|                     "limits": { | ||||
|                         "cpu": "200m", | ||||
|                         "memory": "200Mi" | ||||
|                     } | ||||
|                 } | ||||
|         # wait for sts update | ||||
|         time.sleep(60) | ||||
| 
 | ||||
|         # restart the pod to get a container with the new image  | ||||
|         k8s.api.core_v1.delete_namespaced_pod(pod0, "default") | ||||
|         time.sleep(60) | ||||
| 
 | ||||
|         # lazy update works if the restarted pod and older pods have different Spilo versions | ||||
|         # i.e. the update did not immediately affect all pods | ||||
|         new_image = k8s.get_effective_pod_image(pod0) | ||||
|         old_image = k8s.get_effective_pod_image(pod1) | ||||
|         self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image)) | ||||
| 
 | ||||
|         # clean up | ||||
|         unpatch_lazy_image_upgrade = { | ||||
|             "data": { | ||||
|                 "enable_lazy_image_upgrade": "false", | ||||
|             } | ||||
|         } | ||||
|         k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) | ||||
|         k8s.wait_for_master_failover(failover_targets) | ||||
|         k8s.update_config(unpatch_lazy_image_upgrade) | ||||
| 
 | ||||
|         pods = k8s.api.core_v1.list_namespaced_pod( | ||||
|             'default', label_selector='spilo-role=master,' + cluster_label).items | ||||
|         self.assert_master_is_unique() | ||||
|         masterPod = pods[0] | ||||
|         # at this point operator will complete the normal rolling update | ||||
|         # so we additonally test if disabling the lazy update (forcing the normal rolling update) works | ||||
|         time.sleep(60) | ||||
| 
 | ||||
|         self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, | ||||
|                          "Expected CPU limit {}, found {}" | ||||
|                          .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) | ||||
|         self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, | ||||
|                          "Expected memory limit {}, found {}" | ||||
|                          .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) | ||||
|         image0 = k8s.get_effective_pod_image(pod0) | ||||
|         image1 = k8s.get_effective_pod_image(pod1) | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_multi_namespace_support(self): | ||||
|         ''' | ||||
|         Create a customized Postgres cluster in a non-default namespace. | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
| 
 | ||||
|         with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: | ||||
|             pg_manifest = yaml.safe_load(f) | ||||
|             pg_manifest["metadata"]["namespace"] = self.namespace | ||||
|             yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) | ||||
| 
 | ||||
|         k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") | ||||
|         k8s.wait_for_pod_start("spilo-role=master", self.namespace) | ||||
|         self.assert_master_is_unique(self.namespace, "acid-test-cluster") | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_scaling(self): | ||||
|         ''' | ||||
|            Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         labels = "cluster-name=acid-minimal-cluster" | ||||
| 
 | ||||
|         k8s.wait_for_pg_to_scale(3) | ||||
|         self.assertEqual(3, k8s.count_pods_with_label(labels)) | ||||
|         self.assert_master_is_unique() | ||||
| 
 | ||||
|         k8s.wait_for_pg_to_scale(2) | ||||
|         self.assertEqual(2, k8s.count_pods_with_label(labels)) | ||||
|         self.assert_master_is_unique() | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_taint_based_eviction(self): | ||||
|         ''' | ||||
|            Add taint "postgres=:NoExecute" to node with master. This must cause a failover. | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||
| 
 | ||||
|         # get nodes of master and replica(s) (expected target of new master) | ||||
|         current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label) | ||||
|         num_replicas = len(failover_targets) | ||||
| 
 | ||||
|         # if all pods live on the same node, failover will happen to other worker(s) | ||||
|         failover_targets = [x for x in failover_targets if x != current_master_node] | ||||
|         if len(failover_targets) == 0: | ||||
|             nodes = k8s.api.core_v1.list_node() | ||||
|             for n in nodes.items: | ||||
|                 if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node: | ||||
|                     failover_targets.append(n.metadata.name) | ||||
| 
 | ||||
|         # taint node with postgres=:NoExecute to force failover | ||||
|         body = { | ||||
|             "spec": { | ||||
|                 "taints": [ | ||||
|                     { | ||||
|                         "effect": "NoExecute", | ||||
|                         "key": "postgres" | ||||
|                     } | ||||
|                 ] | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         # patch node and test if master is failing over to one of the expected nodes | ||||
|         k8s.api.core_v1.patch_node(current_master_node, body) | ||||
|         k8s.wait_for_master_failover(failover_targets) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||
| 
 | ||||
|         new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||
|         self.assertNotEqual(current_master_node, new_master_node, | ||||
|                             "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) | ||||
|         self.assertEqual(num_replicas, len(new_replica_nodes), | ||||
|                          "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) | ||||
|         self.assert_master_is_unique() | ||||
| 
 | ||||
|         # undo the tainting | ||||
|         body = { | ||||
|             "spec": { | ||||
|                 "taints": [] | ||||
|             } | ||||
|         } | ||||
|         k8s.api.core_v1.patch_node(new_master_node, body) | ||||
|         self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1)) | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_logical_backup_cron_job(self): | ||||
|  | @ -306,6 +227,133 @@ class EndToEndTestCase(unittest.TestCase): | |||
|         self.assertEqual(0, len(jobs), | ||||
|                          "Expected 0 logical backup jobs, found {}".format(len(jobs))) | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_min_resource_limits(self): | ||||
|         ''' | ||||
|         Lower resource limits below configured minimum and let operator fix it | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||
|         labels = 'spilo-role=master,' + cluster_label | ||||
|         _, failover_targets = k8s.get_pg_nodes(cluster_label) | ||||
| 
 | ||||
|         # configure minimum boundaries for CPU and memory limits | ||||
|         minCPULimit = '500m' | ||||
|         minMemoryLimit = '500Mi' | ||||
|         patch_min_resource_limits = { | ||||
|             "data": { | ||||
|                 "min_cpu_limit": minCPULimit, | ||||
|                 "min_memory_limit": minMemoryLimit | ||||
|             } | ||||
|         } | ||||
|         k8s.update_config(patch_min_resource_limits) | ||||
| 
 | ||||
|         # lower resource limits below minimum | ||||
|         pg_patch_resources = { | ||||
|             "spec": { | ||||
|                 "resources": { | ||||
|                     "requests": { | ||||
|                         "cpu": "10m", | ||||
|                         "memory": "50Mi" | ||||
|                     }, | ||||
|                     "limits": { | ||||
|                         "cpu": "200m", | ||||
|                         "memory": "200Mi" | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) | ||||
|         k8s.wait_for_pod_failover(failover_targets, labels) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica') | ||||
| 
 | ||||
|         pods = k8s.api.core_v1.list_namespaced_pod( | ||||
|             'default', label_selector=labels).items | ||||
|         self.assert_master_is_unique() | ||||
|         masterPod = pods[0] | ||||
| 
 | ||||
|         self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, | ||||
|                          "Expected CPU limit {}, found {}" | ||||
|                          .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) | ||||
|         self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, | ||||
|                          "Expected memory limit {}, found {}" | ||||
|                          .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_multi_namespace_support(self): | ||||
|         ''' | ||||
|         Create a customized Postgres cluster in a non-default namespace. | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
| 
 | ||||
|         with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: | ||||
|             pg_manifest = yaml.safe_load(f) | ||||
|             pg_manifest["metadata"]["namespace"] = self.namespace | ||||
|             yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) | ||||
| 
 | ||||
|         k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") | ||||
|         k8s.wait_for_pod_start("spilo-role=master", self.namespace) | ||||
|         self.assert_master_is_unique(self.namespace, "acid-test-cluster") | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_node_readiness_label(self): | ||||
|         ''' | ||||
|            Remove node readiness label from master node. This must cause a failover. | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||
|         labels = 'spilo-role=master,' + cluster_label | ||||
|         readiness_label = 'lifecycle-status' | ||||
|         readiness_value = 'ready' | ||||
| 
 | ||||
|         # get nodes of master and replica(s) (expected target of new master) | ||||
|         current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||
|         num_replicas = len(current_replica_nodes) | ||||
|         failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) | ||||
| 
 | ||||
|         # add node_readiness_label to potential failover nodes | ||||
|         patch_readiness_label = { | ||||
|             "metadata": { | ||||
|                 "labels": { | ||||
|                     readiness_label: readiness_value | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         for failover_target in failover_targets: | ||||
|             k8s.api.core_v1.patch_node(failover_target, patch_readiness_label) | ||||
| 
 | ||||
|         # define node_readiness_label in config map which should trigger a failover of the master | ||||
|         patch_readiness_label_config = { | ||||
|             "data": { | ||||
|                 "node_readiness_label": readiness_label + ':' + readiness_value, | ||||
|             } | ||||
|         } | ||||
|         k8s.update_config(patch_readiness_label_config) | ||||
|         new_master_node, new_replica_nodes = self.assert_failover( | ||||
|             current_master_node, num_replicas, failover_targets, cluster_label) | ||||
| 
 | ||||
|         # patch also node where master ran before | ||||
|         k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label) | ||||
|         # toggle pod anti affinity to move replica away from master node | ||||
|         self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_scaling(self): | ||||
|         ''' | ||||
|            Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         labels = "cluster-name=acid-minimal-cluster" | ||||
| 
 | ||||
|         k8s.wait_for_pg_to_scale(3) | ||||
|         self.assertEqual(3, k8s.count_pods_with_label(labels)) | ||||
|         self.assert_master_is_unique() | ||||
| 
 | ||||
|         k8s.wait_for_pg_to_scale(2) | ||||
|         self.assertEqual(2, k8s.count_pods_with_label(labels)) | ||||
|         self.assert_master_is_unique() | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_service_annotations(self): | ||||
|         ''' | ||||
|  | @ -347,72 +395,115 @@ class EndToEndTestCase(unittest.TestCase): | |||
|         k8s.update_config(unpatch_custom_service_annotations) | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_lazy_image_update(self): | ||||
|     def test_taint_based_eviction(self): | ||||
|         ''' | ||||
|         Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image  | ||||
|         until they are recreated for reasons other than operator's activity. That works because the operator uses  | ||||
|         "onDelete" pod update policy for stateful sets. | ||||
| 
 | ||||
|         The test covers: | ||||
|         1) enabling lazy upgrade in existing operator deployment | ||||
|         2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod | ||||
|            Add taint "postgres=:NoExecute" to node with master. This must cause a failover. | ||||
|         ''' | ||||
| 
 | ||||
|         k8s = self.k8s | ||||
|         pod0 = "acid-minimal-cluster-0" | ||||
|         pod1 = "acid-minimal-cluster-1" | ||||
|         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||
| 
 | ||||
|         # enable lazy update | ||||
|         patch_lazy_image_upgrade = { | ||||
|             "data": { | ||||
|                 "enable_lazy_image_upgrade": "true", | ||||
|                 "docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16" | ||||
|         # get nodes of master and replica(s) (expected target of new master) | ||||
|         current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||
|         num_replicas = len(current_replica_nodes) | ||||
|         failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) | ||||
| 
 | ||||
|         # taint node with postgres=:NoExecute to force failover | ||||
|         body = { | ||||
|             "spec": { | ||||
|                 "taints": [ | ||||
|                     { | ||||
|                         "effect": "NoExecute", | ||||
|                         "key": "postgres" | ||||
|                     } | ||||
|                 ] | ||||
|             } | ||||
|         } | ||||
|         k8s.update_config(patch_lazy_image_upgrade) | ||||
| 
 | ||||
|         # wait for sts update | ||||
|         time.sleep(60) | ||||
|         # patch node and test if master is failing over to one of the expected nodes | ||||
|         k8s.api.core_v1.patch_node(current_master_node, body) | ||||
|         new_master_node, new_replica_nodes = self.assert_failover( | ||||
|             current_master_node, num_replicas, failover_targets, cluster_label) | ||||
| 
 | ||||
|         # restart the pod to get a container with the new image  | ||||
|         k8s.api.core_v1.delete_namespaced_pod(pod0, "default") | ||||
|         time.sleep(60) | ||||
| 
 | ||||
|         # lazy update works if the restarted pod and older pods have different Spilo versions | ||||
|         # i.e. the update did not immediately affect all pods | ||||
|         new_image = k8s.get_effective_pod_image(pod0) | ||||
|         old_image = k8s.get_effective_pod_image(pod1) | ||||
|         self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image)) | ||||
| 
 | ||||
|         # clean up | ||||
|         unpatch_lazy_image_upgrade = { | ||||
|         # add toleration to pods | ||||
|         patch_toleration_config = { | ||||
|             "data": { | ||||
|                 "enable_lazy_image_upgrade": "false", | ||||
|                 "toleration": "key:postgres,operator:Exists,effect:NoExecute" | ||||
|             } | ||||
|         } | ||||
|         k8s.update_config(unpatch_lazy_image_upgrade) | ||||
|         k8s.update_config(patch_toleration_config) | ||||
| 
 | ||||
|         # at this point operator will complete the normal rolling update | ||||
|         # so we additonally test if disabling the lazy update (forcing the normal rolling update) works | ||||
|         time.sleep(60) | ||||
|         # toggle pod anti affinity to move replica away from master node | ||||
|         self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) | ||||
| 
 | ||||
|         image0 = k8s.get_effective_pod_image(pod0) | ||||
|         image1 = k8s.get_effective_pod_image(pod1) | ||||
|     def get_failover_targets(self, master_node, replica_nodes): | ||||
|         ''' | ||||
|            If all pods live on the same node, failover will happen to other worker(s) | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
| 
 | ||||
|         self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1)) | ||||
|         failover_targets = [x for x in replica_nodes if x != master_node] | ||||
|         if len(failover_targets) == 0: | ||||
|             nodes = k8s.api.core_v1.list_node() | ||||
|             for n in nodes.items: | ||||
|                 if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node: | ||||
|                     failover_targets.append(n.metadata.name) | ||||
| 
 | ||||
|         return failover_targets | ||||
| 
 | ||||
|     def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label): | ||||
|         ''' | ||||
|            Check if master is failing over. The replica should move first to be the switchover target | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||
| 
 | ||||
|         new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||
|         self.assertNotEqual(current_master_node, new_master_node, | ||||
|                             "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) | ||||
|         self.assertEqual(num_replicas, len(new_replica_nodes), | ||||
|                          "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) | ||||
|         self.assert_master_is_unique() | ||||
| 
 | ||||
|         return new_master_node, new_replica_nodes | ||||
| 
 | ||||
|     def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): | ||||
|         ''' | ||||
|            Check that there is a single pod in the k8s cluster with the label "spilo-role=master" | ||||
|            To be called manually after operations that affect pods | ||||
|         ''' | ||||
| 
 | ||||
|         k8s = self.k8s | ||||
|         labels = 'spilo-role=master,cluster-name=' + clusterName | ||||
| 
 | ||||
|         num_of_master_pods = k8s.count_pods_with_label(labels, namespace) | ||||
|         self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) | ||||
| 
 | ||||
|     def assert_distributed_pods(self, master_node, replica_nodes, cluster_label): | ||||
|         ''' | ||||
|            Other tests can lead to the situation that master and replica are on the same node. | ||||
|            Toggle pod anti affinty to distribute pods accross nodes (replica in particular). | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         failover_targets = self.get_failover_targets(master_node, replica_nodes) | ||||
| 
 | ||||
|         # enable pod anti affintiy in config map which should trigger movement of replica | ||||
|         patch_enable_antiaffinity = { | ||||
|             "data": { | ||||
|                 "enable_pod_antiaffinity": "true" | ||||
|             } | ||||
|         } | ||||
|         k8s.update_config(patch_enable_antiaffinity) | ||||
|         self.assert_failover( | ||||
|             master_node, len(replica_nodes), failover_targets, cluster_label) | ||||
| 
 | ||||
|         # disable pod anti affintiy again | ||||
|         patch_disable_antiaffinity = { | ||||
|             "data": { | ||||
|                 "enable_pod_antiaffinity": "false" | ||||
|             } | ||||
|         } | ||||
|         k8s.update_config(patch_disable_antiaffinity) | ||||
| 
 | ||||
| 
 | ||||
| class K8sApi: | ||||
| 
 | ||||
|  | @ -500,15 +591,14 @@ class K8s: | |||
|     def count_pods_with_label(self, labels, namespace='default'): | ||||
|         return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) | ||||
| 
 | ||||
|     def wait_for_master_failover(self, expected_master_nodes, namespace='default'): | ||||
|     def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): | ||||
|         pod_phase = 'Failing over' | ||||
|         new_master_node = '' | ||||
|         labels = 'spilo-role=master,cluster-name=acid-minimal-cluster' | ||||
|         new_pod_node = '' | ||||
| 
 | ||||
|         while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes): | ||||
|         while (pod_phase != 'Running') or (new_pod_node not in failover_targets): | ||||
|             pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items | ||||
|             if pods: | ||||
|                 new_master_node = pods[0].spec.node_name | ||||
|                 new_pod_node = pods[0].spec.node_name | ||||
|                 pod_phase = pods[0].status.phase | ||||
|             time.sleep(self.RETRY_TIMEOUT_SEC) | ||||
| 
 | ||||
|  |  | |||
|  | @ -5,7 +5,7 @@ import ( | |||
| 	"time" | ||||
| 
 | ||||
| 	"github.com/zalando/postgres-operator/pkg/util/retryutil" | ||||
| 	"k8s.io/api/core/v1" | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/labels" | ||||
| 	"k8s.io/apimachinery/pkg/runtime" | ||||
|  | @ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) { | |||
| } | ||||
| 
 | ||||
| func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | ||||
| 
 | ||||
| 	// retry to move master until configured timeout is reached
 | ||||
| 	err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, | ||||
| 		func() (bool, error) { | ||||
| 			err := c.attemptToMoveMasterPodsOffNode(node) | ||||
| 			if err != nil { | ||||
| 				return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") | ||||
| 				return false, err | ||||
| 			} | ||||
| 			return true, nil | ||||
| 		}, | ||||
| 	) | ||||
| 
 | ||||
| 	if err != nil { | ||||
| 		c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) | ||||
| 		c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err) | ||||
| 	} | ||||
| 
 | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue