add e2e test for node readiness label (#846)
* add e2e test for node readiness label * refactoring and order tests alphabetically * always wait for replica after failover
This commit is contained in:
		
							parent
							
								
									51909204fd
								
							
						
					
					
						commit
						ae2a38d62a
					
				|  | @ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
| 
 | 
 | ||||||
|         k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") |         k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") | ||||||
|         k8s.wait_for_pod_start('spilo-role=master') |         k8s.wait_for_pod_start('spilo-role=master') | ||||||
|  |         k8s.wait_for_pod_start('spilo-role=replica') | ||||||
| 
 | 
 | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|     def test_enable_load_balancer(self): |     def test_enable_load_balancer(self): | ||||||
|  | @ -107,141 +108,6 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         self.assertEqual(repl_svc_type, 'ClusterIP', |         self.assertEqual(repl_svc_type, 'ClusterIP', | ||||||
|                          "Expected ClusterIP service type for replica, found {}".format(repl_svc_type)) |                          "Expected ClusterIP service type for replica, found {}".format(repl_svc_type)) | ||||||
| 
 | 
 | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |  | ||||||
|     def test_min_resource_limits(self): |  | ||||||
|         ''' |  | ||||||
|         Lower resource limits below configured minimum and let operator fix it |  | ||||||
|         ''' |  | ||||||
|         k8s = self.k8s |  | ||||||
|         cluster_label = 'cluster-name=acid-minimal-cluster' |  | ||||||
|         _, failover_targets = k8s.get_pg_nodes(cluster_label) |  | ||||||
| 
 |  | ||||||
|         # configure minimum boundaries for CPU and memory limits |  | ||||||
|         minCPULimit = '500m' |  | ||||||
|         minMemoryLimit = '500Mi' |  | ||||||
|         patch_min_resource_limits = { |  | ||||||
|             "data": { |  | ||||||
|                 "min_cpu_limit": minCPULimit, |  | ||||||
|                 "min_memory_limit": minMemoryLimit |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         k8s.update_config(patch_min_resource_limits) |  | ||||||
| 
 |  | ||||||
|         # lower resource limits below minimum |  | ||||||
|         pg_patch_resources = { |  | ||||||
|             "spec": { |  | ||||||
|                 "resources": { |  | ||||||
|                     "requests": { |  | ||||||
|                         "cpu": "10m", |  | ||||||
|                         "memory": "50Mi" |  | ||||||
|                     }, |  | ||||||
|                     "limits": { |  | ||||||
|                         "cpu": "200m", |  | ||||||
|                         "memory": "200Mi" |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         k8s.api.custom_objects_api.patch_namespaced_custom_object( |  | ||||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) |  | ||||||
|         k8s.wait_for_master_failover(failover_targets) |  | ||||||
| 
 |  | ||||||
|         pods = k8s.api.core_v1.list_namespaced_pod( |  | ||||||
|             'default', label_selector='spilo-role=master,' + cluster_label).items |  | ||||||
|         self.assert_master_is_unique() |  | ||||||
|         masterPod = pods[0] |  | ||||||
| 
 |  | ||||||
|         self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, |  | ||||||
|                          "Expected CPU limit {}, found {}" |  | ||||||
|                          .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) |  | ||||||
|         self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, |  | ||||||
|                          "Expected memory limit {}, found {}" |  | ||||||
|                          .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) |  | ||||||
| 
 |  | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |  | ||||||
|     def test_multi_namespace_support(self): |  | ||||||
|         ''' |  | ||||||
|         Create a customized Postgres cluster in a non-default namespace. |  | ||||||
|         ''' |  | ||||||
|         k8s = self.k8s |  | ||||||
| 
 |  | ||||||
|         with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: |  | ||||||
|             pg_manifest = yaml.safe_load(f) |  | ||||||
|             pg_manifest["metadata"]["namespace"] = self.namespace |  | ||||||
|             yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) |  | ||||||
| 
 |  | ||||||
|         k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") |  | ||||||
|         k8s.wait_for_pod_start("spilo-role=master", self.namespace) |  | ||||||
|         self.assert_master_is_unique(self.namespace, "acid-test-cluster") |  | ||||||
| 
 |  | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |  | ||||||
|     def test_scaling(self): |  | ||||||
|         ''' |  | ||||||
|            Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. |  | ||||||
|         ''' |  | ||||||
|         k8s = self.k8s |  | ||||||
|         labels = "cluster-name=acid-minimal-cluster" |  | ||||||
| 
 |  | ||||||
|         k8s.wait_for_pg_to_scale(3) |  | ||||||
|         self.assertEqual(3, k8s.count_pods_with_label(labels)) |  | ||||||
|         self.assert_master_is_unique() |  | ||||||
| 
 |  | ||||||
|         k8s.wait_for_pg_to_scale(2) |  | ||||||
|         self.assertEqual(2, k8s.count_pods_with_label(labels)) |  | ||||||
|         self.assert_master_is_unique() |  | ||||||
| 
 |  | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |  | ||||||
|     def test_taint_based_eviction(self): |  | ||||||
|         ''' |  | ||||||
|            Add taint "postgres=:NoExecute" to node with master. This must cause a failover. |  | ||||||
|         ''' |  | ||||||
|         k8s = self.k8s |  | ||||||
|         cluster_label = 'cluster-name=acid-minimal-cluster' |  | ||||||
| 
 |  | ||||||
|         # get nodes of master and replica(s) (expected target of new master) |  | ||||||
|         current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label) |  | ||||||
|         num_replicas = len(failover_targets) |  | ||||||
| 
 |  | ||||||
|         # if all pods live on the same node, failover will happen to other worker(s) |  | ||||||
|         failover_targets = [x for x in failover_targets if x != current_master_node] |  | ||||||
|         if len(failover_targets) == 0: |  | ||||||
|             nodes = k8s.api.core_v1.list_node() |  | ||||||
|             for n in nodes.items: |  | ||||||
|                 if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node: |  | ||||||
|                     failover_targets.append(n.metadata.name) |  | ||||||
| 
 |  | ||||||
|         # taint node with postgres=:NoExecute to force failover |  | ||||||
|         body = { |  | ||||||
|             "spec": { |  | ||||||
|                 "taints": [ |  | ||||||
|                     { |  | ||||||
|                         "effect": "NoExecute", |  | ||||||
|                         "key": "postgres" |  | ||||||
|                     } |  | ||||||
|                 ] |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         # patch node and test if master is failing over to one of the expected nodes |  | ||||||
|         k8s.api.core_v1.patch_node(current_master_node, body) |  | ||||||
|         k8s.wait_for_master_failover(failover_targets) |  | ||||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) |  | ||||||
| 
 |  | ||||||
|         new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) |  | ||||||
|         self.assertNotEqual(current_master_node, new_master_node, |  | ||||||
|                             "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) |  | ||||||
|         self.assertEqual(num_replicas, len(new_replica_nodes), |  | ||||||
|                          "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) |  | ||||||
|         self.assert_master_is_unique() |  | ||||||
| 
 |  | ||||||
|         # undo the tainting |  | ||||||
|         body = { |  | ||||||
|             "spec": { |  | ||||||
|                 "taints": [] |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         k8s.api.core_v1.patch_node(new_master_node, body) |  | ||||||
| 
 |  | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|     def test_logical_backup_cron_job(self): |     def test_logical_backup_cron_job(self): | ||||||
|         ''' |         ''' | ||||||
|  | @ -306,6 +172,133 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         self.assertEqual(0, len(jobs), |         self.assertEqual(0, len(jobs), | ||||||
|                          "Expected 0 logical backup jobs, found {}".format(len(jobs))) |                          "Expected 0 logical backup jobs, found {}".format(len(jobs))) | ||||||
| 
 | 
 | ||||||
|  |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|  |     def test_min_resource_limits(self): | ||||||
|  |         ''' | ||||||
|  |         Lower resource limits below configured minimum and let operator fix it | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  |         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||||
|  |         labels = 'spilo-role=master,' + cluster_label | ||||||
|  |         _, failover_targets = k8s.get_pg_nodes(cluster_label) | ||||||
|  | 
 | ||||||
|  |         # configure minimum boundaries for CPU and memory limits | ||||||
|  |         minCPULimit = '500m' | ||||||
|  |         minMemoryLimit = '500Mi' | ||||||
|  |         patch_min_resource_limits = { | ||||||
|  |             "data": { | ||||||
|  |                 "min_cpu_limit": minCPULimit, | ||||||
|  |                 "min_memory_limit": minMemoryLimit | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         k8s.update_config(patch_min_resource_limits) | ||||||
|  | 
 | ||||||
|  |         # lower resource limits below minimum | ||||||
|  |         pg_patch_resources = { | ||||||
|  |             "spec": { | ||||||
|  |                 "resources": { | ||||||
|  |                     "requests": { | ||||||
|  |                         "cpu": "10m", | ||||||
|  |                         "memory": "50Mi" | ||||||
|  |                     }, | ||||||
|  |                     "limits": { | ||||||
|  |                         "cpu": "200m", | ||||||
|  |                         "memory": "200Mi" | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||||
|  |             "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) | ||||||
|  |         k8s.wait_for_pod_failover(failover_targets, labels) | ||||||
|  |         k8s.wait_for_pod_start('spilo-role=replica') | ||||||
|  | 
 | ||||||
|  |         pods = k8s.api.core_v1.list_namespaced_pod( | ||||||
|  |             'default', label_selector=labels).items | ||||||
|  |         self.assert_master_is_unique() | ||||||
|  |         masterPod = pods[0] | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, | ||||||
|  |                          "Expected CPU limit {}, found {}" | ||||||
|  |                          .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) | ||||||
|  |         self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, | ||||||
|  |                          "Expected memory limit {}, found {}" | ||||||
|  |                          .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) | ||||||
|  | 
 | ||||||
|  |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|  |     def test_multi_namespace_support(self): | ||||||
|  |         ''' | ||||||
|  |         Create a customized Postgres cluster in a non-default namespace. | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  | 
 | ||||||
|  |         with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: | ||||||
|  |             pg_manifest = yaml.safe_load(f) | ||||||
|  |             pg_manifest["metadata"]["namespace"] = self.namespace | ||||||
|  |             yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) | ||||||
|  | 
 | ||||||
|  |         k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") | ||||||
|  |         k8s.wait_for_pod_start("spilo-role=master", self.namespace) | ||||||
|  |         self.assert_master_is_unique(self.namespace, "acid-test-cluster") | ||||||
|  | 
 | ||||||
|  |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|  |     def test_node_readiness_label(self): | ||||||
|  |         ''' | ||||||
|  |            Remove node readiness label from master node. This must cause a failover. | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  |         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||||
|  |         labels = 'spilo-role=master,' + cluster_label | ||||||
|  |         readiness_label = 'lifecycle-status' | ||||||
|  |         readiness_value = 'ready' | ||||||
|  | 
 | ||||||
|  |         # get nodes of master and replica(s) (expected target of new master) | ||||||
|  |         current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||||
|  |         num_replicas = len(current_replica_nodes) | ||||||
|  |         failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) | ||||||
|  | 
 | ||||||
|  |         # add node_readiness_label to potential failover nodes | ||||||
|  |         patch_readiness_label = { | ||||||
|  |             "metadata": { | ||||||
|  |                 "labels": { | ||||||
|  |                     readiness_label: readiness_value | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         for failover_target in failover_targets: | ||||||
|  |             k8s.api.core_v1.patch_node(failover_target, patch_readiness_label) | ||||||
|  | 
 | ||||||
|  |         # define node_readiness_label in config map which should trigger a failover of the master | ||||||
|  |         patch_readiness_label_config = { | ||||||
|  |             "data": { | ||||||
|  |                 "node_readiness_label": readiness_label + ':' + readiness_value, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         k8s.update_config(patch_readiness_label_config) | ||||||
|  |         new_master_node, new_replica_nodes = self.assert_failover( | ||||||
|  |             current_master_node, num_replicas, failover_targets, cluster_label) | ||||||
|  | 
 | ||||||
|  |         # patch also node where master ran before | ||||||
|  |         k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label) | ||||||
|  |         # toggle pod anti affinity to move replica away from master node | ||||||
|  |         self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) | ||||||
|  | 
 | ||||||
|  |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|  |     def test_scaling(self): | ||||||
|  |         ''' | ||||||
|  |            Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  |         labels = "cluster-name=acid-minimal-cluster" | ||||||
|  | 
 | ||||||
|  |         k8s.wait_for_pg_to_scale(3) | ||||||
|  |         self.assertEqual(3, k8s.count_pods_with_label(labels)) | ||||||
|  |         self.assert_master_is_unique() | ||||||
|  | 
 | ||||||
|  |         k8s.wait_for_pg_to_scale(2) | ||||||
|  |         self.assertEqual(2, k8s.count_pods_with_label(labels)) | ||||||
|  |         self.assert_master_is_unique() | ||||||
|  | 
 | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|     def test_service_annotations(self): |     def test_service_annotations(self): | ||||||
|         ''' |         ''' | ||||||
|  | @ -346,18 +339,116 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         } |         } | ||||||
|         k8s.update_config(unpatch_custom_service_annotations) |         k8s.update_config(unpatch_custom_service_annotations) | ||||||
| 
 | 
 | ||||||
|  |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|  |     def test_taint_based_eviction(self): | ||||||
|  |         ''' | ||||||
|  |            Add taint "postgres=:NoExecute" to node with master. This must cause a failover. | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  |         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||||
|  | 
 | ||||||
|  |         # get nodes of master and replica(s) (expected target of new master) | ||||||
|  |         current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||||
|  |         num_replicas = len(current_replica_nodes) | ||||||
|  |         failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) | ||||||
|  | 
 | ||||||
|  |         # taint node with postgres=:NoExecute to force failover | ||||||
|  |         body = { | ||||||
|  |             "spec": { | ||||||
|  |                 "taints": [ | ||||||
|  |                     { | ||||||
|  |                         "effect": "NoExecute", | ||||||
|  |                         "key": "postgres" | ||||||
|  |                     } | ||||||
|  |                 ] | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         # patch node and test if master is failing over to one of the expected nodes | ||||||
|  |         k8s.api.core_v1.patch_node(current_master_node, body) | ||||||
|  |         new_master_node, new_replica_nodes = self.assert_failover( | ||||||
|  |             current_master_node, num_replicas, failover_targets, cluster_label) | ||||||
|  | 
 | ||||||
|  |         # add toleration to pods | ||||||
|  |         patch_toleration_config = { | ||||||
|  |             "data": { | ||||||
|  |                 "toleration": "key:postgres,operator:Exists,effect:NoExecute" | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         k8s.update_config(patch_toleration_config) | ||||||
|  | 
 | ||||||
|  |         # toggle pod anti affinity to move replica away from master node | ||||||
|  |         self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) | ||||||
|  | 
 | ||||||
|  |     def get_failover_targets(self, master_node, replica_nodes): | ||||||
|  |         ''' | ||||||
|  |            If all pods live on the same node, failover will happen to other worker(s) | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  | 
 | ||||||
|  |         failover_targets = [x for x in replica_nodes if x != master_node] | ||||||
|  |         if len(failover_targets) == 0: | ||||||
|  |             nodes = k8s.api.core_v1.list_node() | ||||||
|  |             for n in nodes.items: | ||||||
|  |                 if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node: | ||||||
|  |                     failover_targets.append(n.metadata.name) | ||||||
|  | 
 | ||||||
|  |         return failover_targets | ||||||
|  | 
 | ||||||
|  |     def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label): | ||||||
|  |         ''' | ||||||
|  |            Check if master is failing over. The replica should move first to be the switchover target | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  |         k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label) | ||||||
|  |         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||||
|  | 
 | ||||||
|  |         new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||||
|  |         self.assertNotEqual(current_master_node, new_master_node, | ||||||
|  |                             "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) | ||||||
|  |         self.assertEqual(num_replicas, len(new_replica_nodes), | ||||||
|  |                          "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) | ||||||
|  |         self.assert_master_is_unique() | ||||||
|  | 
 | ||||||
|  |         return new_master_node, new_replica_nodes | ||||||
|  | 
 | ||||||
|     def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): |     def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): | ||||||
|         ''' |         ''' | ||||||
|            Check that there is a single pod in the k8s cluster with the label "spilo-role=master" |            Check that there is a single pod in the k8s cluster with the label "spilo-role=master" | ||||||
|            To be called manually after operations that affect pods |            To be called manually after operations that affect pods | ||||||
|         ''' |         ''' | ||||||
| 
 |  | ||||||
|         k8s = self.k8s |         k8s = self.k8s | ||||||
|         labels = 'spilo-role=master,cluster-name=' + clusterName |         labels = 'spilo-role=master,cluster-name=' + clusterName | ||||||
| 
 | 
 | ||||||
|         num_of_master_pods = k8s.count_pods_with_label(labels, namespace) |         num_of_master_pods = k8s.count_pods_with_label(labels, namespace) | ||||||
|         self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) |         self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) | ||||||
| 
 | 
 | ||||||
|  |     def assert_distributed_pods(self, master_node, replica_nodes, cluster_label): | ||||||
|  |         ''' | ||||||
|  |            Other tests can lead to the situation that master and replica are on the same node. | ||||||
|  |            Toggle pod anti affinty to distribute pods accross nodes (replica in particular). | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  |         failover_targets = self.get_failover_targets(master_node, replica_nodes) | ||||||
|  | 
 | ||||||
|  |         # enable pod anti affintiy in config map which should trigger movement of replica | ||||||
|  |         patch_enable_antiaffinity = { | ||||||
|  |             "data": { | ||||||
|  |                 "enable_pod_antiaffinity": "true" | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         k8s.update_config(patch_enable_antiaffinity) | ||||||
|  |         self.assert_failover( | ||||||
|  |             master_node, len(replica_nodes), failover_targets, cluster_label) | ||||||
|  | 
 | ||||||
|  |         # disable pod anti affintiy again | ||||||
|  |         patch_disable_antiaffinity = { | ||||||
|  |             "data": { | ||||||
|  |                 "enable_pod_antiaffinity": "false" | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         k8s.update_config(patch_disable_antiaffinity) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class K8sApi: | class K8sApi: | ||||||
| 
 | 
 | ||||||
|  | @ -445,15 +536,14 @@ class K8s: | ||||||
|     def count_pods_with_label(self, labels, namespace='default'): |     def count_pods_with_label(self, labels, namespace='default'): | ||||||
|         return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) |         return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) | ||||||
| 
 | 
 | ||||||
|     def wait_for_master_failover(self, expected_master_nodes, namespace='default'): |     def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): | ||||||
|         pod_phase = 'Failing over' |         pod_phase = 'Failing over' | ||||||
|         new_master_node = '' |         new_pod_node = '' | ||||||
|         labels = 'spilo-role=master,cluster-name=acid-minimal-cluster' |  | ||||||
| 
 | 
 | ||||||
|         while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes): |         while (pod_phase != 'Running') or (new_pod_node not in failover_targets): | ||||||
|             pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items |             pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items | ||||||
|             if pods: |             if pods: | ||||||
|                 new_master_node = pods[0].spec.node_name |                 new_pod_node = pods[0].spec.node_name | ||||||
|                 pod_phase = pods[0].status.phase |                 pod_phase = pods[0].status.phase | ||||||
|             time.sleep(self.RETRY_TIMEOUT_SEC) |             time.sleep(self.RETRY_TIMEOUT_SEC) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -5,7 +5,7 @@ import ( | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
| 	"github.com/zalando/postgres-operator/pkg/util/retryutil" | 	"github.com/zalando/postgres-operator/pkg/util/retryutil" | ||||||
| 	"k8s.io/api/core/v1" | 	v1 "k8s.io/api/core/v1" | ||||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
| 	"k8s.io/apimachinery/pkg/labels" | 	"k8s.io/apimachinery/pkg/labels" | ||||||
| 	"k8s.io/apimachinery/pkg/runtime" | 	"k8s.io/apimachinery/pkg/runtime" | ||||||
|  | @ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | ||||||
| 
 | 	// retry to move master until configured timeout is reached
 | ||||||
| 	err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, | 	err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, | ||||||
| 		func() (bool, error) { | 		func() (bool, error) { | ||||||
| 			err := c.attemptToMoveMasterPodsOffNode(node) | 			err := c.attemptToMoveMasterPodsOffNode(node) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") | 				return false, err | ||||||
| 			} | 			} | ||||||
| 			return true, nil | 			return true, nil | ||||||
| 		}, | 		}, | ||||||
| 	) | 	) | ||||||
| 
 | 
 | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) | 		c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue