add e2e test for node readiness label
This commit is contained in:
		
							parent
							
								
									b24da3201c
								
							
						
					
					
						commit
						5774fce104
					
				|  | @ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
| 
 | 
 | ||||||
|         k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") |         k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") | ||||||
|         k8s.wait_for_pod_start('spilo-role=master') |         k8s.wait_for_pod_start('spilo-role=master') | ||||||
|  |         k8s.wait_for_pod_start('spilo-role=replica') | ||||||
| 
 | 
 | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|     def test_enable_load_balancer(self): |     def test_enable_load_balancer(self): | ||||||
|  | @ -190,6 +191,53 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         self.assertEqual(2, k8s.count_pods_with_label(labels)) |         self.assertEqual(2, k8s.count_pods_with_label(labels)) | ||||||
|         self.assert_master_is_unique() |         self.assert_master_is_unique() | ||||||
| 
 | 
 | ||||||
|  |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|  |     def test_node_readisness_label(self): | ||||||
|  |         ''' | ||||||
|  |            Remove node readiness label from master node. This must cause a failover. | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  |         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||||
|  |         readiness_label = 'lifecycle-status' | ||||||
|  |         readiness_value = 'ready' | ||||||
|  | 
 | ||||||
|  |         # get nodes of master and replica(s) (expected target of new master) | ||||||
|  |         current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||||
|  |         num_replicas = len(current_replica_nodes) | ||||||
|  |         failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) | ||||||
|  | 
 | ||||||
|  |         # add node_readiness_label to potential failover nodes | ||||||
|  |         patch_readiness_label = { | ||||||
|  |             "metadata": { | ||||||
|  |                 "labels": { | ||||||
|  |                     readiness_label: readiness_value | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         for failover_target in failover_targets: | ||||||
|  |             k8s.api.core_v1.patch_node(failover_target, patch_readiness_label) | ||||||
|  | 
 | ||||||
|  |         # define node_readiness_label in config map which should trigger a failover of the master | ||||||
|  |         patch_readiness_label_config = { | ||||||
|  |             "data": { | ||||||
|  |                 "node_readiness_label": readiness_label + ':' + readiness_value, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         k8s.update_config(patch_readiness_label_config) | ||||||
|  | 
 | ||||||
|  |         k8s.wait_for_master_failover(failover_targets) | ||||||
|  |         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||||
|  | 
 | ||||||
|  |         new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||||
|  |         self.assertNotEqual(current_master_node, new_master_node, | ||||||
|  |                             "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) | ||||||
|  |         self.assertEqual(num_replicas, len(new_replica_nodes), | ||||||
|  |                          "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) | ||||||
|  |         self.assert_master_is_unique() | ||||||
|  | 
 | ||||||
|  |         # patch also master node | ||||||
|  |         k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label) | ||||||
|  | 
 | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|     def test_taint_based_eviction(self): |     def test_taint_based_eviction(self): | ||||||
|         ''' |         ''' | ||||||
|  | @ -199,16 +247,9 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         cluster_label = 'cluster-name=acid-minimal-cluster' |         cluster_label = 'cluster-name=acid-minimal-cluster' | ||||||
| 
 | 
 | ||||||
|         # get nodes of master and replica(s) (expected target of new master) |         # get nodes of master and replica(s) (expected target of new master) | ||||||
|         current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label) |         current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||||
|         num_replicas = len(failover_targets) |         num_replicas = len(current_replica_nodes) | ||||||
| 
 |         failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) | ||||||
|         # if all pods live on the same node, failover will happen to other worker(s) |  | ||||||
|         failover_targets = [x for x in failover_targets if x != current_master_node] |  | ||||||
|         if len(failover_targets) == 0: |  | ||||||
|             nodes = k8s.api.core_v1.list_node() |  | ||||||
|             for n in nodes.items: |  | ||||||
|                 if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node: |  | ||||||
|                     failover_targets.append(n.metadata.name) |  | ||||||
| 
 | 
 | ||||||
|         # taint node with postgres=:NoExecute to force failover |         # taint node with postgres=:NoExecute to force failover | ||||||
|         body = { |         body = { | ||||||
|  | @ -346,12 +387,26 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         } |         } | ||||||
|         k8s.update_config(unpatch_custom_service_annotations) |         k8s.update_config(unpatch_custom_service_annotations) | ||||||
| 
 | 
 | ||||||
|  |     def get_failover_targets(self, master_node, replica_nodes): | ||||||
|  |         ''' | ||||||
|  |            If all pods live on the same node, failover will happen to other worker(s) | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  | 
 | ||||||
|  |         failover_targets = [x for x in replica_nodes if x != master_node] | ||||||
|  |         if len(failover_targets) == 0: | ||||||
|  |             nodes = k8s.api.core_v1.list_node() | ||||||
|  |             for n in nodes.items: | ||||||
|  |                 if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node: | ||||||
|  |                     failover_targets.append(n.metadata.name) | ||||||
|  | 
 | ||||||
|  |         return failover_targets | ||||||
|  | 
 | ||||||
|     def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): |     def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): | ||||||
|         ''' |         ''' | ||||||
|            Check that there is a single pod in the k8s cluster with the label "spilo-role=master" |            Check that there is a single pod in the k8s cluster with the label "spilo-role=master" | ||||||
|            To be called manually after operations that affect pods |            To be called manually after operations that affect pods | ||||||
|         ''' |         ''' | ||||||
| 
 |  | ||||||
|         k8s = self.k8s |         k8s = self.k8s | ||||||
|         labels = 'spilo-role=master,cluster-name=' + clusterName |         labels = 'spilo-role=master,cluster-name=' + clusterName | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -5,7 +5,7 @@ import ( | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
| 	"github.com/zalando/postgres-operator/pkg/util/retryutil" | 	"github.com/zalando/postgres-operator/pkg/util/retryutil" | ||||||
| 	"k8s.io/api/core/v1" | 	v1 "k8s.io/api/core/v1" | ||||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
| 	"k8s.io/apimachinery/pkg/labels" | 	"k8s.io/apimachinery/pkg/labels" | ||||||
| 	"k8s.io/apimachinery/pkg/runtime" | 	"k8s.io/apimachinery/pkg/runtime" | ||||||
|  | @ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { | ||||||
| 
 | 	// retry to move master until configured timeout is reached
 | ||||||
| 	err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, | 	err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, | ||||||
| 		func() (bool, error) { | 		func() (bool, error) { | ||||||
| 			err := c.attemptToMoveMasterPodsOffNode(node) | 			err := c.attemptToMoveMasterPodsOffNode(node) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") | 				return false, err | ||||||
| 			} | 			} | ||||||
| 			return true, nil | 			return true, nil | ||||||
| 		}, | 		}, | ||||||
| 	) | 	) | ||||||
| 
 | 
 | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) | 		c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue