add e2e test for node readiness label
This commit is contained in:
parent
b24da3201c
commit
5774fce104
|
|
@ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
|
||||
k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml")
|
||||
k8s.wait_for_pod_start('spilo-role=master')
|
||||
k8s.wait_for_pod_start('spilo-role=replica')
|
||||
|
||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||
def test_enable_load_balancer(self):
|
||||
|
|
@ -190,6 +191,53 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
self.assertEqual(2, k8s.count_pods_with_label(labels))
|
||||
self.assert_master_is_unique()
|
||||
|
||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||
def test_node_readisness_label(self):
|
||||
'''
|
||||
Remove node readiness label from master node. This must cause a failover.
|
||||
'''
|
||||
k8s = self.k8s
|
||||
cluster_label = 'cluster-name=acid-minimal-cluster'
|
||||
readiness_label = 'lifecycle-status'
|
||||
readiness_value = 'ready'
|
||||
|
||||
# get nodes of master and replica(s) (expected target of new master)
|
||||
current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
|
||||
num_replicas = len(current_replica_nodes)
|
||||
failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
|
||||
|
||||
# add node_readiness_label to potential failover nodes
|
||||
patch_readiness_label = {
|
||||
"metadata": {
|
||||
"labels": {
|
||||
readiness_label: readiness_value
|
||||
}
|
||||
}
|
||||
}
|
||||
for failover_target in failover_targets:
|
||||
k8s.api.core_v1.patch_node(failover_target, patch_readiness_label)
|
||||
|
||||
# define node_readiness_label in config map which should trigger a failover of the master
|
||||
patch_readiness_label_config = {
|
||||
"data": {
|
||||
"node_readiness_label": readiness_label + ':' + readiness_value,
|
||||
}
|
||||
}
|
||||
k8s.update_config(patch_readiness_label_config)
|
||||
|
||||
k8s.wait_for_master_failover(failover_targets)
|
||||
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
||||
|
||||
new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
|
||||
self.assertNotEqual(current_master_node, new_master_node,
|
||||
"Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
|
||||
self.assertEqual(num_replicas, len(new_replica_nodes),
|
||||
"Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
|
||||
self.assert_master_is_unique()
|
||||
|
||||
# patch also master node
|
||||
k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label)
|
||||
|
||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||
def test_taint_based_eviction(self):
|
||||
'''
|
||||
|
|
@ -199,16 +247,9 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
cluster_label = 'cluster-name=acid-minimal-cluster'
|
||||
|
||||
# get nodes of master and replica(s) (expected target of new master)
|
||||
current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label)
|
||||
num_replicas = len(failover_targets)
|
||||
|
||||
# if all pods live on the same node, failover will happen to other worker(s)
|
||||
failover_targets = [x for x in failover_targets if x != current_master_node]
|
||||
if len(failover_targets) == 0:
|
||||
nodes = k8s.api.core_v1.list_node()
|
||||
for n in nodes.items:
|
||||
if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node:
|
||||
failover_targets.append(n.metadata.name)
|
||||
current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
|
||||
num_replicas = len(current_replica_nodes)
|
||||
failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
|
||||
|
||||
# taint node with postgres=:NoExecute to force failover
|
||||
body = {
|
||||
|
|
@ -346,12 +387,26 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
}
|
||||
k8s.update_config(unpatch_custom_service_annotations)
|
||||
|
||||
def get_failover_targets(self, master_node, replica_nodes):
|
||||
'''
|
||||
If all pods live on the same node, failover will happen to other worker(s)
|
||||
'''
|
||||
k8s = self.k8s
|
||||
|
||||
failover_targets = [x for x in replica_nodes if x != master_node]
|
||||
if len(failover_targets) == 0:
|
||||
nodes = k8s.api.core_v1.list_node()
|
||||
for n in nodes.items:
|
||||
if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node:
|
||||
failover_targets.append(n.metadata.name)
|
||||
|
||||
return failover_targets
|
||||
|
||||
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
|
||||
'''
|
||||
Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
|
||||
To be called manually after operations that affect pods
|
||||
'''
|
||||
|
||||
k8s = self.k8s
|
||||
labels = 'spilo-role=master,cluster-name=' + clusterName
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/zalando/postgres-operator/pkg/util/retryutil"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
|
|
@ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) {
|
|||
}
|
||||
|
||||
func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
|
||||
|
||||
// retry to move master until configured timeout is reached
|
||||
err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout,
|
||||
func() (bool, error) {
|
||||
err := c.attemptToMoveMasterPodsOffNode(node)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute")
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
},
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout)
|
||||
c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue