merge commit

2020-03-10 12:17:13 +01:00 · 2020-03-10 12:17:13 +01:00 · dd10127e5d
parent e9486f8325 ae2a38d62a
commit dd10127e5d
2 changed files with 261 additions and 171 deletions
--- a/e2e/tests/test_e2e.py
+++ b/e2e/tests/test_e2e.py
@ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase):
        k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml")
        k8s.wait_for_pod_start('spilo-role=master')
        k8s.wait_for_pod_start('spilo-role=replica')
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_enable_load_balancer(self):
@ -108,139 +109,59 @@ class EndToEndTestCase(unittest.TestCase):
                         "Expected ClusterIP service type for replica, found {}".format(repl_svc_type))
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
-    def test_min_resource_limits(self):
+    def test_lazy_image_update(self):
        '''
-        Lower resource limits below configured minimum and let operator fix it
+        Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image 
-        '''
+        until they are recreated for reasons other than operator's activity. That works because the operator uses 
-        k8s = self.k8s
+        "onDelete" pod update policy for stateful sets.
        cluster_label = 'cluster-name=acid-minimal-cluster'
        _, failover_targets = k8s.get_pg_nodes(cluster_label)
-        # configure minimum boundaries for CPU and memory limits
+        The test covers:
-        minCPULimit = '500m'
+        1) enabling lazy upgrade in existing operator deployment
-        minMemoryLimit = '500Mi'
+        2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
-        patch_min_resource_limits = {
+        '''
        k8s = self.k8s
        pod0 = "acid-minimal-cluster-0"
        pod1 = "acid-minimal-cluster-1"
        # enable lazy update
        patch_lazy_image_upgrade = {
            "data": {
-                "min_cpu_limit": minCPULimit,
+                "enable_lazy_image_upgrade": "true",
-                "min_memory_limit": minMemoryLimit
+                "docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
            }
        }
-        k8s.update_config(patch_min_resource_limits)
+        k8s.update_config(patch_lazy_image_upgrade)
-        # lower resource limits below minimum
+        # wait for sts update
-        pg_patch_resources = {
+        time.sleep(60)
-            "spec": {
+
-                "resources": {
+        # restart the pod to get a container with the new image 
-                    "requests": {
+        k8s.api.core_v1.delete_namespaced_pod(pod0, "default")
-                        "cpu": "10m",
+        time.sleep(60)
-                        "memory": "50Mi"
+
-                    },
+        # lazy update works if the restarted pod and older pods have different Spilo versions
-                    "limits": {
+        # i.e. the update did not immediately affect all pods
-                        "cpu": "200m",
+        new_image = k8s.get_effective_pod_image(pod0)
-                        "memory": "200Mi"
+        old_image = k8s.get_effective_pod_image(pod1)
-                    }
+        self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image))
-                }
+
        # clean up
        unpatch_lazy_image_upgrade = {
            "data": {
                "enable_lazy_image_upgrade": "false",
            }
        }
-        k8s.api.custom_objects_api.patch_namespaced_custom_object(
+        k8s.update_config(unpatch_lazy_image_upgrade)
            "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
        k8s.wait_for_master_failover(failover_targets)
-        pods = k8s.api.core_v1.list_namespaced_pod(
+        # at this point operator will complete the normal rolling update
-            'default', label_selector='spilo-role=master,' + cluster_label).items
+        # so we additonally test if disabling the lazy update (forcing the normal rolling update) works
-        self.assert_master_is_unique()
+        time.sleep(60)
        masterPod = pods[0]
-        self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit,
+        image0 = k8s.get_effective_pod_image(pod0)
-                         "Expected CPU limit {}, found {}"
+        image1 = k8s.get_effective_pod_image(pod1)
                         .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
        self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
                         "Expected memory limit {}, found {}"
                         .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
-    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
+        self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1))
    def test_multi_namespace_support(self):
        '''
        Create a customized Postgres cluster in a non-default namespace.
        '''
        k8s = self.k8s
        with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
            pg_manifest = yaml.safe_load(f)
            pg_manifest["metadata"]["namespace"] = self.namespace
            yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
        k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
        k8s.wait_for_pod_start("spilo-role=master", self.namespace)
        self.assert_master_is_unique(self.namespace, "acid-test-cluster")
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_scaling(self):
        '''
           Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
        '''
        k8s = self.k8s
        labels = "cluster-name=acid-minimal-cluster"
        k8s.wait_for_pg_to_scale(3)
        self.assertEqual(3, k8s.count_pods_with_label(labels))
        self.assert_master_is_unique()
        k8s.wait_for_pg_to_scale(2)
        self.assertEqual(2, k8s.count_pods_with_label(labels))
        self.assert_master_is_unique()
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_taint_based_eviction(self):
        '''
           Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
        '''
        k8s = self.k8s
        cluster_label = 'cluster-name=acid-minimal-cluster'
        # get nodes of master and replica(s) (expected target of new master)
        current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label)
        num_replicas = len(failover_targets)
        # if all pods live on the same node, failover will happen to other worker(s)
        failover_targets = [x for x in failover_targets if x != current_master_node]
        if len(failover_targets) == 0:
            nodes = k8s.api.core_v1.list_node()
            for n in nodes.items:
                if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node:
                    failover_targets.append(n.metadata.name)
        # taint node with postgres=:NoExecute to force failover
        body = {
            "spec": {
                "taints": [
                    {
                        "effect": "NoExecute",
                        "key": "postgres"
                    }
                ]
            }
        }
        # patch node and test if master is failing over to one of the expected nodes
        k8s.api.core_v1.patch_node(current_master_node, body)
        k8s.wait_for_master_failover(failover_targets)
        k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
        new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
        self.assertNotEqual(current_master_node, new_master_node,
                            "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
        self.assertEqual(num_replicas, len(new_replica_nodes),
                         "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
        self.assert_master_is_unique()
        # undo the tainting
        body = {
            "spec": {
                "taints": []
            }
        }
        k8s.api.core_v1.patch_node(new_master_node, body)
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_logical_backup_cron_job(self):
@ -306,6 +227,133 @@ class EndToEndTestCase(unittest.TestCase):
        self.assertEqual(0, len(jobs),
                         "Expected 0 logical backup jobs, found {}".format(len(jobs)))
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_min_resource_limits(self):
        '''
        Lower resource limits below configured minimum and let operator fix it
        '''
        k8s = self.k8s
        cluster_label = 'cluster-name=acid-minimal-cluster'
        labels = 'spilo-role=master,' + cluster_label
        _, failover_targets = k8s.get_pg_nodes(cluster_label)
        # configure minimum boundaries for CPU and memory limits
        minCPULimit = '500m'
        minMemoryLimit = '500Mi'
        patch_min_resource_limits = {
            "data": {
                "min_cpu_limit": minCPULimit,
                "min_memory_limit": minMemoryLimit
            }
        }
        k8s.update_config(patch_min_resource_limits)
        # lower resource limits below minimum
        pg_patch_resources = {
            "spec": {
                "resources": {
                    "requests": {
                        "cpu": "10m",
                        "memory": "50Mi"
                    },
                    "limits": {
                        "cpu": "200m",
                        "memory": "200Mi"
                    }
                }
            }
        }
        k8s.api.custom_objects_api.patch_namespaced_custom_object(
            "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
        k8s.wait_for_pod_failover(failover_targets, labels)
        k8s.wait_for_pod_start('spilo-role=replica')
        pods = k8s.api.core_v1.list_namespaced_pod(
            'default', label_selector=labels).items
        self.assert_master_is_unique()
        masterPod = pods[0]
        self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit,
                         "Expected CPU limit {}, found {}"
                         .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
        self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
                         "Expected memory limit {}, found {}"
                         .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_multi_namespace_support(self):
        '''
        Create a customized Postgres cluster in a non-default namespace.
        '''
        k8s = self.k8s
        with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
            pg_manifest = yaml.safe_load(f)
            pg_manifest["metadata"]["namespace"] = self.namespace
            yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
        k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
        k8s.wait_for_pod_start("spilo-role=master", self.namespace)
        self.assert_master_is_unique(self.namespace, "acid-test-cluster")
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_node_readiness_label(self):
        '''
           Remove node readiness label from master node. This must cause a failover.
        '''
        k8s = self.k8s
        cluster_label = 'cluster-name=acid-minimal-cluster'
        labels = 'spilo-role=master,' + cluster_label
        readiness_label = 'lifecycle-status'
        readiness_value = 'ready'
        # get nodes of master and replica(s) (expected target of new master)
        current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
        num_replicas = len(current_replica_nodes)
        failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
        # add node_readiness_label to potential failover nodes
        patch_readiness_label = {
            "metadata": {
                "labels": {
                    readiness_label: readiness_value
                }
            }
        }
        for failover_target in failover_targets:
            k8s.api.core_v1.patch_node(failover_target, patch_readiness_label)
        # define node_readiness_label in config map which should trigger a failover of the master
        patch_readiness_label_config = {
            "data": {
                "node_readiness_label": readiness_label + ':' + readiness_value,
            }
        }
        k8s.update_config(patch_readiness_label_config)
        new_master_node, new_replica_nodes = self.assert_failover(
            current_master_node, num_replicas, failover_targets, cluster_label)
        # patch also node where master ran before
        k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label)
        # toggle pod anti affinity to move replica away from master node
        self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_scaling(self):
        '''
           Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
        '''
        k8s = self.k8s
        labels = "cluster-name=acid-minimal-cluster"
        k8s.wait_for_pg_to_scale(3)
        self.assertEqual(3, k8s.count_pods_with_label(labels))
        self.assert_master_is_unique()
        k8s.wait_for_pg_to_scale(2)
        self.assertEqual(2, k8s.count_pods_with_label(labels))
        self.assert_master_is_unique()
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_service_annotations(self):
        '''
@ -347,72 +395,115 @@ class EndToEndTestCase(unittest.TestCase):
        k8s.update_config(unpatch_custom_service_annotations)
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
-    def test_lazy_image_update(self):
+    def test_taint_based_eviction(self):
        '''
-        Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image 
+           Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
        until they are recreated for reasons other than operator's activity. That works because the operator uses 
        "onDelete" pod update policy for stateful sets.
        The test covers:
        1) enabling lazy upgrade in existing operator deployment
        2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
        '''
        k8s = self.k8s
-        pod0 = "acid-minimal-cluster-0"
+        cluster_label = 'cluster-name=acid-minimal-cluster'
        pod1 = "acid-minimal-cluster-1"
-        # enable lazy update
+        # get nodes of master and replica(s) (expected target of new master)
-        patch_lazy_image_upgrade = {
+        current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
-            "data": {
+        num_replicas = len(current_replica_nodes)
-                "enable_lazy_image_upgrade": "true",
+        failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
-                "docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
+
        # taint node with postgres=:NoExecute to force failover
        body = {
            "spec": {
                "taints": [
                    {
                        "effect": "NoExecute",
                        "key": "postgres"
                    }
                ]
            }
        }
        k8s.update_config(patch_lazy_image_upgrade)
-        # wait for sts update
+        # patch node and test if master is failing over to one of the expected nodes
-        time.sleep(60)
+        k8s.api.core_v1.patch_node(current_master_node, body)
        new_master_node, new_replica_nodes = self.assert_failover(
            current_master_node, num_replicas, failover_targets, cluster_label)
-        # restart the pod to get a container with the new image 
+        # add toleration to pods
-        k8s.api.core_v1.delete_namespaced_pod(pod0, "default")
+        patch_toleration_config = {
        time.sleep(60)
        # lazy update works if the restarted pod and older pods have different Spilo versions
        # i.e. the update did not immediately affect all pods
        new_image = k8s.get_effective_pod_image(pod0)
        old_image = k8s.get_effective_pod_image(pod1)
        self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image))
        # clean up
        unpatch_lazy_image_upgrade = {
            "data": {
-                "enable_lazy_image_upgrade": "false",
+                "toleration": "key:postgres,operator:Exists,effect:NoExecute"
            }
        }
-        k8s.update_config(unpatch_lazy_image_upgrade)
+        k8s.update_config(patch_toleration_config)
-        # at this point operator will complete the normal rolling update
+        # toggle pod anti affinity to move replica away from master node
-        # so we additonally test if disabling the lazy update (forcing the normal rolling update) works
+        self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
        time.sleep(60)
-        image0 = k8s.get_effective_pod_image(pod0)
+    def get_failover_targets(self, master_node, replica_nodes):
-        image1 = k8s.get_effective_pod_image(pod1)
+        '''
           If all pods live on the same node, failover will happen to other worker(s)
        '''
        k8s = self.k8s
-        self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1))
+        failover_targets = [x for x in replica_nodes if x != master_node]
        if len(failover_targets) == 0:
            nodes = k8s.api.core_v1.list_node()
            for n in nodes.items:
                if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node:
                    failover_targets.append(n.metadata.name)
        return failover_targets
    def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label):
        '''
           Check if master is failing over. The replica should move first to be the switchover target
        '''
        k8s = self.k8s
        k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label)
        k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
        new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
        self.assertNotEqual(current_master_node, new_master_node,
                            "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
        self.assertEqual(num_replicas, len(new_replica_nodes),
                         "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
        self.assert_master_is_unique()
        return new_master_node, new_replica_nodes
    def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
        '''
           Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
           To be called manually after operations that affect pods
        '''
        k8s = self.k8s
        labels = 'spilo-role=master,cluster-name=' + clusterName
        num_of_master_pods = k8s.count_pods_with_label(labels, namespace)
        self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods))
    def assert_distributed_pods(self, master_node, replica_nodes, cluster_label):
        '''
           Other tests can lead to the situation that master and replica are on the same node.
           Toggle pod anti affinty to distribute pods accross nodes (replica in particular).
        '''
        k8s = self.k8s
        failover_targets = self.get_failover_targets(master_node, replica_nodes)
        # enable pod anti affintiy in config map which should trigger movement of replica
        patch_enable_antiaffinity = {
            "data": {
                "enable_pod_antiaffinity": "true"
            }
        }
        k8s.update_config(patch_enable_antiaffinity)
        self.assert_failover(
            master_node, len(replica_nodes), failover_targets, cluster_label)
        # disable pod anti affintiy again
        patch_disable_antiaffinity = {
            "data": {
                "enable_pod_antiaffinity": "false"
            }
        }
        k8s.update_config(patch_disable_antiaffinity)
 class K8sApi:
@ -500,15 +591,14 @@ class K8s:
    def count_pods_with_label(self, labels, namespace='default'):
        return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items)
-    def wait_for_master_failover(self, expected_master_nodes, namespace='default'):
+    def wait_for_pod_failover(self, failover_targets, labels, namespace='default'):
        pod_phase = 'Failing over'
-        new_master_node = ''
+        new_pod_node = ''
        labels = 'spilo-role=master,cluster-name=acid-minimal-cluster'
-        while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes):
+        while (pod_phase != 'Running') or (new_pod_node not in failover_targets):
            pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items
            if pods:
-                new_master_node = pods[0].spec.node_name
+                new_pod_node = pods[0].spec.node_name
                pod_phase = pods[0].status.phase
            time.sleep(self.RETRY_TIMEOUT_SEC)
--- a/pkg/controller/node.go
+++ b/pkg/controller/node.go
@ -5,7 +5,7 @@ import (
 	"time"
 	"github.com/zalando/postgres-operator/pkg/util/retryutil"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
@ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) {
 }
 func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
-
+	// retry to move master until configured timeout is reached
 	err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout,
 		func() (bool, error) {
 			err := c.attemptToMoveMasterPodsOffNode(node)
 			if err != nil {
-				return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute")
+				return false, err
 			}
 			return true, nil
 		},
 	)
 	if err != nil {
-		c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout)
+		c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err)
 	}
 }