diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 4efb4c43d..f21106ff1 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase): k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") k8s.wait_for_pod_start('spilo-role=master') + k8s.wait_for_pod_start('spilo-role=replica') @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_enable_load_balancer(self): @@ -108,139 +109,59 @@ class EndToEndTestCase(unittest.TestCase): "Expected ClusterIP service type for replica, found {}".format(repl_svc_type)) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_min_resource_limits(self): + def test_lazy_image_update(self): ''' - Lower resource limits below configured minimum and let operator fix it - ''' - k8s = self.k8s - cluster_label = 'cluster-name=acid-minimal-cluster' - _, failover_targets = k8s.get_pg_nodes(cluster_label) + Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image + until they are recreated for reasons other than operator's activity. That works because the operator uses + "onDelete" pod update policy for stateful sets. - # configure minimum boundaries for CPU and memory limits - minCPULimit = '500m' - minMemoryLimit = '500Mi' - patch_min_resource_limits = { + The test covers: + 1) enabling lazy upgrade in existing operator deployment + 2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod + ''' + + k8s = self.k8s + pod0 = "acid-minimal-cluster-0" + pod1 = "acid-minimal-cluster-1" + + # enable lazy update + patch_lazy_image_upgrade = { "data": { - "min_cpu_limit": minCPULimit, - "min_memory_limit": minMemoryLimit + "enable_lazy_image_upgrade": "true", + "docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16" } } - k8s.update_config(patch_min_resource_limits) + k8s.update_config(patch_lazy_image_upgrade) - # lower resource limits below minimum - pg_patch_resources = { - "spec": { - "resources": { - "requests": { - "cpu": "10m", - "memory": "50Mi" - }, - "limits": { - "cpu": "200m", - "memory": "200Mi" - } - } + # wait for sts update + time.sleep(60) + + # restart the pod to get a container with the new image + k8s.api.core_v1.delete_namespaced_pod(pod0, "default") + time.sleep(60) + + # lazy update works if the restarted pod and older pods have different Spilo versions + # i.e. the update did not immediately affect all pods + new_image = k8s.get_effective_pod_image(pod0) + old_image = k8s.get_effective_pod_image(pod1) + self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image)) + + # clean up + unpatch_lazy_image_upgrade = { + "data": { + "enable_lazy_image_upgrade": "false", } } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) - k8s.wait_for_master_failover(failover_targets) + k8s.update_config(unpatch_lazy_image_upgrade) - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector='spilo-role=master,' + cluster_label).items - self.assert_master_is_unique() - masterPod = pods[0] + # at this point operator will complete the normal rolling update + # so we additonally test if disabling the lazy update (forcing the normal rolling update) works + time.sleep(60) - self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, - "Expected CPU limit {}, found {}" - .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) - self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, - "Expected memory limit {}, found {}" - .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) + image0 = k8s.get_effective_pod_image(pod0) + image1 = k8s.get_effective_pod_image(pod1) - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_multi_namespace_support(self): - ''' - Create a customized Postgres cluster in a non-default namespace. - ''' - k8s = self.k8s - - with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: - pg_manifest = yaml.safe_load(f) - pg_manifest["metadata"]["namespace"] = self.namespace - yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) - - k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") - k8s.wait_for_pod_start("spilo-role=master", self.namespace) - self.assert_master_is_unique(self.namespace, "acid-test-cluster") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_scaling(self): - ''' - Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. - ''' - k8s = self.k8s - labels = "cluster-name=acid-minimal-cluster" - - k8s.wait_for_pg_to_scale(3) - self.assertEqual(3, k8s.count_pods_with_label(labels)) - self.assert_master_is_unique() - - k8s.wait_for_pg_to_scale(2) - self.assertEqual(2, k8s.count_pods_with_label(labels)) - self.assert_master_is_unique() - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_taint_based_eviction(self): - ''' - Add taint "postgres=:NoExecute" to node with master. This must cause a failover. - ''' - k8s = self.k8s - cluster_label = 'cluster-name=acid-minimal-cluster' - - # get nodes of master and replica(s) (expected target of new master) - current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label) - num_replicas = len(failover_targets) - - # if all pods live on the same node, failover will happen to other worker(s) - failover_targets = [x for x in failover_targets if x != current_master_node] - if len(failover_targets) == 0: - nodes = k8s.api.core_v1.list_node() - for n in nodes.items: - if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node: - failover_targets.append(n.metadata.name) - - # taint node with postgres=:NoExecute to force failover - body = { - "spec": { - "taints": [ - { - "effect": "NoExecute", - "key": "postgres" - } - ] - } - } - - # patch node and test if master is failing over to one of the expected nodes - k8s.api.core_v1.patch_node(current_master_node, body) - k8s.wait_for_master_failover(failover_targets) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) - self.assertNotEqual(current_master_node, new_master_node, - "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) - self.assertEqual(num_replicas, len(new_replica_nodes), - "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) - self.assert_master_is_unique() - - # undo the tainting - body = { - "spec": { - "taints": [] - } - } - k8s.api.core_v1.patch_node(new_master_node, body) + self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1)) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_logical_backup_cron_job(self): @@ -306,6 +227,133 @@ class EndToEndTestCase(unittest.TestCase): self.assertEqual(0, len(jobs), "Expected 0 logical backup jobs, found {}".format(len(jobs))) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_min_resource_limits(self): + ''' + Lower resource limits below configured minimum and let operator fix it + ''' + k8s = self.k8s + cluster_label = 'cluster-name=acid-minimal-cluster' + labels = 'spilo-role=master,' + cluster_label + _, failover_targets = k8s.get_pg_nodes(cluster_label) + + # configure minimum boundaries for CPU and memory limits + minCPULimit = '500m' + minMemoryLimit = '500Mi' + patch_min_resource_limits = { + "data": { + "min_cpu_limit": minCPULimit, + "min_memory_limit": minMemoryLimit + } + } + k8s.update_config(patch_min_resource_limits) + + # lower resource limits below minimum + pg_patch_resources = { + "spec": { + "resources": { + "requests": { + "cpu": "10m", + "memory": "50Mi" + }, + "limits": { + "cpu": "200m", + "memory": "200Mi" + } + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) + k8s.wait_for_pod_failover(failover_targets, labels) + k8s.wait_for_pod_start('spilo-role=replica') + + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + self.assert_master_is_unique() + masterPod = pods[0] + + self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, + "Expected CPU limit {}, found {}" + .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) + self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, + "Expected memory limit {}, found {}" + .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_multi_namespace_support(self): + ''' + Create a customized Postgres cluster in a non-default namespace. + ''' + k8s = self.k8s + + with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: + pg_manifest = yaml.safe_load(f) + pg_manifest["metadata"]["namespace"] = self.namespace + yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) + + k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") + k8s.wait_for_pod_start("spilo-role=master", self.namespace) + self.assert_master_is_unique(self.namespace, "acid-test-cluster") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_node_readiness_label(self): + ''' + Remove node readiness label from master node. This must cause a failover. + ''' + k8s = self.k8s + cluster_label = 'cluster-name=acid-minimal-cluster' + labels = 'spilo-role=master,' + cluster_label + readiness_label = 'lifecycle-status' + readiness_value = 'ready' + + # get nodes of master and replica(s) (expected target of new master) + current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) + num_replicas = len(current_replica_nodes) + failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) + + # add node_readiness_label to potential failover nodes + patch_readiness_label = { + "metadata": { + "labels": { + readiness_label: readiness_value + } + } + } + for failover_target in failover_targets: + k8s.api.core_v1.patch_node(failover_target, patch_readiness_label) + + # define node_readiness_label in config map which should trigger a failover of the master + patch_readiness_label_config = { + "data": { + "node_readiness_label": readiness_label + ':' + readiness_value, + } + } + k8s.update_config(patch_readiness_label_config) + new_master_node, new_replica_nodes = self.assert_failover( + current_master_node, num_replicas, failover_targets, cluster_label) + + # patch also node where master ran before + k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label) + # toggle pod anti affinity to move replica away from master node + self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_scaling(self): + ''' + Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. + ''' + k8s = self.k8s + labels = "cluster-name=acid-minimal-cluster" + + k8s.wait_for_pg_to_scale(3) + self.assertEqual(3, k8s.count_pods_with_label(labels)) + self.assert_master_is_unique() + + k8s.wait_for_pg_to_scale(2) + self.assertEqual(2, k8s.count_pods_with_label(labels)) + self.assert_master_is_unique() + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_service_annotations(self): ''' @@ -347,72 +395,115 @@ class EndToEndTestCase(unittest.TestCase): k8s.update_config(unpatch_custom_service_annotations) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_lazy_image_update(self): + def test_taint_based_eviction(self): ''' - Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image - until they are recreated for reasons other than operator's activity. That works because the operator uses - "onDelete" pod update policy for stateful sets. - - The test covers: - 1) enabling lazy upgrade in existing operator deployment - 2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod + Add taint "postgres=:NoExecute" to node with master. This must cause a failover. ''' - k8s = self.k8s - pod0 = "acid-minimal-cluster-0" - pod1 = "acid-minimal-cluster-1" + cluster_label = 'cluster-name=acid-minimal-cluster' - # enable lazy update - patch_lazy_image_upgrade = { - "data": { - "enable_lazy_image_upgrade": "true", - "docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16" + # get nodes of master and replica(s) (expected target of new master) + current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) + num_replicas = len(current_replica_nodes) + failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) + + # taint node with postgres=:NoExecute to force failover + body = { + "spec": { + "taints": [ + { + "effect": "NoExecute", + "key": "postgres" + } + ] } } - k8s.update_config(patch_lazy_image_upgrade) - # wait for sts update - time.sleep(60) + # patch node and test if master is failing over to one of the expected nodes + k8s.api.core_v1.patch_node(current_master_node, body) + new_master_node, new_replica_nodes = self.assert_failover( + current_master_node, num_replicas, failover_targets, cluster_label) - # restart the pod to get a container with the new image - k8s.api.core_v1.delete_namespaced_pod(pod0, "default") - time.sleep(60) - - # lazy update works if the restarted pod and older pods have different Spilo versions - # i.e. the update did not immediately affect all pods - new_image = k8s.get_effective_pod_image(pod0) - old_image = k8s.get_effective_pod_image(pod1) - self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image)) - - # clean up - unpatch_lazy_image_upgrade = { + # add toleration to pods + patch_toleration_config = { "data": { - "enable_lazy_image_upgrade": "false", + "toleration": "key:postgres,operator:Exists,effect:NoExecute" } } - k8s.update_config(unpatch_lazy_image_upgrade) + k8s.update_config(patch_toleration_config) - # at this point operator will complete the normal rolling update - # so we additonally test if disabling the lazy update (forcing the normal rolling update) works - time.sleep(60) + # toggle pod anti affinity to move replica away from master node + self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) - image0 = k8s.get_effective_pod_image(pod0) - image1 = k8s.get_effective_pod_image(pod1) + def get_failover_targets(self, master_node, replica_nodes): + ''' + If all pods live on the same node, failover will happen to other worker(s) + ''' + k8s = self.k8s - self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1)) + failover_targets = [x for x in replica_nodes if x != master_node] + if len(failover_targets) == 0: + nodes = k8s.api.core_v1.list_node() + for n in nodes.items: + if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node: + failover_targets.append(n.metadata.name) + + return failover_targets + + def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label): + ''' + Check if master is failing over. The replica should move first to be the switchover target + ''' + k8s = self.k8s + k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label) + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + + new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) + self.assertNotEqual(current_master_node, new_master_node, + "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) + self.assertEqual(num_replicas, len(new_replica_nodes), + "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) + self.assert_master_is_unique() + + return new_master_node, new_replica_nodes def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): ''' Check that there is a single pod in the k8s cluster with the label "spilo-role=master" To be called manually after operations that affect pods ''' - k8s = self.k8s labels = 'spilo-role=master,cluster-name=' + clusterName num_of_master_pods = k8s.count_pods_with_label(labels, namespace) self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) + def assert_distributed_pods(self, master_node, replica_nodes, cluster_label): + ''' + Other tests can lead to the situation that master and replica are on the same node. + Toggle pod anti affinty to distribute pods accross nodes (replica in particular). + ''' + k8s = self.k8s + failover_targets = self.get_failover_targets(master_node, replica_nodes) + + # enable pod anti affintiy in config map which should trigger movement of replica + patch_enable_antiaffinity = { + "data": { + "enable_pod_antiaffinity": "true" + } + } + k8s.update_config(patch_enable_antiaffinity) + self.assert_failover( + master_node, len(replica_nodes), failover_targets, cluster_label) + + # disable pod anti affintiy again + patch_disable_antiaffinity = { + "data": { + "enable_pod_antiaffinity": "false" + } + } + k8s.update_config(patch_disable_antiaffinity) + class K8sApi: @@ -500,15 +591,14 @@ class K8s: def count_pods_with_label(self, labels, namespace='default'): return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) - def wait_for_master_failover(self, expected_master_nodes, namespace='default'): + def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): pod_phase = 'Failing over' - new_master_node = '' - labels = 'spilo-role=master,cluster-name=acid-minimal-cluster' + new_pod_node = '' - while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes): + while (pod_phase != 'Running') or (new_pod_node not in failover_targets): pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items if pods: - new_master_node = pods[0].spec.node_name + new_pod_node = pods[0].spec.node_name pod_phase = pods[0].status.phase time.sleep(self.RETRY_TIMEOUT_SEC) diff --git a/pkg/controller/node.go b/pkg/controller/node.go index 6f7befa27..8052458c3 100644 --- a/pkg/controller/node.go +++ b/pkg/controller/node.go @@ -5,7 +5,7 @@ import ( "time" "github.com/zalando/postgres-operator/pkg/util/retryutil" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" @@ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) { } func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { - + // retry to move master until configured timeout is reached err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, func() (bool, error) { err := c.attemptToMoveMasterPodsOffNode(node) if err != nil { - return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") + return false, err } return true, nil }, ) if err != nil { - c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) + c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err) } }