From 064f6cce7eb22f282e2fc33a922c7ffcac4b2376 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Mon, 14 Mar 2022 19:22:07 +0100 Subject: [PATCH] shorten e2e test for test deployment --- e2e/tests/test_e2e.py | 1033 ----------------------------------------- 1 file changed, 1033 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index a425354a5..93311f099 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -676,1039 +676,6 @@ class EndToEndTestCase(unittest.TestCase): print('Operator log: {}'.format(k8s.get_operator_log())) raise - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_infrastructure_roles(self): - ''' - Test using external secrets for infrastructure roles - ''' - k8s = self.k8s - # update infrastructure roles description - secret_name = "postgresql-infrastructure-roles" - roles = "secretname: postgresql-infrastructure-roles-new, userkey: user,"\ - "rolekey: memberof, passwordkey: password, defaultrolevalue: robot_zmon" - patch_infrastructure_roles = { - "data": { - "infrastructure_roles_secret_name": secret_name, - "infrastructure_roles_secrets": roles, - }, - } - k8s.update_config(patch_infrastructure_roles) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - try: - # check that new roles are represented in the config by requesting the - # operator configuration via API - - def verify_role(): - try: - operator_pod = k8s.get_operator_pod() - get_config_cmd = "wget --quiet -O - localhost:8080/config" - result = k8s.exec_with_kubectl(operator_pod.metadata.name, - get_config_cmd) - try: - roles_dict = (json.loads(result.stdout) - .get("controller", {}) - .get("InfrastructureRoles")) - except: - return False - - if "robot_zmon_acid_monitoring_new" in roles_dict: - role = roles_dict["robot_zmon_acid_monitoring_new"] - role.pop("Password", None) - self.assertDictEqual(role, { - "Name": "robot_zmon_acid_monitoring_new", - "Namespace":"", - "Flags": None, - "MemberOf": ["robot_zmon"], - "Parameters": None, - "AdminRole": "", - "Origin": 2, - "IsDbOwner": False, - "Deleted": False - }) - return True - except: - pass - - return False - - self.eventuallyTrue(verify_role, "infrastructure role setup is not loaded") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_lazy_spilo_upgrade(self): - ''' - Test lazy upgrade for the Spilo image: operator changes a stateful set - but lets pods run with the old image until they are recreated for - reasons other than operator's activity. That works because the operator - configures stateful sets to use "onDelete" pod update policy. - The test covers: - 1) enabling lazy upgrade in existing operator deployment - 2) forcing the normal rolling upgrade by changing the operator - configmap and restarting its pod - ''' - - k8s = self.k8s - - pod0 = 'acid-minimal-cluster-0' - pod1 = 'acid-minimal-cluster-1' - - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, - "No 2 pods running") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), - 2, "Postgres status did not enter running") - - patch_lazy_spilo_upgrade = { - "data": { - "docker_image": SPILO_CURRENT, - "enable_lazy_spilo_upgrade": "false" - } - } - k8s.update_config(patch_lazy_spilo_upgrade, - step="Init baseline image version") - - self.eventuallyEqual(lambda: k8s.get_statefulset_image(), SPILO_CURRENT, - "Statefulset not updated initially") - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, - "No 2 pods running") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), - 2, "Postgres status did not enter running") - - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), - SPILO_CURRENT, "Rolling upgrade was not executed") - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), - SPILO_CURRENT, "Rolling upgrade was not executed") - - # update docker image in config and enable the lazy upgrade - conf_image = SPILO_LAZY - patch_lazy_spilo_upgrade = { - "data": { - "docker_image": conf_image, - "enable_lazy_spilo_upgrade": "true" - } - } - k8s.update_config(patch_lazy_spilo_upgrade, - step="patch image and lazy upgrade") - self.eventuallyEqual(lambda: k8s.get_statefulset_image(), conf_image, - "Statefulset not updated to next Docker image") - - try: - # restart the pod to get a container with the new image - k8s.api.core_v1.delete_namespaced_pod(pod0, 'default') - - # verify only pod-0 which was deleted got new image from statefulset - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), - conf_image, "Delete pod-0 did not get new spilo image") - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, - "No two pods running after lazy rolling upgrade") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), - 2, "Postgres status did not enter running") - self.assertNotEqual(lambda: k8s.get_effective_pod_image(pod1), - SPILO_CURRENT, - "pod-1 should not have change Docker image to {}".format(SPILO_CURRENT)) - - # clean up - unpatch_lazy_spilo_upgrade = { - "data": { - "enable_lazy_spilo_upgrade": "false", - } - } - k8s.update_config(unpatch_lazy_spilo_upgrade, step="patch lazy upgrade") - - # at this point operator will complete the normal rolling upgrade - # so we additionally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), - conf_image, "Rolling upgrade was not executed", - 50, 3) - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), - conf_image, "Rolling upgrade was not executed", - 50, 3) - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), - 2, "Postgres status did not enter running") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_logical_backup_cron_job(self): - ''' - Ensure we can (a) create the cron job at user request for a specific PG cluster - (b) update the cluster-wide image for the logical backup pod - (c) delete the job at user request - Limitations: - (a) Does not run the actual batch job because there is no S3 mock to upload backups to - (b) Assumes 'acid-minimal-cluster' exists as defined in setUp - ''' - - k8s = self.k8s - - # create the cron job - schedule = "7 7 7 7 *" - pg_patch_enable_backup = { - "spec": { - "enableLogicalBackup": True, - "logicalBackupSchedule": schedule - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_backup) - - try: - self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 1, "failed to create logical backup job") - - job = k8s.get_logical_backup_job().items[0] - self.assertEqual(job.metadata.name, "logical-backup-acid-minimal-cluster", - "Expected job name {}, found {}" - .format("logical-backup-acid-minimal-cluster", job.metadata.name)) - self.assertEqual(job.spec.schedule, schedule, - "Expected {} schedule, found {}" - .format(schedule, job.spec.schedule)) - - # update the cluster-wide image of the logical backup pod - image = "test-image-name" - patch_logical_backup_image = { - "data": { - "logical_backup_docker_image": image, - } - } - k8s.update_config(patch_logical_backup_image, step="patch logical backup image") - - def get_docker_image(): - jobs = k8s.get_logical_backup_job().items - return jobs[0].spec.job_template.spec.template.spec.containers[0].image - - self.eventuallyEqual(get_docker_image, image, - "Expected job image {}, found {}".format(image, "{}")) - - # delete the logical backup cron job - pg_patch_disable_backup = { - "spec": { - "enableLogicalBackup": False, - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_backup) - - self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 0, "failed to create logical backup job") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - # ensure cluster is healthy after tests - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - @unittest.skip("Skipping this test until fixed") - def test_major_version_upgrade(self): - k8s = self.k8s - result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest-12.yaml") - self.eventuallyEqual(lambda: k8s.count_running_pods(labels="application=spilo,cluster-name=acid-upgrade-test"), 2, "No 2 pods running") - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - pg_patch_version = { - "spec": { - "postgres": { - "version": "14" - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - def check_version_14(): - p = k8s.get_patroni_state("acid-upgrade-test-0") - version = p["server_version"][0:2] - return version - - self.evantuallyEqual(check_version_14, "14", "Version was not upgrade to 14") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_min_resource_limits(self): - ''' - Lower resource limits below configured minimum and let operator fix it - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - # get nodes of master and replica(s) (expected target of new master) - _, replica_nodes = k8s.get_pg_nodes(cluster_label) - self.assertNotEqual(replica_nodes, []) - - # configure minimum boundaries for CPU and memory limits - minCPULimit = '503m' - minMemoryLimit = '502Mi' - - patch_min_resource_limits = { - "data": { - "min_cpu_limit": minCPULimit, - "min_memory_limit": minMemoryLimit - } - } - k8s.update_config(patch_min_resource_limits, "Minimum resource test") - - # lower resource limits below minimum - pg_patch_resources = { - "spec": { - "resources": { - "requests": { - "cpu": "10m", - "memory": "50Mi" - }, - "limits": { - "cpu": "200m", - "memory": "200Mi" - } - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # wait for switched over - k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members()), 2, "Postgres status did not enter running") - - def verify_pod_limits(): - pods = k8s.api.core_v1.list_namespaced_pod('default', label_selector="cluster-name=acid-minimal-cluster,application=spilo").items - if len(pods) < 2: - return False - - r = pods[0].spec.containers[0].resources.limits['memory'] == minMemoryLimit - r = r and pods[0].spec.containers[0].resources.limits['cpu'] == minCPULimit - r = r and pods[1].spec.containers[0].resources.limits['memory'] == minMemoryLimit - r = r and pods[1].spec.containers[0].resources.limits['cpu'] == minCPULimit - return r - - self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_multi_namespace_support(self): - ''' - Create a customized Postgres cluster in a non-default namespace. - ''' - k8s = self.k8s - - with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: - pg_manifest = yaml.safe_load(f) - pg_manifest["metadata"]["namespace"] = self.test_namespace - yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) - - try: - k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") - k8s.wait_for_pod_start("spilo-role=master", self.test_namespace) - k8s.wait_for_pod_start("spilo-role=replica", self.test_namespace) - self.assert_master_is_unique(self.test_namespace, "acid-test-cluster") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - finally: - # delete the new cluster so that the k8s_api.get_operator_state works correctly in subsequent tests - # ideally we should delete the 'test' namespace here but - # the pods inside the namespace stuck in the Terminating state making the test time out - k8s.api.custom_objects_api.delete_namespaced_custom_object( - "acid.zalan.do", "v1", self.test_namespace, "postgresqls", "acid-test-cluster") - time.sleep(5) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_node_affinity(self): - ''' - Add label to a node and update postgres cluster spec to deploy only on a node with that label - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - # verify we are in good state from potential previous tests - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - - # get nodes of master and replica(s) - master_nodes, replica_nodes = k8s.get_cluster_nodes() - self.assertNotEqual(master_nodes, []) - self.assertNotEqual(replica_nodes, []) - - # label node with environment=postgres - node_label_body = { - "metadata": { - "labels": { - "node-affinity-test": "postgres" - } - } - } - - try: - # patch master node with the label - k8s.api.core_v1.patch_node(master_nodes[0], node_label_body) - - # add node affinity to cluster - patch_node_affinity_config = { - "spec": { - "nodeAffinity" : { - "requiredDuringSchedulingIgnoredDuringExecution": { - "nodeSelectorTerms": [ - { - "matchExpressions": [ - { - "key": "node-affinity-test", - "operator": "In", - "values": [ - "postgres" - ] - } - ] - } - ] - } - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - group="acid.zalan.do", - version="v1", - namespace="default", - plural="postgresqls", - name="acid-minimal-cluster", - body=patch_node_affinity_config) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # node affinity change should cause replica to relocate from replica node to master node due to node affinity requirement - k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - # next master will be switched over and pod needs to be replaced as well to finish the rolling update - k8s.wait_for_pod_failover(master_nodes, 'spilo-role=master,' + cluster_label) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - podsList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) - for pod in podsList.items: - if pod.metadata.labels.get('spilo-role') == 'replica': - self.assertEqual(master_nodes[0], pod.spec.node_name, - "Sanity check: expected replica to relocate to master node {}, but found on {}".format(master_nodes[0], pod.spec.node_name)) - - # check that pod has correct node affinity - key = pod.spec.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms[0].match_expressions[0].key - value = pod.spec.affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms[0].match_expressions[0].values[0] - self.assertEqual("node-affinity-test", key, - "Sanity check: expect node selector key to be equal to 'node-affinity-test' but got {}".format(key)) - self.assertEqual("postgres", value, - "Sanity check: expect node selector value to be equal to 'postgres' but got {}".format(value)) - - patch_node_remove_affinity_config = { - "spec": { - "nodeAffinity" : None - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - group="acid.zalan.do", - version="v1", - namespace="default", - plural="postgresqls", - name="acid-minimal-cluster", - body=patch_node_remove_affinity_config) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # node affinity change should cause another rolling update and relocation of replica - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - k8s.wait_for_pod_start('spilo-role=master,' + cluster_label) - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - # toggle pod anti affinity to make sure replica and master run on separate nodes - self.assert_distributed_pods(master_nodes) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_node_readiness_label(self): - ''' - Remove node readiness label from master node. This must cause a failover. - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - readiness_label = 'lifecycle-status' - readiness_value = 'ready' - - # verify we are in good state from potential previous tests - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - - # get nodes of master and replica(s) (expected target of new master) - master_nodes, replica_nodes = k8s.get_cluster_nodes() - self.assertNotEqual(master_nodes, []) - self.assertNotEqual(replica_nodes, []) - - try: - # add node_readiness_label to potential failover nodes - patch_readiness_label = { - "metadata": { - "labels": { - readiness_label: readiness_value - } - } - } - for replica_node in replica_nodes: - k8s.api.core_v1.patch_node(replica_node, patch_readiness_label) - - # define node_readiness_label in config map which should trigger a rolling update - patch_readiness_label_config = { - "data": { - "node_readiness_label": readiness_label + ':' + readiness_value, - "node_readiness_label_merge": "AND", - } - } - k8s.update_config(patch_readiness_label_config, "setting readiness label") - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # first replica will be replaced and get the new affinity - # however, it might not start due to a volume node affinity conflict - # in this case only if the pvc and pod are deleted it can be scheduled - replica = k8s.get_cluster_replica_pod() - if replica.status.phase == 'Pending': - k8s.api.core_v1.delete_namespaced_persistent_volume_claim('pgdata-' + replica.metadata.name, 'default') - k8s.api.core_v1.delete_namespaced_pod(replica.metadata.name, 'default') - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - # next master will be switched over and pod needs to be replaced as well to finish the rolling update - k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - # patch also node where master ran before - k8s.api.core_v1.patch_node(master_nodes[0], patch_readiness_label) - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - # toggle pod anti affinity to move replica away from master node - self.assert_distributed_pods(master_nodes) - - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_overwrite_pooler_deployment(self): - pooler_name = 'acid-minimal-cluster-pooler' - k8s = self.k8s - k8s.create_with_kubectl("manifests/minimal-fake-pooler-deployment.yaml") - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(name=pooler_name), 1, - "Initial broken deployment not rolled out") - - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableConnectionPooler': True - } - }) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(name=pooler_name), 2, - "Operator did not succeed in overwriting labels") - - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableConnectionPooler': False - } - }) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: k8s.count_running_pods("connection-pooler="+pooler_name), - 0, "Pooler pods not scaled down") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_password_rotation(self): - ''' - Test password rotation and removal of users due to retention policy - ''' - k8s = self.k8s - leader = k8s.get_cluster_leader_pod() - today = date.today() - - # enable password rotation for owner of foo database - pg_patch_inplace_rotation_for_owner = { - "spec": { - "usersWithInPlaceSecretRotation": [ - "zalando" - ] - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_inplace_rotation_for_owner) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # check if next rotation date was set in secret - secret_data = k8s.get_secret_data("zalando") - next_rotation_timestamp = datetime.strptime(str(base64.b64decode(secret_data["nextRotation"]), 'utf-8'), "%Y-%m-%dT%H:%M:%SZ") - today90days = today+timedelta(days=90) - self.assertEqual(today90days, next_rotation_timestamp.date(), - "Unexpected rotation date in secret of zalando user: expected {}, got {}".format(today90days, next_rotation_timestamp.date())) - - # create fake rotation users that should be removed by operator - # but have one that would still fit into the retention period - create_fake_rotation_user = """ - CREATE ROLE foo_user201031 IN ROLE foo_user; - CREATE ROLE foo_user211031 IN ROLE foo_user; - CREATE ROLE foo_user"""+(today-timedelta(days=40)).strftime("%y%m%d")+""" IN ROLE foo_user; - """ - self.query_database(leader.metadata.name, "postgres", create_fake_rotation_user) - - # patch foo_user secret with outdated rotation date - fake_rotation_date = today.isoformat() + 'T00:00:00Z' - fake_rotation_date_encoded = base64.b64encode(fake_rotation_date.encode('utf-8')) - secret_fake_rotation = { - "data": { - "nextRotation": str(fake_rotation_date_encoded, 'utf-8'), - }, - } - k8s.api.core_v1.patch_namespaced_secret( - name="foo-user.acid-minimal-cluster.credentials.postgresql.acid.zalan.do", - namespace="default", - body=secret_fake_rotation) - - # enable password rotation for all other users (foo_user) - # this will force a sync of secrets for further assertions - enable_password_rotation = { - "data": { - "enable_password_rotation": "true", - "password_rotation_interval": "30", - "password_rotation_user_retention": "30", # should be set to 60 - }, - } - k8s.update_config(enable_password_rotation) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - # check if next rotation date and username have been replaced - secret_data = k8s.get_secret_data("foo_user") - secret_username = str(base64.b64decode(secret_data["username"]), 'utf-8') - next_rotation_timestamp = datetime.strptime(str(base64.b64decode(secret_data["nextRotation"]), 'utf-8'), "%Y-%m-%dT%H:%M:%SZ") - rotation_user = "foo_user"+today.strftime("%y%m%d") - today30days = today+timedelta(days=30) - - self.assertEqual(rotation_user, secret_username, - "Unexpected username in secret of foo_user: expected {}, got {}".format(rotation_user, secret_username)) - self.assertEqual(today30days, next_rotation_timestamp.date(), - "Unexpected rotation date in secret of foo_user: expected {}, got {}".format(today30days, next_rotation_timestamp.date())) - - # check if oldest fake rotation users were deleted - # there should only be foo_user, foo_user+today and foo_user+today-40days - user_query = """ - SELECT rolname - FROM pg_catalog.pg_roles - WHERE rolname LIKE 'foo_user%'; - """ - self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 3, - "Found incorrect number of rotation users", 10, 5) - - # disable password rotation for all other users (foo_user) - # and pick smaller intervals to see if the third fake rotation user is dropped - enable_password_rotation = { - "data": { - "enable_password_rotation": "false", - "password_rotation_interval": "15", - "password_rotation_user_retention": "30", # 2 * rotation interval - }, - } - k8s.update_config(enable_password_rotation) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - # check if username in foo_user secret is reset - secret_data = k8s.get_secret_data("foo_user") - secret_username = str(base64.b64decode(secret_data["username"]), 'utf-8') - next_rotation_timestamp = str(base64.b64decode(secret_data["nextRotation"]), 'utf-8') - self.assertEqual("foo_user", secret_username, - "Unexpected username in secret of foo_user: expected {}, got {}".format("foo_user", secret_username)) - self.assertEqual('', next_rotation_timestamp, - "Unexpected rotation date in secret of foo_user: expected empty string, got {}".format(next_rotation_timestamp)) - - # check roles again, there should only be foo_user and foo_user+today - user_query = """ - SELECT rolname - FROM pg_catalog.pg_roles - WHERE rolname LIKE 'foo_user%'; - """ - self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, - "Found incorrect number of rotation users", 10, 5) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_rolling_update_flag(self): - ''' - Add rolling update flag to only the master and see it failing over - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - # verify we are in good state from potential previous tests - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - - # get node and replica (expected target of new master) - _, replica_nodes = k8s.get_pg_nodes(cluster_label) - - # rolling update annotation - flag = { - "metadata": { - "annotations": { - "zalando-postgres-operator-rolling-update-required": "true", - } - } - } - - try: - podsList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) - for pod in podsList.items: - # add flag only to the master to make it appear to the operator as a leftover from a rolling update - if pod.metadata.labels.get('spilo-role') == 'master': - old_creation_timestamp = pod.metadata.creation_timestamp - k8s.patch_pod(flag, pod.metadata.name, pod.metadata.namespace) - else: - # remember replica name to check if operator does a switchover - switchover_target = pod.metadata.name - - # do not wait until the next sync - k8s.delete_operator_pod() - - # operator should now recreate the master pod and do a switchover before - k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - # check if the former replica is now the new master - leader = k8s.get_cluster_leader_pod() - self.eventuallyEqual(lambda: leader.metadata.name, switchover_target, "Rolling update flag did not trigger switchover") - - # check that the old master has been recreated - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - replica = k8s.get_cluster_replica_pod() - self.assertTrue(replica.metadata.creation_timestamp > old_creation_timestamp, "Old master pod was not recreated") - - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_rolling_update_label_timeout(self): - ''' - Simulate case when replica does not receive label in time and rolling update does not finish - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - flag = "zalando-postgres-operator-rolling-update-required" - - # verify we are in good state from potential previous tests - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - - # get node and replica (expected target of new master) - _, replica_nodes = k8s.get_pg_nodes(cluster_label) - - # rolling update annotation - rolling_update_patch = { - "metadata": { - "annotations": { - flag: "true", - } - } - } - - # make pod_label_wait_timeout so short that rolling update fails on first try - # temporarily lower resync interval to reduce waiting for further tests - # pods should get healthy in the meantime - patch_resync_config = { - "data": { - "pod_label_wait_timeout": "2s", - "resync_period": "30s", - "repair_period": "30s", - } - } - - try: - # patch both pods for rolling update - podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) - for pod in podList.items: - k8s.patch_pod(rolling_update_patch, pod.metadata.name, pod.metadata.namespace) - if pod.metadata.labels.get('spilo-role') == 'replica': - switchover_target = pod.metadata.name - - # update config and restart operator - k8s.update_config(patch_resync_config, "update resync interval and pod_label_wait_timeout") - - # operator should now recreate the replica pod first and do a switchover after - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - # pod_label_wait_timeout should have been exceeded hence the rolling update is continued on next sync - # check if the cluster state is "SyncFailed" - self.eventuallyEqual(lambda: k8s.pg_get_status(), "SyncFailed", "Expected SYNC event to fail") - - # wait for next sync, replica should be running normally by now and be ready for switchover - k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - # check if the former replica is now the new master - leader = k8s.get_cluster_leader_pod() - self.eventuallyEqual(lambda: leader.metadata.name, switchover_target, "Rolling update flag did not trigger switchover") - - # wait for the old master to get restarted - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - # status should again be "SyncFailed" but turn into "Running" on the next sync - time.sleep(30) - self.eventuallyEqual(lambda: k8s.pg_get_status(), "Running", "Expected running cluster after two syncs") - - # revert config changes - patch_resync_config = { - "data": { - "pod_label_wait_timeout": "10m", - "resync_period": "4m", - "repair_period": "2m", - } - } - k8s.update_config(patch_resync_config, "revert resync interval and pod_label_wait_timeout") - - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_scaling(self): - ''' - Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. - ''' - k8s = self.k8s - pod = "acid-minimal-cluster-0" - - k8s.scale_cluster(3) - self.eventuallyEqual(lambda: k8s.count_running_pods(), 3, "Scale up to 3 failed") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod)), 3, "Not all 3 nodes healthy") - - k8s.scale_cluster(2) - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "Scale down to 2 failed") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod)), 2, "Not all members 2 healthy") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_service_annotations(self): - ''' - Create a Postgres cluster with service annotations and check them. - ''' - k8s = self.k8s - patch_custom_service_annotations = { - "data": { - "custom_service_annotations": "foo:bar", - } - } - k8s.update_config(patch_custom_service_annotations) - - pg_patch_custom_annotations = { - "spec": { - "serviceAnnotations": { - "annotation.key": "value", - "alice": "bob", - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_custom_annotations) - - annotations = { - "annotation.key": "value", - "foo": "bar", - "alice": "bob" - } - - self.eventuallyTrue(lambda: k8s.check_service_annotations("cluster-name=acid-minimal-cluster,spilo-role=master", annotations), "Wrong annotations") - self.eventuallyTrue(lambda: k8s.check_service_annotations("cluster-name=acid-minimal-cluster,spilo-role=replica", annotations), "Wrong annotations") - - # clean up - unpatch_custom_service_annotations = { - "data": { - "custom_service_annotations": "", - } - } - k8s.update_config(unpatch_custom_service_annotations) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_statefulset_annotation_propagation(self): - ''' - Inject annotation to Postgresql CRD and check it's propagation to stateful set - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - patch_sset_propagate_annotations = { - "data": { - "downscaler_annotations": "deployment-time,downscaler/*", - "inherited_annotations": "owned-by", - } - } - k8s.update_config(patch_sset_propagate_annotations) - - pg_crd_annotations = { - "metadata": { - "annotations": { - "deployment-time": "2020-04-30 12:00:00", - "downscaler/downtime_replicas": "0", - "owned-by": "acid", - }, - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_crd_annotations) - - annotations = { - "deployment-time": "2020-04-30 12:00:00", - "downscaler/downtime_replicas": "0", - "owned-by": "acid", - } - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyTrue(lambda: k8s.check_statefulset_annotations(cluster_label, annotations), "Annotations missing") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_taint_based_eviction(self): - ''' - Add taint "postgres=:NoExecute" to node with master. This must cause a failover. - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - # verify we are in good state from potential previous tests - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") - - # get nodes of master and replica(s) (expected target of new master) - master_nodes, replica_nodes = k8s.get_cluster_nodes() - self.assertNotEqual(master_nodes, []) - self.assertNotEqual(replica_nodes, []) - - # taint node with postgres=:NoExecute to force failover - body = { - "spec": { - "taints": [ - { - "effect": "NoExecute", - "key": "postgres" - } - ] - } - } - k8s.api.core_v1.patch_node(master_nodes[0], body) - - # add toleration to pods - patch_toleration_config = { - "data": { - "toleration": "key:postgres,operator:Exists,effect:NoExecute" - } - } - - try: - k8s.update_config(patch_toleration_config, step="allow tainted nodes") - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - # toggle pod anti affinity to move replica away from master node - self.assert_distributed_pods(master_nodes) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_zz_cluster_deletion(self): - ''' - Test deletion with configured protection - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - # configure delete protection - patch_delete_annotations = { - "data": { - "delete_annotation_date_key": "delete-date", - "delete_annotation_name_key": "delete-clustername" - } - } - k8s.update_config(patch_delete_annotations) - time.sleep(25) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - try: - # this delete attempt should be omitted because of missing annotations - k8s.api.custom_objects_api.delete_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster") - time.sleep(15) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # check that pods and services are still there - k8s.wait_for_running_pods(cluster_label, 2) - k8s.wait_for_service(cluster_label) - - # recreate Postgres cluster resource - k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") - - # wait a little before proceeding - time.sleep(10) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # add annotations to manifest - delete_date = datetime.today().strftime('%Y-%m-%d') - pg_patch_delete_annotations = { - "metadata": { - "annotations": { - "delete-date": delete_date, - "delete-clustername": "acid-minimal-cluster", - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_delete_annotations) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # wait a little before proceeding - time.sleep(20) - k8s.wait_for_running_pods(cluster_label, 2) - k8s.wait_for_service(cluster_label) - - # now delete process should be triggered - k8s.api.custom_objects_api.delete_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster") - - self.eventuallyEqual(lambda: len(k8s.api.custom_objects_api.list_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", label_selector="cluster-name=acid-minimal-cluster")["items"]), 0, "Manifest not deleted") - - # check if everything has been deleted - self.eventuallyEqual(lambda: k8s.count_pods_with_label(cluster_label), 0, "Pods not deleted") - self.eventuallyEqual(lambda: k8s.count_services_with_label(cluster_label), 0, "Service not deleted") - self.eventuallyEqual(lambda: k8s.count_endpoints_with_label(cluster_label), 0, "Endpoints not deleted") - self.eventuallyEqual(lambda: k8s.count_statefulsets_with_label(cluster_label), 0, "Statefulset not deleted") - self.eventuallyEqual(lambda: k8s.count_deployments_with_label(cluster_label), 0, "Deployments not deleted") - self.eventuallyEqual(lambda: k8s.count_pdbs_with_label(cluster_label), 0, "Pod disruption budget not deleted") - self.eventuallyEqual(lambda: k8s.count_secrets_with_label(cluster_label), 0, "Secrets not deleted") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - # reset configmap - patch_delete_annotations = { - "data": { - "delete_annotation_date_key": "", - "delete_annotation_name_key": "" - } - } - k8s.update_config(patch_delete_annotations) - def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): ''' Check that there is a single pod in the k8s cluster with the label "spilo-role=master"