From e1f5173dafb1a1e8e357a9bcf6852e65f05126d0 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Sun, 20 Mar 2022 11:52:43 +0100 Subject: [PATCH] bring back all e2e tests --- e2e/tests/test_e2e.py | 800 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 800 insertions(+) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index a346e09ad..44c72cb44 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -158,6 +158,806 @@ class EndToEndTestCase(unittest.TestCase): print('Operator log: {}'.format(k8s.get_operator_log())) raise + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_additional_owner_roles(self): + ''' + Test adding additional member roles to existing database owner roles + ''' + k8s = self.k8s + + # enable PostgresTeam CRD and lower resync + owner_roles = { + "data": { + "additional_owner_roles": "cron_admin", + }, + } + k8s.update_config(owner_roles) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + + leader = k8s.get_cluster_leader_pod() + owner_query = """ + SELECT a2.rolname + FROM pg_catalog.pg_authid a + JOIN pg_catalog.pg_auth_members am + ON a.oid = am.member + AND a.rolname = 'cron_admin' + JOIN pg_catalog.pg_authid a2 + ON a2.oid = am.roleid + WHERE a2.rolname IN ('zalando', 'bar_owner', 'bar_data_owner'); + """ + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", owner_query)), 3, + "Not all additional users found in database", 10, 5) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_additional_pod_capabilities(self): + ''' + Extend postgres container capabilities + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + capabilities = ["SYS_NICE","CHOWN"] + patch_capabilities = { + "data": { + "additional_pod_capabilities": ','.join(capabilities), + }, + } + + # get node and replica (expected target of new master) + _, replica_nodes = k8s.get_pg_nodes(cluster_label) + + try: + k8s.update_config(patch_capabilities) + + # changed security context of postgres container should trigger a rolling update + k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_pods_with_container_capabilities(capabilities, cluster_label), + 2, "Container capabilities not updated") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_additional_teams_and_members(self): + ''' + Test PostgresTeam CRD with extra teams and members + ''' + k8s = self.k8s + + # enable PostgresTeam CRD and lower resync + enable_postgres_team_crd = { + "data": { + "enable_postgres_team_crd": "true", + "enable_team_member_deprecation": "true", + "role_deletion_suffix": "_delete_me", + "resync_period": "15s", + "repair_period": "15s", + }, + } + k8s.update_config(enable_postgres_team_crd) + + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresteams', 'custom-team-membership', + { + 'spec': { + 'additionalTeams': { + 'acid': [ + 'e2e' + ] + }, + 'additionalMembers': { + 'e2e': [ + 'kind' + ] + } + } + }) + + leader = k8s.get_cluster_leader_pod() + user_query = """ + SELECT rolname + FROM pg_catalog.pg_roles + WHERE rolname IN ('elephant', 'kind'); + """ + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, + "Not all additional users found in database", 10, 5) + + # replace additional member and check if the removed member's role is renamed + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresteams', 'custom-team-membership', + { + 'spec': { + 'additionalMembers': { + 'e2e': [ + 'tester' + ] + }, + } + }) + + user_query = """ + SELECT rolname + FROM pg_catalog.pg_roles + WHERE (rolname = 'tester' AND rolcanlogin) + OR (rolname = 'kind_delete_me' AND NOT rolcanlogin); + """ + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, + "Database role of replaced member in PostgresTeam not renamed", 10, 5) + + # re-add additional member and check if the role is renamed back + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresteams', 'custom-team-membership', + { + 'spec': { + 'additionalMembers': { + 'e2e': [ + 'kind' + ] + }, + } + }) + + user_query = """ + SELECT rolname + FROM pg_catalog.pg_roles + WHERE (rolname = 'kind' AND rolcanlogin) + OR (rolname = 'tester_delete_me' AND NOT rolcanlogin); + """ + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, + "Database role of recreated member in PostgresTeam not renamed back to original name", 10, 5) + + # revert config change + revert_resync = { + "data": { + "resync_period": "4m", + "repair_period": "1m", + }, + } + k8s.update_config(revert_resync) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_config_update(self): + ''' + Change Postgres config under Spec.Postgresql.Parameters and Spec.Patroni + and query Patroni config endpoint to check if manifest changes got applied + via restarting cluster through Patroni's rest api + ''' + k8s = self.k8s + leader = k8s.get_cluster_leader_pod() + replica = k8s.get_cluster_replica_pod() + masterCreationTimestamp = leader.metadata.creation_timestamp + replicaCreationTimestamp = replica.metadata.creation_timestamp + new_max_connections_value = "50" + + # adjust Postgres config + pg_patch_config = { + "spec": { + "postgresql": { + "parameters": { + "max_connections": new_max_connections_value + } + }, + "patroni": { + "slots": { + "test_slot": { + "type": "physical" + } + }, + "ttl": 29, + "loop_wait": 9, + "retry_timeout": 9, + "synchronous_mode": True + } + } + } + + try: + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + def compare_config(): + effective_config = k8s.patroni_rest(leader.metadata.name, "config") + desired_config = pg_patch_config["spec"]["patroni"] + desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] + effective_parameters = effective_config["postgresql"]["parameters"] + self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], + "max_connections not updated") + self.assertTrue(effective_config["slots"] is not None, "physical replication slot not added") + self.assertEqual(desired_config["ttl"], effective_config["ttl"], + "ttl not updated") + self.assertEqual(desired_config["loop_wait"], effective_config["loop_wait"], + "loop_wait not updated") + self.assertEqual(desired_config["retry_timeout"], effective_config["retry_timeout"], + "retry_timeout not updated") + self.assertEqual(desired_config["synchronous_mode"], effective_config["synchronous_mode"], + "synchronous_mode not updated") + return True + + # check if Patroni config has been updated + self.eventuallyTrue(compare_config, "Postgres config not applied") + + # make sure that pods were not recreated + leader = k8s.get_cluster_leader_pod() + replica = k8s.get_cluster_replica_pod() + self.assertEqual(masterCreationTimestamp, leader.metadata.creation_timestamp, + "Master pod creation timestamp is updated") + self.assertEqual(replicaCreationTimestamp, replica.metadata.creation_timestamp, + "Master pod creation timestamp is updated") + + # query max_connections setting + setting_query = """ + SELECT setting + FROM pg_settings + WHERE name = 'max_connections'; + """ + self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], new_max_connections_value, + "New max_connections setting not applied on master", 10, 5) + self.eventuallyNotEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, + "Expected max_connections not to be updated on replica since Postgres was restarted there first", 10, 5) + + # the next sync should restart the replica because it has pending_restart flag set + # force next sync by deleting the operator pod + k8s.delete_operator_pod() + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, + "New max_connections setting not applied on replica", 10, 5) + + # decrease max_connections again + # this time restart will be correct and new value should appear on both instances + lower_max_connections_value = "30" + pg_patch_max_connections = { + "spec": { + "postgresql": { + "parameters": { + "max_connections": lower_max_connections_value + } + } + } + } + + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + # check Patroni config again + pg_patch_config["spec"]["postgresql"]["parameters"]["max_connections"] = lower_max_connections_value + self.eventuallyTrue(compare_config, "Postgres config not applied") + + # and query max_connections setting again + self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, + "Previous max_connections setting not applied on master", 10, 5) + self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, + "Previous max_connections setting not applied on replica", 10, 5) + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + # make sure cluster is in a good state for further tests + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_cross_namespace_secrets(self): + ''' + Test secrets in different namespace + ''' + k8s = self.k8s + + # enable secret creation in separate namespace + patch_cross_namespace_secret = { + "data": { + "enable_cross_namespace_secret": "true" + } + } + k8s.update_config(patch_cross_namespace_secret, + step="cross namespace secrets enabled") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + + # create secret in test namespace + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'users':{ + 'test.db_user': [], + } + } + }) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", self.test_namespace), + 1, "Secret not created for user in namespace") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_enable_disable_connection_pooler(self): + ''' + For a database without connection pooler, then turns it on, scale up, + turn off and on again. Test with different ways of doing this (via + enableConnectionPooler or connectionPooler configuration section). At + the end turn connection pooler off to not interfere with other tests. + ''' + k8s = self.k8s + pooler_label = 'application=db-connection-pooler,cluster-name=acid-minimal-cluster' + master_pooler_label = 'connection-pooler=acid-minimal-cluster-pooler' + replica_pooler_label = master_pooler_label + '-repl' + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': True, + 'enableReplicaConnectionPooler': True, + } + }) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 2, "Deployment replicas is 2 default") + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), 2, "No pooler pods found") + self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), 2, "No pooler replica pods found") + self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), 2, "No pooler service found") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), 1, "Pooler secret not created") + + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableMasterPoolerLoadBalancer': True, + 'enableReplicaPoolerLoadBalancer': True, + } + }) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_service_type(master_pooler_label+","+pooler_label), + 'LoadBalancer', + "Expected LoadBalancer service type for master pooler pod, found {}") + self.eventuallyEqual(lambda: k8s.get_service_type(replica_pooler_label+","+pooler_label), + 'LoadBalancer', + "Expected LoadBalancer service type for replica pooler pod, found {}") + + # Turn off only master connection pooler + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': False, + 'enableReplicaConnectionPooler': True, + } + }) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(name="acid-minimal-cluster-pooler-repl"), 2, + "Deployment replicas is 2 default") + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), + 0, "Master pooler pods not deleted") + self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), + 2, "Pooler replica pods not found") + self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), + 1, "No pooler service found") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), + 1, "Secret not created") + + # Turn off only replica connection pooler + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': True, + 'enableReplicaConnectionPooler': False, + 'enableMasterPoolerLoadBalancer': False, + } + }) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 2, + "Deployment replicas is 2 default") + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), + 2, "Master pooler pods not found") + self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), + 0, "Pooler replica pods not deleted") + self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), + 1, "No pooler service found") + self.eventuallyEqual(lambda: k8s.get_service_type(master_pooler_label+","+pooler_label), + 'ClusterIP', + "Expected LoadBalancer service type for master, found {}") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), + 1, "Secret not created") + + # scale up connection pooler deployment + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'connectionPooler': { + 'numberOfInstances': 3, + }, + } + }) + + self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 3, + "Deployment replicas is scaled to 3") + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), + 3, "Scale up of pooler pods does not work") + + # turn it off, keeping config should be overwritten by false + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': False, + 'enableReplicaConnectionPooler': False, + 'enableReplicaPoolerLoadBalancer': False, + } + }) + + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), + 0, "Pooler pods not scaled down") + self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), + 0, "Pooler service not removed") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label('application=spilo,cluster-name=acid-minimal-cluster'), + 4, "Secrets not deleted") + + # Verify that all the databases have pooler schema installed. + # Do this via psql, since otherwise we need to deal with + # credentials. + db_list = [] + + leader = k8s.get_cluster_leader_pod() + schemas_query = """ + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name = 'pooler' + """ + + db_list = self.list_databases(leader.metadata.name) + for db in db_list: + self.eventuallyNotEqual(lambda: len(self.query_database(leader.metadata.name, db, schemas_query)), 0, + "Pooler schema not found in database {}".format(db)) + + # remove config section to make test work next time + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'connectionPooler': None, + 'EnableReplicaConnectionPooler': False, + } + }) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_enable_load_balancer(self): + ''' + Test if services are updated when enabling/disabling load balancers in Postgres manifest + ''' + + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster,spilo-role={}' + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), + 'ClusterIP', + "Expected ClusterIP type initially, found {}") + + try: + # enable load balancer services + pg_patch_enable_lbs = { + "spec": { + "enableMasterLoadBalancer": True, + "enableReplicaLoadBalancer": True + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_lbs) + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), + 'LoadBalancer', + "Expected LoadBalancer service type for master, found {}") + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("replica")), + 'LoadBalancer', + "Expected LoadBalancer service type for master, found {}") + + # disable load balancer services again + pg_patch_disable_lbs = { + "spec": { + "enableMasterLoadBalancer": False, + "enableReplicaLoadBalancer": False + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_lbs) + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), + 'ClusterIP', + "Expected LoadBalancer service type for master, found {}") + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("replica")), + 'ClusterIP', + "Expected LoadBalancer service type for master, found {}") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_infrastructure_roles(self): + ''' + Test using external secrets for infrastructure roles + ''' + k8s = self.k8s + # update infrastructure roles description + secret_name = "postgresql-infrastructure-roles" + roles = "secretname: postgresql-infrastructure-roles-new, userkey: user,"\ + "rolekey: memberof, passwordkey: password, defaultrolevalue: robot_zmon" + patch_infrastructure_roles = { + "data": { + "infrastructure_roles_secret_name": secret_name, + "infrastructure_roles_secrets": roles, + }, + } + k8s.update_config(patch_infrastructure_roles) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + + try: + # check that new roles are represented in the config by requesting the + # operator configuration via API + + def verify_role(): + try: + operator_pod = k8s.get_operator_pod() + get_config_cmd = "wget --quiet -O - localhost:8080/config" + result = k8s.exec_with_kubectl(operator_pod.metadata.name, + get_config_cmd) + try: + roles_dict = (json.loads(result.stdout) + .get("controller", {}) + .get("InfrastructureRoles")) + except: + return False + + if "robot_zmon_acid_monitoring_new" in roles_dict: + role = roles_dict["robot_zmon_acid_monitoring_new"] + role.pop("Password", None) + self.assertDictEqual(role, { + "Name": "robot_zmon_acid_monitoring_new", + "Namespace":"", + "Flags": None, + "MemberOf": ["robot_zmon"], + "Parameters": None, + "AdminRole": "", + "Origin": 2, + "IsDbOwner": False, + "Deleted": False + }) + return True + except: + pass + + return False + + self.eventuallyTrue(verify_role, "infrastructure role setup is not loaded") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_lazy_spilo_upgrade(self): + ''' + Test lazy upgrade for the Spilo image: operator changes a stateful set + but lets pods run with the old image until they are recreated for + reasons other than operator's activity. That works because the operator + configures stateful sets to use "onDelete" pod update policy. + The test covers: + 1) enabling lazy upgrade in existing operator deployment + 2) forcing the normal rolling upgrade by changing the operator + configmap and restarting its pod + ''' + + k8s = self.k8s + + pod0 = 'acid-minimal-cluster-0' + pod1 = 'acid-minimal-cluster-1' + + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), + 2, "Postgres status did not enter running") + + patch_lazy_spilo_upgrade = { + "data": { + "docker_image": SPILO_CURRENT, + "enable_lazy_spilo_upgrade": "false" + } + } + k8s.update_config(patch_lazy_spilo_upgrade, + step="Init baseline image version") + + self.eventuallyEqual(lambda: k8s.get_statefulset_image(), SPILO_CURRENT, + "Statefulset not updated initially") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), + 2, "Postgres status did not enter running") + + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), + SPILO_CURRENT, "Rolling upgrade was not executed") + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), + SPILO_CURRENT, "Rolling upgrade was not executed") + + # update docker image in config and enable the lazy upgrade + conf_image = SPILO_LAZY + patch_lazy_spilo_upgrade = { + "data": { + "docker_image": conf_image, + "enable_lazy_spilo_upgrade": "true" + } + } + k8s.update_config(patch_lazy_spilo_upgrade, + step="patch image and lazy upgrade") + self.eventuallyEqual(lambda: k8s.get_statefulset_image(), conf_image, + "Statefulset not updated to next Docker image") + + try: + # restart the pod to get a container with the new image + k8s.api.core_v1.delete_namespaced_pod(pod0, 'default') + + # verify only pod-0 which was deleted got new image from statefulset + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), + conf_image, "Delete pod-0 did not get new spilo image") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No two pods running after lazy rolling upgrade") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), + 2, "Postgres status did not enter running") + self.assertNotEqual(lambda: k8s.get_effective_pod_image(pod1), + SPILO_CURRENT, + "pod-1 should not have change Docker image to {}".format(SPILO_CURRENT)) + + # clean up + unpatch_lazy_spilo_upgrade = { + "data": { + "enable_lazy_spilo_upgrade": "false", + } + } + k8s.update_config(unpatch_lazy_spilo_upgrade, step="patch lazy upgrade") + + # at this point operator will complete the normal rolling upgrade + # so we additionally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), + conf_image, "Rolling upgrade was not executed", + 50, 3) + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), + conf_image, "Rolling upgrade was not executed", + 50, 3) + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), + 2, "Postgres status did not enter running") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_logical_backup_cron_job(self): + ''' + Ensure we can (a) create the cron job at user request for a specific PG cluster + (b) update the cluster-wide image for the logical backup pod + (c) delete the job at user request + Limitations: + (a) Does not run the actual batch job because there is no S3 mock to upload backups to + (b) Assumes 'acid-minimal-cluster' exists as defined in setUp + ''' + + k8s = self.k8s + + # create the cron job + schedule = "7 7 7 7 *" + pg_patch_enable_backup = { + "spec": { + "enableLogicalBackup": True, + "logicalBackupSchedule": schedule + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_backup) + + try: + self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 1, "failed to create logical backup job") + + job = k8s.get_logical_backup_job().items[0] + self.assertEqual(job.metadata.name, "logical-backup-acid-minimal-cluster", + "Expected job name {}, found {}" + .format("logical-backup-acid-minimal-cluster", job.metadata.name)) + self.assertEqual(job.spec.schedule, schedule, + "Expected {} schedule, found {}" + .format(schedule, job.spec.schedule)) + + # update the cluster-wide image of the logical backup pod + image = "test-image-name" + patch_logical_backup_image = { + "data": { + "logical_backup_docker_image": image, + } + } + k8s.update_config(patch_logical_backup_image, step="patch logical backup image") + + def get_docker_image(): + jobs = k8s.get_logical_backup_job().items + return jobs[0].spec.job_template.spec.template.spec.containers[0].image + + self.eventuallyEqual(get_docker_image, image, + "Expected job image {}, found {}".format(image, "{}")) + + # delete the logical backup cron job + pg_patch_disable_backup = { + "spec": { + "enableLogicalBackup": False, + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_backup) + + self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 0, "failed to create logical backup job") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + # ensure cluster is healthy after tests + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + @unittest.skip("Skipping this test until fixed") + def test_major_version_upgrade(self): + k8s = self.k8s + result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest-12.yaml") + self.eventuallyEqual(lambda: k8s.count_running_pods(labels="application=spilo,cluster-name=acid-upgrade-test"), 2, "No 2 pods running") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + pg_patch_version = { + "spec": { + "postgres": { + "version": "14" + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + def check_version_14(): + p = k8s.get_patroni_state("acid-upgrade-test-0") + version = p["server_version"][0:2] + return version + + self.evantuallyEqual(check_version_14, "14", "Version was not upgrade to 14") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_min_resource_limits(self): '''