resolve conflicts
This commit is contained in:
		
						commit
						22d3c72eb3
					
				|  | @ -343,6 +343,8 @@ spec: | ||||||
|                     type: boolean |                     type: boolean | ||||||
|                   synchronous_mode_strict: |                   synchronous_mode_strict: | ||||||
|                     type: boolean |                     type: boolean | ||||||
|  |                   synchronous_node_count: | ||||||
|  |                     type: integer | ||||||
|                   ttl: |                   ttl: | ||||||
|                     type: integer |                     type: integer | ||||||
|               podAnnotations: |               podAnnotations: | ||||||
|  |  | ||||||
|  | @ -306,10 +306,10 @@ The interval of days can be set with `password_rotation_interval` (default | ||||||
| are replaced in the K8s secret. They belong to a newly created user named after | are replaced in the K8s secret. They belong to a newly created user named after | ||||||
| the original role plus rotation date in YYMMDD format. All priviliges are | the original role plus rotation date in YYMMDD format. All priviliges are | ||||||
| inherited meaning that migration scripts should still grant and revoke rights | inherited meaning that migration scripts should still grant and revoke rights | ||||||
| against the original role. The timestamp of the next rotation is written to the | against the original role. The timestamp of the next rotation (in RFC 3339 | ||||||
| secret as well. Note, if the rotation interval is decreased it is reflected in | format, UTC timezone) is written to the secret as well. Note, if the rotation | ||||||
| the secrets only if the next rotation date is more days away than the new | interval is decreased it is reflected in the secrets only if the next rotation | ||||||
| length of the interval. | date is more days away than the new length of the interval. | ||||||
| 
 | 
 | ||||||
| Pods still using the previous secret values which they keep in memory continue | Pods still using the previous secret values which they keep in memory continue | ||||||
| to connect to the database since the password of the corresponding user is not | to connect to the database since the password of the corresponding user is not | ||||||
|  |  | ||||||
|  | @ -303,6 +303,9 @@ explanation of `ttl` and `loop_wait` parameters. | ||||||
| * **synchronous_mode_strict** | * **synchronous_mode_strict** | ||||||
|   Patroni `synchronous_mode_strict` parameter value. Can be used in addition to `synchronous_mode`. The default is set to `false`. Optional. |   Patroni `synchronous_mode_strict` parameter value. Can be used in addition to `synchronous_mode`. The default is set to `false`. Optional. | ||||||
| 
 | 
 | ||||||
|  | * **synchronous_node_count** | ||||||
|  |   Patroni `synchronous_node_count` parameter value. Note, this option is only available for Spilo images with Patroni 2.0+. The default is set to `1`. Optional. | ||||||
|  |    | ||||||
| ## Postgres container resources | ## Postgres container resources | ||||||
| 
 | 
 | ||||||
| Those parameters define [CPU and memory requests and limits](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) | Those parameters define [CPU and memory requests and limits](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) | ||||||
|  |  | ||||||
|  | @ -205,7 +205,7 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|                 "enable_team_member_deprecation": "true", |                 "enable_team_member_deprecation": "true", | ||||||
|                 "role_deletion_suffix": "_delete_me", |                 "role_deletion_suffix": "_delete_me", | ||||||
|                 "resync_period": "15s", |                 "resync_period": "15s", | ||||||
|                 "repair_period": "10s", |                 "repair_period": "15s", | ||||||
|             }, |             }, | ||||||
|         } |         } | ||||||
|         k8s.update_config(enable_postgres_team_crd) |         k8s.update_config(enable_postgres_team_crd) | ||||||
|  | @ -296,6 +296,133 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, |         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, | ||||||
|                              "Operator does not get in sync") |                              "Operator does not get in sync") | ||||||
| 
 | 
 | ||||||
|  |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|  |     def test_config_update(self): | ||||||
|  |         ''' | ||||||
|  |             Change Postgres config under Spec.Postgresql.Parameters and Spec.Patroni | ||||||
|  |             and query Patroni config endpoint to check if manifest changes got applied | ||||||
|  |             via restarting cluster through Patroni's rest api | ||||||
|  |         ''' | ||||||
|  |         k8s = self.k8s | ||||||
|  |         leader = k8s.get_cluster_leader_pod() | ||||||
|  |         replica = k8s.get_cluster_replica_pod() | ||||||
|  |         masterCreationTimestamp = leader.metadata.creation_timestamp | ||||||
|  |         replicaCreationTimestamp = replica.metadata.creation_timestamp | ||||||
|  |         new_max_connections_value = "50" | ||||||
|  | 
 | ||||||
|  |         # adjust Postgres config | ||||||
|  |         pg_patch_config = { | ||||||
|  |             "spec": { | ||||||
|  |                 "postgresql": { | ||||||
|  |                     "parameters": { | ||||||
|  |                         "max_connections": new_max_connections_value | ||||||
|  |                      } | ||||||
|  |                  }, | ||||||
|  |                  "patroni": { | ||||||
|  |                     "slots": { | ||||||
|  |                         "test_slot": { | ||||||
|  |                             "type": "physical" | ||||||
|  |                         } | ||||||
|  |                     }, | ||||||
|  |                     "ttl": 29, | ||||||
|  |                     "loop_wait": 9, | ||||||
|  |                     "retry_timeout": 9, | ||||||
|  |                     "synchronous_mode": True | ||||||
|  |                  } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         try: | ||||||
|  |             k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||||
|  |                 "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) | ||||||
|  |              | ||||||
|  |             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||||
|  | 
 | ||||||
|  |             def compare_config(): | ||||||
|  |                 effective_config = k8s.patroni_rest(leader.metadata.name, "config") | ||||||
|  |                 desired_config = pg_patch_config["spec"]["patroni"] | ||||||
|  |                 desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] | ||||||
|  |                 effective_parameters = effective_config["postgresql"]["parameters"] | ||||||
|  |                 self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], | ||||||
|  |                             "max_connections not updated") | ||||||
|  |                 self.assertTrue(effective_config["slots"] is not None, "physical replication slot not added") | ||||||
|  |                 self.assertEqual(desired_config["ttl"], effective_config["ttl"], | ||||||
|  |                             "ttl not updated") | ||||||
|  |                 self.assertEqual(desired_config["loop_wait"], effective_config["loop_wait"], | ||||||
|  |                             "loop_wait not updated") | ||||||
|  |                 self.assertEqual(desired_config["retry_timeout"], effective_config["retry_timeout"], | ||||||
|  |                             "retry_timeout not updated") | ||||||
|  |                 self.assertEqual(desired_config["synchronous_mode"], effective_config["synchronous_mode"], | ||||||
|  |                             "synchronous_mode not updated") | ||||||
|  |                 return True | ||||||
|  | 
 | ||||||
|  |             # check if Patroni config has been updated | ||||||
|  |             self.eventuallyTrue(compare_config, "Postgres config not applied") | ||||||
|  | 
 | ||||||
|  |             # make sure that pods were not recreated | ||||||
|  |             leader = k8s.get_cluster_leader_pod() | ||||||
|  |             replica = k8s.get_cluster_replica_pod() | ||||||
|  |             self.assertEqual(masterCreationTimestamp, leader.metadata.creation_timestamp, | ||||||
|  |                             "Master pod creation timestamp is updated") | ||||||
|  |             self.assertEqual(replicaCreationTimestamp, replica.metadata.creation_timestamp, | ||||||
|  |                             "Master pod creation timestamp is updated") | ||||||
|  | 
 | ||||||
|  |             # query max_connections setting | ||||||
|  |             setting_query = """ | ||||||
|  |                SELECT setting | ||||||
|  |                  FROM pg_settings | ||||||
|  |                 WHERE name = 'max_connections'; | ||||||
|  |             """ | ||||||
|  |             self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], new_max_connections_value, | ||||||
|  |                 "New max_connections setting not applied on master", 10, 5) | ||||||
|  |             self.eventuallyNotEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, | ||||||
|  |                 "Expected max_connections not to be updated on replica since Postgres was restarted there first", 10, 5) | ||||||
|  | 
 | ||||||
|  |             # the next sync should restart the replica because it has pending_restart flag set | ||||||
|  |             # force next sync by deleting the operator pod | ||||||
|  |             k8s.delete_operator_pod() | ||||||
|  |             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||||
|  | 
 | ||||||
|  |             self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, | ||||||
|  |                 "New max_connections setting not applied on replica", 10, 5) | ||||||
|  | 
 | ||||||
|  |             # decrease max_connections again | ||||||
|  |             # this time restart will be correct and new value should appear on both instances | ||||||
|  |             lower_max_connections_value = "30" | ||||||
|  |             pg_patch_max_connections = { | ||||||
|  |                 "spec": { | ||||||
|  |                     "postgresql": { | ||||||
|  |                         "parameters": { | ||||||
|  |                             "max_connections": lower_max_connections_value | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||||
|  |                 "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) | ||||||
|  | 
 | ||||||
|  |             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||||
|  | 
 | ||||||
|  |             # check Patroni config again | ||||||
|  |             pg_patch_config["spec"]["postgresql"]["parameters"]["max_connections"] = lower_max_connections_value | ||||||
|  |             self.eventuallyTrue(compare_config, "Postgres config not applied") | ||||||
|  | 
 | ||||||
|  |             # and query max_connections setting again | ||||||
|  |             self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, | ||||||
|  |                 "Previous max_connections setting not applied on master", 10, 5) | ||||||
|  |             self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, | ||||||
|  |                 "Previous max_connections setting not applied on replica", 10, 5) | ||||||
|  | 
 | ||||||
|  |         except timeout_decorator.TimeoutError: | ||||||
|  |             print('Operator log: {}'.format(k8s.get_operator_log())) | ||||||
|  |             raise | ||||||
|  | 
 | ||||||
|  |         # make sure cluster is in a good state for further tests | ||||||
|  |         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||||
|  |         self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, | ||||||
|  |                              "No 2 pods running") | ||||||
|  | 
 | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|     def test_cross_namespace_secrets(self): |     def test_cross_namespace_secrets(self): | ||||||
|         ''' |         ''' | ||||||
|  | @ -794,7 +921,11 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         Lower resource limits below configured minimum and let operator fix it |         Lower resource limits below configured minimum and let operator fix it | ||||||
|         ''' |         ''' | ||||||
|         k8s = self.k8s |         k8s = self.k8s | ||||||
|         # self.eventuallyEqual(lambda: k8s.pg_get_status(), "Running", "Cluster not healthy at start") |         cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' | ||||||
|  | 
 | ||||||
|  |         # get nodes of master and replica(s) (expected target of new master) | ||||||
|  |         _, replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||||
|  |         self.assertNotEqual(replica_nodes, []) | ||||||
| 
 | 
 | ||||||
|         # configure minimum boundaries for CPU and memory limits |         # configure minimum boundaries for CPU and memory limits | ||||||
|         minCPULimit = '503m' |         minCPULimit = '503m' | ||||||
|  | @ -827,7 +958,9 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) |             "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) | ||||||
|         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") |         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||||
| 
 | 
 | ||||||
|         self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No two pods running after lazy rolling upgrade") |         # wait for switched over | ||||||
|  |         k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) | ||||||
|  |         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||||
|         self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members()), 2, "Postgres status did not enter running") |         self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members()), 2, "Postgres status did not enter running") | ||||||
| 
 | 
 | ||||||
|         def verify_pod_limits(): |         def verify_pod_limits(): | ||||||
|  | @ -968,15 +1101,15 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") |             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||||
| 
 | 
 | ||||||
|             # node affinity change should cause another rolling update and relocation of replica |             # node affinity change should cause another rolling update and relocation of replica | ||||||
|             k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=replica,' + cluster_label) |  | ||||||
|             k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) |             k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||||
|  |             k8s.wait_for_pod_start('spilo-role=master,' + cluster_label) | ||||||
| 
 | 
 | ||||||
|         except timeout_decorator.TimeoutError: |         except timeout_decorator.TimeoutError: | ||||||
|             print('Operator log: {}'.format(k8s.get_operator_log())) |             print('Operator log: {}'.format(k8s.get_operator_log())) | ||||||
|             raise |             raise | ||||||
| 
 | 
 | ||||||
|         # toggle pod anti affinity to make sure replica and master run on separate nodes |         # toggle pod anti affinity to make sure replica and master run on separate nodes | ||||||
|         self.assert_distributed_pods(replica_nodes) |         self.assert_distributed_pods(master_nodes) | ||||||
| 
 | 
 | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|     def test_node_readiness_label(self): |     def test_node_readiness_label(self): | ||||||
|  | @ -1099,7 +1232,7 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
| 
 | 
 | ||||||
|         # check if next rotation date was set in secret |         # check if next rotation date was set in secret | ||||||
|         secret_data = k8s.get_secret_data("zalando") |         secret_data = k8s.get_secret_data("zalando") | ||||||
|         next_rotation_timestamp = datetime.fromisoformat(str(base64.b64decode(secret_data["nextRotation"]), 'utf-8')) |         next_rotation_timestamp = datetime.strptime(str(base64.b64decode(secret_data["nextRotation"]), 'utf-8'), "%Y-%m-%dT%H:%M:%SZ") | ||||||
|         today90days = today+timedelta(days=90) |         today90days = today+timedelta(days=90) | ||||||
|         self.assertEqual(today90days, next_rotation_timestamp.date(), |         self.assertEqual(today90days, next_rotation_timestamp.date(), | ||||||
|                         "Unexpected rotation date in secret of zalando user: expected {}, got {}".format(today90days, next_rotation_timestamp.date())) |                         "Unexpected rotation date in secret of zalando user: expected {}, got {}".format(today90days, next_rotation_timestamp.date())) | ||||||
|  | @ -1114,7 +1247,7 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         self.query_database(leader.metadata.name, "postgres", create_fake_rotation_user) |         self.query_database(leader.metadata.name, "postgres", create_fake_rotation_user) | ||||||
| 
 | 
 | ||||||
|         # patch foo_user secret with outdated rotation date |         # patch foo_user secret with outdated rotation date | ||||||
|         fake_rotation_date = today.isoformat() + ' 00:00:00' |         fake_rotation_date = today.isoformat() + 'T00:00:00Z' | ||||||
|         fake_rotation_date_encoded = base64.b64encode(fake_rotation_date.encode('utf-8')) |         fake_rotation_date_encoded = base64.b64encode(fake_rotation_date.encode('utf-8')) | ||||||
|         secret_fake_rotation = { |         secret_fake_rotation = { | ||||||
|             "data": { |             "data": { | ||||||
|  | @ -1142,7 +1275,7 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         # check if next rotation date and username have been replaced |         # check if next rotation date and username have been replaced | ||||||
|         secret_data = k8s.get_secret_data("foo_user") |         secret_data = k8s.get_secret_data("foo_user") | ||||||
|         secret_username = str(base64.b64decode(secret_data["username"]), 'utf-8') |         secret_username = str(base64.b64decode(secret_data["username"]), 'utf-8') | ||||||
|         next_rotation_timestamp = datetime.fromisoformat(str(base64.b64decode(secret_data["nextRotation"]), 'utf-8')) |         next_rotation_timestamp = datetime.strptime(str(base64.b64decode(secret_data["nextRotation"]), 'utf-8'), "%Y-%m-%dT%H:%M:%SZ") | ||||||
|         rotation_user = "foo_user"+today.strftime("%y%m%d") |         rotation_user = "foo_user"+today.strftime("%y%m%d") | ||||||
|         today30days = today+timedelta(days=30) |         today30days = today+timedelta(days=30) | ||||||
| 
 | 
 | ||||||
|  | @ -1192,133 +1325,6 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|         self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, |         self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, | ||||||
|             "Found incorrect number of rotation users", 10, 5) |             "Found incorrect number of rotation users", 10, 5) | ||||||
| 
 | 
 | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |  | ||||||
|     def test_patroni_config_update(self): |  | ||||||
|         ''' |  | ||||||
|             Change Postgres config under Spec.Postgresql.Parameters and Spec.Patroni |  | ||||||
|             and query Patroni config endpoint to check if manifest changes got applied |  | ||||||
|             via restarting cluster through Patroni's rest api |  | ||||||
|         ''' |  | ||||||
|         k8s = self.k8s |  | ||||||
|         leader = k8s.get_cluster_leader_pod() |  | ||||||
|         replica = k8s.get_cluster_replica_pod() |  | ||||||
|         masterCreationTimestamp = leader.metadata.creation_timestamp |  | ||||||
|         replicaCreationTimestamp = replica.metadata.creation_timestamp |  | ||||||
|         new_max_connections_value = "50" |  | ||||||
| 
 |  | ||||||
|         # adjust Postgres config |  | ||||||
|         pg_patch_config = { |  | ||||||
|             "spec": { |  | ||||||
|                 "postgresql": { |  | ||||||
|                     "parameters": { |  | ||||||
|                         "max_connections": new_max_connections_value |  | ||||||
|                      } |  | ||||||
|                  }, |  | ||||||
|                  "patroni": { |  | ||||||
|                     "slots": { |  | ||||||
|                         "test_slot": { |  | ||||||
|                             "type": "physical" |  | ||||||
|                         } |  | ||||||
|                     }, |  | ||||||
|                     "ttl": 29, |  | ||||||
|                     "loop_wait": 9, |  | ||||||
|                     "retry_timeout": 9, |  | ||||||
|                     "synchronous_mode": True |  | ||||||
|                  } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         try: |  | ||||||
|             k8s.api.custom_objects_api.patch_namespaced_custom_object( |  | ||||||
|                 "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) |  | ||||||
|              |  | ||||||
|             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") |  | ||||||
| 
 |  | ||||||
|             def compare_config(): |  | ||||||
|                 effective_config = k8s.patroni_rest(leader.metadata.name, "config") |  | ||||||
|                 desired_config = pg_patch_config["spec"]["patroni"] |  | ||||||
|                 desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] |  | ||||||
|                 effective_parameters = effective_config["postgresql"]["parameters"] |  | ||||||
|                 self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], |  | ||||||
|                             "max_connections not updated") |  | ||||||
|                 self.assertTrue(effective_config["slots"] is not None, "physical replication slot not added") |  | ||||||
|                 self.assertEqual(desired_config["ttl"], effective_config["ttl"], |  | ||||||
|                             "ttl not updated") |  | ||||||
|                 self.assertEqual(desired_config["loop_wait"], effective_config["loop_wait"], |  | ||||||
|                             "loop_wait not updated") |  | ||||||
|                 self.assertEqual(desired_config["retry_timeout"], effective_config["retry_timeout"], |  | ||||||
|                             "retry_timeout not updated") |  | ||||||
|                 self.assertEqual(desired_config["synchronous_mode"], effective_config["synchronous_mode"], |  | ||||||
|                             "synchronous_mode not updated") |  | ||||||
|                 return True |  | ||||||
| 
 |  | ||||||
|             # check if Patroni config has been updated |  | ||||||
|             self.eventuallyTrue(compare_config, "Postgres config not applied") |  | ||||||
| 
 |  | ||||||
|             # make sure that pods were not recreated |  | ||||||
|             leader = k8s.get_cluster_leader_pod() |  | ||||||
|             replica = k8s.get_cluster_replica_pod() |  | ||||||
|             self.assertEqual(masterCreationTimestamp, leader.metadata.creation_timestamp, |  | ||||||
|                             "Master pod creation timestamp is updated") |  | ||||||
|             self.assertEqual(replicaCreationTimestamp, replica.metadata.creation_timestamp, |  | ||||||
|                             "Master pod creation timestamp is updated") |  | ||||||
| 
 |  | ||||||
|             # query max_connections setting |  | ||||||
|             setting_query = """ |  | ||||||
|                SELECT setting |  | ||||||
|                  FROM pg_settings |  | ||||||
|                 WHERE name = 'max_connections'; |  | ||||||
|             """ |  | ||||||
|             self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], new_max_connections_value, |  | ||||||
|                 "New max_connections setting not applied on master", 10, 5) |  | ||||||
|             self.eventuallyNotEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, |  | ||||||
|                 "Expected max_connections not to be updated on replica since Postgres was restarted there first", 10, 5) |  | ||||||
| 
 |  | ||||||
|             # the next sync should restart the replica because it has pending_restart flag set |  | ||||||
|             # force next sync by deleting the operator pod |  | ||||||
|             k8s.delete_operator_pod() |  | ||||||
|             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") |  | ||||||
| 
 |  | ||||||
|             self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, |  | ||||||
|                 "New max_connections setting not applied on replica", 10, 5) |  | ||||||
| 
 |  | ||||||
|             # decrease max_connections again |  | ||||||
|             # this time restart will be correct and new value should appear on both instances |  | ||||||
|             lower_max_connections_value = "30" |  | ||||||
|             pg_patch_max_connections = { |  | ||||||
|                 "spec": { |  | ||||||
|                     "postgresql": { |  | ||||||
|                         "parameters": { |  | ||||||
|                             "max_connections": lower_max_connections_value |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             k8s.api.custom_objects_api.patch_namespaced_custom_object( |  | ||||||
|                 "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) |  | ||||||
| 
 |  | ||||||
|             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") |  | ||||||
| 
 |  | ||||||
|             # check Patroni config again |  | ||||||
|             pg_patch_config["spec"]["postgresql"]["parameters"]["max_connections"] = lower_max_connections_value |  | ||||||
|             self.eventuallyTrue(compare_config, "Postgres config not applied") |  | ||||||
| 
 |  | ||||||
|             # and query max_connections setting again |  | ||||||
|             self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, |  | ||||||
|                 "Previous max_connections setting not applied on master", 10, 5) |  | ||||||
|             self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, |  | ||||||
|                 "Previous max_connections setting not applied on replica", 10, 5) |  | ||||||
| 
 |  | ||||||
|         except timeout_decorator.TimeoutError: |  | ||||||
|             print('Operator log: {}'.format(k8s.get_operator_log())) |  | ||||||
|             raise |  | ||||||
| 
 |  | ||||||
|         # make sure cluster is in a good state for further tests |  | ||||||
|         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") |  | ||||||
|         self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, |  | ||||||
|                              "No 2 pods running") |  | ||||||
| 
 |  | ||||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) |     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||||
|     def test_rolling_update_flag(self): |     def test_rolling_update_flag(self): | ||||||
|         ''' |         ''' | ||||||
|  | @ -1405,7 +1411,7 @@ class EndToEndTestCase(unittest.TestCase): | ||||||
|             "data": { |             "data": { | ||||||
|                 "pod_label_wait_timeout": "2s", |                 "pod_label_wait_timeout": "2s", | ||||||
|                 "resync_period": "30s", |                 "resync_period": "30s", | ||||||
|                 "repair_period": "10s", |                 "repair_period": "30s", | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -124,6 +124,7 @@ spec: | ||||||
|     retry_timeout: 10 |     retry_timeout: 10 | ||||||
|     synchronous_mode: false |     synchronous_mode: false | ||||||
|     synchronous_mode_strict: false |     synchronous_mode_strict: false | ||||||
|  |     synchronous_node_count: 1 | ||||||
|     maximum_lag_on_failover: 33554432 |     maximum_lag_on_failover: 33554432 | ||||||
| 
 | 
 | ||||||
| # restore a Postgres DB with point-in-time-recovery | # restore a Postgres DB with point-in-time-recovery | ||||||
|  |  | ||||||
|  | @ -341,6 +341,8 @@ spec: | ||||||
|                     type: boolean |                     type: boolean | ||||||
|                   synchronous_mode_strict: |                   synchronous_mode_strict: | ||||||
|                     type: boolean |                     type: boolean | ||||||
|  |                   synchronous_node_count: | ||||||
|  |                     type: integer | ||||||
|                   ttl: |                   ttl: | ||||||
|                     type: integer |                     type: integer | ||||||
|               podAnnotations: |               podAnnotations: | ||||||
|  |  | ||||||
|  | @ -534,6 +534,9 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ | ||||||
| 							"synchronous_mode_strict": { | 							"synchronous_mode_strict": { | ||||||
| 								Type: "boolean", | 								Type: "boolean", | ||||||
| 							}, | 							}, | ||||||
|  | 							"synchronous_node_count": { | ||||||
|  | 								Type: "integer", | ||||||
|  | 							}, | ||||||
| 							"ttl": { | 							"ttl": { | ||||||
| 								Type: "integer", | 								Type: "integer", | ||||||
| 							}, | 							}, | ||||||
|  |  | ||||||
|  | @ -165,6 +165,7 @@ type Patroni struct { | ||||||
| 	Slots                 map[string]map[string]string `json:"slots,omitempty"` | 	Slots                 map[string]map[string]string `json:"slots,omitempty"` | ||||||
| 	SynchronousMode       bool                         `json:"synchronous_mode,omitempty"` | 	SynchronousMode       bool                         `json:"synchronous_mode,omitempty"` | ||||||
| 	SynchronousModeStrict bool                         `json:"synchronous_mode_strict,omitempty"` | 	SynchronousModeStrict bool                         `json:"synchronous_mode_strict,omitempty"` | ||||||
|  | 	SynchronousNodeCount  uint32                       `json:"synchronous_node_count,omitempty" defaults:1` | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // StandbyDescription contains s3 wal path
 | // StandbyDescription contains s3 wal path
 | ||||||
|  |  | ||||||
|  | @ -50,6 +50,7 @@ type patroniDCS struct { | ||||||
| 	MaximumLagOnFailover     float32                      `json:"maximum_lag_on_failover,omitempty"` | 	MaximumLagOnFailover     float32                      `json:"maximum_lag_on_failover,omitempty"` | ||||||
| 	SynchronousMode          bool                         `json:"synchronous_mode,omitempty"` | 	SynchronousMode          bool                         `json:"synchronous_mode,omitempty"` | ||||||
| 	SynchronousModeStrict    bool                         `json:"synchronous_mode_strict,omitempty"` | 	SynchronousModeStrict    bool                         `json:"synchronous_mode_strict,omitempty"` | ||||||
|  | 	SynchronousNodeCount     uint32                       `json:"synchronous_node_count,omitempty"` | ||||||
| 	PGBootstrapConfiguration map[string]interface{}       `json:"postgresql,omitempty"` | 	PGBootstrapConfiguration map[string]interface{}       `json:"postgresql,omitempty"` | ||||||
| 	Slots                    map[string]map[string]string `json:"slots,omitempty"` | 	Slots                    map[string]map[string]string `json:"slots,omitempty"` | ||||||
| } | } | ||||||
|  | @ -262,6 +263,9 @@ PatroniInitDBParams: | ||||||
| 	if patroni.SynchronousModeStrict { | 	if patroni.SynchronousModeStrict { | ||||||
| 		config.Bootstrap.DCS.SynchronousModeStrict = patroni.SynchronousModeStrict | 		config.Bootstrap.DCS.SynchronousModeStrict = patroni.SynchronousModeStrict | ||||||
| 	} | 	} | ||||||
|  | 	if patroni.SynchronousNodeCount >= 1 { | ||||||
|  | 		config.Bootstrap.DCS.SynchronousNodeCount = patroni.SynchronousNodeCount | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	config.PgLocalConfiguration = make(map[string]interface{}) | 	config.PgLocalConfiguration = make(map[string]interface{}) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -91,11 +91,12 @@ func TestGenerateSpiloJSONConfiguration(t *testing.T) { | ||||||
| 				MaximumLagOnFailover:  33554432, | 				MaximumLagOnFailover:  33554432, | ||||||
| 				SynchronousMode:       true, | 				SynchronousMode:       true, | ||||||
| 				SynchronousModeStrict: true, | 				SynchronousModeStrict: true, | ||||||
|  | 				SynchronousNodeCount:  1, | ||||||
| 				Slots:                 map[string]map[string]string{"permanent_logical_1": {"type": "logical", "database": "foo", "plugin": "pgoutput"}}, | 				Slots:                 map[string]map[string]string{"permanent_logical_1": {"type": "logical", "database": "foo", "plugin": "pgoutput"}}, | ||||||
| 			}, | 			}, | ||||||
| 			role:     "zalandos", | 			role:     "zalandos", | ||||||
| 			opConfig: config.Config{}, | 			opConfig: config.Config{}, | ||||||
| 			result:   `{"postgresql":{"bin_dir":"/usr/lib/postgresql/11/bin","pg_hba":["hostssl all all 0.0.0.0/0 md5","host    all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"zalandos":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"synchronous_mode":true,"synchronous_mode_strict":true,"slots":{"permanent_logical_1":{"database":"foo","plugin":"pgoutput","type":"logical"}}}}}`, | 			result:   `{"postgresql":{"bin_dir":"/usr/lib/postgresql/11/bin","pg_hba":["hostssl all all 0.0.0.0/0 md5","host    all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"zalandos":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"synchronous_mode":true,"synchronous_mode_strict":true,"synchronous_node_count":1,"slots":{"permanent_logical_1":{"database":"foo","plugin":"pgoutput","type":"logical"}}}}}`, | ||||||
| 		}, | 		}, | ||||||
| 	} | 	} | ||||||
| 	for _, tt := range tests { | 	for _, tt := range tests { | ||||||
|  |  | ||||||
|  | @ -13,6 +13,7 @@ import ( | ||||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||
| 	"k8s.io/apimachinery/pkg/types" | 	"k8s.io/apimachinery/pkg/types" | ||||||
| 
 | 
 | ||||||
|  | 	acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1" | ||||||
| 	"github.com/zalando/postgres-operator/pkg/spec" | 	"github.com/zalando/postgres-operator/pkg/spec" | ||||||
| 	"github.com/zalando/postgres-operator/pkg/util" | 	"github.com/zalando/postgres-operator/pkg/util" | ||||||
| 	"github.com/zalando/postgres-operator/pkg/util/patroni" | 	"github.com/zalando/postgres-operator/pkg/util/patroni" | ||||||
|  | @ -349,6 +350,54 @@ func (c *Cluster) MigrateReplicaPod(podName spec.NamespacedName, fromNodeName st | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func (c *Cluster) getPatroniConfig(pod *v1.Pod) (acidv1.Patroni, map[string]string, error) { | ||||||
|  | 	var ( | ||||||
|  | 		patroniConfig acidv1.Patroni | ||||||
|  | 		pgParameters  map[string]string | ||||||
|  | 	) | ||||||
|  | 	podName := util.NameFromMeta(pod.ObjectMeta) | ||||||
|  | 	err := retryutil.Retry(1*time.Second, 5*time.Second, | ||||||
|  | 		func() (bool, error) { | ||||||
|  | 			var err error | ||||||
|  | 			patroniConfig, pgParameters, err = c.patroni.GetConfig(pod) | ||||||
|  | 
 | ||||||
|  | 			if err != nil { | ||||||
|  | 				return false, err | ||||||
|  | 			} | ||||||
|  | 			return true, nil | ||||||
|  | 		}, | ||||||
|  | 	) | ||||||
|  | 
 | ||||||
|  | 	if err != nil { | ||||||
|  | 		return acidv1.Patroni{}, nil, fmt.Errorf("could not get Postgres config from pod %s: %v", podName, err) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return patroniConfig, pgParameters, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (c *Cluster) getPatroniMemberData(pod *v1.Pod) (patroni.MemberData, error) { | ||||||
|  | 	var memberData patroni.MemberData | ||||||
|  | 	err := retryutil.Retry(1*time.Second, 5*time.Second, | ||||||
|  | 		func() (bool, error) { | ||||||
|  | 			var err error | ||||||
|  | 			memberData, err = c.patroni.GetMemberData(pod) | ||||||
|  | 
 | ||||||
|  | 			if err != nil { | ||||||
|  | 				return false, err | ||||||
|  | 			} | ||||||
|  | 			return true, nil | ||||||
|  | 		}, | ||||||
|  | 	) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return patroni.MemberData{}, fmt.Errorf("could not get member data: %v", err) | ||||||
|  | 	} | ||||||
|  | 	if memberData.State == "creating replica" { | ||||||
|  | 		return patroni.MemberData{}, fmt.Errorf("replica currently being initialized") | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return memberData, nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { | func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { | ||||||
| 	ch := c.registerPodSubscriber(podName) | 	ch := c.registerPodSubscriber(podName) | ||||||
| 	defer c.unregisterPodSubscriber(podName) | 	defer c.unregisterPodSubscriber(podName) | ||||||
|  | @ -380,54 +429,10 @@ func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { | ||||||
| 	return pod, nil | 	return pod, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Cluster) isSafeToRecreatePods(pods []v1.Pod) bool { |  | ||||||
| 
 |  | ||||||
| 	/* |  | ||||||
| 	 Operator should not re-create pods if there is at least one replica being bootstrapped |  | ||||||
| 	 because Patroni might use other replicas to take basebackup from (see Patroni's "clonefrom" tag). |  | ||||||
| 
 |  | ||||||
| 	 XXX operator cannot forbid replica re-init, so we might still fail if re-init is started |  | ||||||
| 	 after this check succeeds but before a pod is re-created |  | ||||||
| 	*/ |  | ||||||
| 	for _, pod := range pods { |  | ||||||
| 		c.logger.Debugf("name=%s phase=%s ip=%s", pod.Name, pod.Status.Phase, pod.Status.PodIP) |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	for _, pod := range pods { |  | ||||||
| 
 |  | ||||||
| 		var data patroni.MemberData |  | ||||||
| 
 |  | ||||||
| 		err := retryutil.Retry(1*time.Second, 5*time.Second, |  | ||||||
| 			func() (bool, error) { |  | ||||||
| 				var err error |  | ||||||
| 				data, err = c.patroni.GetMemberData(&pod) |  | ||||||
| 
 |  | ||||||
| 				if err != nil { |  | ||||||
| 					return false, err |  | ||||||
| 				} |  | ||||||
| 				return true, nil |  | ||||||
| 			}, |  | ||||||
| 		) |  | ||||||
| 
 |  | ||||||
| 		if err != nil { |  | ||||||
| 			c.logger.Errorf("failed to get Patroni state for pod: %s", err) |  | ||||||
| 			return false |  | ||||||
| 		} else if data.State == "creating replica" { |  | ||||||
| 			c.logger.Warningf("cannot re-create replica %s: it is currently being initialized", pod.Name) |  | ||||||
| 			return false |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return true |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.NamespacedName) error { | func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.NamespacedName) error { | ||||||
| 	c.setProcessName("starting to recreate pods") | 	c.setProcessName("starting to recreate pods") | ||||||
| 	c.logger.Infof("there are %d pods in the cluster to recreate", len(pods)) | 	c.logger.Infof("there are %d pods in the cluster to recreate", len(pods)) | ||||||
| 
 | 
 | ||||||
| 	if !c.isSafeToRecreatePods(pods) { |  | ||||||
| 		return fmt.Errorf("postpone pod recreation until next Sync: recreation is unsafe because pods are being initialized") |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	var ( | 	var ( | ||||||
| 		masterPod, newMasterPod *v1.Pod | 		masterPod, newMasterPod *v1.Pod | ||||||
| 	) | 	) | ||||||
|  |  | ||||||
|  | @ -285,6 +285,7 @@ func (c *Cluster) syncStatefulSet() error { | ||||||
| 		restartMasterFirst bool | 		restartMasterFirst bool | ||||||
| 	) | 	) | ||||||
| 	podsToRecreate := make([]v1.Pod, 0) | 	podsToRecreate := make([]v1.Pod, 0) | ||||||
|  | 	isSafeToRecreatePods := true | ||||||
| 	switchoverCandidates := make([]spec.NamespacedName, 0) | 	switchoverCandidates := make([]spec.NamespacedName, 0) | ||||||
| 
 | 
 | ||||||
| 	pods, err := c.listPods() | 	pods, err := c.listPods() | ||||||
|  | @ -410,23 +411,21 @@ func (c *Cluster) syncStatefulSet() error { | ||||||
| 	// get Postgres config, compare with manifest and update via Patroni PATCH endpoint if it differs
 | 	// get Postgres config, compare with manifest and update via Patroni PATCH endpoint if it differs
 | ||||||
| 	// Patroni's config endpoint is just a "proxy" to DCS. It is enough to patch it only once and it doesn't matter which pod is used
 | 	// Patroni's config endpoint is just a "proxy" to DCS. It is enough to patch it only once and it doesn't matter which pod is used
 | ||||||
| 	for i, pod := range pods { | 	for i, pod := range pods { | ||||||
| 		emptyPatroniConfig := acidv1.Patroni{} | 		patroniConfig, pgParameters, err := c.getPatroniConfig(&pod) | ||||||
| 		podName := util.NameFromMeta(pods[i].ObjectMeta) |  | ||||||
| 		patroniConfig, pgParameters, err := c.patroni.GetConfig(&pod) |  | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			c.logger.Warningf("could not get Postgres config from pod %s: %v", podName, err) | 			c.logger.Warningf("%v", err) | ||||||
|  | 			isSafeToRecreatePods = false | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
| 		restartWait = patroniConfig.LoopWait | 		restartWait = patroniConfig.LoopWait | ||||||
| 
 | 
 | ||||||
| 		// empty config probably means cluster is not fully initialized yet, e.g. restoring from backup
 | 		// empty config probably means cluster is not fully initialized yet, e.g. restoring from backup
 | ||||||
| 		// do not attempt a restart
 | 		// do not attempt a restart
 | ||||||
| 		if !reflect.DeepEqual(patroniConfig, emptyPatroniConfig) || len(pgParameters) > 0 { | 		if !reflect.DeepEqual(patroniConfig, acidv1.Patroni{}) || len(pgParameters) > 0 { | ||||||
| 			// compare config returned from Patroni with what is specified in the manifest
 | 			// compare config returned from Patroni with what is specified in the manifest
 | ||||||
| 			restartMasterFirst, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, patroniConfig, c.Spec.Patroni, pgParameters, c.Spec.Parameters) | 			restartMasterFirst, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, patroniConfig, c.Spec.Patroni, pgParameters, c.Spec.Parameters) | ||||||
| 
 |  | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				c.logger.Warningf("could not set PostgreSQL configuration options for pod %s: %v", podName, err) | 				c.logger.Warningf("could not set PostgreSQL configuration options for pod %s: %v", pods[i].Name, err) | ||||||
| 				continue | 				continue | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
|  | @ -448,50 +447,59 @@ func (c *Cluster) syncStatefulSet() error { | ||||||
| 			remainingPods = append(remainingPods, &pods[i]) | 			remainingPods = append(remainingPods, &pods[i]) | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
| 		c.restartInstance(&pod, restartWait) | 		if err = c.restartInstance(&pod, restartWait); err != nil { | ||||||
|  | 			c.logger.Errorf("%v", err) | ||||||
|  | 			isSafeToRecreatePods = false | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// in most cases only the master should be left to restart
 | 	// in most cases only the master should be left to restart
 | ||||||
| 	if len(remainingPods) > 0 { | 	if len(remainingPods) > 0 { | ||||||
| 		for _, remainingPod := range remainingPods { | 		for _, remainingPod := range remainingPods { | ||||||
| 			c.restartInstance(remainingPod, restartWait) | 			if err = c.restartInstance(remainingPod, restartWait); err != nil { | ||||||
|  | 				c.logger.Errorf("%v", err) | ||||||
|  | 				isSafeToRecreatePods = false | ||||||
|  | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// if we get here we also need to re-create the pods (either leftovers from the old
 | 	// if we get here we also need to re-create the pods (either leftovers from the old
 | ||||||
| 	// statefulset or those that got their configuration from the outdated statefulset)
 | 	// statefulset or those that got their configuration from the outdated statefulset)
 | ||||||
| 	if len(podsToRecreate) > 0 { | 	if len(podsToRecreate) > 0 { | ||||||
|  | 		if isSafeToRecreatePods { | ||||||
| 			c.logger.Debugln("performing rolling update") | 			c.logger.Debugln("performing rolling update") | ||||||
| 			c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Performing rolling update") | 			c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Performing rolling update") | ||||||
| 			if err := c.recreatePods(podsToRecreate, switchoverCandidates); err != nil { | 			if err := c.recreatePods(podsToRecreate, switchoverCandidates); err != nil { | ||||||
| 				return fmt.Errorf("could not recreate pods: %v", err) | 				return fmt.Errorf("could not recreate pods: %v", err) | ||||||
| 			} | 			} | ||||||
| 			c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Rolling update done - pods have been recreated") | 			c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Rolling update done - pods have been recreated") | ||||||
|  | 		} else { | ||||||
|  | 			c.logger.Warningf("postpone pod recreation until next sync") | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Cluster) restartInstance(pod *v1.Pod, restartWait uint32) { | func (c *Cluster) restartInstance(pod *v1.Pod, restartWait uint32) error { | ||||||
|  | 	// if the config update requires a restart, call Patroni restart
 | ||||||
| 	podName := util.NameFromMeta(pod.ObjectMeta) | 	podName := util.NameFromMeta(pod.ObjectMeta) | ||||||
| 	role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) | 	role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) | ||||||
| 
 | 	memberData, err := c.getPatroniMemberData(pod) | ||||||
| 	// if the config update requires a restart, call Patroni restart
 |  | ||||||
| 	memberData, err := c.patroni.GetMemberData(pod) |  | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		c.logger.Debugf("could not get member data of %s pod %s - skipping possible restart attempt: %v", role, podName, err) | 		return fmt.Errorf("could not restart Postgres in %s pod %s: %v", role, podName, err) | ||||||
| 		return |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// do restart only when it is pending
 | 	// do restart only when it is pending
 | ||||||
| 	if memberData.PendingRestart { | 	if memberData.PendingRestart { | ||||||
| 		c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name)) | 		c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, podName)) | ||||||
| 		if err := c.patroni.Restart(pod); err != nil { | 		if err := c.patroni.Restart(pod); err != nil { | ||||||
| 			c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err) | 			return err | ||||||
| 			return |  | ||||||
| 		} | 		} | ||||||
| 		time.Sleep(time.Duration(restartWait) * time.Second) | 		time.Sleep(time.Duration(restartWait) * time.Second) | ||||||
| 		c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for %s pod %s", role, pod.Name)) | 		c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for %s pod %s", role, podName)) | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	return nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // AnnotationsToPropagate get the annotations to update if required
 | // AnnotationsToPropagate get the annotations to update if required
 | ||||||
|  | @ -620,11 +628,6 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, effectiv | ||||||
| 	return requiresMasterRestart, nil | 	return requiresMasterRestart, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Cluster) getNextRotationDate(currentDate time.Time) (time.Time, string) { |  | ||||||
| 	nextRotationDate := currentDate.AddDate(0, 0, int(c.OpConfig.PasswordRotationInterval)) |  | ||||||
| 	return nextRotationDate, nextRotationDate.Format("2006-01-02 15:04:05") |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func (c *Cluster) syncSecrets() error { | func (c *Cluster) syncSecrets() error { | ||||||
| 
 | 
 | ||||||
| 	c.logger.Info("syncing secrets") | 	c.logger.Info("syncing secrets") | ||||||
|  | @ -682,6 +685,11 @@ func (c *Cluster) syncSecrets() error { | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func (c *Cluster) getNextRotationDate(currentDate time.Time) (time.Time, string) { | ||||||
|  | 	nextRotationDate := currentDate.AddDate(0, 0, int(c.OpConfig.PasswordRotationInterval)) | ||||||
|  | 	return nextRotationDate, nextRotationDate.Format(time.RFC3339) | ||||||
|  | } | ||||||
|  | 
 | ||||||
| func (c *Cluster) updateSecret( | func (c *Cluster) updateSecret( | ||||||
| 	secretUsername string, | 	secretUsername string, | ||||||
| 	generatedSecret *v1.Secret, | 	generatedSecret *v1.Secret, | ||||||
|  | @ -727,7 +735,7 @@ func (c *Cluster) updateSecret( | ||||||
| 
 | 
 | ||||||
| 		// initialize password rotation setting first rotation date
 | 		// initialize password rotation setting first rotation date
 | ||||||
| 		nextRotationDateStr = string(secret.Data["nextRotation"]) | 		nextRotationDateStr = string(secret.Data["nextRotation"]) | ||||||
| 		if nextRotationDate, err = time.ParseInLocation("2006-01-02 15:04:05", nextRotationDateStr, time.Local); err != nil { | 		if nextRotationDate, err = time.ParseInLocation(time.RFC3339, nextRotationDateStr, currentTime.UTC().Location()); err != nil { | ||||||
| 			nextRotationDate, nextRotationDateStr = c.getNextRotationDate(currentTime) | 			nextRotationDate, nextRotationDateStr = c.getNextRotationDate(currentTime) | ||||||
| 			secret.Data["nextRotation"] = []byte(nextRotationDateStr) | 			secret.Data["nextRotation"] = []byte(nextRotationDateStr) | ||||||
| 			updateSecret = true | 			updateSecret = true | ||||||
|  | @ -736,7 +744,7 @@ func (c *Cluster) updateSecret( | ||||||
| 
 | 
 | ||||||
| 		// check if next rotation can happen sooner
 | 		// check if next rotation can happen sooner
 | ||||||
| 		// if rotation interval has been decreased
 | 		// if rotation interval has been decreased
 | ||||||
| 		currentRotationDate, _ := c.getNextRotationDate(currentTime) | 		currentRotationDate, nextRotationDateStr := c.getNextRotationDate(currentTime) | ||||||
| 		if nextRotationDate.After(currentRotationDate) { | 		if nextRotationDate.After(currentRotationDate) { | ||||||
| 			nextRotationDate = currentRotationDate | 			nextRotationDate = currentRotationDate | ||||||
| 		} | 		} | ||||||
|  | @ -756,8 +764,6 @@ func (c *Cluster) updateSecret( | ||||||
| 				*retentionUsers = append(*retentionUsers, secretUsername) | 				*retentionUsers = append(*retentionUsers, secretUsername) | ||||||
| 			} | 			} | ||||||
| 			secret.Data["password"] = []byte(util.RandomPassword(constants.PasswordLength)) | 			secret.Data["password"] = []byte(util.RandomPassword(constants.PasswordLength)) | ||||||
| 
 |  | ||||||
| 			_, nextRotationDateStr = c.getNextRotationDate(nextRotationDate) |  | ||||||
| 			secret.Data["nextRotation"] = []byte(nextRotationDateStr) | 			secret.Data["nextRotation"] = []byte(nextRotationDateStr) | ||||||
| 
 | 
 | ||||||
| 			updateSecret = true | 			updateSecret = true | ||||||
|  |  | ||||||
|  | @ -270,13 +270,29 @@ func TestUpdateSecret(t *testing.T) { | ||||||
| 
 | 
 | ||||||
| 	clusterName := "acid-test-cluster" | 	clusterName := "acid-test-cluster" | ||||||
| 	namespace := "default" | 	namespace := "default" | ||||||
| 	username := "foo" | 	dbname := "app" | ||||||
|  | 	dbowner := "appowner" | ||||||
| 	secretTemplate := config.StringTemplate("{username}.{cluster}.credentials") | 	secretTemplate := config.StringTemplate("{username}.{cluster}.credentials") | ||||||
| 	rotationUsers := make(spec.PgUserMap) | 	rotationUsers := make(spec.PgUserMap) | ||||||
| 	retentionUsers := make([]string, 0) | 	retentionUsers := make([]string, 0) | ||||||
| 	yesterday := time.Now().AddDate(0, 0, -1) |  | ||||||
| 
 | 
 | ||||||
| 	// new cluster with pvc storage resize mode and configured labels
 | 	// define manifest users and enable rotation for dbowner
 | ||||||
|  | 	pg := acidv1.Postgresql{ | ||||||
|  | 		ObjectMeta: metav1.ObjectMeta{ | ||||||
|  | 			Name:      clusterName, | ||||||
|  | 			Namespace: namespace, | ||||||
|  | 		}, | ||||||
|  | 		Spec: acidv1.PostgresSpec{ | ||||||
|  | 			Databases:                      map[string]string{dbname: dbowner}, | ||||||
|  | 			Users:                          map[string]acidv1.UserFlags{"foo": {}, dbowner: {}}, | ||||||
|  | 			UsersWithInPlaceSecretRotation: []string{dbowner}, | ||||||
|  | 			Volume: acidv1.Volume{ | ||||||
|  | 				Size: "1Gi", | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// new cluster with enabled password rotation
 | ||||||
| 	var cluster = New( | 	var cluster = New( | ||||||
| 		Config{ | 		Config{ | ||||||
| 			OpConfig: config.Config{ | 			OpConfig: config.Config{ | ||||||
|  | @ -291,44 +307,61 @@ func TestUpdateSecret(t *testing.T) { | ||||||
| 					ClusterNameLabel: "cluster-name", | 					ClusterNameLabel: "cluster-name", | ||||||
| 				}, | 				}, | ||||||
| 			}, | 			}, | ||||||
| 		}, client, acidv1.Postgresql{}, logger, eventRecorder) | 		}, client, pg, logger, eventRecorder) | ||||||
| 
 | 
 | ||||||
| 	cluster.Name = clusterName | 	cluster.Name = clusterName | ||||||
| 	cluster.Namespace = namespace | 	cluster.Namespace = namespace | ||||||
| 	cluster.pgUsers = map[string]spec.PgUser{} | 	cluster.pgUsers = map[string]spec.PgUser{} | ||||||
| 	cluster.Spec.Users = map[string]acidv1.UserFlags{username: {}} |  | ||||||
| 	cluster.initRobotUsers() | 	cluster.initRobotUsers() | ||||||
| 
 | 
 | ||||||
| 	// create a secret for user foo
 | 	// create secrets
 | ||||||
|  | 	cluster.syncSecrets() | ||||||
|  | 	// initialize rotation with current time
 | ||||||
| 	cluster.syncSecrets() | 	cluster.syncSecrets() | ||||||
| 
 | 
 | ||||||
|  | 	dayAfterTomorrow := time.Now().AddDate(0, 0, 2) | ||||||
|  | 
 | ||||||
|  | 	for username := range cluster.Spec.Users { | ||||||
|  | 		pgUser := cluster.pgUsers[username] | ||||||
|  | 
 | ||||||
|  | 		// first, get the secret
 | ||||||
| 		secret, err := cluster.KubeClient.Secrets(namespace).Get(context.TODO(), secretTemplate.Format("username", username, "cluster", clusterName), metav1.GetOptions{}) | 		secret, err := cluster.KubeClient.Secrets(namespace).Get(context.TODO(), secretTemplate.Format("username", username, "cluster", clusterName), metav1.GetOptions{}) | ||||||
| 		assert.NoError(t, err) | 		assert.NoError(t, err) | ||||||
| 	generatedSecret := cluster.Secrets[secret.UID] | 		secretPassword := string(secret.Data["password"]) | ||||||
| 
 | 
 | ||||||
| 	// now update the secret setting next rotation date (yesterday + interval)
 | 		// now update the secret setting a next rotation date (tomorrow + interval)
 | ||||||
| 	cluster.updateSecret(username, generatedSecret, &rotationUsers, &retentionUsers, yesterday) | 		cluster.updateSecret(username, secret, &rotationUsers, &retentionUsers, dayAfterTomorrow) | ||||||
| 		updatedSecret, err := cluster.KubeClient.Secrets(namespace).Get(context.TODO(), secretTemplate.Format("username", username, "cluster", clusterName), metav1.GetOptions{}) | 		updatedSecret, err := cluster.KubeClient.Secrets(namespace).Get(context.TODO(), secretTemplate.Format("username", username, "cluster", clusterName), metav1.GetOptions{}) | ||||||
| 		assert.NoError(t, err) | 		assert.NoError(t, err) | ||||||
| 
 | 
 | ||||||
|  | 		// check that passwords are different
 | ||||||
|  | 		rotatedPassword := string(updatedSecret.Data["password"]) | ||||||
|  | 		if secretPassword == rotatedPassword { | ||||||
|  | 			t.Errorf("%s: password unchanged in updated secret for %s", testName, username) | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		// check that next rotation date is tomorrow + interval, not date in secret + interval
 | ||||||
| 		nextRotation := string(updatedSecret.Data["nextRotation"]) | 		nextRotation := string(updatedSecret.Data["nextRotation"]) | ||||||
| 	_, nextRotationDate := cluster.getNextRotationDate(yesterday) | 		_, nextRotationDate := cluster.getNextRotationDate(dayAfterTomorrow) | ||||||
| 		if nextRotation != nextRotationDate { | 		if nextRotation != nextRotationDate { | ||||||
| 		t.Errorf("%s: updated secret does not contain correct rotation date: expected %s, got %s", testName, nextRotationDate, nextRotation) | 			t.Errorf("%s: updated secret of %s does not contain correct rotation date: expected %s, got %s", testName, username, nextRotationDate, nextRotation) | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	// update secret again but use current time to trigger rotation
 |  | ||||||
| 	cluster.updateSecret(username, generatedSecret, &rotationUsers, &retentionUsers, time.Now()) |  | ||||||
| 	updatedSecret, err = cluster.KubeClient.Secrets(namespace).Get(context.TODO(), secretTemplate.Format("username", username, "cluster", clusterName), metav1.GetOptions{}) |  | ||||||
| 	assert.NoError(t, err) |  | ||||||
| 
 |  | ||||||
| 	if len(rotationUsers) != 1 && len(retentionUsers) != 1 { |  | ||||||
| 		t.Errorf("%s: unexpected number of users to rotate - expected only foo, found %d", testName, len(rotationUsers)) |  | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | 		// compare username, when it's dbowner they should be equal because of UsersWithInPlaceSecretRotation
 | ||||||
| 		secretUsername := string(updatedSecret.Data["username"]) | 		secretUsername := string(updatedSecret.Data["username"]) | ||||||
| 	rotatedUsername := username + time.Now().Format("060102") | 		if pgUser.IsDbOwner { | ||||||
|  | 			if secretUsername != username { | ||||||
|  | 				t.Errorf("%s: username differs in updated secret: expected %s, got %s", testName, username, secretUsername) | ||||||
|  | 			} | ||||||
|  | 		} else { | ||||||
|  | 			rotatedUsername := username + dayAfterTomorrow.Format("060102") | ||||||
| 			if secretUsername != rotatedUsername { | 			if secretUsername != rotatedUsername { | ||||||
| 				t.Errorf("%s: updated secret does not contain correct username: expected %s, got %s", testName, rotatedUsername, secretUsername) | 				t.Errorf("%s: updated secret does not contain correct username: expected %s, got %s", testName, rotatedUsername, secretUsername) | ||||||
| 			} | 			} | ||||||
|  | 
 | ||||||
|  | 			if len(rotationUsers) != 1 && len(retentionUsers) != 1 { | ||||||
|  | 				t.Errorf("%s: unexpected number of users to rotate - expected only %s, found %d", testName, username, len(rotationUsers)) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -98,7 +98,7 @@ COST_MEMORY = 30.5 * 24 * float(getenv('COST_MEMORY', 0.014375))  # Memory GB m5 | ||||||
| 
 | 
 | ||||||
| WALE_S3_ENDPOINT = getenv( | WALE_S3_ENDPOINT = getenv( | ||||||
|     'WALE_S3_ENDPOINT', |     'WALE_S3_ENDPOINT', | ||||||
|     'https+path://s3-eu-central-1.amazonaws.com:443', |     'https+path://s3.eu-central-1.amazonaws.com:443', | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| USE_AWS_INSTANCE_PROFILE = ( | USE_AWS_INSTANCE_PROFILE = ( | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue