wait after config patch and restart on sync whenever we see pending_restart
This commit is contained in:
		
							parent
							
								
									d3fe4264d4
								
							
						
					
					
						commit
						49668e95eb
					
				|  | @ -1066,8 +1066,8 @@ class EndToEndTestCase(unittest.TestCase): | |||
|         k8s = self.k8s | ||||
|         leader = k8s.get_cluster_leader_pod() | ||||
|         replica = k8s.get_cluster_replica_pod() | ||||
|         labels = 'application=spilo,cluster-name=acid-minimal-cluster,spilo-role=master' | ||||
|         creationTimestamp = leader.metadata.creation_timestamp | ||||
|         masterCreationTimestamp = leader.metadata.creation_timestamp | ||||
|         replicaCreationTimestamp = replica.metadata.creation_timestamp | ||||
|         new_max_connections_value = "50" | ||||
| 
 | ||||
|         # adjust Postgres config | ||||
|  | @ -1116,8 +1116,18 @@ class EndToEndTestCase(unittest.TestCase): | |||
|                             "synchronous_mode not updated") | ||||
|                 return True | ||||
| 
 | ||||
|             # check if Patroni config has been updated | ||||
|             self.eventuallyTrue(compare_config, "Postgres config not applied") | ||||
| 
 | ||||
|             # make sure that pods were not recreated | ||||
|             leader = k8s.get_cluster_leader_pod() | ||||
|             replica = k8s.get_cluster_replica_pod() | ||||
|             self.assertEqual(masterCreationTimestamp, leader.metadata.creation_timestamp, | ||||
|                             "Master pod creation timestamp is updated") | ||||
|             self.assertEqual(replicaCreationTimestamp, replica.metadata.creation_timestamp, | ||||
|                             "Master pod creation timestamp is updated") | ||||
| 
 | ||||
|             # query max_connections setting | ||||
|             setting_query = """ | ||||
|                SELECT setting | ||||
|                  FROM pg_settings | ||||
|  | @ -1128,11 +1138,16 @@ class EndToEndTestCase(unittest.TestCase): | |||
|             self.eventuallyNotEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, | ||||
|                 "Expected max_connections not to be updated on replica since Postgres was restarted there first", 10, 5) | ||||
| 
 | ||||
|             # make sure that pod wasn't recreated | ||||
|             self.assertEqual(creationTimestamp, leader.metadata.creation_timestamp, | ||||
|                             "Master pod creation timestamp is updated") | ||||
|             # the next sync should restart the replica because it has pending_restart flag set | ||||
|             # force next sync by deleting the operator pod | ||||
|             k8s.delete_operator_pod() | ||||
|             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||
| 
 | ||||
|             self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, | ||||
|                 "New max_connections setting not applied on replica", 10, 5) | ||||
| 
 | ||||
|             # decrease max_connections again | ||||
|             # this time restart will be correct and new value should appear on both instances | ||||
|             lower_max_connections_value = "30" | ||||
|             pg_patch_max_connections = { | ||||
|                 "spec": { | ||||
|  | @ -1149,12 +1164,14 @@ class EndToEndTestCase(unittest.TestCase): | |||
| 
 | ||||
|             self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||
| 
 | ||||
|             # check Patroni config again | ||||
|             pg_patch_config["spec"]["postgresql"]["parameters"]["max_connections"] = lower_max_connections_value | ||||
|             self.eventuallyTrue(compare_config, "Postgres config not applied") | ||||
| 
 | ||||
|             self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], new_max_connections_value, | ||||
|             # and query max_connections setting again | ||||
|             self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, | ||||
|                 "Previous max_connections setting not applied on master", 10, 5) | ||||
|             self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, | ||||
|             self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, | ||||
|                 "Previous max_connections setting not applied on replica", 10, 5) | ||||
| 
 | ||||
|         except timeout_decorator.TimeoutError: | ||||
|  |  | |||
|  | @ -273,7 +273,6 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error { | |||
| func (c *Cluster) syncStatefulSet() error { | ||||
| 	var ( | ||||
| 		restartTTL         uint32 | ||||
| 		instanceRestartRequired bool | ||||
| 		restartMasterFirst bool | ||||
| 	) | ||||
| 
 | ||||
|  | @ -414,24 +413,24 @@ func (c *Cluster) syncStatefulSet() error { | |||
| 		// empty config probably means cluster is not fully initialized yet, e.g. restoring from backup
 | ||||
| 		// do not attempt a restart
 | ||||
| 		if !reflect.DeepEqual(patroniConfig, emptyPatroniConfig) || len(pgParameters) > 0 { | ||||
| 			instanceRestartRequired, restartMasterFirst, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, patroniConfig, pgParameters) | ||||
| 			restartMasterFirst, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, patroniConfig, pgParameters) | ||||
| 			if err != nil { | ||||
| 				c.logger.Warningf("could not set PostgreSQL configuration options for pod %s: %v", podName, err) | ||||
| 				continue | ||||
| 			} | ||||
| 			// it could take up to LoopWait to apply the config
 | ||||
| 			time.Sleep(time.Duration(patroniConfig.LoopWait)*time.Second + time.Second*2) | ||||
| 			restartTTL = patroniConfig.TTL | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// if the config update requires a restart, call Patroni restart
 | ||||
| 	if instanceRestartRequired { | ||||
| 	// restart instances if requiredy
 | ||||
| 	remainingPods := make([]*v1.Pod, 0) | ||||
| 	skipRole := Master | ||||
| 	if restartMasterFirst { | ||||
| 		skipRole = Replica | ||||
| 	} | ||||
| 		c.logger.Debug("restarting Postgres server within pods") | ||||
| 	for i, pod := range pods { | ||||
| 		role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) | ||||
| 		if role == skipRole { | ||||
|  | @ -453,7 +452,6 @@ func (c *Cluster) syncStatefulSet() error { | |||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	} | ||||
| 
 | ||||
| 	// if we get here we also need to re-create the pods (either leftovers from the old
 | ||||
| 	// statefulset or those that got their configuration from the outdated statefulset)
 | ||||
|  | @ -472,16 +470,23 @@ func (c *Cluster) restartInstance(pod *v1.Pod) { | |||
| 	podName := util.NameFromMeta(pod.ObjectMeta) | ||||
| 	role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) | ||||
| 
 | ||||
| 	c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name)) | ||||
| 	// if the config update requires a restart, call Patroni restart
 | ||||
| 	memberData, err := c.patroni.GetMemberData(pod) | ||||
| 	if err != nil { | ||||
| 		c.logger.Debugf("could not get member data of %s pod %s - skipping possible restart attempt: %v", role, podName, err) | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	// do restart only when it is pending
 | ||||
| 	if memberData.PendingRestart { | ||||
| 		c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name)) | ||||
| 		if err := c.patroni.Restart(pod); err != nil { | ||||
| 			c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err) | ||||
| 			return | ||||
| 		} | ||||
| 
 | ||||
| 	c.logger.Debugf("Postgres server successfuly restarted in %s pod %s", role, podName) | ||||
| 		c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for %s pod %s", role, pod.Name)) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // AnnotationsToPropagate get the annotations to update if required
 | ||||
| // based on the annotations in postgres CRD
 | ||||
|  | @ -517,7 +522,7 @@ func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[stri | |||
| 
 | ||||
| // checkAndSetGlobalPostgreSQLConfiguration checks whether cluster-wide API parameters
 | ||||
| // (like max_connections) have changed and if necessary sets it via the Patroni API
 | ||||
| func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniConfig acidv1.Patroni, effectivePgParameters map[string]string) (bool, bool, error) { | ||||
| func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniConfig acidv1.Patroni, effectivePgParameters map[string]string) (bool, error) { | ||||
| 	configToSet := make(map[string]interface{}) | ||||
| 	parametersToSet := make(map[string]string) | ||||
| 	restartMaster := make([]bool, 0) | ||||
|  | @ -584,7 +589,7 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniC | |||
| 	} | ||||
| 
 | ||||
| 	if len(configToSet) == 0 { | ||||
| 		return false, false, nil | ||||
| 		return false, nil | ||||
| 	} | ||||
| 
 | ||||
| 	configToSetJson, err := json.Marshal(configToSet) | ||||
|  | @ -598,10 +603,10 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniC | |||
| 	c.logger.Debugf("patching Postgres config via Patroni API on pod %s with following options: %s", | ||||
| 		podName, configToSetJson) | ||||
| 	if err = c.patroni.SetConfig(pod, configToSet); err != nil { | ||||
| 		return true, requiresMasterRestart, fmt.Errorf("could not patch postgres parameters with a pod %s: %v", podName, err) | ||||
| 		return requiresMasterRestart, fmt.Errorf("could not patch postgres parameters with a pod %s: %v", podName, err) | ||||
| 	} | ||||
| 
 | ||||
| 	return true, requiresMasterRestart, nil | ||||
| 	return requiresMasterRestart, nil | ||||
| } | ||||
| 
 | ||||
| func (c *Cluster) syncSecrets() error { | ||||
|  |  | |||
|  | @ -238,17 +238,13 @@ func (p *Patroni) Restart(server *v1.Pod) error { | |||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	memberData, err := p.GetMemberData(server) | ||||
| 	if err != nil { | ||||
| 	if err := p.httpPostOrPatch(http.MethodPost, apiURLString+restartPath, buf); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	p.logger.Infof("Postgres server successfuly restarted in pod %s", server.Name) | ||||
| 
 | ||||
| 	// do restart only when it is pending
 | ||||
| 	if !memberData.PendingRestart { | ||||
| 	return nil | ||||
| } | ||||
| 	return p.httpPostOrPatch(http.MethodPost, apiURLString+restartPath, buf) | ||||
| } | ||||
| 
 | ||||
| // GetMemberData read member data from patroni API
 | ||||
| func (p *Patroni) GetMemberData(server *v1.Pod) (MemberData, error) { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue