perform manual failover on pg cluster rolling upgrade
This commit is contained in:
		
							parent
							
								
									00194d0130
								
							
						
					
					
						commit
						48ec6b35b9
					
				|  | @ -668,3 +668,37 @@ func (c *Cluster) GetStatus() *spec.ClusterStatus { | |||
| 		Error: c.Error, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // ManualFailover does manual failover to a candidate pod
 | ||||
| func (c *Cluster) ManualFailover(curMaster *v1.Pod, candidate spec.NamespacedName) error { | ||||
| 	c.logger.Debugf("failing over from %q to %q", curMaster.Name, candidate) | ||||
| 	podLabelErr := make(chan error) | ||||
| 	stopCh := make(chan struct{}) | ||||
| 	defer close(podLabelErr) | ||||
| 
 | ||||
| 	go func() { | ||||
| 		ch := c.registerPodSubscriber(candidate) | ||||
| 		defer c.unregisterPodSubscriber(candidate) | ||||
| 
 | ||||
| 		role := Master | ||||
| 
 | ||||
| 		select { | ||||
| 		case <-stopCh: | ||||
| 		case podLabelErr <- c.waitForPodLabel(ch, &role): | ||||
| 		} | ||||
| 	}() | ||||
| 
 | ||||
| 	if err := c.patroni.Failover(curMaster, candidate.Name); err != nil { | ||||
| 		close(stopCh) | ||||
| 		return fmt.Errorf("could not failover: %v", err) | ||||
| 	} | ||||
| 	c.logger.Debugf("successfully failed over from %q to %q", curMaster.Name, candidate) | ||||
| 
 | ||||
| 	defer close(stopCh) | ||||
| 
 | ||||
| 	if err := <-podLabelErr; err != nil { | ||||
| 		return fmt.Errorf("could not get master pod label: %v", err) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
|  |  | |||
|  | @ -104,7 +104,7 @@ func (c *Cluster) recreatePod(pod v1.Pod) error { | |||
| 	if err := c.waitForPodDeletion(ch); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := c.waitForPodLabel(ch); err != nil { | ||||
| 	if err := c.waitForPodLabel(ch, nil); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	c.logger.Infof("pod %q is ready", podName) | ||||
|  | @ -127,6 +127,7 @@ func (c *Cluster) recreatePods() error { | |||
| 	c.logger.Infof("there are %d pods in the cluster to recreate", len(pods.Items)) | ||||
| 
 | ||||
| 	var masterPod v1.Pod | ||||
| 	replicas := make([]spec.NamespacedName, 0) | ||||
| 	for _, pod := range pods.Items { | ||||
| 		role := c.podSpiloRole(&pod) | ||||
| 
 | ||||
|  | @ -138,11 +139,17 @@ func (c *Cluster) recreatePods() error { | |||
| 		if err := c.recreatePod(pod); err != nil { | ||||
| 			return fmt.Errorf("could not recreate replica pod %q: %v", util.NameFromMeta(pod.ObjectMeta), err) | ||||
| 		} | ||||
| 
 | ||||
| 		replicas = append(replicas, util.NameFromMeta(pod.ObjectMeta)) | ||||
| 	} | ||||
| 
 | ||||
| 	if masterPod.Name == "" { | ||||
| 		c.logger.Warningln("no master pod in the cluster") | ||||
| 	} else { | ||||
| 		//TODO: do manual failover
 | ||||
| 		err := c.ManualFailover(&masterPod, masterCandidate(replicas)) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("could not perform manual failover: %v", err) | ||||
| 		} | ||||
| 		//TODO: specify master, leave new master empty
 | ||||
| 		c.logger.Infof("recreating master pod %q", util.NameFromMeta(masterPod.ObjectMeta)) | ||||
| 
 | ||||
|  |  | |||
|  | @ -3,6 +3,7 @@ package cluster | |||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"math/rand" | ||||
| 	"strings" | ||||
| 	"time" | ||||
| 
 | ||||
|  | @ -182,15 +183,17 @@ func (c *Cluster) getTeamMembers() ([]string, error) { | |||
| 	return teamInfo.Members, nil | ||||
| } | ||||
| 
 | ||||
| func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent) error { | ||||
| func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent, role *PostgresRole) error { | ||||
| 	for { | ||||
| 		select { | ||||
| 		case podEvent := <-podEvents: | ||||
| 			role := c.podSpiloRole(podEvent.CurPod) | ||||
| 			// We cannot assume any role of the newly created pod. Normally, for a multi-pod cluster
 | ||||
| 			// we should observe the 'replica' value, but it could be that some pods are not allowed
 | ||||
| 			// to promote, therefore, the new pod could be a master as well.
 | ||||
| 			if role == constants.PodRoleMaster || role == constants.PodRoleReplica { | ||||
| 			podRole := PostgresRole(podEvent.CurPod.Labels[c.OpConfig.PodRoleLabel]) | ||||
| 
 | ||||
| 			if role == nil { | ||||
| 				if podRole == Master || podRole == Replica { | ||||
| 					return nil | ||||
| 				} | ||||
| 			} else if *role == podRole { | ||||
| 				return nil | ||||
| 			} | ||||
| 		case <-time.After(c.OpConfig.PodLabelWaitTimeout): | ||||
|  | @ -342,3 +345,7 @@ func (c *Cluster) credentialSecretNameForCluster(username string, clusterName st | |||
| func (c *Cluster) podSpiloRole(pod *v1.Pod) string { | ||||
| 	return pod.Labels[c.OpConfig.PodRoleLabel] | ||||
| } | ||||
| 
 | ||||
| func masterCandidate(replicas []spec.NamespacedName) spec.NamespacedName { | ||||
| 	return replicas[rand.Intn(len(replicas))] | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue