perform manual failover on pg cluster rolling upgrade
This commit is contained in:
		
							parent
							
								
									00194d0130
								
							
						
					
					
						commit
						48ec6b35b9
					
				|  | @ -668,3 +668,37 @@ func (c *Cluster) GetStatus() *spec.ClusterStatus { | ||||||
| 		Error: c.Error, | 		Error: c.Error, | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | // ManualFailover does manual failover to a candidate pod
 | ||||||
|  | func (c *Cluster) ManualFailover(curMaster *v1.Pod, candidate spec.NamespacedName) error { | ||||||
|  | 	c.logger.Debugf("failing over from %q to %q", curMaster.Name, candidate) | ||||||
|  | 	podLabelErr := make(chan error) | ||||||
|  | 	stopCh := make(chan struct{}) | ||||||
|  | 	defer close(podLabelErr) | ||||||
|  | 
 | ||||||
|  | 	go func() { | ||||||
|  | 		ch := c.registerPodSubscriber(candidate) | ||||||
|  | 		defer c.unregisterPodSubscriber(candidate) | ||||||
|  | 
 | ||||||
|  | 		role := Master | ||||||
|  | 
 | ||||||
|  | 		select { | ||||||
|  | 		case <-stopCh: | ||||||
|  | 		case podLabelErr <- c.waitForPodLabel(ch, &role): | ||||||
|  | 		} | ||||||
|  | 	}() | ||||||
|  | 
 | ||||||
|  | 	if err := c.patroni.Failover(curMaster, candidate.Name); err != nil { | ||||||
|  | 		close(stopCh) | ||||||
|  | 		return fmt.Errorf("could not failover: %v", err) | ||||||
|  | 	} | ||||||
|  | 	c.logger.Debugf("successfully failed over from %q to %q", curMaster.Name, candidate) | ||||||
|  | 
 | ||||||
|  | 	defer close(stopCh) | ||||||
|  | 
 | ||||||
|  | 	if err := <-podLabelErr; err != nil { | ||||||
|  | 		return fmt.Errorf("could not get master pod label: %v", err) | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -104,7 +104,7 @@ func (c *Cluster) recreatePod(pod v1.Pod) error { | ||||||
| 	if err := c.waitForPodDeletion(ch); err != nil { | 	if err := c.waitForPodDeletion(ch); err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 	if err := c.waitForPodLabel(ch); err != nil { | 	if err := c.waitForPodLabel(ch, nil); err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 	c.logger.Infof("pod %q is ready", podName) | 	c.logger.Infof("pod %q is ready", podName) | ||||||
|  | @ -127,6 +127,7 @@ func (c *Cluster) recreatePods() error { | ||||||
| 	c.logger.Infof("there are %d pods in the cluster to recreate", len(pods.Items)) | 	c.logger.Infof("there are %d pods in the cluster to recreate", len(pods.Items)) | ||||||
| 
 | 
 | ||||||
| 	var masterPod v1.Pod | 	var masterPod v1.Pod | ||||||
|  | 	replicas := make([]spec.NamespacedName, 0) | ||||||
| 	for _, pod := range pods.Items { | 	for _, pod := range pods.Items { | ||||||
| 		role := c.podSpiloRole(&pod) | 		role := c.podSpiloRole(&pod) | ||||||
| 
 | 
 | ||||||
|  | @ -138,11 +139,17 @@ func (c *Cluster) recreatePods() error { | ||||||
| 		if err := c.recreatePod(pod); err != nil { | 		if err := c.recreatePod(pod); err != nil { | ||||||
| 			return fmt.Errorf("could not recreate replica pod %q: %v", util.NameFromMeta(pod.ObjectMeta), err) | 			return fmt.Errorf("could not recreate replica pod %q: %v", util.NameFromMeta(pod.ObjectMeta), err) | ||||||
| 		} | 		} | ||||||
|  | 
 | ||||||
|  | 		replicas = append(replicas, util.NameFromMeta(pod.ObjectMeta)) | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
| 	if masterPod.Name == "" { | 	if masterPod.Name == "" { | ||||||
| 		c.logger.Warningln("no master pod in the cluster") | 		c.logger.Warningln("no master pod in the cluster") | ||||||
| 	} else { | 	} else { | ||||||
| 		//TODO: do manual failover
 | 		err := c.ManualFailover(&masterPod, masterCandidate(replicas)) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return fmt.Errorf("could not perform manual failover: %v", err) | ||||||
|  | 		} | ||||||
| 		//TODO: specify master, leave new master empty
 | 		//TODO: specify master, leave new master empty
 | ||||||
| 		c.logger.Infof("recreating master pod %q", util.NameFromMeta(masterPod.ObjectMeta)) | 		c.logger.Infof("recreating master pod %q", util.NameFromMeta(masterPod.ObjectMeta)) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3,6 +3,7 @@ package cluster | ||||||
| import ( | import ( | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"math/rand" | ||||||
| 	"strings" | 	"strings" | ||||||
| 	"time" | 	"time" | ||||||
| 
 | 
 | ||||||
|  | @ -182,15 +183,17 @@ func (c *Cluster) getTeamMembers() ([]string, error) { | ||||||
| 	return teamInfo.Members, nil | 	return teamInfo.Members, nil | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent) error { | func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent, role *PostgresRole) error { | ||||||
| 	for { | 	for { | ||||||
| 		select { | 		select { | ||||||
| 		case podEvent := <-podEvents: | 		case podEvent := <-podEvents: | ||||||
| 			role := c.podSpiloRole(podEvent.CurPod) | 			podRole := PostgresRole(podEvent.CurPod.Labels[c.OpConfig.PodRoleLabel]) | ||||||
| 			// We cannot assume any role of the newly created pod. Normally, for a multi-pod cluster
 | 
 | ||||||
| 			// we should observe the 'replica' value, but it could be that some pods are not allowed
 | 			if role == nil { | ||||||
| 			// to promote, therefore, the new pod could be a master as well.
 | 				if podRole == Master || podRole == Replica { | ||||||
| 			if role == constants.PodRoleMaster || role == constants.PodRoleReplica { | 					return nil | ||||||
|  | 				} | ||||||
|  | 			} else if *role == podRole { | ||||||
| 				return nil | 				return nil | ||||||
| 			} | 			} | ||||||
| 		case <-time.After(c.OpConfig.PodLabelWaitTimeout): | 		case <-time.After(c.OpConfig.PodLabelWaitTimeout): | ||||||
|  | @ -342,3 +345,7 @@ func (c *Cluster) credentialSecretNameForCluster(username string, clusterName st | ||||||
| func (c *Cluster) podSpiloRole(pod *v1.Pod) string { | func (c *Cluster) podSpiloRole(pod *v1.Pod) string { | ||||||
| 	return pod.Labels[c.OpConfig.PodRoleLabel] | 	return pod.Labels[c.OpConfig.PodRoleLabel] | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | func masterCandidate(replicas []spec.NamespacedName) spec.NamespacedName { | ||||||
|  | 	return replicas[rand.Intn(len(replicas))] | ||||||
|  | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue