Migrate only master pods. Migrate single masters. (#199)
Avoid migrating replica pods, since they will be handled by the node draining anyway (the PDB specifies that only masters are to be kept). Allow migration of the single-pod clusters.
This commit is contained in:
parent
bb5ce6cbbe
commit
23011bdf9a
|
|
@ -34,10 +34,6 @@ func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) {
|
|||
return nil, fmt.Errorf("could not get list of pods: %v", err)
|
||||
}
|
||||
|
||||
if len(pods.Items) == 0 {
|
||||
return nil, fmt.Errorf("no pods")
|
||||
}
|
||||
|
||||
if role == Master && len(pods.Items) > 1 {
|
||||
return nil, fmt.Errorf("too many masters")
|
||||
}
|
||||
|
|
@ -158,6 +154,11 @@ func (c *Cluster) masterCandidate(oldNodeName string) (*v1.Pod, error) {
|
|||
return nil, fmt.Errorf("could not get replica pods: %v", err)
|
||||
}
|
||||
|
||||
if len(replicas) == 0 {
|
||||
c.logger.Warningf("single master pod for cluster %q, migration will cause disruption of the service")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
for i, pod := range replicas {
|
||||
// look for replicas running on live nodes. Ignore errors when querying the nodes.
|
||||
if pod.Spec.NodeName != oldNodeName {
|
||||
|
|
@ -198,21 +199,25 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
|
|||
return fmt.Errorf("could not get new master candidate: %v", err)
|
||||
}
|
||||
|
||||
pod, err := c.movePodFromEndOfLifeNode(masterCandidatePod)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not move pod: %v", err)
|
||||
}
|
||||
// there are two cases for each postgres cluster that has its master pod on the node to migrate from:
|
||||
// - the cluster has some replicas - migrate one of those if necessary and failover to it
|
||||
// - there are no replicas - just terminate the master and wait until it respawns
|
||||
// in both cases the result is the new master up and running on a new node.
|
||||
if masterCandidatePod != nil {
|
||||
pod, err := c.movePodFromEndOfLifeNode(masterCandidatePod)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not move pod: %v", err)
|
||||
}
|
||||
|
||||
masterCandidateName := util.NameFromMeta(pod.ObjectMeta)
|
||||
if err := c.ManualFailover(oldMaster, masterCandidateName); err != nil {
|
||||
return fmt.Errorf("could not failover to pod %q: %v", masterCandidateName, err)
|
||||
masterCandidateName := util.NameFromMeta(pod.ObjectMeta)
|
||||
if err := c.ManualFailover(oldMaster, masterCandidateName); err != nil {
|
||||
return fmt.Errorf("could not failover to pod %q: %v", masterCandidateName, err)
|
||||
}
|
||||
} else {
|
||||
if _, err = c.movePodFromEndOfLifeNode(oldMaster); err != nil {
|
||||
return fmt.Errorf("could not move pod: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
_, err = c.movePodFromEndOfLifeNode(oldMaster)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not move pod: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -95,6 +95,9 @@ func (c *Cluster) preScaleDown(newStatefulSet *v1beta1.StatefulSet) error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("could not get master pod: %v", err)
|
||||
}
|
||||
if len(masterPod) == 0 {
|
||||
return fmt.Errorf("no master pod is running in the cluster")
|
||||
}
|
||||
|
||||
podNum, err := getPodIndex(masterPod[0].Name)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ func (c *Controller) nodeAdd(obj interface{}) {
|
|||
c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID)
|
||||
// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
|
||||
if !c.nodeIsReady(node) {
|
||||
c.movePodsOffNode(node)
|
||||
c.moveMasterPodsOffNode(node)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
|
|||
if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) {
|
||||
return
|
||||
}
|
||||
c.movePodsOffNode(nodeCur)
|
||||
c.moveMasterPodsOffNode(nodeCur)
|
||||
}
|
||||
|
||||
func (c *Controller) nodeIsReady(node *v1.Node) bool {
|
||||
|
|
@ -72,7 +72,7 @@ func (c *Controller) nodeIsReady(node *v1.Node) bool {
|
|||
util.MapContains(node.Labels, map[string]string{"master": "true"}))
|
||||
}
|
||||
|
||||
func (c *Controller) movePodsOffNode(node *v1.Node) {
|
||||
func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
|
||||
nodeName := util.NameFromMeta(node.ObjectMeta)
|
||||
c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q",
|
||||
nodeName, c.opConfig.NodeReadinessLabel)
|
||||
|
|
@ -95,14 +95,15 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
|
|||
|
||||
clusters := make(map[*cluster.Cluster]bool)
|
||||
masterPods := make(map[*v1.Pod]*cluster.Cluster)
|
||||
replicaPods := make(map[*v1.Pod]*cluster.Cluster)
|
||||
movedPods := 0
|
||||
for _, pod := range nodePods {
|
||||
podName := util.NameFromMeta(pod.ObjectMeta)
|
||||
|
||||
role, ok := pod.Labels[c.opConfig.PodRoleLabel]
|
||||
if !ok {
|
||||
c.logger.Warningf("could not move pod %q: pod has no role", podName)
|
||||
if !ok || cluster.PostgresRole(role) != cluster.Master {
|
||||
if !ok {
|
||||
c.logger.Warningf("could not move pod %q: pod has no role", podName)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
@ -116,17 +117,11 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
|
|||
continue
|
||||
}
|
||||
|
||||
movedPods++
|
||||
|
||||
if !clusters[cl] {
|
||||
clusters[cl] = true
|
||||
}
|
||||
|
||||
if cluster.PostgresRole(role) == cluster.Master {
|
||||
masterPods[pod] = cl
|
||||
} else {
|
||||
replicaPods[pod] = cl
|
||||
}
|
||||
masterPods[pod] = cl
|
||||
}
|
||||
|
||||
for cl := range clusters {
|
||||
|
|
@ -138,16 +133,8 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
|
|||
|
||||
if err := cl.MigrateMasterPod(podName); err != nil {
|
||||
c.logger.Errorf("could not move master pod %q: %v", podName, err)
|
||||
movedPods--
|
||||
}
|
||||
}
|
||||
|
||||
for pod, cl := range replicaPods {
|
||||
podName := util.NameFromMeta(pod.ObjectMeta)
|
||||
|
||||
if err := cl.MigrateReplicaPod(podName, node.Name); err != nil {
|
||||
c.logger.Errorf("could not move replica pod %q: %v", podName, err)
|
||||
movedPods--
|
||||
} else {
|
||||
movedPods++
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -155,13 +142,13 @@ func (c *Controller) movePodsOffNode(node *v1.Node) {
|
|||
cl.Unlock()
|
||||
}
|
||||
|
||||
totalPods := len(nodePods)
|
||||
totalPods := len(masterPods)
|
||||
|
||||
c.logger.Infof("%d/%d pods have been moved out from the %q node",
|
||||
c.logger.Infof("%d/%d master pods have been moved out from the %q node",
|
||||
movedPods, totalPods, nodeName)
|
||||
|
||||
if leftPods := totalPods - movedPods; leftPods > 0 {
|
||||
c.logger.Warnf("could not move %d/%d pods from the %q node",
|
||||
c.logger.Warnf("could not move master %d/%d pods from the %q node",
|
||||
leftPods, totalPods, nodeName)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue