Improve behavior on node decomissionining (#184)
* Trigger the node migration on the lack of the readiness label. * Examine the node's readiness status on node add. Make sure we don't miss the not ready node, especially when the operator is killed during the migration.
This commit is contained in:
		
							parent
							
								
									1109cfa7a1
								
							
						
					
					
						commit
						8e99518eeb
					
				|  | @ -319,6 +319,6 @@ func (c *Cluster) podIsEndOfLife(pod *v1.Pod) (bool, error) { | |||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 	return node.Spec.Unschedulable || util.MapContains(node.Labels, c.OpConfig.NodeEOLLabel), nil | ||||
| 	return node.Spec.Unschedulable || !util.MapContains(node.Labels, c.OpConfig.NodeReadinessLabel), nil | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -38,6 +38,10 @@ func (c *Controller) nodeAdd(obj interface{}) { | |||
| 	} | ||||
| 
 | ||||
| 	c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) | ||||
| 	// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
 | ||||
| 	if !c.nodeIsReady(node) { | ||||
| 		c.movePodsOffNode(node) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (c *Controller) nodeUpdate(prev, cur interface{}) { | ||||
|  | @ -55,13 +59,23 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { | |||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	if nodePrev.Spec.Unschedulable && util.MapContains(nodePrev.Labels, c.opConfig.NodeEOLLabel) || | ||||
| 		!nodeCur.Spec.Unschedulable || !util.MapContains(nodeCur.Labels, c.opConfig.NodeEOLLabel) { | ||||
| 	// do nothing if the node should have already triggered an update or
 | ||||
| 	// if only one of the label and the unschedulability criteria are met.
 | ||||
| 	if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) { | ||||
| 		return | ||||
| 	} | ||||
| 	c.movePodsOffNode(nodeCur) | ||||
| } | ||||
| 
 | ||||
| 	c.logger.Infof("node %q became unschedulable and has EOL labels: %q", util.NameFromMeta(nodeCur.ObjectMeta), | ||||
| 		c.opConfig.NodeEOLLabel) | ||||
| func (c *Controller) nodeIsReady(node *v1.Node) bool { | ||||
| 	return (!node.Spec.Unschedulable || util.MapContains(node.Labels, c.opConfig.NodeReadinessLabel) || | ||||
| 		util.MapContains(node.Labels, map[string]string{"master": "true"})) | ||||
| } | ||||
| 
 | ||||
| func (c *Controller) movePodsOffNode(node *v1.Node) { | ||||
| 	nodeName := util.NameFromMeta(node.ObjectMeta) | ||||
| 	c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q", | ||||
| 		nodeName, c.opConfig.NodeReadinessLabel) | ||||
| 
 | ||||
| 	opts := metav1.ListOptions{ | ||||
| 		LabelSelector: labels.Set(c.opConfig.ClusterLabels).String(), | ||||
|  | @ -74,7 +88,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { | |||
| 
 | ||||
| 	nodePods := make([]*v1.Pod, 0) | ||||
| 	for i, pod := range podList.Items { | ||||
| 		if pod.Spec.NodeName == nodeCur.Name { | ||||
| 		if pod.Spec.NodeName == node.Name { | ||||
| 			nodePods = append(nodePods, &podList.Items[i]) | ||||
| 		} | ||||
| 	} | ||||
|  | @ -131,7 +145,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { | |||
| 	for pod, cl := range replicaPods { | ||||
| 		podName := util.NameFromMeta(pod.ObjectMeta) | ||||
| 
 | ||||
| 		if err := cl.MigrateReplicaPod(podName, nodeCur.Name); err != nil { | ||||
| 		if err := cl.MigrateReplicaPod(podName, node.Name); err != nil { | ||||
| 			c.logger.Errorf("could not move replica pod %q: %v", podName, err) | ||||
| 			movedPods-- | ||||
| 		} | ||||
|  | @ -144,11 +158,11 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { | |||
| 	totalPods := len(nodePods) | ||||
| 
 | ||||
| 	c.logger.Infof("%d/%d pods have been moved out from the %q node", | ||||
| 		movedPods, totalPods, util.NameFromMeta(nodeCur.ObjectMeta)) | ||||
| 		movedPods, totalPods, nodeName) | ||||
| 
 | ||||
| 	if leftPods := totalPods - movedPods; leftPods > 0 { | ||||
| 		c.logger.Warnf("could not move %d/%d pods from the %q node", | ||||
| 			leftPods, totalPods, util.NameFromMeta(nodeCur.ObjectMeta)) | ||||
| 			leftPods, totalPods, nodeName) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -0,0 +1,65 @@ | |||
| package controller | ||||
| 
 | ||||
| import ( | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/zalando-incubator/postgres-operator/pkg/spec" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/client-go/pkg/api/v1" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	readyLabel = "lifecycle-status" | ||||
| 	readyValue = "ready" | ||||
| ) | ||||
| 
 | ||||
| func initializeController() *Controller { | ||||
| 	var c = NewController(&spec.ControllerConfig{}) | ||||
| 	c.opConfig.NodeReadinessLabel = map[string]string{readyLabel: readyValue} | ||||
| 	return c | ||||
| } | ||||
| 
 | ||||
| func makeNode(labels map[string]string, isSchedulable bool) *v1.Node { | ||||
| 	return &v1.Node{ | ||||
| 		ObjectMeta: metav1.ObjectMeta{ | ||||
| 			Namespace: v1.NamespaceDefault, | ||||
| 			Labels:    labels, | ||||
| 		}, | ||||
| 		Spec: v1.NodeSpec{ | ||||
| 			Unschedulable: !isSchedulable, | ||||
| 		}, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| var c = initializeController() | ||||
| 
 | ||||
| func TestNodeIsReady(t *testing.T) { | ||||
| 	testName := "TestNodeIsReady" | ||||
| 	var testTable = []struct { | ||||
| 		in  *v1.Node | ||||
| 		out bool | ||||
| 	}{ | ||||
| 		{ | ||||
| 			in:  makeNode(map[string]string{"foo": "bar"}, true), | ||||
| 			out: true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			in:  makeNode(map[string]string{"foo": "bar"}, false), | ||||
| 			out: false, | ||||
| 		}, | ||||
| 		{ | ||||
| 			in:  makeNode(map[string]string{readyLabel: readyValue}, false), | ||||
| 			out: true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			in:  makeNode(map[string]string{"foo": "bar", "master": "true"}, false), | ||||
| 			out: true, | ||||
| 		}, | ||||
| 	} | ||||
| 	for _, tt := range testTable { | ||||
| 		if isReady := c.nodeIsReady(tt.in); isReady != tt.out { | ||||
| 			t.Errorf("%s: expected response %t doesn't match the actual %t for the node %#v", | ||||
| 				testName, tt.out, isReady, tt.in) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -32,7 +32,6 @@ type Resources struct { | |||
| 	DefaultCPULimit         string            `name:"default_cpu_limit" default:"3"` | ||||
| 	DefaultMemoryLimit      string            `name:"default_memory_limit" default:"1Gi"` | ||||
| 	PodEnvironmentConfigMap string            `name:"pod_environment_configmap" default:""` | ||||
| 	NodeEOLLabel            map[string]string `name:"node_eol_label" default:"lifecycle-status:pending-decommission"` | ||||
| 	NodeReadinessLabel      map[string]string `name:"node_readiness_label" default:"lifecycle-status:ready"` | ||||
| 	MaxInstances            int32             `name:"max_instances" default:"-1"` | ||||
| 	MinInstances            int32             `name:"min_instances" default:"-1"` | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue