diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 44af662cb..e64421a31 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -319,6 +319,6 @@ func (c *Cluster) podIsEndOfLife(pod *v1.Pod) (bool, error) { if err != nil { return false, err } - return node.Spec.Unschedulable || util.MapContains(node.Labels, c.OpConfig.NodeEOLLabel), nil + return node.Spec.Unschedulable || !util.MapContains(node.Labels, c.OpConfig.NodeReadinessLabel), nil } diff --git a/pkg/controller/node.go b/pkg/controller/node.go index 524b3ce30..f745333aa 100644 --- a/pkg/controller/node.go +++ b/pkg/controller/node.go @@ -38,6 +38,10 @@ func (c *Controller) nodeAdd(obj interface{}) { } c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) + // check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate) + if !c.nodeIsReady(node) { + c.movePodsOffNode(node) + } } func (c *Controller) nodeUpdate(prev, cur interface{}) { @@ -55,13 +59,23 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { return } - if nodePrev.Spec.Unschedulable && util.MapContains(nodePrev.Labels, c.opConfig.NodeEOLLabel) || - !nodeCur.Spec.Unschedulable || !util.MapContains(nodeCur.Labels, c.opConfig.NodeEOLLabel) { + // do nothing if the node should have already triggered an update or + // if only one of the label and the unschedulability criteria are met. + if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) { return } + c.movePodsOffNode(nodeCur) +} - c.logger.Infof("node %q became unschedulable and has EOL labels: %q", util.NameFromMeta(nodeCur.ObjectMeta), - c.opConfig.NodeEOLLabel) +func (c *Controller) nodeIsReady(node *v1.Node) bool { + return (!node.Spec.Unschedulable || util.MapContains(node.Labels, c.opConfig.NodeReadinessLabel) || + util.MapContains(node.Labels, map[string]string{"master": "true"})) +} + +func (c *Controller) movePodsOffNode(node *v1.Node) { + nodeName := util.NameFromMeta(node.ObjectMeta) + c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q", + nodeName, c.opConfig.NodeReadinessLabel) opts := metav1.ListOptions{ LabelSelector: labels.Set(c.opConfig.ClusterLabels).String(), @@ -74,7 +88,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { nodePods := make([]*v1.Pod, 0) for i, pod := range podList.Items { - if pod.Spec.NodeName == nodeCur.Name { + if pod.Spec.NodeName == node.Name { nodePods = append(nodePods, &podList.Items[i]) } } @@ -131,7 +145,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { for pod, cl := range replicaPods { podName := util.NameFromMeta(pod.ObjectMeta) - if err := cl.MigrateReplicaPod(podName, nodeCur.Name); err != nil { + if err := cl.MigrateReplicaPod(podName, node.Name); err != nil { c.logger.Errorf("could not move replica pod %q: %v", podName, err) movedPods-- } @@ -144,11 +158,11 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { totalPods := len(nodePods) c.logger.Infof("%d/%d pods have been moved out from the %q node", - movedPods, totalPods, util.NameFromMeta(nodeCur.ObjectMeta)) + movedPods, totalPods, nodeName) if leftPods := totalPods - movedPods; leftPods > 0 { c.logger.Warnf("could not move %d/%d pods from the %q node", - leftPods, totalPods, util.NameFromMeta(nodeCur.ObjectMeta)) + leftPods, totalPods, nodeName) } } diff --git a/pkg/controller/node_test.go b/pkg/controller/node_test.go new file mode 100644 index 000000000..0f86628bd --- /dev/null +++ b/pkg/controller/node_test.go @@ -0,0 +1,65 @@ +package controller + +import ( + "testing" + + "github.com/zalando-incubator/postgres-operator/pkg/spec" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/pkg/api/v1" +) + +const ( + readyLabel = "lifecycle-status" + readyValue = "ready" +) + +func initializeController() *Controller { + var c = NewController(&spec.ControllerConfig{}) + c.opConfig.NodeReadinessLabel = map[string]string{readyLabel: readyValue} + return c +} + +func makeNode(labels map[string]string, isSchedulable bool) *v1.Node { + return &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: v1.NamespaceDefault, + Labels: labels, + }, + Spec: v1.NodeSpec{ + Unschedulable: !isSchedulable, + }, + } +} + +var c = initializeController() + +func TestNodeIsReady(t *testing.T) { + testName := "TestNodeIsReady" + var testTable = []struct { + in *v1.Node + out bool + }{ + { + in: makeNode(map[string]string{"foo": "bar"}, true), + out: true, + }, + { + in: makeNode(map[string]string{"foo": "bar"}, false), + out: false, + }, + { + in: makeNode(map[string]string{readyLabel: readyValue}, false), + out: true, + }, + { + in: makeNode(map[string]string{"foo": "bar", "master": "true"}, false), + out: true, + }, + } + for _, tt := range testTable { + if isReady := c.nodeIsReady(tt.in); isReady != tt.out { + t.Errorf("%s: expected response %t doesn't match the actual %t for the node %#v", + testName, tt.out, isReady, tt.in) + } + } +} diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 90590a84b..8cec41417 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -32,7 +32,6 @@ type Resources struct { DefaultCPULimit string `name:"default_cpu_limit" default:"3"` DefaultMemoryLimit string `name:"default_memory_limit" default:"1Gi"` PodEnvironmentConfigMap string `name:"pod_environment_configmap" default:""` - NodeEOLLabel map[string]string `name:"node_eol_label" default:"lifecycle-status:pending-decommission"` NodeReadinessLabel map[string]string `name:"node_readiness_label" default:"lifecycle-status:ready"` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"`