Improve behavior on node decomissionining (#184)

* Trigger the node migration on the lack of  the readiness label.

* Examine the node's readiness status on node add.

Make sure we don't miss the not ready node, especially when the
operator is killed during the migration.
This commit is contained in:
Oleksii Kliukin 2018-01-04 11:53:15 +01:00 committed by GitHub
parent 1109cfa7a1
commit 8e99518eeb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 88 additions and 10 deletions

View File

@ -319,6 +319,6 @@ func (c *Cluster) podIsEndOfLife(pod *v1.Pod) (bool, error) {
if err != nil { if err != nil {
return false, err return false, err
} }
return node.Spec.Unschedulable || util.MapContains(node.Labels, c.OpConfig.NodeEOLLabel), nil return node.Spec.Unschedulable || !util.MapContains(node.Labels, c.OpConfig.NodeReadinessLabel), nil
} }

View File

@ -38,6 +38,10 @@ func (c *Controller) nodeAdd(obj interface{}) {
} }
c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID)
// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
if !c.nodeIsReady(node) {
c.movePodsOffNode(node)
}
} }
func (c *Controller) nodeUpdate(prev, cur interface{}) { func (c *Controller) nodeUpdate(prev, cur interface{}) {
@ -55,13 +59,23 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
return return
} }
if nodePrev.Spec.Unschedulable && util.MapContains(nodePrev.Labels, c.opConfig.NodeEOLLabel) || // do nothing if the node should have already triggered an update or
!nodeCur.Spec.Unschedulable || !util.MapContains(nodeCur.Labels, c.opConfig.NodeEOLLabel) { // if only one of the label and the unschedulability criteria are met.
if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) {
return return
} }
c.movePodsOffNode(nodeCur)
}
c.logger.Infof("node %q became unschedulable and has EOL labels: %q", util.NameFromMeta(nodeCur.ObjectMeta), func (c *Controller) nodeIsReady(node *v1.Node) bool {
c.opConfig.NodeEOLLabel) return (!node.Spec.Unschedulable || util.MapContains(node.Labels, c.opConfig.NodeReadinessLabel) ||
util.MapContains(node.Labels, map[string]string{"master": "true"}))
}
func (c *Controller) movePodsOffNode(node *v1.Node) {
nodeName := util.NameFromMeta(node.ObjectMeta)
c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q",
nodeName, c.opConfig.NodeReadinessLabel)
opts := metav1.ListOptions{ opts := metav1.ListOptions{
LabelSelector: labels.Set(c.opConfig.ClusterLabels).String(), LabelSelector: labels.Set(c.opConfig.ClusterLabels).String(),
@ -74,7 +88,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
nodePods := make([]*v1.Pod, 0) nodePods := make([]*v1.Pod, 0)
for i, pod := range podList.Items { for i, pod := range podList.Items {
if pod.Spec.NodeName == nodeCur.Name { if pod.Spec.NodeName == node.Name {
nodePods = append(nodePods, &podList.Items[i]) nodePods = append(nodePods, &podList.Items[i])
} }
} }
@ -131,7 +145,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
for pod, cl := range replicaPods { for pod, cl := range replicaPods {
podName := util.NameFromMeta(pod.ObjectMeta) podName := util.NameFromMeta(pod.ObjectMeta)
if err := cl.MigrateReplicaPod(podName, nodeCur.Name); err != nil { if err := cl.MigrateReplicaPod(podName, node.Name); err != nil {
c.logger.Errorf("could not move replica pod %q: %v", podName, err) c.logger.Errorf("could not move replica pod %q: %v", podName, err)
movedPods-- movedPods--
} }
@ -144,11 +158,11 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
totalPods := len(nodePods) totalPods := len(nodePods)
c.logger.Infof("%d/%d pods have been moved out from the %q node", c.logger.Infof("%d/%d pods have been moved out from the %q node",
movedPods, totalPods, util.NameFromMeta(nodeCur.ObjectMeta)) movedPods, totalPods, nodeName)
if leftPods := totalPods - movedPods; leftPods > 0 { if leftPods := totalPods - movedPods; leftPods > 0 {
c.logger.Warnf("could not move %d/%d pods from the %q node", c.logger.Warnf("could not move %d/%d pods from the %q node",
leftPods, totalPods, util.NameFromMeta(nodeCur.ObjectMeta)) leftPods, totalPods, nodeName)
} }
} }

View File

@ -0,0 +1,65 @@
package controller
import (
"testing"
"github.com/zalando-incubator/postgres-operator/pkg/spec"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/pkg/api/v1"
)
const (
readyLabel = "lifecycle-status"
readyValue = "ready"
)
func initializeController() *Controller {
var c = NewController(&spec.ControllerConfig{})
c.opConfig.NodeReadinessLabel = map[string]string{readyLabel: readyValue}
return c
}
func makeNode(labels map[string]string, isSchedulable bool) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Namespace: v1.NamespaceDefault,
Labels: labels,
},
Spec: v1.NodeSpec{
Unschedulable: !isSchedulable,
},
}
}
var c = initializeController()
func TestNodeIsReady(t *testing.T) {
testName := "TestNodeIsReady"
var testTable = []struct {
in *v1.Node
out bool
}{
{
in: makeNode(map[string]string{"foo": "bar"}, true),
out: true,
},
{
in: makeNode(map[string]string{"foo": "bar"}, false),
out: false,
},
{
in: makeNode(map[string]string{readyLabel: readyValue}, false),
out: true,
},
{
in: makeNode(map[string]string{"foo": "bar", "master": "true"}, false),
out: true,
},
}
for _, tt := range testTable {
if isReady := c.nodeIsReady(tt.in); isReady != tt.out {
t.Errorf("%s: expected response %t doesn't match the actual %t for the node %#v",
testName, tt.out, isReady, tt.in)
}
}
}

View File

@ -32,7 +32,6 @@ type Resources struct {
DefaultCPULimit string `name:"default_cpu_limit" default:"3"` DefaultCPULimit string `name:"default_cpu_limit" default:"3"`
DefaultMemoryLimit string `name:"default_memory_limit" default:"1Gi"` DefaultMemoryLimit string `name:"default_memory_limit" default:"1Gi"`
PodEnvironmentConfigMap string `name:"pod_environment_configmap" default:""` PodEnvironmentConfigMap string `name:"pod_environment_configmap" default:""`
NodeEOLLabel map[string]string `name:"node_eol_label" default:"lifecycle-status:pending-decommission"`
NodeReadinessLabel map[string]string `name:"node_readiness_label" default:"lifecycle-status:ready"` NodeReadinessLabel map[string]string `name:"node_readiness_label" default:"lifecycle-status:ready"`
MaxInstances int32 `name:"max_instances" default:"-1"` MaxInstances int32 `name:"max_instances" default:"-1"`
MinInstances int32 `name:"min_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"`