Improve behavior on node decomissionining (#184)
* Trigger the node migration on the lack of the readiness label. * Examine the node's readiness status on node add. Make sure we don't miss the not ready node, especially when the operator is killed during the migration.
This commit is contained in:
parent
1109cfa7a1
commit
8e99518eeb
|
|
@ -319,6 +319,6 @@ func (c *Cluster) podIsEndOfLife(pod *v1.Pod) (bool, error) {
|
|||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return node.Spec.Unschedulable || util.MapContains(node.Labels, c.OpConfig.NodeEOLLabel), nil
|
||||
return node.Spec.Unschedulable || !util.MapContains(node.Labels, c.OpConfig.NodeReadinessLabel), nil
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,10 @@ func (c *Controller) nodeAdd(obj interface{}) {
|
|||
}
|
||||
|
||||
c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID)
|
||||
// check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate)
|
||||
if !c.nodeIsReady(node) {
|
||||
c.movePodsOffNode(node)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Controller) nodeUpdate(prev, cur interface{}) {
|
||||
|
|
@ -55,13 +59,23 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
|
|||
return
|
||||
}
|
||||
|
||||
if nodePrev.Spec.Unschedulable && util.MapContains(nodePrev.Labels, c.opConfig.NodeEOLLabel) ||
|
||||
!nodeCur.Spec.Unschedulable || !util.MapContains(nodeCur.Labels, c.opConfig.NodeEOLLabel) {
|
||||
// do nothing if the node should have already triggered an update or
|
||||
// if only one of the label and the unschedulability criteria are met.
|
||||
if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) {
|
||||
return
|
||||
}
|
||||
c.movePodsOffNode(nodeCur)
|
||||
}
|
||||
|
||||
c.logger.Infof("node %q became unschedulable and has EOL labels: %q", util.NameFromMeta(nodeCur.ObjectMeta),
|
||||
c.opConfig.NodeEOLLabel)
|
||||
func (c *Controller) nodeIsReady(node *v1.Node) bool {
|
||||
return (!node.Spec.Unschedulable || util.MapContains(node.Labels, c.opConfig.NodeReadinessLabel) ||
|
||||
util.MapContains(node.Labels, map[string]string{"master": "true"}))
|
||||
}
|
||||
|
||||
func (c *Controller) movePodsOffNode(node *v1.Node) {
|
||||
nodeName := util.NameFromMeta(node.ObjectMeta)
|
||||
c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q",
|
||||
nodeName, c.opConfig.NodeReadinessLabel)
|
||||
|
||||
opts := metav1.ListOptions{
|
||||
LabelSelector: labels.Set(c.opConfig.ClusterLabels).String(),
|
||||
|
|
@ -74,7 +88,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
|
|||
|
||||
nodePods := make([]*v1.Pod, 0)
|
||||
for i, pod := range podList.Items {
|
||||
if pod.Spec.NodeName == nodeCur.Name {
|
||||
if pod.Spec.NodeName == node.Name {
|
||||
nodePods = append(nodePods, &podList.Items[i])
|
||||
}
|
||||
}
|
||||
|
|
@ -131,7 +145,7 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
|
|||
for pod, cl := range replicaPods {
|
||||
podName := util.NameFromMeta(pod.ObjectMeta)
|
||||
|
||||
if err := cl.MigrateReplicaPod(podName, nodeCur.Name); err != nil {
|
||||
if err := cl.MigrateReplicaPod(podName, node.Name); err != nil {
|
||||
c.logger.Errorf("could not move replica pod %q: %v", podName, err)
|
||||
movedPods--
|
||||
}
|
||||
|
|
@ -144,11 +158,11 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) {
|
|||
totalPods := len(nodePods)
|
||||
|
||||
c.logger.Infof("%d/%d pods have been moved out from the %q node",
|
||||
movedPods, totalPods, util.NameFromMeta(nodeCur.ObjectMeta))
|
||||
movedPods, totalPods, nodeName)
|
||||
|
||||
if leftPods := totalPods - movedPods; leftPods > 0 {
|
||||
c.logger.Warnf("could not move %d/%d pods from the %q node",
|
||||
leftPods, totalPods, util.NameFromMeta(nodeCur.ObjectMeta))
|
||||
leftPods, totalPods, nodeName)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
package controller
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/zalando-incubator/postgres-operator/pkg/spec"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/pkg/api/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
readyLabel = "lifecycle-status"
|
||||
readyValue = "ready"
|
||||
)
|
||||
|
||||
func initializeController() *Controller {
|
||||
var c = NewController(&spec.ControllerConfig{})
|
||||
c.opConfig.NodeReadinessLabel = map[string]string{readyLabel: readyValue}
|
||||
return c
|
||||
}
|
||||
|
||||
func makeNode(labels map[string]string, isSchedulable bool) *v1.Node {
|
||||
return &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Namespace: v1.NamespaceDefault,
|
||||
Labels: labels,
|
||||
},
|
||||
Spec: v1.NodeSpec{
|
||||
Unschedulable: !isSchedulable,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
var c = initializeController()
|
||||
|
||||
func TestNodeIsReady(t *testing.T) {
|
||||
testName := "TestNodeIsReady"
|
||||
var testTable = []struct {
|
||||
in *v1.Node
|
||||
out bool
|
||||
}{
|
||||
{
|
||||
in: makeNode(map[string]string{"foo": "bar"}, true),
|
||||
out: true,
|
||||
},
|
||||
{
|
||||
in: makeNode(map[string]string{"foo": "bar"}, false),
|
||||
out: false,
|
||||
},
|
||||
{
|
||||
in: makeNode(map[string]string{readyLabel: readyValue}, false),
|
||||
out: true,
|
||||
},
|
||||
{
|
||||
in: makeNode(map[string]string{"foo": "bar", "master": "true"}, false),
|
||||
out: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range testTable {
|
||||
if isReady := c.nodeIsReady(tt.in); isReady != tt.out {
|
||||
t.Errorf("%s: expected response %t doesn't match the actual %t for the node %#v",
|
||||
testName, tt.out, isReady, tt.in)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -32,7 +32,6 @@ type Resources struct {
|
|||
DefaultCPULimit string `name:"default_cpu_limit" default:"3"`
|
||||
DefaultMemoryLimit string `name:"default_memory_limit" default:"1Gi"`
|
||||
PodEnvironmentConfigMap string `name:"pod_environment_configmap" default:""`
|
||||
NodeEOLLabel map[string]string `name:"node_eol_label" default:"lifecycle-status:pending-decommission"`
|
||||
NodeReadinessLabel map[string]string `name:"node_readiness_label" default:"lifecycle-status:ready"`
|
||||
MaxInstances int32 `name:"max_instances" default:"-1"`
|
||||
MinInstances int32 `name:"min_instances" default:"-1"`
|
||||
|
|
|
|||
Loading…
Reference in New Issue