From f400539b690bf6eeaea459a7984bcedb8bce53a8 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Thu, 28 Feb 2019 16:19:27 +0100 Subject: [PATCH] Retry moving master pods (#463) * Retry moving master pods * bump up master pod wait timeout --- docs/reference/operator_parameters.md | 4 +++ manifests/configmap.yaml | 1 + .../v1/operator_configuration_type.go | 1 + pkg/controller/node.go | 33 +++++++++++++++++-- pkg/controller/operator_config.go | 1 + pkg/util/config/config.go | 1 + 6 files changed, 38 insertions(+), 3 deletions(-) diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index cff935c29..5b5f33199 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -212,6 +212,10 @@ configuration they are grouped under the `kubernetes` key. class](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass) that should be assigned to the Postgres pods. The priority class itself must be defined in advance. Default is empty (use the default priority class). + + * **master_pod_move_timeout** + The period of time to wait for the success of migration of master pods from an unschedulable node. + The migration includes Patroni switchovers to respective replicas on healthy nodes. The situation where master pods still exist on the old node after this timeout expires has to be fixed manually. The default is 20 minutes. * **enable_pod_antiaffinity** toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) on the Postgres pods, to avoid multiple pods diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 19eb3f5de..97ff9a2b2 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -46,6 +46,7 @@ data: pod_label_wait_timeout: 10m ready_wait_interval: 3s ready_wait_timeout: 30s + # master_pod_move_timeout: 10m replication_username: standby resource_check_interval: 3s resource_check_timeout: 10m diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 3f37a2673..3c406d2e3 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -60,6 +60,7 @@ type KubernetesMetaConfiguration struct { // TODO: use namespacedname PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` + MasterPodMoveTimeout time.Duration `json:"master_pod_move_timeout,omitempty"` EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity" default:"false"` PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"` } diff --git a/pkg/controller/node.go b/pkg/controller/node.go index e443dc704..1849954ef 100644 --- a/pkg/controller/node.go +++ b/pkg/controller/node.go @@ -1,6 +1,10 @@ package controller import ( + "fmt" + "time" + + "github.com/zalando/postgres-operator/pkg/util/retryutil" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -38,6 +42,7 @@ func (c *Controller) nodeAdd(obj interface{}) { } c.logger.Debugf("new node has been added: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) + // check if the node became not ready while the operator was down (otherwise we would have caught it in nodeUpdate) if !c.nodeIsReady(node) { c.moveMasterPodsOffNode(node) @@ -64,7 +69,9 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) { return } + c.moveMasterPodsOffNode(nodeCur) + } func (c *Controller) nodeIsReady(node *v1.Node) bool { @@ -72,7 +79,7 @@ func (c *Controller) nodeIsReady(node *v1.Node) bool { util.MapContains(node.Labels, map[string]string{"master": "true"})) } -func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { +func (c *Controller) attemptToMoveMasterPodsOffNode(node *v1.Node) error { nodeName := util.NameFromMeta(node.ObjectMeta) c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q", nodeName, c.opConfig.NodeReadinessLabel) @@ -83,7 +90,7 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { podList, err := c.KubeClient.Pods(c.opConfig.WatchedNamespace).List(opts) if err != nil { c.logger.Errorf("could not fetch list of the pods: %v", err) - return + return err } nodePods := make([]*v1.Pod, 0) @@ -148,9 +155,11 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { movedPods, totalPods, nodeName) if leftPods := totalPods - movedPods; leftPods > 0 { - c.logger.Warnf("could not move master %d/%d pods from the %q node", + return fmt.Errorf("could not move master %d/%d pods from the %q node", leftPods, totalPods, nodeName) } + + return nil } func (c *Controller) nodeDelete(obj interface{}) { @@ -161,3 +170,21 @@ func (c *Controller) nodeDelete(obj interface{}) { c.logger.Debugf("node has been deleted: %q (%s)", util.NameFromMeta(node.ObjectMeta), node.Spec.ProviderID) } + +func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { + + err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, + func() (bool, error) { + err := c.attemptToMoveMasterPodsOffNode(node) + if err != nil { + return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") + } + return true, nil + }, + ) + + if err != nil { + c.logger.Warning("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) + } + +} diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index f381e8a5b..85535fa32 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -52,6 +52,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.ClusterNameLabel = fromCRD.Kubernetes.ClusterNameLabel result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName + result.MasterPodMoveTimeout = fromCRD.Kubernetes.MasterPodMoveTimeout result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity; result.PodAntiAffinityTopologyKey = fromCRD.Kubernetes.PodAntiAffinityTopologyKey; diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 54b04d3f7..a60c8e673 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -81,6 +81,7 @@ type Config struct { // value of this string must be valid JSON or YAML; see initPodServiceAccount PodServiceAccountDefinition string `name:"pod_service_account_definition" default:""` PodServiceAccountRoleBindingDefinition string `name:"pod_service_account_role_binding_definition" default:""` + MasterPodMoveTimeout time.Duration `name:"master_pod_move_timeout" default:"20m"` DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"` AWSRegion string `name:"aws_region" default:"eu-central-1"` WALES3Bucket string `name:"wal_s3_bucket"`