From b90a36c9090121ca28f8354d81516f67c4c70354 Mon Sep 17 00:00:00 2001 From: Oleksii Kliukin Date: Tue, 16 Jan 2018 15:43:03 +0100 Subject: [PATCH] Set node_readiness_label default to an empty value. (#204) Previously, it was set to the lifecycle-status:ready, breaking a lot of minikube deployments. Also it was not possible befor to run with this label set to an empty value. Document the effect of the label in the new section of the documentation. --- README.md | 21 +++++++++++++++++++++ manifests/configmap.yaml | 2 +- pkg/cluster/k8sres.go | 8 +++++++- pkg/util/config/config.go | 2 +- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 17ca5f8cc..dc7b60db5 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,27 @@ spec: Please be aware that the taint and toleration only ensures that no other pod gets scheduled to a PostgreSQL node but not that PostgreSQL pods are placed on such a node. This can be achieved by setting a node affinity rule in the ConfigMap. +### Using the operator to minimize the amount of failovers during the cluster upgrade + +Postgres operator moves master pods out of to be decommissioned Kubernetes nodes. The decommission status of the node is derived +from the presence of the set of labels defined by the `node_readiness_label` parameter. The operator makes sure that the Postgres +master pods are moved elsewhere from the node that is pending to be decommissioned , but not on another node that is also +about to be shut down. It achieves that via a combination of several properties set on the postgres pods: + +* [nodeAffinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#node-affinity-beta-feature) is configured to avoid scheduling the pod on nodes without all labels from the `node_readiness_label` set. +* [PodDisruptionBudget](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#how-disruption-budgets-work) is defined to keep the master pods running until they are moved out by the operator. + +The operator starts moving master pods when the node is drained and doesn't have all labels from the `node_readiness_label` set. +By default this parameter is set to an empty string, disabling this feature altogether. It can be set to a string containing one +or more key:value parameters, i.e: +``` +node_readiness_label: "lifecycle-status:ready,disagnostic-checks:ok" + +``` + +when multiple labels are set the operator will require all of them to be present on a node (and set to the specified value) in order to consider +it ready. + #### Custom Pod Environment Variables It is possible to configure a config map which is used by the Postgres pods as an additional provider for environment variables. diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index d996a473c..a5f9c33ff 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -39,5 +39,5 @@ data: pod_terminate_grace_period: 5m pdb_name_format: "postgres-{cluster}-pdb" node_eol_label: "lifecycle-status:pending-decommission" - node_readiness_label: "lifecycle-status:ready" + node_readiness_label: "" team_api_role_configuration: "log_statement:all" diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 203786875..1bc18422e 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -238,6 +238,9 @@ PatroniInitDBParams: func (c *Cluster) nodeAffinity() *v1.Affinity { matchExpressions := make([]v1.NodeSelectorRequirement, 0) + if len(c.OpConfig.NodeReadinessLabel) == 0 { + return nil + } for k, v := range c.OpConfig.NodeReadinessLabel { matchExpressions = append(matchExpressions, v1.NodeSelectorRequirement{ Key: k, @@ -431,10 +434,13 @@ func (c *Cluster) generatePodTemplate( ServiceAccountName: c.OpConfig.ServiceAccountName, TerminationGracePeriodSeconds: &terminateGracePeriodSeconds, Containers: []v1.Container{container}, - Affinity: c.nodeAffinity(), Tolerations: c.tolerations(tolerationsSpec), } + if affinity := c.nodeAffinity(); affinity != nil { + podSpec.Affinity = affinity + } + if c.OpConfig.ScalyrAPIKey != "" && c.OpConfig.ScalyrImage != "" { podSpec.Containers = append( podSpec.Containers, diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 8cec41417..e0a520b08 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -32,7 +32,7 @@ type Resources struct { DefaultCPULimit string `name:"default_cpu_limit" default:"3"` DefaultMemoryLimit string `name:"default_memory_limit" default:"1Gi"` PodEnvironmentConfigMap string `name:"pod_environment_configmap" default:""` - NodeReadinessLabel map[string]string `name:"node_readiness_label" default:"lifecycle-status:ready"` + NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` }