From ea531ff1405d52109c97c1e41c06c305c0b8f54a Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 6 Jan 2022 12:12:58 +0100 Subject: [PATCH] extend docs and reflect review feedback --- docs/administrator.md | 72 +++++++++++++++++++++++++++ docs/reference/operator_parameters.md | 15 ++---- docs/user.md | 7 +-- pkg/cluster/cluster.go | 1 - pkg/cluster/k8sres.go | 4 +- 5 files changed, 83 insertions(+), 16 deletions(-) diff --git a/docs/administrator.md b/docs/administrator.md index 551ee5523..c15b324c7 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -339,6 +339,78 @@ master pods from being evicted by the K8s runtime. To prevent eviction completely, specify the toleration by leaving out the `tolerationSeconds` value (similar to how Kubernetes' own DaemonSets are configured) +## Node readiness labels + +The operator can watch on certain node labels to detect e.g. the start of a +Kubernetes cluster upgrade procedure and move master pods off the nodes to be +decommissioned. Key-value pairs for these node readiness labels can be +specified in the configuration (option name is in singular form): + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-operator +data: + node_readiness_label: "status1:ready,status2:ready" +``` + +```yaml +apiVersion: "acid.zalan.do/v1" +kind: OperatorConfiguration +metadata: + name: postgresql-configuration +configuration: + kubernetes: + node_readiness_label: + status1: ready + status2: ready +``` + +The operator will create a `nodeAffinity` on the pods. This makes the +`node_readiness_label` option the global configuration for defining node +affinities for all Postgres clusters. You can have both, cluster-specific and +global affinity, defined and they will get merged on the pods (AND condition). + +```yaml + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: environment + operator: In + values: + - pci + - key: status1 + operator: In + values: + - ready + - key: status2 + ... +``` + +If multiple `matchExpressions` are defined in the manifest (OR condition) the +readiness label configuration will be appended with its own expressions block: + +```yaml + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: environment + ... + - matchExpressions: + - key: storage + ... + - matchExpressions: + - key: status1 + ... + - key: status2 + ... +``` + ## Enable pod anti affinity To ensure Postgres pods are running on different topologies, you can use diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index de3b370f3..5bcbdef97 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -340,16 +340,11 @@ configuration they are grouped under the `kubernetes` key. * **node_readiness_label** a set of labels that a running and active node should possess to be - considered `ready`. The operator uses values of those labels to detect the - start of the Kubernetes cluster upgrade procedure and move master pods off - the nodes to be decommissioned. When the set is not empty, the operator also - assigns the `nodeAffinity` clause to the Postgres pods to be scheduled only - on `ready` nodes. If a `nodeAffinity` is specified in the postgres cluster - manifest as well the `nodeSelectorTerms` will get merged. If the - `nodeAffinity` of the manifest contains only one `matchExpressions` slice - the node readiniess label expressions will be moved there (AND condition). - When multiple selector expressions are defined in the manifest an extra - `matchExpressions` section is appended (OR condition). The default is empty. + considered `ready`. When the set is not empty, the operator assigns the + `nodeAffinity` clause to the Postgres pods to be scheduled only on `ready` + nodes. If a `nodeAffinity` is also specified in the postgres cluster + manifest both affinities will get merged on the pods. See [user docs](../user.md#use-taints-tolerations-and-node-affinity-for-dedicated-postgresql-nodes) + for more details. The default is empty. * **toleration** a dictionary that should contain `key`, `operator`, `value` and diff --git a/docs/user.md b/docs/user.md index 052c4c618..20db45979 100644 --- a/docs/user.md +++ b/docs/user.md @@ -671,7 +671,9 @@ configured [default requests](reference/operator_parameters.md#kubernetes-resour To ensure Postgres pods are running on nodes without any other application pods, you can use [taints and tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) -and configure the required toleration in the manifest. +and configure the required toleration in the manifest. Tolerations can also be +defined in the [operator config](administrator.md#use-taints-and-tolerations-for-dedicated-postgresql-nodes) +to apply for all Postgres clusters. ```yaml spec: @@ -704,8 +706,7 @@ spec: ``` If you need to define a `nodeAffinity` for all your Postgres clusters use the -`node_readiness_label` configuration option, which allows you to define a list -of key-value pairs. +`node_readiness_label` [configuration](administrator.md#node-readiness-labels). ## In-place major version upgrade diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 9895d6ba4..ca58c10a0 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -375,7 +375,6 @@ func (c *Cluster) compareStatefulSetWith(statefulSet *appsv1.StatefulSet) *compa reasons = append(reasons, "new statefulset's number of replicas does not match the current one") } if !reflect.DeepEqual(c.Statefulset.Annotations, statefulSet.Annotations) { - match = false needsReplace = true reasons = append(reasons, "new statefulset's annotations do not match the current one") } diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index ed6af9229..f67a89a71 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -352,15 +352,15 @@ func nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinity *v1.NodeAff }, } } else { - // if there are multiple node selector terms specified, append the node readiness label expressions (OR condition) if len(nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) > 1 { + // if there are multiple node selector terms specified, append the node readiness label expressions (OR condition) manifestTerms := nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms manifestTerms = append(manifestTerms, nodeReadinessSelectorTerm) nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{ NodeSelectorTerms: manifestTerms, } - // if there's just one term defined merge it with the readiness label term (AND condition) } else { + // if there is just one term defined merge it with the readiness label term (AND condition) manifestExpressions := nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions manifestExpressions = append(manifestExpressions, matchExpressions...) nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{