From 596ad5375d6a7deeac573f39cbf99e1141c86182 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 12 Jan 2022 13:21:19 +0100 Subject: [PATCH] add config option to change affinity merge behavior --- .../crds/operatorconfigurations.yaml | 5 +++++ charts/postgres-operator/values.yaml | 3 +++ docs/administrator.md | 9 ++++++--- docs/reference/operator_parameters.md | 11 ++++++++--- e2e/tests/test_e2e.py | 1 + manifests/configmap.yaml | 3 ++- manifests/operatorconfiguration.crd.yaml | 5 +++++ ...tgresql-operator-default-configuration.yaml | 1 + pkg/apis/acid.zalan.do/v1/crds.go | 11 +++++++++++ .../v1/operator_configuration_type.go | 1 + pkg/cluster/k8sres.go | 18 +++++++----------- pkg/controller/operator_config.go | 1 + pkg/util/config/config.go | 1 + 13 files changed, 52 insertions(+), 18 deletions(-) diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 043129516..e99dce3b9 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -230,6 +230,11 @@ spec: type: object additionalProperties: type: string + node_readiness_label_merge: + type: string + enum: + - "AND" + - "OR" oauth_token_secret_name: type: string default: "postgresql-operator" diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 65619845a..595009bac 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -130,6 +130,9 @@ configKubernetes: # node_readiness_label: # status: ready + # defines how nodeAffinity from manifest should be merged with node_readiness_label + # node_readiness_label_merge: "OR" + # namespaced name of the secret containing the OAuth2 token to pass to the teams API # oauth_token_secret_name: postgresql-operator diff --git a/docs/administrator.md b/docs/administrator.md index c15b324c7..879b677b9 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -370,7 +370,10 @@ configuration: The operator will create a `nodeAffinity` on the pods. This makes the `node_readiness_label` option the global configuration for defining node affinities for all Postgres clusters. You can have both, cluster-specific and -global affinity, defined and they will get merged on the pods (AND condition). +global affinity, defined and they will get merged on the pods. If +`node_readiness_label_merge` is configured to `"AND"` the node readiness +affinity will end up under the same `matchExpressions` section(s) from the +manifest affinity. ```yaml affinity: @@ -390,8 +393,8 @@ global affinity, defined and they will get merged on the pods (AND condition). ... ``` -If multiple `matchExpressions` are defined in the manifest (OR condition) the -readiness label configuration will be appended with its own expressions block: +If `node_readiness_label_merge` is set to `"OR"` (default) the readiness label +affinty will be appended with its own expressions block: ```yaml affinity: diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 5bcbdef97..e6a0f19b8 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -342,9 +342,14 @@ configuration they are grouped under the `kubernetes` key. a set of labels that a running and active node should possess to be considered `ready`. When the set is not empty, the operator assigns the `nodeAffinity` clause to the Postgres pods to be scheduled only on `ready` - nodes. If a `nodeAffinity` is also specified in the postgres cluster - manifest both affinities will get merged on the pods. See [user docs](../user.md#use-taints-tolerations-and-node-affinity-for-dedicated-postgresql-nodes) - for more details. The default is empty. + nodes. The default is empty. + +* **node_readiness_label_merge** + If a `nodeAffinity` is also specified in the postgres cluster manifest + it will get merged with the `node_readiness_label` affinity on the pods. + The merge strategy can be configured - it can either be "AND" or "OR". + See [user docs](../user.md#use-taints-tolerations-and-node-affinity-for-dedicated-postgresql-nodes) + for more details. Default is "OR". * **toleration** a dictionary that should contain `key`, `operator`, `value` and diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 88cad63a0..3047de259 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -1010,6 +1010,7 @@ class EndToEndTestCase(unittest.TestCase): patch_readiness_label_config = { "data": { "node_readiness_label": readiness_label + ':' + readiness_value, + "node_readiness_label_merge": "AND", } } k8s.update_config(patch_readiness_label_config, "setting readiness label") diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 7d3e14ce3..332c184ed 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -85,7 +85,8 @@ data: # min_cpu_limit: 250m # min_memory_limit: 250Mi # minimal_major_version: "9.6" - # node_readiness_label: "" + # node_readiness_label: "status:ready" + # node_readiness_label_merge: "OR" # oauth_token_secret_name: postgresql-operator # pam_configuration: | # https://info.example.com/oauth2/tokeninfo?access_token= uid realm=/employees diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index bb64995ab..84a8024e9 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -225,6 +225,11 @@ spec: type: object additionalProperties: type: string + node_readiness_label_merge: + type: string + enum: + - "AND" + - "OR" oauth_token_secret_name: type: string default: "postgresql-operator" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 02d558543..a82226394 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -69,6 +69,7 @@ configuration: master_pod_move_timeout: 20m # node_readiness_label: # status: ready + # node_readiness_label_merge: "OR" oauth_token_secret_name: postgresql-operator pdb_name_format: "postgres-{cluster}-pdb" pod_antiaffinity_topology_key: "kubernetes.io/hostname" diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index fae5a09f2..61efdcd0f 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -1164,6 +1164,17 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "node_readiness_label_merge": { + Type: "string", + Enum: []apiextv1.JSON{ + { + Raw: []byte(`"AND"`), + }, + { + Raw: []byte(`"OR"`), + }, + }, + }, "oauth_token_secret_name": { Type: "string", }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index f8eb5b5d1..a11cba0a5 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -82,6 +82,7 @@ type KubernetesMetaConfiguration struct { DeleteAnnotationDateKey string `json:"delete_annotation_date_key,omitempty"` DeleteAnnotationNameKey string `json:"delete_annotation_name_key,omitempty"` NodeReadinessLabel map[string]string `json:"node_readiness_label,omitempty"` + NodeReadinessLabelMerge string `json:"node_readiness_label_merge,omitempty"` CustomPodAnnotations map[string]string `json:"custom_pod_annotations,omitempty"` // TODO: use a proper toleration structure? PodToleration map[string]string `json:"toleration,omitempty"` diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index f67a89a71..0749dc691 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -327,7 +327,7 @@ func generateCapabilities(capabilities []string) *v1.Capabilities { return nil } -func nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinity *v1.NodeAffinity) *v1.Affinity { +func (c *Cluster) nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinity *v1.NodeAffinity) *v1.Affinity { if len(nodeReadinessLabel) == 0 && nodeAffinity == nil { return nil } @@ -352,21 +352,17 @@ func nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinity *v1.NodeAff }, } } else { - if len(nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) > 1 { - // if there are multiple node selector terms specified, append the node readiness label expressions (OR condition) + if c.OpConfig.NodeReadinessLabelMerge == "OR" { manifestTerms := nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms manifestTerms = append(manifestTerms, nodeReadinessSelectorTerm) nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{ NodeSelectorTerms: manifestTerms, } - } else { - // if there is just one term defined merge it with the readiness label term (AND condition) - manifestExpressions := nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions - manifestExpressions = append(manifestExpressions, matchExpressions...) - nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{ - NodeSelectorTerms: []v1.NodeSelectorTerm{ - v1.NodeSelectorTerm{MatchExpressions: manifestExpressions}, - }, + } else if c.OpConfig.NodeReadinessLabelMerge == "AND" { + for i, nodeSelectorTerm := range nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms { + manifestExpressions := nodeSelectorTerm.MatchExpressions + manifestExpressions = append(manifestExpressions, matchExpressions...) + nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[i] = v1.NodeSelectorTerm{MatchExpressions: manifestExpressions} } } } diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 275898d8e..f94f3b96c 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -109,6 +109,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.DeleteAnnotationDateKey = fromCRD.Kubernetes.DeleteAnnotationDateKey result.DeleteAnnotationNameKey = fromCRD.Kubernetes.DeleteAnnotationNameKey result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel + result.NodeReadinessLabelMerge = fromCRD.Kubernetes.NodeReadinessLabelMerge result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName result.PodManagementPolicy = util.Coalesce(fromCRD.Kubernetes.PodManagementPolicy, "ordered_ready") result.MasterPodMoveTimeout = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.MasterPodMoveTimeout), "10m") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 71bf406e4..e676a3c1e 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -54,6 +54,7 @@ type Resources struct { PodEnvironmentConfigMap spec.NamespacedName `name:"pod_environment_configmap"` PodEnvironmentSecret string `name:"pod_environment_secret"` NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` + NodeReadinessLabelMerge string `name:"node_readiness_label_merge" default:"OR"` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` ShmVolume *bool `name:"enable_shm_volume" default:"true"`