add config option to change affinity merge behavior

This commit is contained in:
Felix Kunde 2022-01-12 13:21:19 +01:00
parent 92db09182e
commit 596ad5375d
13 changed files with 52 additions and 18 deletions

View File

@ -230,6 +230,11 @@ spec:
type: object
additionalProperties:
type: string
node_readiness_label_merge:
type: string
enum:
- "AND"
- "OR"
oauth_token_secret_name:
type: string
default: "postgresql-operator"

View File

@ -130,6 +130,9 @@ configKubernetes:
# node_readiness_label:
# status: ready
# defines how nodeAffinity from manifest should be merged with node_readiness_label
# node_readiness_label_merge: "OR"
# namespaced name of the secret containing the OAuth2 token to pass to the teams API
# oauth_token_secret_name: postgresql-operator

View File

@ -370,7 +370,10 @@ configuration:
The operator will create a `nodeAffinity` on the pods. This makes the
`node_readiness_label` option the global configuration for defining node
affinities for all Postgres clusters. You can have both, cluster-specific and
global affinity, defined and they will get merged on the pods (AND condition).
global affinity, defined and they will get merged on the pods. If
`node_readiness_label_merge` is configured to `"AND"` the node readiness
affinity will end up under the same `matchExpressions` section(s) from the
manifest affinity.
```yaml
affinity:
@ -390,8 +393,8 @@ global affinity, defined and they will get merged on the pods (AND condition).
...
```
If multiple `matchExpressions` are defined in the manifest (OR condition) the
readiness label configuration will be appended with its own expressions block:
If `node_readiness_label_merge` is set to `"OR"` (default) the readiness label
affinty will be appended with its own expressions block:
```yaml
affinity:

View File

@ -342,9 +342,14 @@ configuration they are grouped under the `kubernetes` key.
a set of labels that a running and active node should possess to be
considered `ready`. When the set is not empty, the operator assigns the
`nodeAffinity` clause to the Postgres pods to be scheduled only on `ready`
nodes. If a `nodeAffinity` is also specified in the postgres cluster
manifest both affinities will get merged on the pods. See [user docs](../user.md#use-taints-tolerations-and-node-affinity-for-dedicated-postgresql-nodes)
for more details. The default is empty.
nodes. The default is empty.
* **node_readiness_label_merge**
If a `nodeAffinity` is also specified in the postgres cluster manifest
it will get merged with the `node_readiness_label` affinity on the pods.
The merge strategy can be configured - it can either be "AND" or "OR".
See [user docs](../user.md#use-taints-tolerations-and-node-affinity-for-dedicated-postgresql-nodes)
for more details. Default is "OR".
* **toleration**
a dictionary that should contain `key`, `operator`, `value` and

View File

@ -1010,6 +1010,7 @@ class EndToEndTestCase(unittest.TestCase):
patch_readiness_label_config = {
"data": {
"node_readiness_label": readiness_label + ':' + readiness_value,
"node_readiness_label_merge": "AND",
}
}
k8s.update_config(patch_readiness_label_config, "setting readiness label")

View File

@ -85,7 +85,8 @@ data:
# min_cpu_limit: 250m
# min_memory_limit: 250Mi
# minimal_major_version: "9.6"
# node_readiness_label: ""
# node_readiness_label: "status:ready"
# node_readiness_label_merge: "OR"
# oauth_token_secret_name: postgresql-operator
# pam_configuration: |
# https://info.example.com/oauth2/tokeninfo?access_token= uid realm=/employees

View File

@ -225,6 +225,11 @@ spec:
type: object
additionalProperties:
type: string
node_readiness_label_merge:
type: string
enum:
- "AND"
- "OR"
oauth_token_secret_name:
type: string
default: "postgresql-operator"

View File

@ -69,6 +69,7 @@ configuration:
master_pod_move_timeout: 20m
# node_readiness_label:
# status: ready
# node_readiness_label_merge: "OR"
oauth_token_secret_name: postgresql-operator
pdb_name_format: "postgres-{cluster}-pdb"
pod_antiaffinity_topology_key: "kubernetes.io/hostname"

View File

@ -1164,6 +1164,17 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
},
},
},
"node_readiness_label_merge": {
Type: "string",
Enum: []apiextv1.JSON{
{
Raw: []byte(`"AND"`),
},
{
Raw: []byte(`"OR"`),
},
},
},
"oauth_token_secret_name": {
Type: "string",
},

View File

@ -82,6 +82,7 @@ type KubernetesMetaConfiguration struct {
DeleteAnnotationDateKey string `json:"delete_annotation_date_key,omitempty"`
DeleteAnnotationNameKey string `json:"delete_annotation_name_key,omitempty"`
NodeReadinessLabel map[string]string `json:"node_readiness_label,omitempty"`
NodeReadinessLabelMerge string `json:"node_readiness_label_merge,omitempty"`
CustomPodAnnotations map[string]string `json:"custom_pod_annotations,omitempty"`
// TODO: use a proper toleration structure?
PodToleration map[string]string `json:"toleration,omitempty"`

View File

@ -327,7 +327,7 @@ func generateCapabilities(capabilities []string) *v1.Capabilities {
return nil
}
func nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinity *v1.NodeAffinity) *v1.Affinity {
func (c *Cluster) nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinity *v1.NodeAffinity) *v1.Affinity {
if len(nodeReadinessLabel) == 0 && nodeAffinity == nil {
return nil
}
@ -352,21 +352,17 @@ func nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinity *v1.NodeAff
},
}
} else {
if len(nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) > 1 {
// if there are multiple node selector terms specified, append the node readiness label expressions (OR condition)
if c.OpConfig.NodeReadinessLabelMerge == "OR" {
manifestTerms := nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
manifestTerms = append(manifestTerms, nodeReadinessSelectorTerm)
nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{
NodeSelectorTerms: manifestTerms,
}
} else {
// if there is just one term defined merge it with the readiness label term (AND condition)
manifestExpressions := nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions
} else if c.OpConfig.NodeReadinessLabelMerge == "AND" {
for i, nodeSelectorTerm := range nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms {
manifestExpressions := nodeSelectorTerm.MatchExpressions
manifestExpressions = append(manifestExpressions, matchExpressions...)
nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
v1.NodeSelectorTerm{MatchExpressions: manifestExpressions},
},
nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[i] = v1.NodeSelectorTerm{MatchExpressions: manifestExpressions}
}
}
}

View File

@ -109,6 +109,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.DeleteAnnotationDateKey = fromCRD.Kubernetes.DeleteAnnotationDateKey
result.DeleteAnnotationNameKey = fromCRD.Kubernetes.DeleteAnnotationNameKey
result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel
result.NodeReadinessLabelMerge = fromCRD.Kubernetes.NodeReadinessLabelMerge
result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName
result.PodManagementPolicy = util.Coalesce(fromCRD.Kubernetes.PodManagementPolicy, "ordered_ready")
result.MasterPodMoveTimeout = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.MasterPodMoveTimeout), "10m")

View File

@ -54,6 +54,7 @@ type Resources struct {
PodEnvironmentConfigMap spec.NamespacedName `name:"pod_environment_configmap"`
PodEnvironmentSecret string `name:"pod_environment_secret"`
NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""`
NodeReadinessLabelMerge string `name:"node_readiness_label_merge" default:"OR"`
MaxInstances int32 `name:"max_instances" default:"-1"`
MinInstances int32 `name:"min_instances" default:"-1"`
ShmVolume *bool `name:"enable_shm_volume" default:"true"`