From 101d42f7d88a442990e5d29bde8755cc4bf1a28d Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sun, 4 Feb 2024 20:32:44 +0700 Subject: [PATCH 01/15] Add topologySpreadConstraints configuration to pod spec. --- e2e/tests/test_e2e.py | 73 ++++++++++++++++---- manifests/postgresql.crd.yaml | 6 ++ pkg/apis/acid.zalan.do/v1/postgresql_type.go | 23 +++--- pkg/cluster/cluster.go | 5 ++ pkg/cluster/k8sres.go | 14 ++++ pkg/cluster/k8sres_test.go | 43 ++++++++++++ 6 files changed, 141 insertions(+), 23 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 70145f3e4..316eaefa3 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -560,7 +560,7 @@ class EndToEndTestCase(unittest.TestCase): pg_patch_config["spec"]["patroni"]["slots"][slot_to_change]["database"] = "bar" del pg_patch_config["spec"]["patroni"]["slots"][slot_to_remove] - + k8s.api.custom_objects_api.patch_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_delete_slot_patch) @@ -577,7 +577,7 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", get_slot_query%("database", slot_to_change))[0], "bar", "The replication slot cannot be updated", 10, 5) - + # make sure slot from Patroni didn't get deleted self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", get_slot_query%("slot_name", patroni_slot))), 1, "The replication slot from Patroni gets deleted", 10, 5) @@ -933,7 +933,7 @@ class EndToEndTestCase(unittest.TestCase): }, } } - + old_sts_creation_timestamp = sts.metadata.creation_timestamp k8s.api.apps_v1.patch_namespaced_stateful_set(sts.metadata.name, sts.metadata.namespace, annotation_patch) old_svc_creation_timestamp = svc.metadata.creation_timestamp @@ -1371,7 +1371,7 @@ class EndToEndTestCase(unittest.TestCase): } k8s.update_config(patch_scaled_policy_retain) self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - + # decrease the number of instances k8s.api.custom_objects_api.patch_namespaced_custom_object( 'acid.zalan.do', 'v1', 'default', 'postgresqls', 'acid-minimal-cluster', pg_patch_scale_down_instances) @@ -1648,7 +1648,6 @@ class EndToEndTestCase(unittest.TestCase): # toggle pod anti affinity to move replica away from master node self.assert_distributed_pods(master_nodes) - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_overwrite_pooler_deployment(self): pooler_name = 'acid-minimal-cluster-pooler' @@ -1801,7 +1800,7 @@ class EndToEndTestCase(unittest.TestCase): }, } k8s.api.core_v1.patch_namespaced_secret( - name="foo-user.acid-minimal-cluster.credentials.postgresql.acid.zalan.do", + name="foo-user.acid-minimal-cluster.credentials.postgresql.acid.zalan.do", namespace="default", body=secret_fake_rotation) @@ -1818,7 +1817,7 @@ class EndToEndTestCase(unittest.TestCase): "enable_password_rotation": "true", "inherited_annotations": "environment", "password_rotation_interval": "30", - "password_rotation_user_retention": "30", # should be set to 60 + "password_rotation_user_retention": "30", # should be set to 60 }, } k8s.update_config(enable_password_rotation) @@ -1887,7 +1886,7 @@ class EndToEndTestCase(unittest.TestCase): self.assertTrue("environment" in db_user_secret.metadata.annotations, "Added annotation was not propagated to secret") # disable password rotation for all other users (foo_user) - # and pick smaller intervals to see if the third fake rotation user is dropped + # and pick smaller intervals to see if the third fake rotation user is dropped enable_password_rotation = { "data": { "enable_password_rotation": "false", @@ -2387,6 +2386,56 @@ class EndToEndTestCase(unittest.TestCase): # toggle pod anti affinity to move replica away from master node self.assert_distributed_pods(master_nodes) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_topology_spread_constraints(self): + ''' + Enable topologySpreadConstraints for pods + ''' + k8s = self.k8s + cluster_labels = "application=spilo,cluster-name=acid-minimal-cluster" + + # Verify we are in good state from potential previous tests + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") + + master_nodes, replica_nodes = k8s.get_cluster_nodes() + self.assertNotEqual(master_nodes, []) + self.assertNotEqual(replica_nodes, []) + + # Patch label to nodes for topologySpreadConstraints + patch_node_label = { + "metadata": { + "labels": { + "topology.kubernetes.io/zone": "zalando" + } + } + } + k8s.api.core_v1.patch_node(master_nodes[0], patch_node_label) + k8s.api.core_v1.patch_node(replica_nodes[0], patch_node_label) + + # Scale-out postgresql pods + k8s.api.custom_objects_api.patch_namespaced_custom_object("acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", + {"spec": {"numberOfInstances": 6}}) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_pods_with_label(cluster_labels), 6, "Postgresql StatefulSet are scale to 6") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 6, "All pods are running") + + worker_node_1 = 0 + worker_node_2 = 0 + pods = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_labels) + for pod in pods.items: + if pod.spec.node_name == 'postgres-operator-e2e-tests-worker': + worker_node_1 += 1 + elif pod.spec.node_name == 'postgres-operator-e2e-tests-worker2': + worker_node_2 += 1 + + self.assertEqual(worker_node_1, worker_node_2) + self.assertEqual(worker_node_1, 3) + self.assertEqual(worker_node_2, 3) + + # Scale-it postgresql pods to previous replicas + k8s.api.custom_objects_api.patch_namespaced_custom_object("acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", + {"spec": {"numberOfInstances": 2}}) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_zz_cluster_deletion(self): ''' @@ -2462,7 +2511,7 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: k8s.count_deployments_with_label(cluster_label), 0, "Deployments not deleted") self.eventuallyEqual(lambda: k8s.count_pdbs_with_label(cluster_label), 0, "Pod disruption budget not deleted") self.eventuallyEqual(lambda: k8s.count_secrets_with_label(cluster_label), 8, "Secrets were deleted although disabled in config") - self.eventuallyEqual(lambda: k8s.count_pvcs_with_label(cluster_label), 3, "PVCs were deleted although disabled in config") + self.eventuallyEqual(lambda: k8s.count_pvcs_with_label(cluster_label), 6, "PVCs were deleted although disabled in config") except timeout_decorator.TimeoutError: print('Operator log: {}'.format(k8s.get_operator_log())) @@ -2504,7 +2553,7 @@ class EndToEndTestCase(unittest.TestCase): # if nodes are different we can quit here if master_nodes[0] not in replica_nodes: - return True + return True # enable pod anti affintiy in config map which should trigger movement of replica patch_enable_antiaffinity = { @@ -2528,7 +2577,7 @@ class EndToEndTestCase(unittest.TestCase): } k8s.update_config(patch_disable_antiaffinity, "disable antiaffinity") self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_labels) k8s.wait_for_running_pods(cluster_labels, 2) @@ -2539,7 +2588,7 @@ class EndToEndTestCase(unittest.TestCase): # if nodes are different we can quit here for target_node in target_nodes: if (target_node not in master_nodes or target_node not in replica_nodes) and master_nodes[0] in replica_nodes: - print('Pods run on the same node') + print('Pods run on the same node') return False except timeout_decorator.TimeoutError: diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index 39811824e..bfc66b160 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -4056,6 +4056,12 @@ spec: type: string type: object type: array + topologySpreadConstraints: + type: array + nullable: true + items: + type: object + x-kubernetes-preserve-unknown-fields: true useLoadBalancer: description: |- deprecated load balancer settings maintained for backward compatibility diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index 1dadfd06c..40aa0fd18 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -92,17 +92,18 @@ type PostgresSpec struct { Clone *CloneDescription `json:"clone,omitempty"` // Note: usernames specified here as database owners must be declared // in the users key of the spec key. - Databases map[string]string `json:"databases,omitempty"` - PreparedDatabases map[string]PreparedDatabase `json:"preparedDatabases,omitempty"` - SchedulerName *string `json:"schedulerName,omitempty"` - NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"` - Tolerations []v1.Toleration `json:"tolerations,omitempty"` - Sidecars []Sidecar `json:"sidecars,omitempty"` - InitContainers []v1.Container `json:"initContainers,omitempty"` - PodPriorityClassName string `json:"podPriorityClassName,omitempty"` - ShmVolume *bool `json:"enableShmVolume,omitempty"` - EnableLogicalBackup bool `json:"enableLogicalBackup,omitempty"` - LogicalBackupRetention string `json:"logicalBackupRetention,omitempty"` + Databases map[string]string `json:"databases,omitempty"` + PreparedDatabases map[string]PreparedDatabase `json:"preparedDatabases,omitempty"` + SchedulerName *string `json:"schedulerName,omitempty"` + NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"` + TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` + Tolerations []v1.Toleration `json:"tolerations,omitempty"` + Sidecars []Sidecar `json:"sidecars,omitempty"` + InitContainers []v1.Container `json:"initContainers,omitempty"` + PodPriorityClassName string `json:"podPriorityClassName,omitempty"` + ShmVolume *bool `json:"enableShmVolume,omitempty"` + EnableLogicalBackup bool `json:"enableLogicalBackup,omitempty"` + LogicalBackupRetention string `json:"logicalBackupRetention,omitempty"` // +kubebuilder:validation:Pattern=`^(\d+|\*)(/\d+)?(\s+(\d+|\*)(/\d+)?){4}$` LogicalBackupSchedule string `json:"logicalBackupSchedule,omitempty"` StandbyCluster *StandbyDescription `json:"standby,omitempty"` diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index f629d1528..0fb3771ff 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -504,6 +504,11 @@ func (c *Cluster) compareStatefulSetWith(statefulSet *appsv1.StatefulSet) *compa needsRollUpdate = true reasons = append(reasons, "new statefulset's pod affinity does not match the current one") } + if !reflect.DeepEqual(c.Statefulset.Spec.Template.Spec.TopologySpreadConstraints, statefulSet.Spec.Template.Spec.TopologySpreadConstraints) { + needsReplace = true + needsRollUpdate = true + reasons = append(reasons, "new statefulset's pod topologySpreadConstraints does not match the current one") + } if len(c.Statefulset.Spec.Template.Spec.Tolerations) != len(statefulSet.Spec.Template.Spec.Tolerations) { needsReplace = true needsRollUpdate = true diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 724986dbc..9e7043308 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -612,6 +612,13 @@ func generatePodAntiAffinity(podAffinityTerm v1.PodAffinityTerm, preferredDuring return podAntiAffinity } +func generateTopologySpreadConstraints(labels labels.Set, topologySpreadConstraints []v1.TopologySpreadConstraint) []v1.TopologySpreadConstraint { + for _, topologySpreadConstraint := range topologySpreadConstraints { + topologySpreadConstraint.LabelSelector = &metav1.LabelSelector{MatchLabels: labels} + } + return topologySpreadConstraints +} + func tolerations(tolerationsSpec *[]v1.Toleration, podToleration map[string]string) []v1.Toleration { // allow to override tolerations by postgresql manifest if len(*tolerationsSpec) > 0 { @@ -817,6 +824,7 @@ func (c *Cluster) generatePodTemplate( initContainers []v1.Container, sidecarContainers []v1.Container, sharePgSocketWithSidecars *bool, + topologySpreadConstraintsSpec []v1.TopologySpreadConstraint, tolerationsSpec *[]v1.Toleration, spiloRunAsUser *int64, spiloRunAsGroup *int64, @@ -886,6 +894,10 @@ func (c *Cluster) generatePodTemplate( podSpec.PriorityClassName = priorityClassName } + if len(topologySpreadConstraintsSpec) > 0 { + podSpec.TopologySpreadConstraints = generateTopologySpreadConstraints(labels, topologySpreadConstraintsSpec) + } + if sharePgSocketWithSidecars != nil && *sharePgSocketWithSidecars { addVarRunVolume(&podSpec) } @@ -1484,6 +1496,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef initContainers, sidecarContainers, c.OpConfig.SharePgSocketWithSidecars, + spec.TopologySpreadConstraints, &tolerationSpec, effectiveRunAsUser, effectiveRunAsGroup, @@ -2378,6 +2391,7 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) { []v1.Container{}, []v1.Container{}, util.False(), + []v1.TopologySpreadConstraint{}, &tolerationsSpec, nil, nil, diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 04f6476a6..9220329fe 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -4271,3 +4271,46 @@ func TestGenerateCapabilities(t *testing.T) { } } } + +func TestTopologySpreadConstraints(t *testing.T) { + clusterName := "acid-test-cluster" + namespace := "default" + + pg := acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + }, + Spec: acidv1.PostgresSpec{ + NumberOfInstances: 1, + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: k8sutil.StringToPointer("1"), Memory: k8sutil.StringToPointer("10")}, + ResourceLimits: acidv1.ResourceDescription{CPU: k8sutil.StringToPointer("1"), Memory: k8sutil.StringToPointer("10")}, + }, + Volume: acidv1.Volume{ + Size: "1G", + }, + }, + } + + cluster := New( + Config{ + OpConfig: config.Config{ + PodManagementPolicy: "ordered_ready", + }, + }, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder) + cluster.Name = clusterName + cluster.Namespace = namespace + cluster.labelsSet(true) + + s, err := cluster.generateStatefulSet(&pg.Spec) + assert.NoError(t, err) + assert.Contains(t, s.Spec.Template.Spec.TopologySpreadConstraints, v1.TopologySpreadConstraint{ + MaxSkew: int32(1), + TopologyKey: "topology.kubernetes.io/zone", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: cluster.labelsSet(true), + }, + }) +} From 30d880225ea6d4061b8634c573465d7a497af5ad Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Mon, 18 Nov 2024 14:16:48 +0700 Subject: [PATCH 02/15] Modify the UT. --- pkg/cluster/k8sres_test.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 9220329fe..3ebbafb67 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -4290,6 +4290,16 @@ func TestTopologySpreadConstraints(t *testing.T) { Volume: acidv1.Volume{ Size: "1G", }, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "topology.kubernetes.io/zone", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: cluster.labelsSet(true), + }, + }, + }, }, } From 72f643f2439958270f5131faa75ec1611576ac9d Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Mon, 18 Nov 2024 14:23:36 +0700 Subject: [PATCH 03/15] Correct the assert with the list of constraints. --- pkg/cluster/k8sres_test.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 3ebbafb67..4ffd9cb5b 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -4315,12 +4315,14 @@ func TestTopologySpreadConstraints(t *testing.T) { s, err := cluster.generateStatefulSet(&pg.Spec) assert.NoError(t, err) - assert.Contains(t, s.Spec.Template.Spec.TopologySpreadConstraints, v1.TopologySpreadConstraint{ - MaxSkew: int32(1), - TopologyKey: "topology.kubernetes.io/zone", - WhenUnsatisfiable: v1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: cluster.labelsSet(true), + assert.Contains(t, s.Spec.Template.Spec.TopologySpreadConstraints, []v1.TopologySpreadConstraint{ + { + MaxSkew: int32(1), + TopologyKey: "topology.kubernetes.io/zone", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: cluster.labelsSet(true), + }, }, }) } From 0412edf164abd815f90a1ec5b26ab0824dd4490f Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Mon, 18 Nov 2024 15:24:49 +0700 Subject: [PATCH 04/15] Fix UT --- pkg/cluster/k8sres_test.go | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 4ffd9cb5b..919e69027 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -4275,6 +4275,9 @@ func TestGenerateCapabilities(t *testing.T) { func TestTopologySpreadConstraints(t *testing.T) { clusterName := "acid-test-cluster" namespace := "default" + labelSelector := &metav1.LabelSelector{ + MatchLabels: cluster.labelsSet(true), + } pg := acidv1.Postgresql{ ObjectMeta: metav1.ObjectMeta{ @@ -4295,9 +4298,7 @@ func TestTopologySpreadConstraints(t *testing.T) { MaxSkew: 1, TopologyKey: "topology.kubernetes.io/zone", WhenUnsatisfiable: v1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: cluster.labelsSet(true), - }, + LabelSelector: labelSelector, }, }, }, @@ -4315,14 +4316,11 @@ func TestTopologySpreadConstraints(t *testing.T) { s, err := cluster.generateStatefulSet(&pg.Spec) assert.NoError(t, err) - assert.Contains(t, s.Spec.Template.Spec.TopologySpreadConstraints, []v1.TopologySpreadConstraint{ - { - MaxSkew: int32(1), - TopologyKey: "topology.kubernetes.io/zone", - WhenUnsatisfiable: v1.DoNotSchedule, - LabelSelector: &metav1.LabelSelector{ - MatchLabels: cluster.labelsSet(true), - }, - }, - }) + assert.Contains(t, s.Spec.Template.Spec.TopologySpreadConstraints, v1.TopologySpreadConstraint{ + MaxSkew: int32(1), + TopologyKey: "topology.kubernetes.io/zone", + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: labelSelector, + }, + ) } From 6c669fe038147f577f4e8684b4a526ea2b208b8a Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sat, 8 Mar 2025 13:03:15 +0700 Subject: [PATCH 05/15] Run update-codegen.sh to add deepcopy for new field to the api. --- pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 159a87f35..5044bb1f7 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -796,6 +796,13 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) { *out = new(corev1.NodeAffinity) (*in).DeepCopyInto(*out) } + if in.TopologySpreadConstraints != nil { + in, out := &in.TopologySpreadConstraints, &out.TopologySpreadConstraints + *out = make([]corev1.TopologySpreadConstraint, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Tolerations != nil { in, out := &in.Tolerations, &out.Tolerations *out = make([]corev1.Toleration, len(*in)) From 612f7695d4c3dd5fdbd2bdd718c921b17db3055c Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sat, 8 Mar 2025 13:10:22 +0700 Subject: [PATCH 06/15] Reuse configured TopologySpreadConstraints for logical backup. --- pkg/cluster/k8sres.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 9e7043308..482efa41c 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -2382,6 +2382,8 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) { tolerationsSpec := tolerations(&spec.Tolerations, c.OpConfig.PodToleration) + topologySpreadConstraintsSpec := generateTopologySpreadConstraints(labels, spec.TopologySpreadConstraints) + // re-use the method that generates DB pod templates if podTemplate, err = c.generatePodTemplate( c.Namespace, @@ -2391,7 +2393,7 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) { []v1.Container{}, []v1.Container{}, util.False(), - []v1.TopologySpreadConstraint{}, + topologySpreadConstraintsSpec, &tolerationsSpec, nil, nil, From 37bfa71808c58720cd4cdca28e98c9a307514448 Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sat, 8 Mar 2025 13:27:29 +0700 Subject: [PATCH 07/15] Remove x-kubernetes-preserve-unknown-fields and XPreserveUnknownFields. --- manifests/postgresql.crd.yaml | 12 +++++++++++- pkg/apis/acid.zalan.do/v1/crds.go | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index bfc66b160..8cf360340 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -4061,7 +4061,17 @@ spec: nullable: true items: type: object - x-kubernetes-preserve-unknown-fields: true + properties: + maxSkew: + type: integer + format: int32 + topologyKey: + type: string + whenUnsatisfiable: + type: string + enum: + - DoNotSchedule + - ScheduleAnyway useLoadBalancer: description: |- deprecated load balancer settings maintained for backward compatibility diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 46739e46d..ef7efee56 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -56,6 +56,7 @@ var OperatorConfigCRDResourceColumns = []apiextv1.CustomResourceColumnDefinition } var min1 = 1.0 +var minLength1 int64 = 1 var minDisable = -1.0 // OperatorConfigCRDResourceValidation to check applied manifest parameters From 0518b6d930fa5095b5568b9a1656cab04dc41fb8 Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sat, 18 Oct 2025 22:36:36 +0700 Subject: [PATCH 08/15] Remove unnecessary len check. --- pkg/cluster/k8sres.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 482efa41c..cc4873ce5 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -894,9 +894,7 @@ func (c *Cluster) generatePodTemplate( podSpec.PriorityClassName = priorityClassName } - if len(topologySpreadConstraintsSpec) > 0 { - podSpec.TopologySpreadConstraints = generateTopologySpreadConstraints(labels, topologySpreadConstraintsSpec) - } + podSpec.TopologySpreadConstraints = generateTopologySpreadConstraints(labels, topologySpreadConstraintsSpec) if sharePgSocketWithSidecars != nil && *sharePgSocketWithSidecars { addVarRunVolume(&podSpec) From 25001c76b64a76163bdba484c3f8ab538d11b600 Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sat, 18 Oct 2025 22:56:38 +0700 Subject: [PATCH 09/15] Add topologySpreadConstraint example in the complete manifest. --- manifests/complete-postgres-manifest.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 7b347a9c8..93797e0e1 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -232,6 +232,12 @@ spec: # values: # - enabled +# Add topology spread constraint to distribute PostgreSQL pods across all nodes labeled with "topology.kubernetes.io/zone". +# topologySpreadConstraint: +# - maxSkew: 1 +# topologyKey: topology.kubernetes.io/zone +# whenUnsatisfiable: DoNotSchedule + # Enables change data capture streams for defined database tables # streams: # - applicationId: test-app From a7d49212f8731ae2205f40618c3075cd901b9aa8 Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sun, 19 Oct 2025 13:08:09 +0700 Subject: [PATCH 10/15] Add support for helm chart. --- charts/postgres-operator/crds/postgresqls.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index c801346e4..ec12bda77 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -589,6 +589,23 @@ spec: - PreferNoSchedule tolerationSeconds: type: integer + topologySpreadConstraints: + type: array + nullable: true + items: + type: object + properties: + maxskew: + type: integer + format: int32 + minimum: 1 + topologyKey: + type: string + whenUnsatisfiable: + type: string + enum: + - DoNotSchedule + - ScheduleAnyway useLoadBalancer: type: boolean description: deprecated From c08ce4df178bf911f01bf9799d77ad177ec6b74b Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sun, 19 Oct 2025 13:29:26 +0700 Subject: [PATCH 11/15] Add documentation for topologySpreadConstraint. --- docs/user.md | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/docs/user.md b/docs/user.md index 236b439a8..cf5089a89 100644 --- a/docs/user.md +++ b/docs/user.md @@ -714,7 +714,7 @@ but Kubernetes will not spin up the pod if the requested HugePages cannot be all For more information on HugePages in Kubernetes, see also [https://kubernetes.io/docs/tasks/manage-hugepages/scheduling-hugepages/](https://kubernetes.io/docs/tasks/manage-hugepages/scheduling-hugepages/) -## Use taints, tolerations and node affinity for dedicated PostgreSQL nodes +## Use taints, tolerations, node affinity and topology spread constraint for dedicated PostgreSQL nodes To ensure Postgres pods are running on nodes without any other application pods, you can use [taints and tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) @@ -755,6 +755,23 @@ spec: If you need to define a `nodeAffinity` for all your Postgres clusters use the `node_readiness_label` [configuration](administrator.md#node-readiness-labels). +If you need PostgreSQL Pods to run on separate nodes, you can use the +[topologySpreadConstraints](https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/) to control how they are distributed across your cluster. +This ensures they are spread among failure domains such as +regions, zones, nodes, or other user-defined topology domains. + +```yaml +apiVersion: "acid.zalan.do/v1" +kind: postgresql +metadata: + name: acid-minimal-cluster +spec: + topologySpreadConstraints: + - maxskew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: DoNotSchedule +``` + ## In-place major version upgrade Starting with Spilo 14, operator supports in-place major version upgrade to a @@ -1064,7 +1081,7 @@ spec: - all volumeSource: emptyDir: {} - sidecars: + sidecars: - name: "container-name" image: "company/image:tag" volumeMounts: From d09bd09b97f96d42fddf515c4667099b59b73ca8 Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sun, 19 Oct 2025 14:24:18 +0700 Subject: [PATCH 12/15] Update e2e test to patch topologySpreadConstraints into the postgresqls manifest. --- e2e/tests/test_e2e.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 316eaefa3..2f76ffa74 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -2412,9 +2412,23 @@ class EndToEndTestCase(unittest.TestCase): k8s.api.core_v1.patch_node(master_nodes[0], patch_node_label) k8s.api.core_v1.patch_node(replica_nodes[0], patch_node_label) - # Scale-out postgresql pods - k8s.api.custom_objects_api.patch_namespaced_custom_object("acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", - {"spec": {"numberOfInstances": 6}}) + # Patch topologySpreadConstraint and scale-out postgresql pods to postgresqls manifest. + patch_topologySpreadConstraint_config = { + "spec": { + "numberOfInstances": 6, + "topologySpreadConstraint": [ + { + "maxskew": 1, + "topologyKey": "topology.kubernetes.io/zone", + "whenUnsatisfiable": "DoNotSchedule" + } + ] + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", + "postgresqls", "acid-minimal-cluster", + patch_topologySpreadConstraint_config) self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") self.eventuallyEqual(lambda: k8s.count_pods_with_label(cluster_labels), 6, "Postgresql StatefulSet are scale to 6") self.eventuallyEqual(lambda: k8s.count_running_pods(), 6, "All pods are running") @@ -2432,9 +2446,17 @@ class EndToEndTestCase(unittest.TestCase): self.assertEqual(worker_node_1, 3) self.assertEqual(worker_node_2, 3) - # Scale-it postgresql pods to previous replicas - k8s.api.custom_objects_api.patch_namespaced_custom_object("acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", - {"spec": {"numberOfInstances": 2}}) + # Reset configurations + patch_topologySpreadConstraint_config = { + "spec": { + "numberOfInstances": 2, + "topologySpreadConstraint": [] + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", + "postgresqls", "acid-minimal-cluster", + patch_topologySpreadConstraint_config) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_zz_cluster_deletion(self): From 69304648752f430df5fe9f2559b3c147dd8e86bf Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Mon, 26 Jan 2026 21:48:08 +0700 Subject: [PATCH 13/15] Revert unnecessary change. --- e2e/tests/test_e2e.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 2f76ffa74..dfb8e54c0 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -2533,7 +2533,7 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: k8s.count_deployments_with_label(cluster_label), 0, "Deployments not deleted") self.eventuallyEqual(lambda: k8s.count_pdbs_with_label(cluster_label), 0, "Pod disruption budget not deleted") self.eventuallyEqual(lambda: k8s.count_secrets_with_label(cluster_label), 8, "Secrets were deleted although disabled in config") - self.eventuallyEqual(lambda: k8s.count_pvcs_with_label(cluster_label), 6, "PVCs were deleted although disabled in config") + self.eventuallyEqual(lambda: k8s.count_pvcs_with_label(cluster_label), 3, "PVCs were deleted although disabled in config") except timeout_decorator.TimeoutError: print('Operator log: {}'.format(k8s.get_operator_log())) From c162b5907aad61a9bce1a2d932fdd632054da8c9 Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Fri, 30 Jan 2026 21:48:53 +0700 Subject: [PATCH 14/15] Updated the PVC retention policy to remove redundant PVCs. --- e2e/tests/test_e2e.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index dfb8e54c0..36cc78fd3 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -2397,6 +2397,15 @@ class EndToEndTestCase(unittest.TestCase): # Verify we are in good state from potential previous tests self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") + # patch the pvc retention policy to enable delete when scale down + patch_scaled_policy_delete = { + "data": { + "persistent_volume_claim_retention_policy": "when_deleted:retain,when_scaled:delete" + } + } + k8s.update_config(patch_scaled_policy_delete) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + master_nodes, replica_nodes = k8s.get_cluster_nodes() self.assertNotEqual(master_nodes, []) self.assertNotEqual(replica_nodes, []) @@ -2457,6 +2466,9 @@ class EndToEndTestCase(unittest.TestCase): "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", patch_topologySpreadConstraint_config) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_pods_with_label(cluster_labels), 2, "Postgresql StatefulSet are scale to 2") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "All pods are running") @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_zz_cluster_deletion(self): From 4ea42a0c6eb9a258d8c614d7f31b493607670c33 Mon Sep 17 00:00:00 2001 From: Trung Minh Lai Date: Sun, 1 Feb 2026 09:16:06 +0700 Subject: [PATCH 15/15] Fix expected PVC count in end-to-end test after config changes. --- e2e/tests/test_e2e.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 36cc78fd3..53348a9b1 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -2545,7 +2545,7 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: k8s.count_deployments_with_label(cluster_label), 0, "Deployments not deleted") self.eventuallyEqual(lambda: k8s.count_pdbs_with_label(cluster_label), 0, "Pod disruption budget not deleted") self.eventuallyEqual(lambda: k8s.count_secrets_with_label(cluster_label), 8, "Secrets were deleted although disabled in config") - self.eventuallyEqual(lambda: k8s.count_pvcs_with_label(cluster_label), 3, "PVCs were deleted although disabled in config") + self.eventuallyEqual(lambda: k8s.count_pvcs_with_label(cluster_label), 2, "PVCs were deleted although disabled in config") except timeout_decorator.TimeoutError: print('Operator log: {}'.format(k8s.get_operator_log()))