add preferred during scheduling pod anti affinity (#2048)
* add preferred during scheduling pod anti affinity Co-authored-by: Felix Kunde <felix-kunde@gmx.de>
This commit is contained in:
parent
93a253bde1
commit
be7b52db92
|
|
@ -281,6 +281,9 @@ spec:
|
||||||
pod_antiaffinity_topology_key:
|
pod_antiaffinity_topology_key:
|
||||||
type: string
|
type: string
|
||||||
default: "kubernetes.io/hostname"
|
default: "kubernetes.io/hostname"
|
||||||
|
pod_antiaffinity_preferred_during_scheduling:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
pod_environment_configmap:
|
pod_environment_configmap:
|
||||||
type: string
|
type: string
|
||||||
pod_environment_secret:
|
pod_environment_secret:
|
||||||
|
|
|
||||||
|
|
@ -167,6 +167,8 @@ configKubernetes:
|
||||||
pdb_name_format: "postgres-{cluster}-pdb"
|
pdb_name_format: "postgres-{cluster}-pdb"
|
||||||
# override topology key for pod anti affinity
|
# override topology key for pod anti affinity
|
||||||
pod_antiaffinity_topology_key: "kubernetes.io/hostname"
|
pod_antiaffinity_topology_key: "kubernetes.io/hostname"
|
||||||
|
# switches pod anti affinity type to `preferredDuringSchedulingIgnoredDuringExecution`
|
||||||
|
# pod_antiaffinity_preferred_during_scheduling: true
|
||||||
# namespaced name of the ConfigMap with environment variables to populate on every pod
|
# namespaced name of the ConfigMap with environment variables to populate on every pod
|
||||||
# pod_environment_configmap: "default/my-custom-config"
|
# pod_environment_configmap: "default/my-custom-config"
|
||||||
# name of the Secret (in cluster namespace) with environment variables to populate on every pod
|
# name of the Secret (in cluster namespace) with environment variables to populate on every pod
|
||||||
|
|
|
||||||
|
|
@ -516,6 +516,9 @@ configuration:
|
||||||
enable_pod_antiaffinity: true
|
enable_pod_antiaffinity: true
|
||||||
```
|
```
|
||||||
|
|
||||||
|
By default the type of pod anti affinity is `requiredDuringSchedulingIgnoredDuringExecution`,
|
||||||
|
you can switch to `preferredDuringSchedulingIgnoredDuringExecution` by setting `pod_antiaffinity_preferred_during_scheduling: true`.
|
||||||
|
|
||||||
By default the topology key for the pod anti affinity is set to
|
By default the topology key for the pod anti affinity is set to
|
||||||
`kubernetes.io/hostname`, you can set another topology key e.g.
|
`kubernetes.io/hostname`, you can set another topology key e.g.
|
||||||
`failure-domain.beta.kubernetes.io/zone`. See [built-in node labels](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) for available topology keys.
|
`failure-domain.beta.kubernetes.io/zone`. See [built-in node labels](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) for available topology keys.
|
||||||
|
|
|
||||||
|
|
@ -1378,6 +1378,9 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
|
||||||
"pod_antiaffinity_topology_key": {
|
"pod_antiaffinity_topology_key": {
|
||||||
Type: "string",
|
Type: "string",
|
||||||
},
|
},
|
||||||
|
"pod_antiaffinity_preferred_during_scheduling": {
|
||||||
|
Type: "boolean",
|
||||||
|
},
|
||||||
"pod_environment_configmap": {
|
"pod_environment_configmap": {
|
||||||
Type: "string",
|
Type: "string",
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,7 @@ type KubernetesMetaConfiguration struct {
|
||||||
MasterPodMoveTimeout Duration `json:"master_pod_move_timeout,omitempty"`
|
MasterPodMoveTimeout Duration `json:"master_pod_move_timeout,omitempty"`
|
||||||
EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity,omitempty"`
|
EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity,omitempty"`
|
||||||
PodAntiAffinityTopologyKey string `json:"pod_antiaffinity_topology_key,omitempty"`
|
PodAntiAffinityTopologyKey string `json:"pod_antiaffinity_topology_key,omitempty"`
|
||||||
|
PodAntiAffinityPreferredDuringScheduling bool `json:"pod_antiaffinity_preferred_during_scheduling,omitempty"`
|
||||||
PodManagementPolicy string `json:"pod_management_policy,omitempty"`
|
PodManagementPolicy string `json:"pod_management_policy,omitempty"`
|
||||||
EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"`
|
EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"`
|
||||||
EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"`
|
EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"`
|
||||||
|
|
|
||||||
|
|
@ -354,7 +354,12 @@ func (c *Cluster) generateConnectionPoolerPodTemplate(role PostgresRole) (
|
||||||
nodeAffinity := c.nodeAffinity(c.OpConfig.NodeReadinessLabel, spec.NodeAffinity)
|
nodeAffinity := c.nodeAffinity(c.OpConfig.NodeReadinessLabel, spec.NodeAffinity)
|
||||||
if c.OpConfig.EnablePodAntiAffinity {
|
if c.OpConfig.EnablePodAntiAffinity {
|
||||||
labelsSet := labels.Set(c.connectionPoolerLabels(role, false).MatchLabels)
|
labelsSet := labels.Set(c.connectionPoolerLabels(role, false).MatchLabels)
|
||||||
podTemplate.Spec.Affinity = generatePodAffinity(labelsSet, c.OpConfig.PodAntiAffinityTopologyKey, nodeAffinity)
|
podTemplate.Spec.Affinity = generatePodAffinity(
|
||||||
|
labelsSet,
|
||||||
|
c.OpConfig.PodAntiAffinityTopologyKey,
|
||||||
|
nodeAffinity,
|
||||||
|
c.OpConfig.PodAntiAffinityPreferredDuringScheduling,
|
||||||
|
)
|
||||||
} else if nodeAffinity != nil {
|
} else if nodeAffinity != nil {
|
||||||
podTemplate.Spec.Affinity = nodeAffinity
|
podTemplate.Spec.Affinity = nodeAffinity
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -495,17 +495,27 @@ func (c *Cluster) nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinit
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func generatePodAffinity(labels labels.Set, topologyKey string, nodeAffinity *v1.Affinity) *v1.Affinity {
|
func generatePodAffinity(labels labels.Set, topologyKey string, nodeAffinity *v1.Affinity, preferredDuringScheduling bool) *v1.Affinity {
|
||||||
// generate pod anti-affinity to avoid multiple pods of the same Postgres cluster in the same topology , e.g. node
|
// generate pod anti-affinity to avoid multiple pods of the same Postgres cluster in the same topology , e.g. node
|
||||||
podAffinity := v1.Affinity{
|
|
||||||
PodAntiAffinity: &v1.PodAntiAffinity{
|
podAffinityTerm := v1.PodAffinityTerm{
|
||||||
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{{
|
|
||||||
LabelSelector: &metav1.LabelSelector{
|
LabelSelector: &metav1.LabelSelector{
|
||||||
MatchLabels: labels,
|
MatchLabels: labels,
|
||||||
},
|
},
|
||||||
TopologyKey: topologyKey,
|
TopologyKey: topologyKey,
|
||||||
}},
|
}
|
||||||
},
|
|
||||||
|
podAffinity := v1.Affinity{
|
||||||
|
PodAntiAffinity: &v1.PodAntiAffinity{},
|
||||||
|
}
|
||||||
|
|
||||||
|
if preferredDuringScheduling {
|
||||||
|
podAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution = []v1.WeightedPodAffinityTerm{{
|
||||||
|
Weight: 1,
|
||||||
|
PodAffinityTerm: podAffinityTerm,
|
||||||
|
}}
|
||||||
|
} else {
|
||||||
|
podAffinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = []v1.PodAffinityTerm{podAffinityTerm}
|
||||||
}
|
}
|
||||||
|
|
||||||
if nodeAffinity != nil && nodeAffinity.NodeAffinity != nil {
|
if nodeAffinity != nil && nodeAffinity.NodeAffinity != nil {
|
||||||
|
|
@ -727,6 +737,7 @@ func (c *Cluster) generatePodTemplate(
|
||||||
shmVolume *bool,
|
shmVolume *bool,
|
||||||
podAntiAffinity bool,
|
podAntiAffinity bool,
|
||||||
podAntiAffinityTopologyKey string,
|
podAntiAffinityTopologyKey string,
|
||||||
|
podAntiAffinityPreferredDuringScheduling bool,
|
||||||
additionalSecretMount string,
|
additionalSecretMount string,
|
||||||
additionalSecretMountPath string,
|
additionalSecretMountPath string,
|
||||||
additionalVolumes []acidv1.AdditionalVolume,
|
additionalVolumes []acidv1.AdditionalVolume,
|
||||||
|
|
@ -767,7 +778,12 @@ func (c *Cluster) generatePodTemplate(
|
||||||
}
|
}
|
||||||
|
|
||||||
if podAntiAffinity {
|
if podAntiAffinity {
|
||||||
podSpec.Affinity = generatePodAffinity(labels, podAntiAffinityTopologyKey, nodeAffinity)
|
podSpec.Affinity = generatePodAffinity(
|
||||||
|
labels,
|
||||||
|
podAntiAffinityTopologyKey,
|
||||||
|
nodeAffinity,
|
||||||
|
podAntiAffinityPreferredDuringScheduling,
|
||||||
|
)
|
||||||
} else if nodeAffinity != nil {
|
} else if nodeAffinity != nil {
|
||||||
podSpec.Affinity = nodeAffinity
|
podSpec.Affinity = nodeAffinity
|
||||||
}
|
}
|
||||||
|
|
@ -1376,6 +1392,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef
|
||||||
mountShmVolumeNeeded(c.OpConfig, spec),
|
mountShmVolumeNeeded(c.OpConfig, spec),
|
||||||
c.OpConfig.EnablePodAntiAffinity,
|
c.OpConfig.EnablePodAntiAffinity,
|
||||||
c.OpConfig.PodAntiAffinityTopologyKey,
|
c.OpConfig.PodAntiAffinityTopologyKey,
|
||||||
|
c.OpConfig.PodAntiAffinityPreferredDuringScheduling,
|
||||||
c.OpConfig.AdditionalSecretMount,
|
c.OpConfig.AdditionalSecretMount,
|
||||||
c.OpConfig.AdditionalSecretMountPath,
|
c.OpConfig.AdditionalSecretMountPath,
|
||||||
additionalVolumes)
|
additionalVolumes)
|
||||||
|
|
@ -2122,6 +2139,7 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
|
||||||
util.False(),
|
util.False(),
|
||||||
false,
|
false,
|
||||||
"",
|
"",
|
||||||
|
false,
|
||||||
c.OpConfig.AdditionalSecretMount,
|
c.OpConfig.AdditionalSecretMount,
|
||||||
c.OpConfig.AdditionalSecretMountPath,
|
c.OpConfig.AdditionalSecretMountPath,
|
||||||
[]acidv1.AdditionalVolume{}); err != nil {
|
[]acidv1.AdditionalVolume{}); err != nil {
|
||||||
|
|
|
||||||
|
|
@ -1360,6 +1360,95 @@ func TestNodeAffinity(t *testing.T) {
|
||||||
assert.Equal(t, s.Spec.Template.Spec.Affinity.NodeAffinity, nodeAff, "cluster template has correct node affinity")
|
assert.Equal(t, s.Spec.Template.Spec.Affinity.NodeAffinity, nodeAff, "cluster template has correct node affinity")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPodAntiAffinityrRequiredDuringScheduling(t *testing.T) {
|
||||||
|
var err error
|
||||||
|
var spiloRunAsUser = int64(101)
|
||||||
|
var spiloRunAsGroup = int64(103)
|
||||||
|
var spiloFSGroup = int64(103)
|
||||||
|
|
||||||
|
spec := acidv1.PostgresSpec{
|
||||||
|
TeamID: "myapp", NumberOfInstances: 1,
|
||||||
|
Resources: &acidv1.Resources{
|
||||||
|
ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"},
|
||||||
|
ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"},
|
||||||
|
},
|
||||||
|
Volume: acidv1.Volume{
|
||||||
|
Size: "1G",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cluster := New(
|
||||||
|
Config{
|
||||||
|
OpConfig: config.Config{
|
||||||
|
PodManagementPolicy: "ordered_ready",
|
||||||
|
ProtectedRoles: []string{"admin"},
|
||||||
|
Auth: config.Auth{
|
||||||
|
SuperUsername: superUserName,
|
||||||
|
ReplicationUsername: replicationUserName,
|
||||||
|
},
|
||||||
|
Resources: config.Resources{
|
||||||
|
SpiloRunAsUser: &spiloRunAsUser,
|
||||||
|
SpiloRunAsGroup: &spiloRunAsGroup,
|
||||||
|
SpiloFSGroup: &spiloFSGroup,
|
||||||
|
},
|
||||||
|
EnablePodAntiAffinity: true,
|
||||||
|
},
|
||||||
|
}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)
|
||||||
|
|
||||||
|
s, err := cluster.generateStatefulSet(&spec)
|
||||||
|
if err != nil {
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Nil(t, s.Spec.Template.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution, "pod anti-affinity should not use preferredDuringScheduling")
|
||||||
|
assert.NotNil(t, s.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution, "pod anti-affinity should use requiredDuringScheduling")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPodAntiAffinityPreferredDuringScheduling(t *testing.T) {
|
||||||
|
var err error
|
||||||
|
var spiloRunAsUser = int64(101)
|
||||||
|
var spiloRunAsGroup = int64(103)
|
||||||
|
var spiloFSGroup = int64(103)
|
||||||
|
|
||||||
|
spec := acidv1.PostgresSpec{
|
||||||
|
TeamID: "myapp", NumberOfInstances: 1,
|
||||||
|
Resources: &acidv1.Resources{
|
||||||
|
ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"},
|
||||||
|
ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"},
|
||||||
|
},
|
||||||
|
Volume: acidv1.Volume{
|
||||||
|
Size: "1G",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cluster := New(
|
||||||
|
Config{
|
||||||
|
OpConfig: config.Config{
|
||||||
|
PodManagementPolicy: "ordered_ready",
|
||||||
|
ProtectedRoles: []string{"admin"},
|
||||||
|
Auth: config.Auth{
|
||||||
|
SuperUsername: superUserName,
|
||||||
|
ReplicationUsername: replicationUserName,
|
||||||
|
},
|
||||||
|
Resources: config.Resources{
|
||||||
|
SpiloRunAsUser: &spiloRunAsUser,
|
||||||
|
SpiloRunAsGroup: &spiloRunAsGroup,
|
||||||
|
SpiloFSGroup: &spiloFSGroup,
|
||||||
|
},
|
||||||
|
EnablePodAntiAffinity: true,
|
||||||
|
PodAntiAffinityPreferredDuringScheduling: true,
|
||||||
|
},
|
||||||
|
}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)
|
||||||
|
|
||||||
|
s, err := cluster.generateStatefulSet(&spec)
|
||||||
|
if err != nil {
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.NotNil(t, s.Spec.Template.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution, "pod anti-affinity should use preferredDuringScheduling")
|
||||||
|
assert.Nil(t, s.Spec.Template.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution, "pod anti-affinity should not use requiredDuringScheduling")
|
||||||
|
}
|
||||||
|
|
||||||
func testDeploymentOwnerReference(cluster *Cluster, deployment *appsv1.Deployment) error {
|
func testDeploymentOwnerReference(cluster *Cluster, deployment *appsv1.Deployment) error {
|
||||||
owner := deployment.ObjectMeta.OwnerReferences[0]
|
owner := deployment.ObjectMeta.OwnerReferences[0]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -123,6 +123,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
|
||||||
result.MasterPodMoveTimeout = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.MasterPodMoveTimeout), "10m")
|
result.MasterPodMoveTimeout = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.MasterPodMoveTimeout), "10m")
|
||||||
result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity
|
result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity
|
||||||
result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname")
|
result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname")
|
||||||
|
result.PodAntiAffinityPreferredDuringScheduling = fromCRD.Kubernetes.PodAntiAffinityPreferredDuringScheduling
|
||||||
result.PodToleration = fromCRD.Kubernetes.PodToleration
|
result.PodToleration = fromCRD.Kubernetes.PodToleration
|
||||||
|
|
||||||
// Postgres Pod resources
|
// Postgres Pod resources
|
||||||
|
|
|
||||||
|
|
@ -203,6 +203,7 @@ type Config struct {
|
||||||
CustomPodAnnotations map[string]string `name:"custom_pod_annotations"`
|
CustomPodAnnotations map[string]string `name:"custom_pod_annotations"`
|
||||||
EnablePodAntiAffinity bool `name:"enable_pod_antiaffinity" default:"false"`
|
EnablePodAntiAffinity bool `name:"enable_pod_antiaffinity" default:"false"`
|
||||||
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
|
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
|
||||||
|
PodAntiAffinityPreferredDuringScheduling bool `name:"pod_antiaffinity_preferred_during_scheduling" default:"false"`
|
||||||
StorageResizeMode string `name:"storage_resize_mode" default:"pvc"`
|
StorageResizeMode string `name:"storage_resize_mode" default:"pvc"`
|
||||||
EnableLoadBalancer *bool `name:"enable_load_balancer"` // deprecated and kept for backward compatibility
|
EnableLoadBalancer *bool `name:"enable_load_balancer"` // deprecated and kept for backward compatibility
|
||||||
ExternalTrafficPolicy string `name:"external_traffic_policy" default:"Cluster"`
|
ExternalTrafficPolicy string `name:"external_traffic_policy" default:"Cluster"`
|
||||||
|
|
@ -231,7 +232,7 @@ type Config struct {
|
||||||
EnableTeamIdClusternamePrefix bool `name:"enable_team_id_clustername_prefix" default:"false"`
|
EnableTeamIdClusternamePrefix bool `name:"enable_team_id_clustername_prefix" default:"false"`
|
||||||
MajorVersionUpgradeMode string `name:"major_version_upgrade_mode" default:"off"`
|
MajorVersionUpgradeMode string `name:"major_version_upgrade_mode" default:"off"`
|
||||||
MajorVersionUpgradeTeamAllowList []string `name:"major_version_upgrade_team_allow_list" default:""`
|
MajorVersionUpgradeTeamAllowList []string `name:"major_version_upgrade_team_allow_list" default:""`
|
||||||
MinimalMajorVersion string `name:"minimal_major_version" default:"11"`
|
MinimalMajorVersion string `name:"minimal_major_version" default:"9.6"`
|
||||||
TargetMajorVersion string `name:"target_major_version" default:"14"`
|
TargetMajorVersion string `name:"target_major_version" default:"14"`
|
||||||
PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"`
|
PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"`
|
||||||
PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"`
|
PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"`
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue