Add Pod Anti Affinity (#489)

* Add Pod Anti Affinity
This commit is contained in:
teuto.net Netzdienste GmbH 2019-02-21 16:37:03 +01:00 committed by Sergey Dudoladov
parent 2e9b6533e7
commit 26a7fdfa9f
6 changed files with 73 additions and 2 deletions

View File

@ -151,6 +151,36 @@ Postgres pods by default receive tolerations for `unreachable` and `noExecute` t
Depending on your setup, you may want to adjust these parameters to prevent master pods from being evicted by the Kubernetes runtime. Depending on your setup, you may want to adjust these parameters to prevent master pods from being evicted by the Kubernetes runtime.
To prevent eviction completely, specify the toleration by leaving out the `tolerationSeconds` value (similar to how Kubernetes' own DaemonSets are configured) To prevent eviction completely, specify the toleration by leaving out the `tolerationSeconds` value (similar to how Kubernetes' own DaemonSets are configured)
### Enable pod anti affinity
To ensure Postgres pods are running on different topologies, you can use [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/)
and configure the required topology in the operator ConfigMap.
Enable pod anti affinity by adding following line to the operator ConfigMap:
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: postgres-operator
data:
enable_pod_antiaffinity: "true"
```
By default the topology key for the pod anti affinity is set to `kubernetes.io/hostname`,
you can set another topology key e.g. `failure-domain.beta.kubernetes.io/zone` by adding following line
to the operator ConfigMap, see [built-in node labels](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) for available topology keys:
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: postgres-operator
data:
enable_pod_antiaffinity: "true"
pod_antiaffinity_topology_key: "failure-domain.beta.kubernetes.io/zone"
```
### Add cluster-specific labels ### Add cluster-specific labels
In some cases, you might want to add `labels` that are specific to a given In some cases, you might want to add `labels` that are specific to a given

View File

@ -213,6 +213,14 @@ configuration they are grouped under the `kubernetes` key.
that should be assigned to the Postgres pods. The priority class itself must be defined in advance. that should be assigned to the Postgres pods. The priority class itself must be defined in advance.
Default is empty (use the default priority class). Default is empty (use the default priority class).
* **enable_pod_antiaffinity**
toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) on the Postgres pods, to avoid multiple pods
of the same Postgres cluster in the same topology , e.g. node. The default is `false`.
* **pod_antiaffinity_topology_key**
override
[topology key](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels)
for pod anti affinity. The default is `kubernetes.io/hostname`.
## Kubernetes resource requests ## Kubernetes resource requests

View File

@ -60,6 +60,8 @@ type KubernetesMetaConfiguration struct {
// TODO: use namespacedname // TODO: use namespacedname
PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"` PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"`
PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"`
EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity" default:"false"`
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
} }
// PostgresPodResourcesDefaults defines the spec of default resources // PostgresPodResourcesDefaults defines the spec of default resources

View File

@ -290,6 +290,26 @@ func nodeAffinity(nodeReadinessLabel map[string]string) *v1.Affinity {
} }
} }
func generatePodAffinity(labels labels.Set, topologyKey string, nodeAffinity *v1.Affinity) *v1.Affinity {
// generate pod anti-affinity to avoid multiple pods of the same Postgres cluster in the same topology , e.g. node
podAffinity := v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: topologyKey,
}},
},
}
if nodeAffinity != nil && nodeAffinity.NodeAffinity != nil {
podAffinity.NodeAffinity = nodeAffinity.NodeAffinity
}
return &podAffinity
}
func tolerations(tolerationsSpec *[]v1.Toleration, podToleration map[string]string) []v1.Toleration { func tolerations(tolerationsSpec *[]v1.Toleration, podToleration map[string]string) []v1.Toleration {
// allow to override tolerations by postgresql manifest // allow to override tolerations by postgresql manifest
if len(*tolerationsSpec) > 0 { if len(*tolerationsSpec) > 0 {
@ -419,6 +439,8 @@ func generatePodTemplate(
kubeIAMRole string, kubeIAMRole string,
priorityClassName string, priorityClassName string,
shmVolume bool, shmVolume bool,
podAntiAffinity bool,
podAntiAffinityTopologyKey string,
) (*v1.PodTemplateSpec, error) { ) (*v1.PodTemplateSpec, error) {
terminateGracePeriodSeconds := terminateGracePeriod terminateGracePeriodSeconds := terminateGracePeriod
@ -437,7 +459,9 @@ func generatePodTemplate(
addShmVolume(&podSpec) addShmVolume(&podSpec)
} }
if nodeAffinity != nil { if podAntiAffinity {
podSpec.Affinity = generatePodAffinity(labels, podAntiAffinityTopologyKey, nodeAffinity)
} else if nodeAffinity != nil {
podSpec.Affinity = nodeAffinity podSpec.Affinity = nodeAffinity
} }
@ -813,7 +837,9 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
c.OpConfig.PodServiceAccountName, c.OpConfig.PodServiceAccountName,
c.OpConfig.KubeIAMRole, c.OpConfig.KubeIAMRole,
effectivePodPriorityClassName, effectivePodPriorityClassName,
mountShmVolumeNeeded(c.OpConfig, spec)); err != nil { mountShmVolumeNeeded(c.OpConfig, spec),
c.OpConfig.EnablePodAntiAffinity,
c.OpConfig.PodAntiAffinityTopologyKey); err != nil {
return nil, fmt.Errorf("could not generate pod template: %v", err) return nil, fmt.Errorf("could not generate pod template: %v", err)
} }

View File

@ -53,6 +53,9 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel
result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName
result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity;
result.PodAntiAffinityTopologyKey = fromCRD.Kubernetes.PodAntiAffinityTopologyKey;
result.DefaultCPURequest = fromCRD.PostgresPodResources.DefaultCPURequest result.DefaultCPURequest = fromCRD.PostgresPodResources.DefaultCPURequest
result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest
result.DefaultCPULimit = fromCRD.PostgresPodResources.DefaultCPULimit result.DefaultCPULimit = fromCRD.PostgresPodResources.DefaultCPULimit

View File

@ -95,6 +95,8 @@ type Config struct {
EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"` EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"`
EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"` EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"`
CustomServiceAnnotations map[string]string `name:"custom_service_annotations"` CustomServiceAnnotations map[string]string `name:"custom_service_annotations"`
EnablePodAntiAffinity bool `name:"enable_pod_antiaffinity" default:"false"`
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
// deprecated and kept for backward compatibility // deprecated and kept for backward compatibility
EnableLoadBalancer *bool `name:"enable_load_balancer"` EnableLoadBalancer *bool `name:"enable_load_balancer"`
MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"` MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"`