From 47dd766fa773d4616890070216593d68de7c3cb9 Mon Sep 17 00:00:00 2001 From: Georg Kunz Date: Thu, 2 Nov 2017 19:10:44 +0100 Subject: [PATCH] Add node toleration config to PodSpec (#151) * Add node toleration config to PodSpec This allows to taint nodes dedicated to Postgres and prevents other pods from running on these nodes. * Document taint and toleration setup And remove setting from default operator ConfigMap * Allow to overwrite tolerations with Postgres manifest --- README.md | 44 +++++++++++++++++++++++++++++++++++++++ pkg/cluster/k8sres.go | 25 +++++++++++++++++++++- pkg/spec/postgresql.go | 2 ++ pkg/util/config/config.go | 1 + 4 files changed, 71 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ac3cb438..eeea5e790 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,50 @@ We can use the generated secret of the `postgres` robot user to connect to our ` $ psql -U postgres +### Configuration Options + +The operator can be configured with the provided ConfigMap (`manifests/configmap.yaml`). + +#### Use Taints and Tolerations for Dedicated Postgres Nodes + +To ensure Postgres pods are running on nodes without any other application pods, you can use [taints and tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) and configure the required toleration in the operator ConfigMap. + +As an example you can set following node taint: + +``` +$ kubectl taint nodes postgres=:NoSchedule +``` + +And configure the toleration for the Postgres pods by adding following line to the ConfigMap: + +``` +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-operator +data: + toleration: "key:postgres,operator:Exists,effect:NoSchedule" + ... +``` + +Or you can specify and/or overwrite the tolerations for each postgres instance in the postgres manifest: + +``` +apiVersion: "acid.zalan.do/v1" +kind: postgresql +metadata: + name: acid-minimal-cluster +spec: + teamId: "ACID" + tolerations: + - key: postgres + operator: Exists + effect: NoSchedule +``` + +Please be ware that the taint and toleration only ensures that no other pod gets scheduled to the "postgres" node but not that Postgres pods are placed on such a node. This can be achieved by setting a node affinity rule in the ConfigMap. + + # Setup development environment The following steps guide you through the setup to work on the operator itself. diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 49cf28cbb..1ed94cb2d 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -250,7 +250,29 @@ func (c *Cluster) nodeAffinity() *v1.Affinity { } } +func (c *Cluster) tolerations(tolerationsSpec *[]v1.Toleration) []v1.Toleration { + // allow to override tolerations by postgresql manifest + if len(*tolerationsSpec) > 0 { + return *tolerationsSpec + } + + podToleration := c.Config.OpConfig.PodToleration + if (len(podToleration["key"]) > 0 || len(podToleration["operator"]) > 0 || len(podToleration["value"]) > 0 || len(podToleration["effect"]) > 0) { + return []v1.Toleration{ + { + Key: podToleration["key"], + Operator: v1.TolerationOperator(podToleration["operator"]), + Value: podToleration["value"], + Effect: v1.TaintEffect(podToleration["effect"]), + }, + } + } else { + return []v1.Toleration{} + } +} + func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequirements, + tolerationsSpec *[]v1.Toleration, pgParameters *spec.PostgresqlParam, patroniParameters *spec.Patroni, cloneDescription *spec.CloneDescription) *v1.PodTemplateSpec { @@ -372,6 +394,7 @@ func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequireme TerminationGracePeriodSeconds: &terminateGracePeriodSeconds, Containers: []v1.Container{container}, Affinity: c.nodeAffinity(), + Tolerations: c.tolerations(tolerationsSpec), } template := v1.PodTemplateSpec{ @@ -394,7 +417,7 @@ func (c *Cluster) generateStatefulSet(spec spec.PostgresSpec) (*v1beta1.Stateful return nil, fmt.Errorf("could not generate resource requirements: %v", err) } - podTemplate := c.generatePodTemplate(resourceRequirements, &spec.PostgresqlParam, &spec.Patroni, &spec.Clone) + podTemplate := c.generatePodTemplate(resourceRequirements, &spec.Tolerations, &spec.PostgresqlParam, &spec.Patroni, &spec.Clone) volumeClaimTemplate, err := generatePersistentVolumeClaimTemplate(spec.Volume.Size, spec.Volume.StorageClass) if err != nil { return nil, fmt.Errorf("could not generate volume claim template: %v", err) diff --git a/pkg/spec/postgresql.go b/pkg/spec/postgresql.go index 1965e6479..e6d7b712f 100644 --- a/pkg/spec/postgresql.go +++ b/pkg/spec/postgresql.go @@ -7,6 +7,7 @@ import ( "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/pkg/api/v1" ) // MaintenanceWindow describes the time window when the operator is allowed to do maintenance on a cluster. @@ -102,6 +103,7 @@ type PostgresSpec struct { Clone CloneDescription `json:"clone"` ClusterName string `json:"-"` Databases map[string]string `json:"databases,omitempty"` + Tolerations []v1.Toleration `json:"tolerations,omitempty"` } // PostgresqlList defines a list of PostgreSQL clusters. diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 93cbaae34..caf617aea 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -25,6 +25,7 @@ type Resources struct { ClusterLabels map[string]string `name:"cluster_labels" default:"application:spilo"` ClusterNameLabel string `name:"cluster_name_label" default:"cluster-name"` PodRoleLabel string `name:"pod_role_label" default:"spilo-role"` + PodToleration map[string]string `name:"toleration" default:""` DefaultCPURequest string `name:"default_cpu_request" default:"100m"` DefaultMemoryRequest string `name:"default_memory_request" default:"100Mi"` DefaultCPULimit string `name:"default_cpu_limit" default:"3"`