From a119772efbddb15d3eabcd7f057f3af909573f4a Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 5 Oct 2022 19:25:24 +0300 Subject: [PATCH] add toggle to turn off readiness probes (#2004) * add toggle to turn off readiness probes * include PodManagementPolicy and ReadinessProbe in stateful set comparison * add URI scheme to generated readiness probe --- .../crds/operatorconfigurations.yaml | 3 +++ charts/postgres-operator/values.yaml | 2 ++ docs/reference/operator_parameters.md | 8 ++++++++ manifests/configmap.yaml | 1 + manifests/operatorconfiguration.crd.yaml | 3 +++ .../postgresql-operator-default-configuration.yaml | 1 + pkg/apis/acid.zalan.do/v1/crds.go | 3 +++ .../acid.zalan.do/v1/operator_configuration_type.go | 1 + pkg/cluster/cluster.go | 7 +++++++ pkg/cluster/k8sres.go | 13 ++++++++----- pkg/controller/operator_config.go | 1 + pkg/util/config/config.go | 1 + 12 files changed, 39 insertions(+), 5 deletions(-) diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index ca47ab3b8..4ee1c6919 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -214,6 +214,9 @@ spec: enable_pod_disruption_budget: type: boolean default: true + enable_readiness_probe: + type: boolean + default: false enable_sidecars: type: boolean default: true diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 9870c707f..0bcc5b7e0 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -129,6 +129,8 @@ configKubernetes: enable_pod_antiaffinity: false # toggles PDB to set to MinAvailabe 0 or 1 enable_pod_disruption_budget: true + # toogles readiness probe for database pods + enable_readiness_probe: false # enables sidecar containers to run alongside Spilo in the same pod enable_sidecars: true diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index a923ad71c..af09a5da4 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -489,6 +489,14 @@ configuration they are grouped under the `kubernetes` key. of stateful sets of PG clusters. The default is `ordered_ready`, the second possible value is `parallel`. +* **enable_readiness_probe** + the operator can set a readiness probe on the statefulset for the database + pods with `InitialDelaySeconds: 6`, `PeriodSeconds: 10`, `TimeoutSeconds: 5`, + `SuccessThreshold: 1` and `FailureThreshold: 3`. When enabling readiness + probes it is recommended to switch the `pod_management_policy` to `parallel` + to avoid unneccesary waiting times in case of multiple instances failing. + The default is `false`. + * **storage_resize_mode** defines how operator handles the difference between the requested volume size and the actual size. Available options are: diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index e8823ee0e..bd5f5af12 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -52,6 +52,7 @@ data: # enable_pod_disruption_budget: "true" # enable_postgres_team_crd: "false" # enable_postgres_team_crd_superusers: "false" + enable_readiness_probe: "false" enable_replica_load_balancer: "false" enable_replica_pooler_load_balancer: "false" # enable_shm_volume: "true" diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 1f6d48a4d..0f0d47d63 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -212,6 +212,9 @@ spec: enable_pod_disruption_budget: type: boolean default: true + enable_readiness_probe: + type: boolean + default: false enable_sidecars: type: boolean default: true diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 7c8ac61ae..070bbc4df 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -60,6 +60,7 @@ configuration: enable_init_containers: true enable_pod_antiaffinity: false enable_pod_disruption_budget: true + enable_readiness_probe: false enable_sidecars: true # ignored_annotations: # - k8s.v1.cni.cncf.io/network-status diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 7b06ee233..58186a5e5 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -1275,6 +1275,9 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ "enable_pod_disruption_budget": { Type: "boolean", }, + "enable_readiness_probe": { + Type: "boolean", + }, "enable_sidecars": { Type: "boolean", }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index de149033a..85ba25e34 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -98,6 +98,7 @@ type KubernetesMetaConfiguration struct { EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity,omitempty"` PodAntiAffinityTopologyKey string `json:"pod_antiaffinity_topology_key,omitempty"` PodManagementPolicy string `json:"pod_management_policy,omitempty"` + EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"` EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"` } diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 93ae3ec35..d993cfa0d 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -389,6 +389,11 @@ func (c *Cluster) compareStatefulSetWith(statefulSet *appsv1.StatefulSet) *compa needsReplace = true reasons = append(reasons, "new statefulset's annotations do not match: "+reason) } + if c.Statefulset.Spec.PodManagementPolicy != statefulSet.Spec.PodManagementPolicy { + match = false + needsReplace = true + reasons = append(reasons, "new statefulset's pod management policy do not match") + } needsRollUpdate, reasons = c.compareContainers("initContainers", c.Statefulset.Spec.Template.Spec.InitContainers, statefulSet.Spec.Template.Spec.InitContainers, needsRollUpdate, reasons) needsRollUpdate, reasons = c.compareContainers("containers", c.Statefulset.Spec.Template.Spec.Containers, statefulSet.Spec.Template.Spec.Containers, needsRollUpdate, reasons) @@ -528,6 +533,8 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe checks := []containerCheck{ newCheck("new statefulset %s's %s (index %d) name does not match the current one", func(a, b v1.Container) bool { return a.Name != b.Name }), + newCheck("new statefulset %s's %s (index %d) readiness probe does not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.ReadinessProbe, b.ReadinessProbe) }), newCheck("new statefulset %s's %s (index %d) ports do not match the current one", func(a, b v1.Container) bool { return !comparePorts(a.Ports, b.Ports) }), newCheck("new statefulset %s's %s (index %d) resources do not match the current ones", diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 337cd3a63..ac0ae59c6 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1119,17 +1119,18 @@ func extractPgVersionFromBinPath(binPath string, template string) (string, error func generateSpiloReadinessProbe() *v1.Probe { return &v1.Probe{ + FailureThreshold: 3, Handler: v1.Handler{ HTTPGet: &v1.HTTPGetAction{ - Path: "/readiness", - Port: intstr.IntOrString{IntVal: patroni.ApiPort}, + Path: "/readiness", + Port: intstr.IntOrString{IntVal: patroni.ApiPort}, + Scheme: v1.URISchemeHTTP, }, }, InitialDelaySeconds: 6, PeriodSeconds: 10, - TimeoutSeconds: 5, SuccessThreshold: 1, - FailureThreshold: 3, + TimeoutSeconds: 5, } } @@ -1280,7 +1281,9 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef ) // Patroni responds 200 to probe only if it either owns the leader lock or postgres is running and DCS is accessible - spiloContainer.ReadinessProbe = generateSpiloReadinessProbe() + if c.OpConfig.EnableReadinessProbe { + spiloContainer.ReadinessProbe = generateSpiloReadinessProbe() + } // generate container specs for sidecars specified in the cluster manifest clusterSpecificSidecars := []v1.Container{} diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index d66ff7f11..b035573a0 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -118,6 +118,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.NodeReadinessLabelMerge = fromCRD.Kubernetes.NodeReadinessLabelMerge result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName result.PodManagementPolicy = util.Coalesce(fromCRD.Kubernetes.PodManagementPolicy, "ordered_ready") + result.EnableReadinessProbe = fromCRD.Kubernetes.EnableReadinessProbe result.MasterPodMoveTimeout = util.CoalesceDuration(time.Duration(fromCRD.Kubernetes.MasterPodMoveTimeout), "10m") result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index e1fde13f4..447acbc92 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -219,6 +219,7 @@ type Config struct { TeamAPIRoleConfiguration map[string]string `name:"team_api_role_configuration" default:"log_statement:all"` PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` PodManagementPolicy string `name:"pod_management_policy" default:"ordered_ready"` + EnableReadinessProbe bool `name:"enable_readiness_probe" default:"false"` ProtectedRoles []string `name:"protected_role_names" default:"admin,cron_admin"` PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"`