diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index f007240d7..d00aba065 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -114,6 +114,7 @@ configKubernetesCRD: cluster_name_label: cluster-name enable_pod_antiaffinity: false pod_antiaffinity_topology_key: "kubernetes.io/hostname" + enable_pod_disruption_budget: true secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}" # inherited_labels: # - application @@ -161,7 +162,7 @@ serviceAccount: # The name of the ServiceAccount to use. # If not set and create is true, a name is generated using the fullname template # When relying solely on the OperatorConfiguration CRD, set this value to "operator" - # Otherwise, the operator tries to use the "default" service account which is forbidden + # Otherwise, the operator tries to use the "default" service account which is forbidden name: "" priorityClassName: "" diff --git a/docs/administrator.md b/docs/administrator.md index 4629d1284..1b51091fe 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -154,6 +154,22 @@ data: pod_antiaffinity_topology_key: "failure-domain.beta.kubernetes.io/zone" ``` +### Pod Disruption Budget + +By default the operator uses a PodDisruptionBudget (PDB) to protect the cluster +from voluntarily disruptions and hence unwanted DB downtime. The `MinAvailable` +parameter of the PDB is set to `1` which prevents killing masters in single-node +clusters and/or the last remaining running instance in a multi-node cluster. + +The PDB is only relaxed in two scenarios: +* If a cluster is scaled down to `0` instances (e.g. for draining nodes) +* If the PDB is disabled in the configuration (`enable_pod_disruption_budget`) + +The PDB is still in place having `MinAvailable` set to `0`. If enabled it will +be automatically set to `1` on scale up. Disabling PDBs helps avoiding blocking +Kubernetes upgrades in managed K8s environments at the cost of prolonged DB +downtime. See PR #384 for the use case. + ### Add cluster-specific labels In some cases, you might want to add `labels` that are specific to a given diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index c9a9db753..e44a0f8cf 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -161,6 +161,13 @@ configuration they are grouped under the `kubernetes` key. replaced by the cluster name. Only the `{cluster}` placeholders is allowed in the template. +* **enable_pod_disruption_budget** + PDB is enabled by default to protect the cluster from voluntarily disruptions + and hence unwanted DB downtime. However, on some cloud providers it could be + necessary to temporarily disabled it, e.g. for node updates. See + [admin docs](../administrator.md#pod-disruption-budget) for more information. + Default is true. + * **secret_name_template** a template for the name of the database user secrets generated by the operator. `{username}` is replaced with name of the secret, `{cluster}` with diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 956ab7f0f..77f2756f6 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -20,6 +20,7 @@ configuration: pod_service_account_name: operator pod_terminate_grace_period: 5m pdb_name_format: "postgres-{cluster}-pdb" + enable_pod_disruption_budget: true secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}" cluster_domain: cluster.local oauth_token_secret_name: postgresql-operator diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 136be7843..b6fade388 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -49,6 +49,7 @@ type KubernetesMetaConfiguration struct { SpiloFSGroup *int64 `json:"spilo_fsgroup,omitempty"` WatchedNamespace string `json:"watched_namespace,omitempty"` PDBNameFormat config.StringTemplate `json:"pdb_name_format,omitempty"` + EnablePodDisruptionBudget *bool `json:"enable_pod_disruption_budget,omitempty"` SecretNameTemplate config.StringTemplate `json:"secret_name_template,omitempty"` ClusterDomain string `json:"cluster_domain"` OAuthTokenSecretName spec.NamespacedName `json:"oauth_token_secret_name,omitempty"` diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 65d8ee925..6a554a2d8 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -76,6 +76,11 @@ func (in *KubernetesMetaConfiguration) DeepCopyInto(out *KubernetesMetaConfigura *out = new(int64) **out = **in } + if in.EnablePodDisruptionBudget != nil { + in, out := &in.EnablePodDisruptionBudget, &out.EnablePodDisruptionBudget + *out = new(bool) + **out = **in + } out.OAuthTokenSecretName = in.OAuthTokenSecretName out.InfrastructureRolesSecretName = in.InfrastructureRolesSecretName if in.ClusterLabels != nil { diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index b9759ca86..a6644b868 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -579,6 +579,15 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { } }() + // pod disruption budget + if oldSpec.Spec.NumberOfInstances != newSpec.Spec.NumberOfInstances { + c.logger.Debug("syncing pod disruption budgets") + if err := c.syncPodDisruptionBudget(true); err != nil { + c.logger.Errorf("could not sync pod disruption budget: %v", err) + updateFailed = true + } + } + // logical backup job func() { diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 808105a6d..b7fdbe50e 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1272,26 +1272,26 @@ func (c *Cluster) generateCloneEnvironment(description *acidv1.CloneDescription) result = append(result, v1.EnvVar{Name: "CLONE_WAL_BUCKET_SCOPE_PREFIX", Value: ""}) if description.S3Endpoint != "" { - result = append(result, v1.EnvVar{Name: "CLONE_AWS_ENDPOINT", Value: description.S3Endpoint}) - result = append(result, v1.EnvVar{Name: "CLONE_WALE_S3_ENDPOINT", Value: description.S3Endpoint}) + result = append(result, v1.EnvVar{Name: "CLONE_AWS_ENDPOINT", Value: description.S3Endpoint}) + result = append(result, v1.EnvVar{Name: "CLONE_WALE_S3_ENDPOINT", Value: description.S3Endpoint}) } if description.S3AccessKeyId != "" { - result = append(result, v1.EnvVar{Name: "CLONE_AWS_ACCESS_KEY_ID", Value: description.S3AccessKeyId}) + result = append(result, v1.EnvVar{Name: "CLONE_AWS_ACCESS_KEY_ID", Value: description.S3AccessKeyId}) } if description.S3SecretAccessKey != "" { - result = append(result, v1.EnvVar{Name: "CLONE_AWS_SECRET_ACCESS_KEY", Value: description.S3SecretAccessKey}) + result = append(result, v1.EnvVar{Name: "CLONE_AWS_SECRET_ACCESS_KEY", Value: description.S3SecretAccessKey}) } if description.S3ForcePathStyle != nil { - s3ForcePathStyle := "0" + s3ForcePathStyle := "0" - if *description.S3ForcePathStyle { - s3ForcePathStyle = "1" - } + if *description.S3ForcePathStyle { + s3ForcePathStyle = "1" + } - result = append(result, v1.EnvVar{Name: "CLONE_AWS_S3_FORCE_PATH_STYLE", Value: s3ForcePathStyle}) + result = append(result, v1.EnvVar{Name: "CLONE_AWS_S3_FORCE_PATH_STYLE", Value: s3ForcePathStyle}) } } @@ -1300,6 +1300,12 @@ func (c *Cluster) generateCloneEnvironment(description *acidv1.CloneDescription) func (c *Cluster) generatePodDisruptionBudget() *policybeta1.PodDisruptionBudget { minAvailable := intstr.FromInt(1) + pdbEnabled := c.OpConfig.EnablePodDisruptionBudget + + // if PodDisruptionBudget is disabled or if there are no DB pods, set the budget to 0. + if (pdbEnabled != nil && !*pdbEnabled) || c.Spec.NumberOfInstances <= 0 { + minAvailable = intstr.FromInt(0) + } return &policybeta1.PodDisruptionBudget{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 7ccbcb51d..0ddb3c55f 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1,6 +1,8 @@ package cluster import ( + "reflect" + "k8s.io/api/core/v1" "testing" @@ -9,6 +11,10 @@ import ( "github.com/zalando/postgres-operator/pkg/util/config" "github.com/zalando/postgres-operator/pkg/util/constants" "github.com/zalando/postgres-operator/pkg/util/k8sutil" + + policyv1beta1 "k8s.io/api/policy/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" ) func True() *bool { @@ -21,6 +27,11 @@ func False() *bool { return &b } +func toIntStr(val int) *intstr.IntOrString { + b := intstr.FromInt(val) + return &b +} + func TestGenerateSpiloJSONConfiguration(t *testing.T) { var cluster = New( Config{ @@ -143,6 +154,113 @@ func TestCreateLoadBalancerLogic(t *testing.T) { } } +func TestGeneratePodDisruptionBudget(t *testing.T) { + tests := []struct { + c *Cluster + out policyv1beta1.PodDisruptionBudget + }{ + // With multiple instances. + { + New( + Config{OpConfig: config.Config{Resources: config.Resources{ClusterNameLabel: "cluster-name", PodRoleLabel: "spilo-role"}, PDBNameFormat: "postgres-{cluster}-pdb"}}, + k8sutil.KubernetesClient{}, + acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{Name: "myapp-database", Namespace: "myapp"}, + Spec: acidv1.PostgresSpec{TeamID: "myapp", NumberOfInstances: 3}}, + logger), + policyv1beta1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "postgres-myapp-database-pdb", + Namespace: "myapp", + Labels: map[string]string{"team": "myapp", "cluster-name": "myapp-database"}, + }, + Spec: policyv1beta1.PodDisruptionBudgetSpec{ + MinAvailable: toIntStr(1), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"spilo-role": "master", "cluster-name": "myapp-database"}, + }, + }, + }, + }, + // With zero instances. + { + New( + Config{OpConfig: config.Config{Resources: config.Resources{ClusterNameLabel: "cluster-name", PodRoleLabel: "spilo-role"}, PDBNameFormat: "postgres-{cluster}-pdb"}}, + k8sutil.KubernetesClient{}, + acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{Name: "myapp-database", Namespace: "myapp"}, + Spec: acidv1.PostgresSpec{TeamID: "myapp", NumberOfInstances: 0}}, + logger), + policyv1beta1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "postgres-myapp-database-pdb", + Namespace: "myapp", + Labels: map[string]string{"team": "myapp", "cluster-name": "myapp-database"}, + }, + Spec: policyv1beta1.PodDisruptionBudgetSpec{ + MinAvailable: toIntStr(0), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"spilo-role": "master", "cluster-name": "myapp-database"}, + }, + }, + }, + }, + // With PodDisruptionBudget disabled. + { + New( + Config{OpConfig: config.Config{Resources: config.Resources{ClusterNameLabel: "cluster-name", PodRoleLabel: "spilo-role"}, PDBNameFormat: "postgres-{cluster}-pdb", EnablePodDisruptionBudget: False()}}, + k8sutil.KubernetesClient{}, + acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{Name: "myapp-database", Namespace: "myapp"}, + Spec: acidv1.PostgresSpec{TeamID: "myapp", NumberOfInstances: 3}}, + logger), + policyv1beta1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "postgres-myapp-database-pdb", + Namespace: "myapp", + Labels: map[string]string{"team": "myapp", "cluster-name": "myapp-database"}, + }, + Spec: policyv1beta1.PodDisruptionBudgetSpec{ + MinAvailable: toIntStr(0), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"spilo-role": "master", "cluster-name": "myapp-database"}, + }, + }, + }, + }, + // With non-default PDBNameFormat and PodDisruptionBudget explicitly enabled. + { + New( + Config{OpConfig: config.Config{Resources: config.Resources{ClusterNameLabel: "cluster-name", PodRoleLabel: "spilo-role"}, PDBNameFormat: "postgres-{cluster}-databass-budget", EnablePodDisruptionBudget: True()}}, + k8sutil.KubernetesClient{}, + acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{Name: "myapp-database", Namespace: "myapp"}, + Spec: acidv1.PostgresSpec{TeamID: "myapp", NumberOfInstances: 3}}, + logger), + policyv1beta1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "postgres-myapp-database-databass-budget", + Namespace: "myapp", + Labels: map[string]string{"team": "myapp", "cluster-name": "myapp-database"}, + }, + Spec: policyv1beta1.PodDisruptionBudgetSpec{ + MinAvailable: toIntStr(1), + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"spilo-role": "master", "cluster-name": "myapp-database"}, + }, + }, + }, + }, + } + + for _, tt := range tests { + result := tt.c.generatePodDisruptionBudget() + if !reflect.DeepEqual(*result, tt.out) { + t.Errorf("Expected PodDisruptionBudget: %#v, got %#v", tt.out, *result) + } + } +} + func TestShmVolume(t *testing.T) { testName := "TestShmVolume" tests := []struct { @@ -269,6 +387,5 @@ func TestCloneEnv(t *testing.T) { t.Errorf("%s %s: Expected env value %s, have %s instead", testName, tt.subTest, tt.env.Value, env.Value) } - } } diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index f5ae30b81..f69bdd2d9 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -73,20 +73,6 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error { } } - // create database objects unless we are running without pods or disabled that feature explicitly - if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&newSpec.Spec) <= 0) { - c.logger.Debugf("syncing roles") - if err = c.syncRoles(); err != nil { - err = fmt.Errorf("could not sync roles: %v", err) - return err - } - c.logger.Debugf("syncing databases") - if err = c.syncDatabases(); err != nil { - err = fmt.Errorf("could not sync databases: %v", err) - return err - } - } - c.logger.Debug("syncing pod disruption budgets") if err = c.syncPodDisruptionBudget(false); err != nil { err = fmt.Errorf("could not sync pod disruption budget: %v", err) @@ -103,6 +89,20 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error { } } + // create database objects unless we are running without pods or disabled that feature explicitly + if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&newSpec.Spec) <= 0) { + c.logger.Debugf("syncing roles") + if err = c.syncRoles(); err != nil { + err = fmt.Errorf("could not sync roles: %v", err) + return err + } + c.logger.Debugf("syncing databases") + if err = c.syncDatabases(); err != nil { + err = fmt.Errorf("could not sync databases: %v", err) + return err + } + } + return err } diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 66d8bddd6..a6d2173f1 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -46,6 +46,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.ClusterDomain = fromCRD.Kubernetes.ClusterDomain result.WatchedNamespace = fromCRD.Kubernetes.WatchedNamespace result.PDBNameFormat = fromCRD.Kubernetes.PDBNameFormat + result.EnablePodDisruptionBudget = fromCRD.Kubernetes.EnablePodDisruptionBudget result.SecretNameTemplate = fromCRD.Kubernetes.SecretNameTemplate result.OAuthTokenSecretName = fromCRD.Kubernetes.OAuthTokenSecretName result.InfrastructureRolesSecretName = fromCRD.Kubernetes.InfrastructureRolesSecretName diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 82249eb46..b8188e44e 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -110,20 +110,21 @@ type Config struct { EnablePodAntiAffinity bool `name:"enable_pod_antiaffinity" default:"false"` PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"` // deprecated and kept for backward compatibility - EnableLoadBalancer *bool `name:"enable_load_balancer"` - MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"` - ReplicaDNSNameFormat StringTemplate `name:"replica_dns_name_format" default:"{cluster}-repl.{team}.{hostedzone}"` - PDBNameFormat StringTemplate `name:"pdb_name_format" default:"postgres-{cluster}-pdb"` - Workers uint32 `name:"workers" default:"4"` - APIPort int `name:"api_port" default:"8080"` - RingLogLines int `name:"ring_log_lines" default:"100"` - ClusterHistoryEntries int `name:"cluster_history_entries" default:"1000"` - TeamAPIRoleConfiguration map[string]string `name:"team_api_role_configuration" default:"log_statement:all"` - PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` - PodManagementPolicy string `name:"pod_management_policy" default:"ordered_ready"` - ProtectedRoles []string `name:"protected_role_names" default:"admin"` - PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` - SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" defaults:"false"` + EnableLoadBalancer *bool `name:"enable_load_balancer"` + MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"` + ReplicaDNSNameFormat StringTemplate `name:"replica_dns_name_format" default:"{cluster}-repl.{team}.{hostedzone}"` + PDBNameFormat StringTemplate `name:"pdb_name_format" default:"postgres-{cluster}-pdb"` + EnablePodDisruptionBudget *bool `name:"enable_pod_disruption_budget" default:"true"` + Workers uint32 `name:"workers" default:"4"` + APIPort int `name:"api_port" default:"8080"` + RingLogLines int `name:"ring_log_lines" default:"100"` + ClusterHistoryEntries int `name:"cluster_history_entries" default:"1000"` + TeamAPIRoleConfiguration map[string]string `name:"team_api_role_configuration" default:"log_statement:all"` + PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` + PodManagementPolicy string `name:"pod_management_policy" default:"ordered_ready"` + ProtectedRoles []string `name:"protected_role_names" default:"admin"` + PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` + SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"` } // MustMarshal marshals the config or panics diff --git a/pkg/util/k8sutil/k8sutil.go b/pkg/util/k8sutil/k8sutil.go index bd10256e0..66b51dd1f 100644 --- a/pkg/util/k8sutil/k8sutil.go +++ b/pkg/util/k8sutil/k8sutil.go @@ -158,7 +158,7 @@ func SamePDB(cur, new *policybeta1.PodDisruptionBudget) (match bool, reason stri //TODO: improve comparison match = reflect.DeepEqual(new.Spec, cur.Spec) if !match { - reason = "new service spec doesn't match the current one" + reason = "new PDB spec doesn't match the current one" } return