From 330c2c4c0bda5169ccad5bf0837564e87947a155 Mon Sep 17 00:00:00 2001 From: Jan Mussler Date: Wed, 30 Jun 2021 15:01:55 +0200 Subject: [PATCH 1/9] Do not modify if values are below gp3 minimum throughput. (#1543) * Do not modify if values are below gp3 minimum throughput. --- pkg/cluster/volumes.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cluster/volumes.go b/pkg/cluster/volumes.go index 9a41f5f05..5837375da 100644 --- a/pkg/cluster/volumes.go +++ b/pkg/cluster/volumes.go @@ -96,13 +96,13 @@ func (c *Cluster) syncUnderlyingEBSVolume() error { var modifySize *int64 var modifyType *string - if targetValue.Iops != nil { + if targetValue.Iops != nil && *targetValue.Iops >= int64(3000) { if volume.Iops != *targetValue.Iops { modifyIops = targetValue.Iops } } - if targetValue.Throughput != nil { + if targetValue.Throughput != nil && *targetValue.Throughput >= int64(125) { if volume.Throughput != *targetValue.Throughput { modifyThroughput = targetValue.Throughput } From fa604027cf52bb911df91188837a2d028b4dc022 Mon Sep 17 00:00:00 2001 From: Rafia Sabih Date: Fri, 2 Jul 2021 08:46:21 +0200 Subject: [PATCH 2/9] Move flag to configmap (#1540) * Move flag to configmap Co-authored-by: Rafia Sabih Co-authored-by: Felix Kunde --- .../crds/operatorconfigurations.yaml | 3 ++ .../postgres-operator/crds/postgresqls.yaml | 2 - charts/postgres-operator/values.yaml | 4 +- docs/reference/operator_parameters.md | 20 ++++++--- docs/user.md | 2 +- e2e/tests/test_e2e.py | 23 ++++++---- manifests/complete-postgres-manifest.yaml | 1 - manifests/configmap.yaml | 1 + ...gresql-operator-default-configuration.yaml | 1 + pkg/apis/acid.zalan.do/v1/crds.go | 6 +-- .../v1/operator_configuration_type.go | 1 + pkg/apis/acid.zalan.do/v1/postgresql_type.go | 43 +++++++++---------- .../acid.zalan.do/v1/zz_generated.deepcopy.go | 5 --- pkg/cluster/cluster.go | 3 +- pkg/cluster/cluster_test.go | 2 +- pkg/controller/operator_config.go | 1 + pkg/util/config/config.go | 1 + 17 files changed, 66 insertions(+), 53 deletions(-) diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 82a737ae6..9ae7b1c91 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -173,6 +173,9 @@ spec: enable_init_containers: type: boolean default: true + enable_cross_namespace_secret: + type: boolean + default: false enable_pod_antiaffinity: type: boolean default: false diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index eb628863d..aead7fe69 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -515,8 +515,6 @@ spec: type: integer useLoadBalancer: # deprecated type: boolean - enableNamespacedSecret: - type: boolean users: type: object additionalProperties: diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 287835a24..32160cf5a 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -97,6 +97,8 @@ configKubernetes: # - deployment-time # - downscaler/* + # allow user secrets in other namespaces than the Postgres cluster + enable_cross_namespace_secret: false # enables initContainers to run actions before Spilo is started enable_init_containers: true # toggles pod anti affinity on the Postgres pods @@ -151,7 +153,7 @@ configKubernetes: # template for database user secrets generated by the operator, # here username contains the namespace in the format namespace.username # if the user is in different namespace than cluster and cross namespace secrets - # are enabled via EnableNamespacedSecret flag. + # are enabled via `enable_cross_namespace_secret` flag in the configuration. secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}" # set user and group for the spilo container (required to run Spilo as non-root process) # spilo_runasuser: 101 diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 1b1ae852e..2217d87bb 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -264,6 +264,13 @@ configuration they are grouped under the `kubernetes` key. [admin docs](../administrator.md#pod-disruption-budget) for more information. Default is true. +* **enable_cross_namespace_secrets** + To allow secrets in a different namespace other than the Postgres cluster + namespace. Once enabled, specify the namespace in the user name under the + `users` section in the form `{namespace}.{username}`. The operator will then + create the user secret in that namespace. The part after the first `.` is + considered to be the user name. The default is `false`. + * **enable_init_containers** global option to allow for creating init containers in the cluster manifest to run actions before Spilo is started. Default is true. @@ -275,13 +282,12 @@ configuration they are grouped under the `kubernetes` key. * **secret_name_template** a template for the name of the database user secrets generated by the - operator. `{namespace}` is replaced with name of the namespace (if cross - namespace secrets are enabled via EnableNamespacedSecret flag, otherwise the - secret is in cluster's namespace and in that case it is not present in secret - name), `{username}` is replaced with name of the secret, `{cluster}` with the - name of the cluster, `{tprkind}` with the kind of CRD (formerly known as TPR) - and `{tprgroup}` with the group of the CRD. No other placeholders are allowed. - The default is + operator. `{namespace}` is replaced with name of the namespace if + `enable_cross_namespace_secret` is set, otherwise the + secret is in cluster's namespace. `{username}` is replaced with name of the + secret, `{cluster}` with the name of the cluster, `{tprkind}` with the kind + of CRD (formerly known as TPR) and `{tprgroup}` with the group of the CRD. + No other placeholders are allowed. The default is `{namespace}.{username}.{cluster}.credentials.{tprkind}.{tprgroup}`. * **cluster_domain** diff --git a/docs/user.md b/docs/user.md index 245385a2b..47d10e7e0 100644 --- a/docs/user.md +++ b/docs/user.md @@ -140,7 +140,7 @@ At the moment it is not possible to define membership of the manifest role in other roles. To define the secrets for the users in a different namespace than that of the cluster, -one can use the flag `EnableNamespacedSecret` and declare the namespace for the +one can set `enable_cross_namespace_secret` and declare the namespace for the secrets in the manifest in the following manner, ```yaml diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 30d0cfe2f..5815af24d 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -598,29 +598,36 @@ class EndToEndTestCase(unittest.TestCase): self.k8s.api.core_v1.create_namespace(v1_appnamespace) self.k8s.wait_for_namespace_creation(app_namespace) + patch_cross_namespace_secret = { + "data": { + "enable_cross_namespace_secret": "true" + } + } + self.k8s.update_config(patch_cross_namespace_secret, + step="cross namespace secrets enabled") + self.k8s.api.custom_objects_api.patch_namespaced_custom_object( 'acid.zalan.do', 'v1', 'default', 'postgresqls', 'acid-minimal-cluster', { 'spec': { - 'enableNamespacedSecret': True, 'users':{ 'appspace.db_user': [], } } }) + self.eventuallyEqual(lambda: self.k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", app_namespace), 1, "Secret not created for user in namespace") #reset the flag - self.k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableNamespacedSecret': False, + unpatch_cross_namespace_secret = { + "data": { + "enable_cross_namespace_secret": "false", } - }) + } + self.k8s.update_config(unpatch_cross_namespace_secret, step="disable cross namespace secrets") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_lazy_spilo_upgrade(self): diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 5f995de15..6e2acbdd3 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -12,7 +12,6 @@ spec: dockerImage: registry.opensource.zalan.do/acid/spilo-13:2.0-p7 teamId: "acid" numberOfInstances: 2 - enableNamespacedSecret: False users: # Application/Robot users zalando: - superuser diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 7a05135ab..96072644d 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -36,6 +36,7 @@ data: # downscaler_annotations: "deployment-time,downscaler/*" # enable_admin_role_for_users: "true" # enable_crd_validation: "true" + # enable_cross_namespace_secret: "false" # enable_database_access: "true" enable_ebs_gp3_migration: "false" # enable_ebs_gp3_migration_max_size: "1000" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index bd6f321dd..e869498ba 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -45,6 +45,7 @@ configuration: # downscaler_annotations: # - deployment-time # - downscaler/* + # enable_cross_namespace_secret: "false" enable_init_containers: true enable_pod_antiaffinity: false enable_pod_disruption_budget: true diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index ae91a9f38..6ca754bbb 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -730,9 +730,6 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ Type: "boolean", Description: "Deprecated", }, - "enableNamespacedSecret": { - Type: "boolean", - }, "users": { Type: "object", AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ @@ -1029,6 +1026,9 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "enable_cross_namespace_secret": { + Type: "boolean", + }, "enable_init_containers": { Type: "boolean", }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index cf581431b..8023864cf 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -91,6 +91,7 @@ type KubernetesMetaConfiguration struct { EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity,omitempty"` PodAntiAffinityTopologyKey string `json:"pod_antiaffinity_topology_key,omitempty"` PodManagementPolicy string `json:"pod_management_policy,omitempty"` + EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"` } // PostgresPodResourcesDefaults defines the spec of default resources diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index 1787f5b4e..7346fb0e5 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -53,28 +53,27 @@ type PostgresSpec struct { // load balancers' source ranges are the same for master and replica services AllowedSourceRanges []string `json:"allowedSourceRanges"` - NumberOfInstances int32 `json:"numberOfInstances"` - EnableNamespacedSecret *bool `json:"enableNamespacedSecret,omitempty"` - Users map[string]UserFlags `json:"users,omitempty"` - MaintenanceWindows []MaintenanceWindow `json:"maintenanceWindows,omitempty"` - Clone *CloneDescription `json:"clone,omitempty"` - ClusterName string `json:"-"` - Databases map[string]string `json:"databases,omitempty"` - PreparedDatabases map[string]PreparedDatabase `json:"preparedDatabases,omitempty"` - SchedulerName *string `json:"schedulerName,omitempty"` - NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"` - Tolerations []v1.Toleration `json:"tolerations,omitempty"` - Sidecars []Sidecar `json:"sidecars,omitempty"` - InitContainers []v1.Container `json:"initContainers,omitempty"` - PodPriorityClassName string `json:"podPriorityClassName,omitempty"` - ShmVolume *bool `json:"enableShmVolume,omitempty"` - EnableLogicalBackup bool `json:"enableLogicalBackup,omitempty"` - LogicalBackupSchedule string `json:"logicalBackupSchedule,omitempty"` - StandbyCluster *StandbyDescription `json:"standby,omitempty"` - PodAnnotations map[string]string `json:"podAnnotations,omitempty"` - ServiceAnnotations map[string]string `json:"serviceAnnotations,omitempty"` - TLS *TLSDescription `json:"tls,omitempty"` - AdditionalVolumes []AdditionalVolume `json:"additionalVolumes,omitempty"` + NumberOfInstances int32 `json:"numberOfInstances"` + Users map[string]UserFlags `json:"users,omitempty"` + MaintenanceWindows []MaintenanceWindow `json:"maintenanceWindows,omitempty"` + Clone *CloneDescription `json:"clone,omitempty"` + ClusterName string `json:"-"` + Databases map[string]string `json:"databases,omitempty"` + PreparedDatabases map[string]PreparedDatabase `json:"preparedDatabases,omitempty"` + SchedulerName *string `json:"schedulerName,omitempty"` + NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"` + Tolerations []v1.Toleration `json:"tolerations,omitempty"` + Sidecars []Sidecar `json:"sidecars,omitempty"` + InitContainers []v1.Container `json:"initContainers,omitempty"` + PodPriorityClassName string `json:"podPriorityClassName,omitempty"` + ShmVolume *bool `json:"enableShmVolume,omitempty"` + EnableLogicalBackup bool `json:"enableLogicalBackup,omitempty"` + LogicalBackupSchedule string `json:"logicalBackupSchedule,omitempty"` + StandbyCluster *StandbyDescription `json:"standby,omitempty"` + PodAnnotations map[string]string `json:"podAnnotations,omitempty"` + ServiceAnnotations map[string]string `json:"serviceAnnotations,omitempty"` + TLS *TLSDescription `json:"tls,omitempty"` + AdditionalVolumes []AdditionalVolume `json:"additionalVolumes,omitempty"` // deprecated json tags InitContainersOld []v1.Container `json:"init_containers,omitempty"` diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index bad75ffd8..584a72143 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -614,11 +614,6 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) { *out = make([]string, len(*in)) copy(*out, *in) } - if in.EnableNamespacedSecret != nil { - in, out := &in.EnableNamespacedSecret, &out.EnableNamespacedSecret - *out = new(bool) - **out = **in - } if in.Users != nil { in, out := &in.Users, &out.Users *out = make(map[string]UserFlags, len(*in)) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index f56a0d1e4..c9abb10fd 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -1163,8 +1163,7 @@ func (c *Cluster) initRobotUsers() error { namespace := c.Namespace //if namespaced secrets are allowed - if c.Postgresql.Spec.EnableNamespacedSecret != nil && - *c.Postgresql.Spec.EnableNamespacedSecret { + if c.Config.OpConfig.EnableCrossNamespaceSecret { if strings.Contains(username, ".") { splits := strings.Split(username, ".") namespace = splits[0] diff --git a/pkg/cluster/cluster_test.go b/pkg/cluster/cluster_test.go index 9e7f60906..dc1f5ff03 100644 --- a/pkg/cluster/cluster_test.go +++ b/pkg/cluster/cluster_test.go @@ -1024,7 +1024,6 @@ func TestCrossNamespacedSecrets(t *testing.T) { Volume: acidv1.Volume{ Size: "1Gi", }, - EnableNamespacedSecret: boolToPointer(true), Users: map[string]acidv1.UserFlags{ "appspace.db_user": {}, "db_user": {}, @@ -1052,6 +1051,7 @@ func TestCrossNamespacedSecrets(t *testing.T) { DefaultMemoryLimit: "300Mi", PodRoleLabel: "spilo-role", }, + EnableCrossNamespaceSecret: true, }, }, client, pg, logger, eventRecorder) diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 761cf1b60..1b9cfba96 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -82,6 +82,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.EnableSidecars = util.CoalesceBool(fromCRD.Kubernetes.EnableSidecars, util.True()) result.SecretNameTemplate = fromCRD.Kubernetes.SecretNameTemplate result.OAuthTokenSecretName = fromCRD.Kubernetes.OAuthTokenSecretName + result.EnableCrossNamespaceSecret = fromCRD.Kubernetes.EnableCrossNamespaceSecret result.InfrastructureRolesSecretName = fromCRD.Kubernetes.InfrastructureRolesSecretName if fromCRD.Kubernetes.InfrastructureRolesDefs != nil { diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 52530fb12..662161053 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -207,6 +207,7 @@ type Config struct { PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" default:"false"` EnableLazySpiloUpgrade bool `name:"enable_lazy_spilo_upgrade" default:"false"` + EnableCrossNamespaceSecret bool `name:"enable_cross_namespace_secret" default:"false"` EnablePgVersionEnvVar bool `name:"enable_pgversion_env_var" default:"true"` EnableSpiloWalPathCompat bool `name:"enable_spilo_wal_path_compat" default:"false"` MajorVersionUpgradeMode string `name:"major_version_upgrade_mode" default:"off"` From b9d6c4ebed881272b76e8f361742880800faee1c Mon Sep 17 00:00:00 2001 From: Julian Date: Wed, 7 Jul 2021 14:47:49 +0200 Subject: [PATCH 3/9] Add namespace to helm chart resources that need them (#1531) --- charts/postgres-operator-ui/templates/deployment.yaml | 1 + charts/postgres-operator-ui/templates/ingress.yaml | 1 + charts/postgres-operator-ui/templates/service.yaml | 1 + charts/postgres-operator-ui/templates/serviceaccount.yaml | 1 + charts/postgres-operator/templates/configmap.yaml | 1 + charts/postgres-operator/templates/deployment.yaml | 1 + charts/postgres-operator/templates/operatorconfiguration.yaml | 1 + .../postgres-operator/templates/postgres-pod-priority-class.yaml | 1 + charts/postgres-operator/templates/service.yaml | 1 + charts/postgres-operator/templates/serviceaccount.yaml | 1 + 10 files changed, 10 insertions(+) diff --git a/charts/postgres-operator-ui/templates/deployment.yaml b/charts/postgres-operator-ui/templates/deployment.yaml index b72b108e0..91c27fee5 100644 --- a/charts/postgres-operator-ui/templates/deployment.yaml +++ b/charts/postgres-operator-ui/templates/deployment.yaml @@ -7,6 +7,7 @@ metadata: app.kubernetes.io/managed-by: {{ .Release.Service }} app.kubernetes.io/instance: {{ .Release.Name }} name: {{ template "postgres-operator-ui.fullname" . }} + namespace: {{ .Release.Namespace }} spec: replicas: 1 selector: diff --git a/charts/postgres-operator-ui/templates/ingress.yaml b/charts/postgres-operator-ui/templates/ingress.yaml index 73fa2e817..873cfed0f 100644 --- a/charts/postgres-operator-ui/templates/ingress.yaml +++ b/charts/postgres-operator-ui/templates/ingress.yaml @@ -9,6 +9,7 @@ apiVersion: extensions/v1beta1 kind: Ingress metadata: name: {{ $fullName }} + namespace: {{ .Release.Namespace }} labels: app.kubernetes.io/name: {{ template "postgres-operator-ui.name" . }} helm.sh/chart: {{ template "postgres-operator-ui.chart" . }} diff --git a/charts/postgres-operator-ui/templates/service.yaml b/charts/postgres-operator-ui/templates/service.yaml index bc40fbbb1..e14603720 100644 --- a/charts/postgres-operator-ui/templates/service.yaml +++ b/charts/postgres-operator-ui/templates/service.yaml @@ -7,6 +7,7 @@ metadata: app.kubernetes.io/managed-by: {{ .Release.Service }} app.kubernetes.io/instance: {{ .Release.Name }} name: {{ template "postgres-operator-ui.fullname" . }} + namespace: {{ .Release.Namespace }} spec: ports: - port: {{ .Values.service.port }} diff --git a/charts/postgres-operator-ui/templates/serviceaccount.yaml b/charts/postgres-operator-ui/templates/serviceaccount.yaml index 4c5a25543..94a9ca52e 100644 --- a/charts/postgres-operator-ui/templates/serviceaccount.yaml +++ b/charts/postgres-operator-ui/templates/serviceaccount.yaml @@ -3,6 +3,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "postgres-operator-ui.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} labels: app.kubernetes.io/name: {{ template "postgres-operator-ui.name" . }} helm.sh/chart: {{ template "postgres-operator-ui.chart" . }} diff --git a/charts/postgres-operator/templates/configmap.yaml b/charts/postgres-operator/templates/configmap.yaml index 836babe1d..094652a21 100644 --- a/charts/postgres-operator/templates/configmap.yaml +++ b/charts/postgres-operator/templates/configmap.yaml @@ -3,6 +3,7 @@ apiVersion: v1 kind: ConfigMap metadata: name: {{ template "postgres-operator.fullname" . }} + namespace: {{ .Release.Namespace }} labels: app.kubernetes.io/name: {{ template "postgres-operator.name" . }} helm.sh/chart: {{ template "postgres-operator.chart" . }} diff --git a/charts/postgres-operator/templates/deployment.yaml b/charts/postgres-operator/templates/deployment.yaml index 89500ae94..b91062666 100644 --- a/charts/postgres-operator/templates/deployment.yaml +++ b/charts/postgres-operator/templates/deployment.yaml @@ -7,6 +7,7 @@ metadata: app.kubernetes.io/managed-by: {{ .Release.Service }} app.kubernetes.io/instance: {{ .Release.Name }} name: {{ template "postgres-operator.fullname" . }} + namespace: {{ .Release.Namespace }} spec: replicas: 1 selector: diff --git a/charts/postgres-operator/templates/operatorconfiguration.yaml b/charts/postgres-operator/templates/operatorconfiguration.yaml index be1608297..4e380f448 100644 --- a/charts/postgres-operator/templates/operatorconfiguration.yaml +++ b/charts/postgres-operator/templates/operatorconfiguration.yaml @@ -3,6 +3,7 @@ apiVersion: "acid.zalan.do/v1" kind: OperatorConfiguration metadata: name: {{ template "postgres-operator.fullname" . }} + namespace: {{ .Release.Namespace }} labels: app.kubernetes.io/name: {{ template "postgres-operator.name" . }} helm.sh/chart: {{ template "postgres-operator.chart" . }} diff --git a/charts/postgres-operator/templates/postgres-pod-priority-class.yaml b/charts/postgres-operator/templates/postgres-pod-priority-class.yaml index 7ee0f2e55..583639eca 100644 --- a/charts/postgres-operator/templates/postgres-pod-priority-class.yaml +++ b/charts/postgres-operator/templates/postgres-pod-priority-class.yaml @@ -9,6 +9,7 @@ metadata: app.kubernetes.io/managed-by: {{ .Release.Service }} app.kubernetes.io/instance: {{ .Release.Name }} name: {{ .Values.podPriorityClassName }} + namespace: {{ .Release.Namespace }} preemptionPolicy: PreemptLowerPriority globalDefault: false value: 1000000 diff --git a/charts/postgres-operator/templates/service.yaml b/charts/postgres-operator/templates/service.yaml index 38ea9a062..c1b52744c 100644 --- a/charts/postgres-operator/templates/service.yaml +++ b/charts/postgres-operator/templates/service.yaml @@ -7,6 +7,7 @@ metadata: app.kubernetes.io/managed-by: {{ .Release.Service }} app.kubernetes.io/instance: {{ .Release.Name }} name: {{ template "postgres-operator.fullname" . }} + namespace: {{ .Release.Namespace }} spec: type: ClusterIP ports: diff --git a/charts/postgres-operator/templates/serviceaccount.yaml b/charts/postgres-operator/templates/serviceaccount.yaml index e04e8ad62..4f42559c9 100644 --- a/charts/postgres-operator/templates/serviceaccount.yaml +++ b/charts/postgres-operator/templates/serviceaccount.yaml @@ -3,6 +3,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: {{ include "postgres-operator.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} labels: app.kubernetes.io/name: {{ template "postgres-operator.name" . }} helm.sh/chart: {{ template "postgres-operator.chart" . }} From 8f03cd1aa9399c0bf4ac696e2e10e5ef0883784b Mon Sep 17 00:00:00 2001 From: yelhouti Date: Tue, 20 Jul 2021 10:53:48 +0200 Subject: [PATCH 4/9] document where the chart is hosted (#1554) * document where the chart is hosted --- docs/quickstart.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/quickstart.md b/docs/quickstart.md index a90c90f42..a715bf945 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -85,6 +85,8 @@ The chart works with both Helm 2 and Helm 3. The `crd-install` hook from v2 will be skipped with warning when using v3. Documentation for installing applications with Helm 2 can be found in the [v2 docs](https://v2.helm.sh/docs/). +The chart is also hosted at: https://opensource.zalando.com/postgres-operator/charts/postgres-operator/ + ## Check if Postgres Operator is running Starting the operator may take a few seconds. Check if the operator pod is From b1fab703ee1676cb6b6f00b43bf785cb7a95e30a Mon Sep 17 00:00:00 2001 From: Remo Wenger Date: Tue, 20 Jul 2021 10:54:06 +0200 Subject: [PATCH 5/9] Fix typo (#1559) --- docs/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index a715bf945..bdcc6dfad 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -14,7 +14,7 @@ solutions: * [kind](https://kind.sigs.k8s.io/) and [k3d](https://k3d.io), which allows creating multi-nodes K8s clusters running on Docker (requires Docker) -To interact with the K8s infrastructure install it's CLI runtime [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/#install-kubectl-binary-via-curl). +To interact with the K8s infrastructure install its CLI runtime [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/#install-kubectl-binary-via-curl). This quickstart assumes that you have started minikube or created a local kind cluster. Note that you can also use built-in K8s support in the Docker Desktop From 58bab073da9bb14535214b27164b55a8fc16f53a Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Tue, 27 Jul 2021 09:46:55 +0200 Subject: [PATCH 6/9] fix searching for users with namespace in name (#1569) * fix searching for users with namespace in name and improve e2e test * remove reformatting username to query --- e2e/tests/test_e2e.py | 29 +++++++++++------------------ pkg/cluster/sync.go | 6 ------ 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 5815af24d..9e7df1aa2 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -588,16 +588,13 @@ class EndToEndTestCase(unittest.TestCase): raise @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_zz_cross_namespace_secrets(self): + def test_cross_namespace_secrets(self): ''' Test secrets in different namespace ''' - app_namespace = "appspace" - - v1_appnamespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=app_namespace)) - self.k8s.api.core_v1.create_namespace(v1_appnamespace) - self.k8s.wait_for_namespace_creation(app_namespace) + k8s = self.k8s + # enable secret creation in separate namespace patch_cross_namespace_secret = { "data": { "enable_cross_namespace_secret": "true" @@ -605,30 +602,26 @@ class EndToEndTestCase(unittest.TestCase): } self.k8s.update_config(patch_cross_namespace_secret, step="cross namespace secrets enabled") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + # create secret in test namespace self.k8s.api.custom_objects_api.patch_namespaced_custom_object( 'acid.zalan.do', 'v1', 'default', 'postgresqls', 'acid-minimal-cluster', { 'spec': { 'users':{ - 'appspace.db_user': [], + 'test.db_user': [], } } }) - - self.eventuallyEqual(lambda: self.k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", app_namespace), + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + self.eventuallyEqual(lambda: self.k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", self.test_namespace), 1, "Secret not created for user in namespace") - #reset the flag - unpatch_cross_namespace_secret = { - "data": { - "enable_cross_namespace_secret": "false", - } - } - self.k8s.update_config(unpatch_cross_namespace_secret, step="disable cross namespace secrets") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_lazy_spilo_upgrade(self): ''' diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 53552f558..85d87b35a 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -386,7 +386,6 @@ func (c *Cluster) syncStatefulSet() error { return fmt.Errorf("could not set cluster-wide PostgreSQL configuration options: %v", err) } - if instancesRestartRequired { c.logger.Debugln("restarting Postgres server within pods") c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "restarting Postgres server within pods") @@ -623,11 +622,6 @@ func (c *Cluster) syncRoles() (err error) { // create list of database roles to query for _, u := range c.pgUsers { pgRole := u.Name - if u.Namespace != c.Namespace && u.Namespace != "" { - // to avoid the conflict of having multiple users of same name - // but each in different namespace. - pgRole = fmt.Sprintf("%s.%s", u.Name, u.Namespace) - } userNames = append(userNames, pgRole) // add team member role name with rename suffix in case we need to rename it back if u.Origin == spec.RoleOriginTeamsAPI && c.OpConfig.EnableTeamMemberDeprecation { From 2057ad8191217037f8f3eaae2ee45f1bb5572ab2 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Tue, 27 Jul 2021 11:41:27 +0200 Subject: [PATCH 7/9] reorder e2e tests to follow alphabetical sorting (#1567) * reorder e2e tests to follow alphabetical sorting * e2e: finish waiting for pod failover only if all pods were replaced * wait for sync in rolling update timeout test --- .../postgres-operator/crds/postgresqls.yaml | 182 ++-- e2e/tests/k8s_api.py | 18 + e2e/tests/test_e2e.py | 940 +++++++++--------- manifests/postgresql.crd.yaml | 182 ++-- pkg/apis/acid.zalan.do/v1/crds.go | 170 ++-- 5 files changed, 761 insertions(+), 731 deletions(-) diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index aead7fe69..7604e8d5a 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -223,6 +223,97 @@ spec: items: type: string pattern: '^\ *((Mon|Tue|Wed|Thu|Fri|Sat|Sun):(2[0-3]|[01]?\d):([0-5]?\d)|(2[0-3]|[01]?\d):([0-5]?\d))-((Mon|Tue|Wed|Thu|Fri|Sat|Sun):(2[0-3]|[01]?\d):([0-5]?\d)|(2[0-3]|[01]?\d):([0-5]?\d))\ *$' + nodeAffinity: + type: object + properties: + preferredDuringSchedulingIgnoredDuringExecution: + type: array + items: + type: object + required: + - weight + - preference + properties: + preference: + type: object + properties: + matchExpressions: + type: array + items: + type: object + required: + - key + - operator + properties: + key: + type: string + operator: + type: string + values: + type: array + items: + type: string + matchFields: + type: array + items: + type: object + required: + - key + - operator + properties: + key: + type: string + operator: + type: string + values: + type: array + items: + type: string + weight: + format: int32 + type: integer + requiredDuringSchedulingIgnoredDuringExecution: + type: object + required: + - nodeSelectorTerms + properties: + nodeSelectorTerms: + type: array + items: + type: object + properties: + matchExpressions: + type: array + items: + type: object + required: + - key + - operator + properties: + key: + type: string + operator: + type: string + values: + type: array + items: + type: string + matchFields: + type: array + items: + type: object + required: + - key + - operator + properties: + key: + type: string + operator: + type: string + values: + type: array + items: + type: string numberOfInstances: type: integer minimum: 0 @@ -396,97 +487,6 @@ spec: type: string caSecretName: type: string - nodeAffinity: - type: object - properties: - preferredDuringSchedulingIgnoredDuringExecution: - type: array - items: - type: object - required: - - weight - - preference - properties: - preference: - type: object - properties: - matchExpressions: - type: array - items: - type: object - required: - - key - - operator - properties: - key: - type: string - operator: - type: string - values: - type: array - items: - type: string - matchFields: - type: array - items: - type: object - required: - - key - - operator - properties: - key: - type: string - operator: - type: string - values: - type: array - items: - type: string - weight: - format: int32 - type: integer - requiredDuringSchedulingIgnoredDuringExecution: - type: object - required: - - nodeSelectorTerms - properties: - nodeSelectorTerms: - type: array - items: - type: object - properties: - matchExpressions: - type: array - items: - type: object - required: - - key - - operator - properties: - key: - type: string - operator: - type: string - values: - type: array - items: - type: string - matchFields: - type: array - items: - type: object - required: - - key - - operator - properties: - key: - type: string - operator: - type: string - values: - type: array - items: - type: string tolerations: type: array items: diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index d28ea69ad..9937add88 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -156,6 +156,10 @@ class K8s: while not get_services(): time.sleep(self.RETRY_TIMEOUT_SEC) + def count_pods_with_rolling_update_flag(self, labels, namespace='default'): + pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items + return len(list(filter(lambda x: "zalando-postgres-operator-rolling-update-required" in x.metadata.annotations, pods))) + def count_pods_with_label(self, labels, namespace='default'): return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) @@ -189,6 +193,7 @@ class K8s: def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): pod_phase = 'Failing over' new_pod_node = '' + pods_with_update_flag = self.count_pods_with_rolling_update_flag(labels, namespace) while (pod_phase != 'Running') or (new_pod_node not in failover_targets): pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items @@ -196,6 +201,10 @@ class K8s: new_pod_node = pods[0].spec.node_name pod_phase = pods[0].status.phase time.sleep(self.RETRY_TIMEOUT_SEC) + + while pods_with_update_flag != 0: + pods_with_update_flag = self.count_pods_with_rolling_update_flag(labels, namespace) + time.sleep(self.RETRY_TIMEOUT_SEC) def wait_for_namespace_creation(self, namespace='default'): ns_found = False @@ -423,6 +432,10 @@ class K8sBase: while not get_services(): time.sleep(self.RETRY_TIMEOUT_SEC) + def count_pods_with_rolling_update_flag(self, labels, namespace='default'): + pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items + return len(list(filter(lambda x: "zalando-postgres-operator-rolling-update-required" in x.metadata.annotations, pods))) + def count_pods_with_label(self, labels, namespace='default'): return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) @@ -456,6 +469,7 @@ class K8sBase: def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): pod_phase = 'Failing over' new_pod_node = '' + pods_with_update_flag = self.count_pods_with_rolling_update_flag(labels, namespace) while (pod_phase != 'Running') or (new_pod_node not in failover_targets): pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items @@ -464,6 +478,10 @@ class K8sBase: pod_phase = pods[0].status.phase time.sleep(self.RETRY_TIMEOUT_SEC) + while pods_with_update_flag != 0: + pods_with_update_flag = self.count_pods_with_rolling_update_flag(labels, namespace) + time.sleep(self.RETRY_TIMEOUT_SEC) + def get_logical_backup_job(self, namespace='default'): return self.api.batch_v1_beta1.list_namespaced_cron_job(namespace, label_selector="application=spilo") diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 9e7df1aa2..08d2864d2 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -290,37 +290,98 @@ class EndToEndTestCase(unittest.TestCase): "Operator does not get in sync") @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_overwrite_pooler_deployment(self): - self.k8s.create_with_kubectl("manifests/minimal-fake-pooler-deployment.yaml") - self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: self.k8s.get_deployment_replica_count(name="acid-minimal-cluster-pooler"), 1, - "Initial broken deployment not rolled out") + def test_cross_namespace_secrets(self): + ''' + Test secrets in different namespace + ''' + k8s = self.k8s - self.k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableConnectionPooler': True + # enable secret creation in separate namespace + patch_cross_namespace_secret = { + "data": { + "enable_cross_namespace_secret": "true" } - }) - - self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: self.k8s.get_deployment_replica_count(name="acid-minimal-cluster-pooler"), 2, - "Operator did not succeed in overwriting labels") + } + self.k8s.update_config(patch_cross_namespace_secret, + step="cross namespace secrets enabled") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + # create secret in test namespace self.k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableConnectionPooler': False - } - }) + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'users':{ + 'test.db_user': [], + } + } + }) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + self.eventuallyEqual(lambda: self.k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", self.test_namespace), + 1, "Secret not created for user in namespace") - self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: self.k8s.count_running_pods("connection-pooler=acid-minimal-cluster-pooler"), - 0, "Pooler pods not scaled down") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_decrease_max_connections(self): + ''' + Test decreasing max_connections and restarting cluster through rest api + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + labels = 'spilo-role=master,' + cluster_label + new_max_connections_value = "99" + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + self.assert_master_is_unique() + masterPod = pods[0] + creationTimestamp = masterPod.metadata.creation_timestamp + + # adjust max_connection + pg_patch_max_connections = { + "spec": { + "postgresql": { + "parameters": { + "max_connections": new_max_connections_value + } + } + } + } + + try: + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) + + def get_max_connections(): + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + self.assert_master_is_unique() + masterPod = pods[0] + get_max_connections_cmd = '''psql -At -U postgres -c "SELECT setting FROM pg_settings WHERE name = 'max_connections';"''' + result = k8s.exec_with_kubectl(masterPod.metadata.name, get_max_connections_cmd) + max_connections_value = int(result.stdout) + return max_connections_value + + #Make sure that max_connections decreased + self.eventuallyEqual(get_max_connections, int(new_max_connections_value), "max_connections didn't decrease") + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + self.assert_master_is_unique() + masterPod = pods[0] + #Make sure that pod didn't restart + self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, + "Master pod creation timestamp is updated") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + # make sure cluster is in a good state for further tests + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_enable_disable_connection_pooler(self): @@ -587,41 +648,6 @@ class EndToEndTestCase(unittest.TestCase): print('Operator log: {}'.format(k8s.get_operator_log())) raise - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_cross_namespace_secrets(self): - ''' - Test secrets in different namespace - ''' - k8s = self.k8s - - # enable secret creation in separate namespace - patch_cross_namespace_secret = { - "data": { - "enable_cross_namespace_secret": "true" - } - } - self.k8s.update_config(patch_cross_namespace_secret, - step="cross namespace secrets enabled") - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - # create secret in test namespace - self.k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'users':{ - 'test.db_user': [], - } - } - }) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - self.eventuallyEqual(lambda: self.k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", self.test_namespace), - 1, "Secret not created for user in namespace") - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_lazy_spilo_upgrade(self): ''' @@ -788,6 +814,33 @@ class EndToEndTestCase(unittest.TestCase): # ensure cluster is healthy after tests self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + @unittest.skip("Skipping this test until fixed") + def test_major_version_upgrade(self): + k8s = self.k8s + result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest-12.yaml") + self.eventuallyEqual(lambda: k8s.count_running_pods(labels="application=spilo,cluster-name=acid-upgrade-test"), 2, "No 2 pods running") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + pg_patch_version = { + "spec": { + "postgres": { + "version": "13" + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + def check_version_13(): + p = k8s.get_patroni_state("acid-upgrade-test-0") + version = p["server_version"][0:2] + return version + + self.evantuallyEqual(check_version_13, "13", "Version was not upgrade to 13") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_min_resource_limits(self): ''' @@ -858,6 +911,7 @@ class EndToEndTestCase(unittest.TestCase): try: k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") k8s.wait_for_pod_start("spilo-role=master", self.test_namespace) + k8s.wait_for_pod_start("spilo-role=replica", self.test_namespace) self.assert_master_is_unique(self.test_namespace, "acid-test-cluster") except timeout_decorator.TimeoutError: @@ -871,360 +925,6 @@ class EndToEndTestCase(unittest.TestCase): "acid.zalan.do", "v1", self.test_namespace, "postgresqls", "acid-test-cluster") time.sleep(5) - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_rolling_update_flag(self): - ''' - Add rolling update flag to only the master and see it failing over - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - # verify we are in good state from potential previous tests - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - - # get node and replica (expected target of new master) - _, replica_nodes = k8s.get_pg_nodes(cluster_label) - - # rolling update annotation - flag = { - "metadata": { - "annotations": { - "zalando-postgres-operator-rolling-update-required": "true", - } - } - } - - try: - podsList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) - for pod in podsList.items: - # add flag only to the master to make it appear to the operator as a leftover from a rolling update - if pod.metadata.labels.get('spilo-role') == 'master': - old_creation_timestamp = pod.metadata.creation_timestamp - k8s.patch_pod(flag, pod.metadata.name, pod.metadata.namespace) - else: - # remember replica name to check if operator does a switchover - switchover_target = pod.metadata.name - - # do not wait until the next sync - k8s.delete_operator_pod() - - # operator should now recreate the master pod and do a switchover before - k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) - - # check if the former replica is now the new master - leader = k8s.get_cluster_leader_pod() - self.eventuallyEqual(lambda: leader.metadata.name, switchover_target, "Rolling update flag did not trigger switchover") - - # check that the old master has been recreated - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - replica = k8s.get_cluster_replica_pod() - self.assertTrue(replica.metadata.creation_timestamp > old_creation_timestamp, "Old master pod was not recreated") - - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_rolling_update_label_timeout(self): - ''' - Simulate case when replica does not receive label in time and rolling update does not finish - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - flag = "zalando-postgres-operator-rolling-update-required" - - # verify we are in good state from potential previous tests - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - - # get node and replica (expected target of new master) - _, replica_nodes = k8s.get_pg_nodes(cluster_label) - - # rolling update annotation - rolling_update_patch = { - "metadata": { - "annotations": { - flag: "true", - } - } - } - - # make pod_label_wait_timeout so short that rolling update fails on first try - # temporarily lower resync interval to reduce waiting for further tests - # pods should get healthy in the meantime - patch_resync_config = { - "data": { - "pod_label_wait_timeout": "2s", - "resync_period": "20s", - } - } - - try: - # patch both pods for rolling update - podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) - for pod in podList.items: - k8s.patch_pod(rolling_update_patch, pod.metadata.name, pod.metadata.namespace) - if pod.metadata.labels.get('spilo-role') == 'replica': - switchover_target = pod.metadata.name - - # update config and restart operator - k8s.update_config(patch_resync_config, "update resync interval and pod_label_wait_timeout") - - # operator should now recreate the replica pod first and do a switchover after - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - # pod_label_wait_timeout should have been exceeded hence the rolling update is continued on next sync - # check if the cluster state is "SyncFailed" - self.eventuallyEqual(lambda: k8s.pg_get_status(), "SyncFailed", "Expected SYNC event to fail") - - # wait for next sync, replica should be running normally by now and be ready for switchover - k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) - - # check if the former replica is now the new master - leader = k8s.get_cluster_leader_pod() - self.eventuallyEqual(lambda: leader.metadata.name, switchover_target, "Rolling update flag did not trigger switchover") - - # wait for the old master to get restarted - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - # status should again be "SyncFailed" but turn into "Running" on the next sync - time.sleep(10) - self.eventuallyEqual(lambda: k8s.pg_get_status(), "Running", "Expected running cluster after two syncs") - - # revert config changes - patch_resync_config = { - "data": { - "pod_label_wait_timeout": "10m", - "resync_period": "30m", - } - } - k8s.update_config(patch_resync_config, "revert resync interval and pod_label_wait_timeout") - - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_zz_node_readiness_label(self): - ''' - Remove node readiness label from master node. This must cause a failover. - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - readiness_label = 'lifecycle-status' - readiness_value = 'ready' - - try: - # get nodes of master and replica(s) (expected target of new master) - current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) - num_replicas = len(current_replica_nodes) - failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) - - # add node_readiness_label to potential failover nodes - patch_readiness_label = { - "metadata": { - "labels": { - readiness_label: readiness_value - } - } - } - self.assertTrue(len(failover_targets) > 0, "No failover targets available") - for failover_target in failover_targets: - k8s.api.core_v1.patch_node(failover_target, patch_readiness_label) - - # define node_readiness_label in config map which should trigger a failover of the master - patch_readiness_label_config = { - "data": { - "node_readiness_label": readiness_label + ':' + readiness_value, - } - } - k8s.update_config(patch_readiness_label_config, "setting readiness label") - new_master_node, new_replica_nodes = self.assert_failover( - current_master_node, num_replicas, failover_targets, cluster_label) - - # patch also node where master ran before - k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label) - - # toggle pod anti affinity to move replica away from master node - self.eventuallyTrue(lambda: self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label), "Pods are redistributed") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_scaling(self): - ''' - Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. - ''' - k8s = self.k8s - pod = "acid-minimal-cluster-0" - - k8s.scale_cluster(3) - self.eventuallyEqual(lambda: k8s.count_running_pods(), 3, "Scale up to 3 failed") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod)), 3, "Not all 3 nodes healthy") - - k8s.scale_cluster(2) - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "Scale down to 2 failed") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod)), 2, "Not all members 2 healthy") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_service_annotations(self): - ''' - Create a Postgres cluster with service annotations and check them. - ''' - k8s = self.k8s - patch_custom_service_annotations = { - "data": { - "custom_service_annotations": "foo:bar", - } - } - k8s.update_config(patch_custom_service_annotations) - - pg_patch_custom_annotations = { - "spec": { - "serviceAnnotations": { - "annotation.key": "value", - "alice": "bob", - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_custom_annotations) - - annotations = { - "annotation.key": "value", - "foo": "bar", - "alice": "bob" - } - - self.eventuallyTrue(lambda: k8s.check_service_annotations("cluster-name=acid-minimal-cluster,spilo-role=master", annotations), "Wrong annotations") - self.eventuallyTrue(lambda: k8s.check_service_annotations("cluster-name=acid-minimal-cluster,spilo-role=replica", annotations), "Wrong annotations") - - # clean up - unpatch_custom_service_annotations = { - "data": { - "custom_service_annotations": "", - } - } - k8s.update_config(unpatch_custom_service_annotations) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_statefulset_annotation_propagation(self): - ''' - Inject annotation to Postgresql CRD and check it's propagation to stateful set - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - patch_sset_propagate_annotations = { - "data": { - "downscaler_annotations": "deployment-time,downscaler/*", - "inherited_annotations": "owned-by", - } - } - k8s.update_config(patch_sset_propagate_annotations) - - pg_crd_annotations = { - "metadata": { - "annotations": { - "deployment-time": "2020-04-30 12:00:00", - "downscaler/downtime_replicas": "0", - "owned-by": "acid", - }, - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_crd_annotations) - - annotations = { - "deployment-time": "2020-04-30 12:00:00", - "downscaler/downtime_replicas": "0", - "owned-by": "acid", - } - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyTrue(lambda: k8s.check_statefulset_annotations(cluster_label, annotations), "Annotations missing") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - @unittest.skip("Skipping this test until fixed") - def test_zaa_test_major_version_upgrade(self): - k8s = self.k8s - result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest-12.yaml") - self.eventuallyEqual(lambda: k8s.count_running_pods(labels="application=spilo,cluster-name=acid-upgrade-test"), 2, "No 2 pods running") - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - pg_patch_version = { - "spec": { - "postgres": { - "version": "13" - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - def check_version_13(): - p = k8s.get_patroni_state("acid-upgrade-test-0") - version = p["server_version"][0:2] - return version - - self.evantuallyEqual(check_version_13, "13", "Version was not upgrade to 13") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - @unittest.skip("Skipping this test until fixed") - def test_zzz_taint_based_eviction(self): - ''' - Add taint "postgres=:NoExecute" to node with master. This must cause a failover. - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - - # verify we are in good state from potential previous tests - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") - - # get nodes of master and replica(s) (expected target of new master) - master_nodes, replica_nodes = k8s.get_cluster_nodes() - - self.assertNotEqual(master_nodes, []) - self.assertNotEqual(replica_nodes, []) - - # taint node with postgres=:NoExecute to force failover - body = { - "spec": { - "taints": [ - { - "effect": "NoExecute", - "key": "postgres" - } - ] - } - } - - k8s.api.core_v1.patch_node(master_nodes[0], body) - self.eventuallyTrue(lambda: k8s.get_cluster_nodes()[0], replica_nodes) - self.assertNotEqual(lambda: k8s.get_cluster_nodes()[0], master_nodes) - - # add toleration to pods - patch_toleration_config = { - "data": { - "toleration": "key:postgres,operator:Exists,effect:NoExecute" - } - } - - k8s.update_config(patch_toleration_config, step="allow tainted nodes") - - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") - - # toggle pod anti affinity to move replica away from master node - nm, new_replica_nodes = k8s.get_cluster_nodes() - new_master_node = nm[0] - self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_node_affinity(self): ''' @@ -1334,7 +1034,367 @@ class EndToEndTestCase(unittest.TestCase): raise @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_zzzz_cluster_deletion(self): + def test_node_readiness_label(self): + ''' + Remove node readiness label from master node. This must cause a failover. + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + readiness_label = 'lifecycle-status' + readiness_value = 'ready' + + try: + # get nodes of master and replica(s) (expected target of new master) + current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) + num_replicas = len(current_replica_nodes) + failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) + + # add node_readiness_label to potential failover nodes + patch_readiness_label = { + "metadata": { + "labels": { + readiness_label: readiness_value + } + } + } + self.assertTrue(len(failover_targets) > 0, "No failover targets available") + for failover_target in failover_targets: + k8s.api.core_v1.patch_node(failover_target, patch_readiness_label) + + # define node_readiness_label in config map which should trigger a failover of the master + patch_readiness_label_config = { + "data": { + "node_readiness_label": readiness_label + ':' + readiness_value, + } + } + k8s.update_config(patch_readiness_label_config, "setting readiness label") + new_master_node, new_replica_nodes = self.assert_failover( + current_master_node, num_replicas, failover_targets, cluster_label) + + # patch also node where master ran before + k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label) + + # toggle pod anti affinity to move replica away from master node + self.eventuallyTrue(lambda: self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label), "Pods are redistributed") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_overwrite_pooler_deployment(self): + self.k8s.create_with_kubectl("manifests/minimal-fake-pooler-deployment.yaml") + self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: self.k8s.get_deployment_replica_count(name="acid-minimal-cluster-pooler"), 1, + "Initial broken deployment not rolled out") + + self.k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': True + } + }) + + self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: self.k8s.get_deployment_replica_count(name="acid-minimal-cluster-pooler"), 2, + "Operator did not succeed in overwriting labels") + + self.k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': False + } + }) + + self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: self.k8s.count_running_pods("connection-pooler=acid-minimal-cluster-pooler"), + 0, "Pooler pods not scaled down") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_rolling_update_flag(self): + ''' + Add rolling update flag to only the master and see it failing over + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + + # verify we are in good state from potential previous tests + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") + + # get node and replica (expected target of new master) + _, replica_nodes = k8s.get_pg_nodes(cluster_label) + + # rolling update annotation + flag = { + "metadata": { + "annotations": { + "zalando-postgres-operator-rolling-update-required": "true", + } + } + } + + try: + podsList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) + for pod in podsList.items: + # add flag only to the master to make it appear to the operator as a leftover from a rolling update + if pod.metadata.labels.get('spilo-role') == 'master': + old_creation_timestamp = pod.metadata.creation_timestamp + k8s.patch_pod(flag, pod.metadata.name, pod.metadata.namespace) + else: + # remember replica name to check if operator does a switchover + switchover_target = pod.metadata.name + + # do not wait until the next sync + k8s.delete_operator_pod() + + # operator should now recreate the master pod and do a switchover before + k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) + + # check if the former replica is now the new master + leader = k8s.get_cluster_leader_pod() + self.eventuallyEqual(lambda: leader.metadata.name, switchover_target, "Rolling update flag did not trigger switchover") + + # check that the old master has been recreated + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + replica = k8s.get_cluster_replica_pod() + self.assertTrue(replica.metadata.creation_timestamp > old_creation_timestamp, "Old master pod was not recreated") + + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_rolling_update_label_timeout(self): + ''' + Simulate case when replica does not receive label in time and rolling update does not finish + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + flag = "zalando-postgres-operator-rolling-update-required" + + # verify we are in good state from potential previous tests + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") + + # get node and replica (expected target of new master) + _, replica_nodes = k8s.get_pg_nodes(cluster_label) + + # rolling update annotation + rolling_update_patch = { + "metadata": { + "annotations": { + flag: "true", + } + } + } + + # make pod_label_wait_timeout so short that rolling update fails on first try + # temporarily lower resync interval to reduce waiting for further tests + # pods should get healthy in the meantime + patch_resync_config = { + "data": { + "pod_label_wait_timeout": "2s", + "resync_period": "30s", + } + } + + try: + # patch both pods for rolling update + podList = k8s.api.core_v1.list_namespaced_pod('default', label_selector=cluster_label) + for pod in podList.items: + k8s.patch_pod(rolling_update_patch, pod.metadata.name, pod.metadata.namespace) + if pod.metadata.labels.get('spilo-role') == 'replica': + switchover_target = pod.metadata.name + + # update config and restart operator + k8s.update_config(patch_resync_config, "update resync interval and pod_label_wait_timeout") + + # operator should now recreate the replica pod first and do a switchover after + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + + # pod_label_wait_timeout should have been exceeded hence the rolling update is continued on next sync + # check if the cluster state is "SyncFailed" + self.eventuallyEqual(lambda: k8s.pg_get_status(), "SyncFailed", "Expected SYNC event to fail") + + # wait for next sync, replica should be running normally by now and be ready for switchover + k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) + + # check if the former replica is now the new master + leader = k8s.get_cluster_leader_pod() + self.eventuallyEqual(lambda: leader.metadata.name, switchover_target, "Rolling update flag did not trigger switchover") + + # wait for the old master to get restarted + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + + # status should again be "SyncFailed" but turn into "Running" on the next sync + time.sleep(30) + self.eventuallyEqual(lambda: k8s.pg_get_status(), "Running", "Expected running cluster after two syncs") + + # revert config changes + patch_resync_config = { + "data": { + "pod_label_wait_timeout": "10m", + "resync_period": "30m", + } + } + k8s.update_config(patch_resync_config, "revert resync interval and pod_label_wait_timeout") + + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_scaling(self): + ''' + Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. + ''' + k8s = self.k8s + pod = "acid-minimal-cluster-0" + + k8s.scale_cluster(3) + self.eventuallyEqual(lambda: k8s.count_running_pods(), 3, "Scale up to 3 failed") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod)), 3, "Not all 3 nodes healthy") + + k8s.scale_cluster(2) + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "Scale down to 2 failed") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod)), 2, "Not all members 2 healthy") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_service_annotations(self): + ''' + Create a Postgres cluster with service annotations and check them. + ''' + k8s = self.k8s + patch_custom_service_annotations = { + "data": { + "custom_service_annotations": "foo:bar", + } + } + k8s.update_config(patch_custom_service_annotations) + + pg_patch_custom_annotations = { + "spec": { + "serviceAnnotations": { + "annotation.key": "value", + "alice": "bob", + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_custom_annotations) + + annotations = { + "annotation.key": "value", + "foo": "bar", + "alice": "bob" + } + + self.eventuallyTrue(lambda: k8s.check_service_annotations("cluster-name=acid-minimal-cluster,spilo-role=master", annotations), "Wrong annotations") + self.eventuallyTrue(lambda: k8s.check_service_annotations("cluster-name=acid-minimal-cluster,spilo-role=replica", annotations), "Wrong annotations") + + # clean up + unpatch_custom_service_annotations = { + "data": { + "custom_service_annotations": "", + } + } + k8s.update_config(unpatch_custom_service_annotations) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_statefulset_annotation_propagation(self): + ''' + Inject annotation to Postgresql CRD and check it's propagation to stateful set + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + + patch_sset_propagate_annotations = { + "data": { + "downscaler_annotations": "deployment-time,downscaler/*", + "inherited_annotations": "owned-by", + } + } + k8s.update_config(patch_sset_propagate_annotations) + + pg_crd_annotations = { + "metadata": { + "annotations": { + "deployment-time": "2020-04-30 12:00:00", + "downscaler/downtime_replicas": "0", + "owned-by": "acid", + }, + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_crd_annotations) + + annotations = { + "deployment-time": "2020-04-30 12:00:00", + "downscaler/downtime_replicas": "0", + "owned-by": "acid", + } + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyTrue(lambda: k8s.check_statefulset_annotations(cluster_label, annotations), "Annotations missing") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + @unittest.skip("Skipping this test until fixed") + def test_taint_based_eviction(self): + ''' + Add taint "postgres=:NoExecute" to node with master. This must cause a failover. + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + + # verify we are in good state from potential previous tests + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") + + # get nodes of master and replica(s) (expected target of new master) + master_nodes, replica_nodes = k8s.get_cluster_nodes() + + self.assertNotEqual(master_nodes, []) + self.assertNotEqual(replica_nodes, []) + + # taint node with postgres=:NoExecute to force failover + body = { + "spec": { + "taints": [ + { + "effect": "NoExecute", + "key": "postgres" + } + ] + } + } + + k8s.api.core_v1.patch_node(master_nodes[0], body) + self.eventuallyTrue(lambda: k8s.get_cluster_nodes()[0], replica_nodes) + self.assertNotEqual(lambda: k8s.get_cluster_nodes()[0], master_nodes) + + # add toleration to pods + patch_toleration_config = { + "data": { + "toleration": "key:postgres,operator:Exists,effect:NoExecute" + } + } + + k8s.update_config(patch_toleration_config, step="allow tainted nodes") + + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") + + # toggle pod anti affinity to move replica away from master node + nm, new_replica_nodes = k8s.get_cluster_nodes() + new_master_node = nm[0] + self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_zz_cluster_deletion(self): ''' Test deletion with configured protection ''' @@ -1418,54 +1478,6 @@ class EndToEndTestCase(unittest.TestCase): } k8s.update_config(patch_delete_annotations) - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_decrease_max_connections(self): - ''' - Test decreasing max_connections and restarting cluster through rest api - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - labels = 'spilo-role=master,' + cluster_label - new_max_connections_value = "99" - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] - creationTimestamp = masterPod.metadata.creation_timestamp - - # adjust max_connection - pg_patch_max_connections = { - "spec": { - "postgresql": { - "parameters": { - "max_connections": new_max_connections_value - } - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) - - def get_max_connections(): - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] - get_max_connections_cmd = '''psql -At -U postgres -c "SELECT setting FROM pg_settings WHERE name = 'max_connections';"''' - result = k8s.exec_with_kubectl(masterPod.metadata.name, get_max_connections_cmd) - max_connections_value = int(result.stdout) - return max_connections_value - - #Make sure that max_connections decreased - self.eventuallyEqual(get_max_connections, int(new_max_connections_value), "max_connections didn't decrease") - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] - #Make sure that pod didn't restart - self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, - "Master pod creation timestamp is updated") - def get_failover_targets(self, master_node, replica_nodes): ''' If all pods live on the same node, failover will happen to other worker(s) diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index 30b41d392..652a66fda 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -219,6 +219,97 @@ spec: items: type: string pattern: '^\ *((Mon|Tue|Wed|Thu|Fri|Sat|Sun):(2[0-3]|[01]?\d):([0-5]?\d)|(2[0-3]|[01]?\d):([0-5]?\d))-((Mon|Tue|Wed|Thu|Fri|Sat|Sun):(2[0-3]|[01]?\d):([0-5]?\d)|(2[0-3]|[01]?\d):([0-5]?\d))\ *$' + nodeAffinity: + type: object + properties: + preferredDuringSchedulingIgnoredDuringExecution: + type: array + items: + type: object + required: + - weight + - preference + properties: + preference: + type: object + properties: + matchExpressions: + type: array + items: + type: object + required: + - key + - operator + properties: + key: + type: string + operator: + type: string + values: + type: array + items: + type: string + matchFields: + type: array + items: + type: object + required: + - key + - operator + properties: + key: + type: string + operator: + type: string + values: + type: array + items: + type: string + weight: + format: int32 + type: integer + requiredDuringSchedulingIgnoredDuringExecution: + type: object + required: + - nodeSelectorTerms + properties: + nodeSelectorTerms: + type: array + items: + type: object + properties: + matchExpressions: + type: array + items: + type: object + required: + - key + - operator + properties: + key: + type: string + operator: + type: string + values: + type: array + items: + type: string + matchFields: + type: array + items: + type: object + required: + - key + - operator + properties: + key: + type: string + operator: + type: string + values: + type: array + items: + type: string numberOfInstances: type: integer minimum: 0 @@ -392,97 +483,6 @@ spec: type: string caSecretName: type: string - nodeAffinity: - type: object - properties: - preferredDuringSchedulingIgnoredDuringExecution: - type: array - items: - type: object - required: - - weight - - preference - properties: - preference: - type: object - properties: - matchExpressions: - type: array - items: - type: object - required: - - key - - operator - properties: - key: - type: string - operator: - type: string - values: - type: array - items: - type: string - matchFields: - type: array - items: - type: object - required: - - key - - operator - properties: - key: - type: string - operator: - type: string - values: - type: array - items: - type: string - weight: - format: int32 - type: integer - requiredDuringSchedulingIgnoredDuringExecution: - type: object - required: - - nodeSelectorTerms - properties: - nodeSelectorTerms: - type: array - items: - type: object - properties: - matchExpressions: - type: array - items: - type: object - required: - - key - - operator - properties: - key: - type: string - operator: - type: string - values: - type: array - items: - type: string - matchFields: - type: array - items: - type: object - required: - - key - - operator - properties: - key: - type: string - operator: - type: string - values: - type: array - items: - type: string tolerations: type: array items: diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 6ca754bbb..a95eeab20 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -341,6 +341,91 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "nodeAffinity": { + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "preferredDuringSchedulingIgnoredDuringExecution": { + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "object", + Required: []string{"preference", "weight"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "preference": { + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "matchExpressions": { + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "object", + AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ + Allows: true, + }, + }, + }, + }, + "matchFields": { + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "object", + AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ + Allows: true, + }, + }, + }, + }, + }, + }, + "weight": { + Type: "integer", + Format: "int32", + }, + }, + }, + }, + }, + "requiredDuringSchedulingIgnoredDuringExecution": { + Type: "object", + Required: []string{"nodeSelectorTerms"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "nodeSelectorTerms": { + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "matchExpressions": { + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "object", + AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ + Allows: true, + }, + }, + }, + }, + "matchFields": { + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "object", + AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ + Allows: true, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, "numberOfInstances": { Type: "integer", Minimum: &min0, @@ -596,91 +681,6 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, - "nodeAffinity": { - Type: "object", - Properties: map[string]apiextv1.JSONSchemaProps{ - "preferredDuringSchedulingIgnoredDuringExecution": { - Type: "array", - Items: &apiextv1.JSONSchemaPropsOrArray{ - Schema: &apiextv1.JSONSchemaProps{ - Type: "object", - Required: []string{"preference", "weight"}, - Properties: map[string]apiextv1.JSONSchemaProps{ - "preference": { - Type: "object", - Properties: map[string]apiextv1.JSONSchemaProps{ - "matchExpressions": { - Type: "array", - Items: &apiextv1.JSONSchemaPropsOrArray{ - Schema: &apiextv1.JSONSchemaProps{ - Type: "object", - AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ - Allows: true, - }, - }, - }, - }, - "matchFields": { - Type: "array", - Items: &apiextv1.JSONSchemaPropsOrArray{ - Schema: &apiextv1.JSONSchemaProps{ - Type: "object", - AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ - Allows: true, - }, - }, - }, - }, - }, - }, - "weight": { - Type: "integer", - Format: "int32", - }, - }, - }, - }, - }, - "requiredDuringSchedulingIgnoredDuringExecution": { - Type: "object", - Required: []string{"nodeSelectorTerms"}, - Properties: map[string]apiextv1.JSONSchemaProps{ - "nodeSelectorTerms": { - Type: "array", - Items: &apiextv1.JSONSchemaPropsOrArray{ - Schema: &apiextv1.JSONSchemaProps{ - Type: "object", - Properties: map[string]apiextv1.JSONSchemaProps{ - "matchExpressions": { - Type: "array", - Items: &apiextv1.JSONSchemaPropsOrArray{ - Schema: &apiextv1.JSONSchemaProps{ - Type: "object", - AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ - Allows: true, - }, - }, - }, - }, - "matchFields": { - Type: "array", - Items: &apiextv1.JSONSchemaPropsOrArray{ - Schema: &apiextv1.JSONSchemaProps{ - Type: "object", - AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ - Allows: true, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, "tolerations": { Type: "array", Items: &apiextv1.JSONSchemaPropsOrArray{ From 66620d5049d807656ad3322f87e88f9a33d56859 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Mon, 9 Aug 2021 16:23:41 +0200 Subject: [PATCH 8/9] refactor restarting instances (#1535) * refactor restarting instances and reduce listPods calls * only add parameters to set if it differs from effective config * update e2e test for updating Postgres config * patch config only once --- e2e/tests/k8s_api.py | 14 ++ e2e/tests/test_e2e.py | 141 +++++++++++-------- pkg/apis/acid.zalan.do/v1/marshal.go | 2 +- pkg/cluster/sync.go | 203 +++++++++++++++------------ pkg/util/patroni/patroni.go | 15 ++ 5 files changed, 228 insertions(+), 147 deletions(-) diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index 9937add88..c3ad1c999 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -252,6 +252,13 @@ class K8s: stdout=subprocess.PIPE, stderr=subprocess.PIPE) + def patroni_rest(self, pod, path): + r = self.exec_with_kubectl(pod, "curl localhost:8008/" + path) + if not r.returncode == 0 or not r.stdout.decode()[0:1] == "{": + return None + + return json.loads(r.stdout.decode()) + def get_patroni_state(self, pod): r = self.exec_with_kubectl(pod, "patronictl list -f json") if not r.returncode == 0 or not r.stdout.decode()[0:1] == "[": @@ -514,6 +521,13 @@ class K8sBase: stdout=subprocess.PIPE, stderr=subprocess.PIPE) + def patroni_rest(self, pod, path): + r = self.exec_with_kubectl(pod, "curl localhost:8008/" + path) + if not r.returncode == 0 or not r.stdout.decode()[0:1] == "{": + return None + + return json.loads(r.stdout.decode()) + def get_patroni_state(self, pod): r = self.exec_with_kubectl(pod, "patronictl list -f json") if not r.returncode == 0 or not r.stdout.decode()[0:1] == "[": diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 08d2864d2..6a4bf78ca 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -324,65 +324,6 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: self.k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", self.test_namespace), 1, "Secret not created for user in namespace") - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_decrease_max_connections(self): - ''' - Test decreasing max_connections and restarting cluster through rest api - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - labels = 'spilo-role=master,' + cluster_label - new_max_connections_value = "99" - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] - creationTimestamp = masterPod.metadata.creation_timestamp - - # adjust max_connection - pg_patch_max_connections = { - "spec": { - "postgresql": { - "parameters": { - "max_connections": new_max_connections_value - } - } - } - } - - try: - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) - - def get_max_connections(): - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] - get_max_connections_cmd = '''psql -At -U postgres -c "SELECT setting FROM pg_settings WHERE name = 'max_connections';"''' - result = k8s.exec_with_kubectl(masterPod.metadata.name, get_max_connections_cmd) - max_connections_value = int(result.stdout) - return max_connections_value - - #Make sure that max_connections decreased - self.eventuallyEqual(get_max_connections, int(new_max_connections_value), "max_connections didn't decrease") - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] - #Make sure that pod didn't restart - self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, - "Master pod creation timestamp is updated") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - # make sure cluster is in a good state for further tests - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, - "No 2 pods running") - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_enable_disable_connection_pooler(self): ''' @@ -1114,6 +1055,88 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: self.k8s.count_running_pods("connection-pooler=acid-minimal-cluster-pooler"), 0, "Pooler pods not scaled down") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_patroni_config_update(self): + ''' + Change Postgres config under Spec.Postgresql.Parameters and Spec.Patroni + and query Patroni config endpoint to check if manifest changes got applied + via restarting cluster through Patroni's rest api + ''' + k8s = self.k8s + masterPod = k8s.get_cluster_leader_pod() + labels = 'application=spilo,cluster-name=acid-minimal-cluster,spilo-role=master' + creationTimestamp = masterPod.metadata.creation_timestamp + new_max_connections_value = "50" + + # adjust max_connection + pg_patch_config = { + "spec": { + "postgresql": { + "parameters": { + "max_connections": new_max_connections_value + } + }, + "patroni": { + "slots": { + "test_slot": { + "type": "physical" + } + }, + "ttl": 29, + "loop_wait": 9, + "retry_timeout": 9, + "synchronous_mode": True + } + } + } + + try: + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) + + self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + def compare_config(): + effective_config = k8s.patroni_rest(masterPod.metadata.name, "config") + desired_patroni = pg_patch_config["spec"]["patroni"] + desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] + effective_parameters = effective_config["postgresql"]["parameters"] + self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], + "max_connections not updated") + self.assertTrue(effective_config["slots"] is not None, "physical replication slot not added") + self.assertEqual(desired_patroni["ttl"], effective_config["ttl"], + "ttl not updated") + self.assertEqual(desired_patroni["loop_wait"], effective_config["loop_wait"], + "loop_wait not updated") + self.assertEqual(desired_patroni["retry_timeout"], effective_config["retry_timeout"], + "retry_timeout not updated") + self.assertEqual(desired_patroni["synchronous_mode"], effective_config["synchronous_mode"], + "synchronous_mode not updated") + return True + + self.eventuallyTrue(compare_config, "Postgres config not applied") + + setting_query = """ + SELECT setting + FROM pg_settings + WHERE name = 'max_connections'; + """ + self.eventuallyEqual(lambda: self.query_database(masterPod.metadata.name, "postgres", setting_query)[0], new_max_connections_value, + "New max_connections setting not applied", 10, 5) + + # make sure that pod wasn't recreated + self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, + "Master pod creation timestamp is updated") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + # make sure cluster is in a good state for further tests + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_rolling_update_flag(self): ''' diff --git a/pkg/apis/acid.zalan.do/v1/marshal.go b/pkg/apis/acid.zalan.do/v1/marshal.go index 9521082fc..f4167ce92 100644 --- a/pkg/apis/acid.zalan.do/v1/marshal.go +++ b/pkg/apis/acid.zalan.do/v1/marshal.go @@ -81,7 +81,7 @@ func (ps *PostgresStatus) UnmarshalJSON(data []byte) error { if err != nil { metaErr := json.Unmarshal(data, &status) if metaErr != nil { - return fmt.Errorf("Could not parse status: %v; err %v", string(data), metaErr) + return fmt.Errorf("could not parse status: %v; err %v", string(data), metaErr) } tmp.PostgresClusterStatus = status } diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 85d87b35a..4937a2034 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -2,7 +2,9 @@ package cluster import ( "context" + "encoding/json" "fmt" + "reflect" "regexp" "strings" "time" @@ -261,14 +263,18 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error { } func (c *Cluster) syncStatefulSet() error { - var instancesRestartRequired bool + var ( + masterPod *v1.Pod + postgresConfig map[string]interface{} + instanceRestartRequired bool + ) podsToRecreate := make([]v1.Pod, 0) switchoverCandidates := make([]spec.NamespacedName, 0) pods, err := c.listPods() if err != nil { - c.logger.Infof("could not list pods of the statefulset: %v", err) + c.logger.Warnf("could not list pods of the statefulset: %v", err) } // NB: Be careful to consider the codepath that acts on podsRollingUpdateRequired before returning early. @@ -381,20 +387,50 @@ func (c *Cluster) syncStatefulSet() error { // Apply special PostgreSQL parameters that can only be set via the Patroni API. // it is important to do it after the statefulset pods are there, but before the rolling update // since those parameters require PostgreSQL restart. - instancesRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration() + pods, err = c.listPods() if err != nil { - return fmt.Errorf("could not set cluster-wide PostgreSQL configuration options: %v", err) + c.logger.Warnf("could not get list of pods to apply special PostgreSQL parameters only to be set via Patroni API: %v", err) } - if instancesRestartRequired { - c.logger.Debugln("restarting Postgres server within pods") - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "restarting Postgres server within pods") - if err := c.restartInstances(); err != nil { - c.logger.Warningf("could not restart Postgres server within pods: %v", err) + // get Postgres config, compare with manifest and update via Patroni PATCH endpoint if it differs + // Patroni's config endpoint is just a "proxy" to DCS. It is enough to patch it only once and it doesn't matter which pod is used. + for i, pod := range pods { + podName := util.NameFromMeta(pods[i].ObjectMeta) + config, err := c.patroni.GetConfig(&pod) + if err != nil { + c.logger.Warningf("could not get Postgres config from pod %s: %v", podName, err) + continue } - c.logger.Infof("Postgres server successfuly restarted on all pods") - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Postgres server restart done - all instances have been restarted") + instanceRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, config) + if err != nil { + c.logger.Warningf("could not set PostgreSQL configuration options for pod %s: %v", podName, err) + continue + } + break } + + // if the config update requires a restart, call Patroni restart for replicas first, then master + if instanceRestartRequired { + c.logger.Debug("restarting Postgres server within pods") + ttl, ok := postgresConfig["ttl"].(int32) + if !ok { + ttl = 30 + } + for i, pod := range pods { + role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) + if role == Master { + masterPod = &pods[i] + continue + } + c.restartInstance(&pod) + time.Sleep(time.Duration(ttl) * time.Second) + } + + if masterPod != nil { + c.restartInstance(masterPod) + } + } + // if we get here we also need to re-create the pods (either leftovers from the old // statefulset or those that got their configuration from the outdated statefulset) if len(podsToRecreate) > 0 { @@ -408,55 +444,19 @@ func (c *Cluster) syncStatefulSet() error { return nil } -func (c *Cluster) restartInstances() error { - c.setProcessName("starting to restart Postgres servers") - ls := c.labelsSet(false) - namespace := c.Namespace +func (c *Cluster) restartInstance(pod *v1.Pod) { + podName := util.NameFromMeta(pod.ObjectMeta) + role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) - listOptions := metav1.ListOptions{ - LabelSelector: ls.String(), + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name)) + + if err := c.patroni.Restart(pod); err != nil { + c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err) + return } - pods, err := c.KubeClient.Pods(namespace).List(context.TODO(), listOptions) - if err != nil { - return fmt.Errorf("could not get the list of pods: %v", err) - } - c.logger.Infof("there are %d pods in the cluster which resquire Postgres server restart", len(pods.Items)) - - var ( - masterPod *v1.Pod - ) - for i, pod := range pods.Items { - role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) - - if role == Master { - masterPod = &pods.Items[i] - continue - } - - podName := util.NameFromMeta(pods.Items[i].ObjectMeta) - config, err := c.patroni.GetConfig(&pod) - if err != nil { - return fmt.Errorf("could not get config for pod %s: %v", podName, err) - } - ttl, ok := config["ttl"].(int32) - if !ok { - ttl = 30 - } - if err = c.patroni.Restart(&pod); err != nil { - return fmt.Errorf("could not restart Postgres server on pod %s: %v", podName, err) - } - time.Sleep(time.Duration(ttl) * time.Second) - } - - if masterPod != nil { - podName := util.NameFromMeta(masterPod.ObjectMeta) - if err = c.patroni.Restart(masterPod); err != nil { - return fmt.Errorf("could not restart postgres server on masterPod %s: %v", podName, err) - } - } - - return nil + c.logger.Debugf("Postgres server successfuly restarted in %s pod %s", role, podName) + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for %s pod %s", role, pod.Name)) } // AnnotationsToPropagate get the annotations to update if required @@ -492,48 +492,77 @@ func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[stri } // checkAndSetGlobalPostgreSQLConfiguration checks whether cluster-wide API parameters -// (like max_connections) has changed and if necessary sets it via the Patroni API -func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration() (bool, error) { - var ( - err error - pods []v1.Pod - restartRequired bool - ) +// (like max_connections) have changed and if necessary sets it via the Patroni API +func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniConfig map[string]interface{}) (bool, error) { + configToSet := make(map[string]interface{}) + parametersToSet := make(map[string]string) + effectivePgParameters := make(map[string]interface{}) - // we need to extract those options from the cluster manifest. - optionsToSet := make(map[string]string) - pgOptions := c.Spec.Parameters + // read effective Patroni config if set + if patroniConfig != nil { + effectivePostgresql := patroniConfig["postgresql"].(map[string]interface{}) + effectivePgParameters = effectivePostgresql[patroniPGParametersParameterName].(map[string]interface{}) + } - for k, v := range pgOptions { - if isBootstrapOnlyParameter(k) { - optionsToSet[k] = v + // compare parameters under postgresql section with c.Spec.Postgresql.Parameters from manifest + desiredPgParameters := c.Spec.Parameters + for desiredOption, desiredValue := range desiredPgParameters { + effectiveValue := effectivePgParameters[desiredOption] + if isBootstrapOnlyParameter(desiredOption) && (effectiveValue != desiredValue) { + parametersToSet[desiredOption] = desiredValue } } - if len(optionsToSet) == 0 { - return restartRequired, nil + if len(parametersToSet) > 0 { + configToSet["postgresql"] = map[string]interface{}{patroniPGParametersParameterName: parametersToSet} } - if pods, err = c.listPods(); err != nil { - return restartRequired, err + // compare other options from config with c.Spec.Patroni from manifest + desiredPatroniConfig := c.Spec.Patroni + if desiredPatroniConfig.LoopWait > 0 && desiredPatroniConfig.LoopWait != uint32(patroniConfig["loop_wait"].(float64)) { + configToSet["loop_wait"] = desiredPatroniConfig.LoopWait } - if len(pods) == 0 { - return restartRequired, fmt.Errorf("could not call Patroni API: cluster has no pods") + if desiredPatroniConfig.MaximumLagOnFailover > 0 && desiredPatroniConfig.MaximumLagOnFailover != float32(patroniConfig["maximum_lag_on_failover"].(float64)) { + configToSet["maximum_lag_on_failover"] = desiredPatroniConfig.MaximumLagOnFailover } + if desiredPatroniConfig.PgHba != nil && !reflect.DeepEqual(desiredPatroniConfig.PgHba, (patroniConfig["pg_hba"])) { + configToSet["pg_hba"] = desiredPatroniConfig.PgHba + } + if desiredPatroniConfig.RetryTimeout > 0 && desiredPatroniConfig.RetryTimeout != uint32(patroniConfig["retry_timeout"].(float64)) { + configToSet["retry_timeout"] = desiredPatroniConfig.RetryTimeout + } + if desiredPatroniConfig.Slots != nil && !reflect.DeepEqual(desiredPatroniConfig.Slots, patroniConfig["slots"]) { + configToSet["slots"] = desiredPatroniConfig.Slots + } + if desiredPatroniConfig.SynchronousMode != patroniConfig["synchronous_mode"] { + configToSet["synchronous_mode"] = desiredPatroniConfig.SynchronousMode + } + if desiredPatroniConfig.SynchronousModeStrict != patroniConfig["synchronous_mode_strict"] { + configToSet["synchronous_mode_strict"] = desiredPatroniConfig.SynchronousModeStrict + } + if desiredPatroniConfig.TTL > 0 && desiredPatroniConfig.TTL != uint32(patroniConfig["ttl"].(float64)) { + configToSet["ttl"] = desiredPatroniConfig.TTL + } + + if len(configToSet) == 0 { + return false, nil + } + + configToSetJson, err := json.Marshal(configToSet) + if err != nil { + c.logger.Debugf("could not convert config patch to JSON: %v", err) + } + // try all pods until the first one that is successful, as it doesn't matter which pod // carries the request to change configuration through - for _, pod := range pods { - podName := util.NameFromMeta(pod.ObjectMeta) - c.logger.Debugf("calling Patroni API on a pod %s to set the following Postgres options: %v", - podName, optionsToSet) - if err = c.patroni.SetPostgresParameters(&pod, optionsToSet); err == nil { - restartRequired = true - return restartRequired, nil - } - c.logger.Warningf("could not patch postgres parameters with a pod %s: %v", podName, err) + podName := util.NameFromMeta(pod.ObjectMeta) + c.logger.Debugf("patching Postgres config via Patroni API on pod %s with following options: %s", + podName, configToSetJson) + if err = c.patroni.SetConfig(pod, configToSet); err != nil { + return true, fmt.Errorf("could not patch postgres parameters with a pod %s: %v", podName, err) } - return restartRequired, fmt.Errorf("could not reach Patroni API to set Postgres options: failed on every pod (%d total)", - len(pods)) + + return true, nil } func (c *Cluster) syncSecrets() error { diff --git a/pkg/util/patroni/patroni.go b/pkg/util/patroni/patroni.go index 1f2c95552..a9cadafba 100644 --- a/pkg/util/patroni/patroni.go +++ b/pkg/util/patroni/patroni.go @@ -32,6 +32,7 @@ type Interface interface { GetMemberData(server *v1.Pod) (MemberData, error) Restart(server *v1.Pod) error GetConfig(server *v1.Pod) (map[string]interface{}, error) + SetConfig(server *v1.Pod, config map[string]interface{}) error } // Patroni API client @@ -163,6 +164,20 @@ func (p *Patroni) SetPostgresParameters(server *v1.Pod, parameters map[string]st return p.httpPostOrPatch(http.MethodPatch, apiURLString+configPath, buf) } +//SetConfig sets Patroni options via Patroni patch API call. +func (p *Patroni) SetConfig(server *v1.Pod, config map[string]interface{}) error { + buf := &bytes.Buffer{} + err := json.NewEncoder(buf).Encode(config) + if err != nil { + return fmt.Errorf("could not encode json: %v", err) + } + apiURLString, err := apiURL(server) + if err != nil { + return err + } + return p.httpPostOrPatch(http.MethodPatch, apiURLString+configPath, buf) +} + // MemberDataPatroni child element type MemberDataPatroni struct { Version string `json:"version"` From 47dc0a9aee78b3372dd104c5b930ff15b980fa2f Mon Sep 17 00:00:00 2001 From: Markus Heinemann Date: Thu, 12 Aug 2021 11:52:32 +0200 Subject: [PATCH 9/9] =?UTF-8?q?fix(ui):=20update=20ingress=20api=20version?= =?UTF-8?q?s=20in=20helm=20chart=20for=20newer=20k8s=20vers=E2=80=A6=20(#1?= =?UTF-8?q?575)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(ui): update ingress api versions in helm chart for newer k8s versions * fix(ui): update ingress api version in plain manifest --- charts/postgres-operator-ui/templates/ingress.yaml | 14 +++++++++++++- ui/manifests/ingress.yaml | 12 ++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/charts/postgres-operator-ui/templates/ingress.yaml b/charts/postgres-operator-ui/templates/ingress.yaml index 873cfed0f..21e7dbea2 100644 --- a/charts/postgres-operator-ui/templates/ingress.yaml +++ b/charts/postgres-operator-ui/templates/ingress.yaml @@ -1,7 +1,10 @@ {{- if .Values.ingress.enabled -}} {{- $fullName := include "postgres-operator-ui.fullname" . -}} {{- $svcPort := .Values.service.port -}} -{{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} + +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} apiVersion: networking.k8s.io/v1beta1 {{- else -}} apiVersion: extensions/v1beta1 @@ -37,9 +40,18 @@ spec: paths: {{- range .paths }} - path: {{ . }} + {{ if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion -}} + pathType: ImplementationSpecific + backend: + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else -}} backend: serviceName: {{ $fullName }} servicePort: {{ $svcPort }} + {{- end -}} {{- end }} {{- end }} {{- end }} diff --git a/ui/manifests/ingress.yaml b/ui/manifests/ingress.yaml index 4efac53ac..a5e6f0fab 100644 --- a/ui/manifests/ingress.yaml +++ b/ui/manifests/ingress.yaml @@ -1,4 +1,4 @@ -apiVersion: "networking.k8s.io/v1beta1" +apiVersion: "networking.k8s.io/v1" kind: "Ingress" metadata: name: "postgres-operator-ui" @@ -10,6 +10,10 @@ spec: - host: "ui.example.org" http: paths: - - backend: - serviceName: "postgres-operator-ui" - servicePort: 80 + - path: / + pathType: ImplementationSpecific + backend: + service: + name: "postgres-operator-ui" + port: + number: 80