From b300fca364466485ffc07432e9b2a4ae95930177 Mon Sep 17 00:00:00 2001 From: Damiano Albani Date: Thu, 3 Jun 2021 09:28:18 +0200 Subject: [PATCH 1/5] Keep single values.yaml in operator chart while supporting ConfigMap & CRD (#1224) * Keep single values.yaml while supporting ConfigMap & CRD Co-authored-by: Damiano Albani --- .../postgres-operator/templates/_helpers.tpl | 21 + .../templates/configmap.yaml | 24 +- charts/postgres-operator/values-crd.yaml | 403 ------------------ charts/postgres-operator/values.yaml | 139 +++--- docs/developer.md | 9 +- docs/quickstart.md | 6 - 6 files changed, 114 insertions(+), 488 deletions(-) diff --git a/charts/postgres-operator/templates/_helpers.tpl b/charts/postgres-operator/templates/_helpers.tpl index e49670763..c958a99a6 100644 --- a/charts/postgres-operator/templates/_helpers.tpl +++ b/charts/postgres-operator/templates/_helpers.tpl @@ -51,3 +51,24 @@ Create chart name and version as used by the chart label. {{- define "postgres-operator.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} {{- end -}} + +{{/* +Flatten nested config options when ConfigMap is used as ConfigTarget +*/}} +{{- define "flattenValuesForConfigMap" }} +{{- range $key, $value := . }} + {{- if or (kindIs "string" $value) (kindIs "int" $value) }} +{{ $key }}: {{ $value | quote }} + {{- end }} + {{- if kindIs "slice" $value }} +{{ $key }}: {{ join "," $value | quote }} + {{- end }} + {{- if kindIs "map" $value }} + {{- $list := list }} + {{- range $subKey, $subValue := $value }} + {{- $list = append $list (printf "%s:%s" $subKey $subValue) }} +{{ $key }}: {{ join "," $list | quote }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} diff --git a/charts/postgres-operator/templates/configmap.yaml b/charts/postgres-operator/templates/configmap.yaml index 7b762390c..836babe1d 100644 --- a/charts/postgres-operator/templates/configmap.yaml +++ b/charts/postgres-operator/templates/configmap.yaml @@ -13,16 +13,16 @@ data: pod_priority_class_name: {{ .Values.podPriorityClassName }} {{- end }} pod_service_account_name: {{ include "postgres-pod.serviceAccountName" . }} -{{ toYaml .Values.configGeneral | indent 2 }} -{{ toYaml .Values.configUsers | indent 2 }} -{{ toYaml .Values.configMajorVersionUpgrade | indent 2 }} -{{ toYaml .Values.configKubernetes | indent 2 }} -{{ toYaml .Values.configTimeouts | indent 2 }} -{{ toYaml .Values.configLoadBalancer | indent 2 }} -{{ toYaml .Values.configAwsOrGcp | indent 2 }} -{{ toYaml .Values.configLogicalBackup | indent 2 }} -{{ toYaml .Values.configDebug | indent 2 }} -{{ toYaml .Values.configLoggingRestApi | indent 2 }} -{{ toYaml .Values.configTeamsApi | indent 2 }} -{{ toYaml .Values.configConnectionPooler | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configGeneral | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configUsers | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configMajorVersionUpgrade | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configKubernetes | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configTimeouts | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configLoadBalancer | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configAwsOrGcp | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configLogicalBackup | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configDebug | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configLoggingRestApi | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configTeamsApi | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configConnectionPooler | indent 2 }} {{- end }} diff --git a/charts/postgres-operator/values-crd.yaml b/charts/postgres-operator/values-crd.yaml index b1fc43261..e69de29bb 100644 --- a/charts/postgres-operator/values-crd.yaml +++ b/charts/postgres-operator/values-crd.yaml @@ -1,403 +0,0 @@ -image: - registry: registry.opensource.zalan.do - repository: acid/postgres-operator - tag: v1.6.3 - pullPolicy: "IfNotPresent" - -# Optionally specify an array of imagePullSecrets. -# Secrets must be manually created in the namespace. -# ref: https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod -# imagePullSecrets: - # - name: myRegistryKeySecretName - -podAnnotations: {} -podLabels: {} - -configTarget: "OperatorConfigurationCRD" - -# general top-level configuration parameters -configGeneral: - # choose if deployment creates/updates CRDs with OpenAPIV3Validation - enable_crd_validation: true - # update only the statefulsets without immediately doing the rolling update - enable_lazy_spilo_upgrade: false - # set the PGVERSION env var instead of providing the version via postgresql.bin_dir in SPILO_CONFIGURATION - enable_pgversion_env_var: true - # start any new database pod without limitations on shm memory - enable_shm_volume: true - # enables backwards compatible path between Spilo 12 and Spilo 13 images - enable_spilo_wal_path_compat: false - # etcd connection string for Patroni. Empty uses K8s-native DCS. - etcd_host: "" - # Select if setup uses endpoints (default), or configmaps to manage leader (DCS=k8s) - # kubernetes_use_configmaps: false - # Spilo docker image - docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p7 - # min number of instances in Postgres cluster. -1 = no limit - min_instances: -1 - # max number of instances in Postgres cluster. -1 = no limit - max_instances: -1 - # period between consecutive repair requests - repair_period: 5m - # period between consecutive sync requests - resync_period: 30m - # can prevent certain cases of memory overcommitment - # set_memory_request_to_limit: false - - # map of sidecar names to docker images - # sidecar_docker_images - # example: "exampleimage:exampletag" - - # number of routines the operator spawns to process requests concurrently - workers: 8 - -# parameters describing Postgres users -configUsers: - # postgres username used for replication between instances - replication_username: standby - # postgres superuser name to be created by initdb - super_username: postgres - -configMajorVersionUpgrade: - # "off": no upgrade, "manual": manifest triggers action, "full": minimal version violation triggers too - major_version_upgrade_mode: "off" - # minimal Postgres major version that will not automatically be upgraded - minimal_major_version: "9.5" - # target Postgres major version when upgrading clusters automatically - target_major_version: "13" - -configKubernetes: - # list of additional capabilities for postgres container - # additional_pod_capabilities: - # - "SYS_NICE" - - # default DNS domain of K8s cluster where operator is running - cluster_domain: cluster.local - # additional labels assigned to the cluster objects - cluster_labels: - application: spilo - # label assigned to Kubernetes objects created by the operator - cluster_name_label: cluster-name - # additional annotations to add to every database pod - # custom_pod_annotations: - # keya: valuea - # keyb: valueb - - # key name for annotation that compares manifest value with current date - # delete_annotation_date_key: "delete-date" - - # key name for annotation that compares manifest value with cluster name - # delete_annotation_name_key: "delete-clustername" - - # list of annotations propagated from cluster manifest to statefulset and deployment - # downscaler_annotations: - # - deployment-time - # - downscaler/* - - # enables initContainers to run actions before Spilo is started - enable_init_containers: true - # toggles pod anti affinity on the Postgres pods - enable_pod_antiaffinity: false - # toggles PDB to set to MinAvailabe 0 or 1 - enable_pod_disruption_budget: true - # enables sidecar containers to run alongside Spilo in the same pod - enable_sidecars: true - # namespaced name of the secret containing infrastructure roles names and passwords - # infrastructure_roles_secret_name: postgresql-infrastructure-roles - - # list of annotation keys that can be inherited from the cluster manifest - # inherited_annotations: - # - owned-by - - # list of label keys that can be inherited from the cluster manifest - # inherited_labels: - # - application - # - environment - - # timeout for successful migration of master pods from unschedulable node - # master_pod_move_timeout: 20m - - # set of labels that a running and active node should possess to be considered ready - # node_readiness_label: - # status: ready - - # namespaced name of the secret containing the OAuth2 token to pass to the teams API - # oauth_token_secret_name: postgresql-operator - - # defines the template for PDB (Pod Disruption Budget) names - pdb_name_format: "postgres-{cluster}-pdb" - # override topology key for pod anti affinity - pod_antiaffinity_topology_key: "kubernetes.io/hostname" - # namespaced name of the ConfigMap with environment variables to populate on every pod - # pod_environment_configmap: "default/my-custom-config" - # name of the Secret (in cluster namespace) with environment variables to populate on every pod - # pod_environment_secret: "my-custom-secret" - - # specify the pod management policy of stateful sets of Postgres clusters - pod_management_policy: "ordered_ready" - # label assigned to the Postgres pods (and services/endpoints) - pod_role_label: spilo-role - # service account definition as JSON/YAML string to be used by postgres cluster pods - # pod_service_account_definition: "" - - # role binding definition as JSON/YAML string to be used by pod service account - # pod_service_account_role_binding_definition: "" - - # Postgres pods are terminated forcefully after this timeout - pod_terminate_grace_period: 5m - # template for database user secrets generated by the operator - secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}" - # set user and group for the spilo container (required to run Spilo as non-root process) - # spilo_runasuser: "101" - # spilo_runasgroup: "103" - # group ID with write-access to volumes (required to run Spilo as non-root process) - # spilo_fsgroup: 103 - - # whether the Spilo container should run in privileged mode - spilo_privileged: false - # whether the Spilo container should run with additional permissions other than parent. - # required by cron which needs setuid - spilo_allow_privilege_escalation: true - # storage resize strategy, available options are: ebs, pvc, off - storage_resize_mode: pvc - # operator watches for postgres objects in the given namespace - watched_namespace: "*" # listen to all namespaces - -# configure resource requests for the Postgres pods -configPostgresPodResources: - # CPU limits for the postgres containers - default_cpu_limit: "1" - # CPU request value for the postgres containers - default_cpu_request: 100m - # memory limits for the postgres containers - default_memory_limit: 500Mi - # memory request value for the postgres containers - default_memory_request: 100Mi - # hard CPU minimum required to properly run a Postgres cluster - min_cpu_limit: 250m - # hard memory minimum required to properly run a Postgres cluster - min_memory_limit: 250Mi - -# timeouts related to some operator actions -configTimeouts: - # timeout when waiting for the Postgres pods to be deleted - pod_deletion_wait_timeout: 10m - # timeout when waiting for pod role and cluster labels - pod_label_wait_timeout: 10m - # interval between consecutive attempts waiting for postgresql CRD to be created - ready_wait_interval: 3s - # timeout for the complete postgres CRD creation - ready_wait_timeout: 30s - # interval to wait between consecutive attempts to check for some K8s resources - resource_check_interval: 3s - # timeout when waiting for the presence of a certain K8s resource (e.g. Sts, PDB) - resource_check_timeout: 10m - -# configure behavior of load balancers -configLoadBalancer: - # DNS zone for cluster DNS name when load balancer is configured for cluster - db_hosted_zone: db.example.com - # annotations to apply to service when load balancing is enabled - # custom_service_annotations: - # keyx: valuez - # keya: valuea - - # toggles service type load balancer pointing to the master pod of the cluster - enable_master_load_balancer: false - # toggles service type load balancer pointing to the replica pod of the cluster - enable_replica_load_balancer: false - # define external traffic policy for the load balancer - external_traffic_policy: "Cluster" - # defines the DNS name string template for the master load balancer cluster - master_dns_name_format: "{cluster}.{team}.{hostedzone}" - # defines the DNS name string template for the replica load balancer cluster - replica_dns_name_format: "{cluster}-repl.{team}.{hostedzone}" - -# options to aid debugging of the operator itself -configDebug: - # toggles verbose debug logs from the operator - debug_logging: true - # toggles operator functionality that require access to the postgres database - enable_database_access: true - -# parameters affecting logging and REST API listener -configLoggingRestApi: - # REST API listener listens to this port - api_port: 8080 - # number of entries in the cluster history ring buffer - cluster_history_entries: 1000 - # number of lines in the ring buffer used to store cluster logs - ring_log_lines: 100 - -# configure interaction with non-Kubernetes objects from AWS or GCP -configAwsOrGcp: - # Additional Secret (aws or gcp credentials) to mount in the pod - # additional_secret_mount: "some-secret-name" - - # Path to mount the above Secret in the filesystem of the container(s) - # additional_secret_mount_path: "/some/dir" - - # AWS region used to store ESB volumes - aws_region: eu-central-1 - - # enable automatic migration on AWS from gp2 to gp3 volumes - enable_ebs_gp3_migration: false - # defines maximum volume size in GB until which auto migration happens - # enable_ebs_gp3_migration_max_size: 1000 - - # GCP credentials that will be used by the operator / pods - # gcp_credentials: "" - - # AWS IAM role to supply in the iam.amazonaws.com/role annotation of Postgres pods - # kube_iam_role: "" - - # S3 bucket to use for shipping postgres daily logs - # log_s3_bucket: "" - - # GCS bucket to use for shipping WAL segments with WAL-E - # wal_gs_bucket: "" - - # S3 bucket to use for shipping WAL segments with WAL-E - # wal_s3_bucket: "" - -# configure K8s cron job managed by the operator -configLogicalBackup: - # image for pods of the logical backup job (example runs pg_dumpall) - logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.3" - # path of google cloud service account json file - # logical_backup_google_application_credentials: "" - - # prefix for the backup job name - logical_backup_job_prefix: "logical-backup-" - # storage provider - either "s3" or "gcs" - logical_backup_provider: "s3" - # S3 Access Key ID - logical_backup_s3_access_key_id: "" - # S3 bucket to store backup results - logical_backup_s3_bucket: "my-bucket-url" - # S3 region of bucket - logical_backup_s3_region: "" - # S3 endpoint url when not using AWS - logical_backup_s3_endpoint: "" - # S3 Secret Access Key - logical_backup_s3_secret_access_key: "" - # S3 server side encryption - logical_backup_s3_sse: "AES256" - # backup schedule in the cron format - logical_backup_schedule: "30 00 * * *" - -# automate creation of human users with teams API service -configTeamsApi: - # team_admin_role will have the rights to grant roles coming from PG manifests - enable_admin_role_for_users: true - # operator watches for PostgresTeam CRs to assign additional teams and members to clusters - enable_postgres_team_crd: false - # toogle to create additional superuser teams from PostgresTeam CRs - enable_postgres_team_crd_superusers: false - # toggle to automatically rename roles of former team members and deny LOGIN - enable_team_member_deprecation: false - # toggle to grant superuser to team members created from the Teams API - enable_team_superuser: false - # toggles usage of the Teams API by the operator - enable_teams_api: false - # should contain a URL to use for authentication (username and token) - # pam_configuration: "" - - # operator will add all team member roles to this group and add a pg_hba line - pam_role_name: zalandos - # List of teams which members need the superuser role in each Postgres cluster - postgres_superuser_teams: - - postgres_superusers - # List of roles that cannot be overwritten by an application, team or infrastructure role - protected_role_names: - - admin - # Suffix to add if members are removed from TeamsAPI or PostgresTeam CRD - role_deletion_suffix: "_deleted" - # role name to grant to team members created from the Teams API - team_admin_role: admin - # postgres config parameters to apply to each team member role - team_api_role_configuration: - log_statement: all - # URL of the Teams API service - # teams_api_url: http://fake-teams-api.default.svc.cluster.local - -configConnectionPooler: - # db schema to install lookup function into - connection_pooler_schema: "pooler" - # db user for pooler to use - connection_pooler_user: "pooler" - # docker image - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-16" - # max db connections the pooler should hold - connection_pooler_max_db_connections: 60 - # default pooling mode - connection_pooler_mode: "transaction" - # number of pooler instances - connection_pooler_number_of_instances: 2 - # default resources - connection_pooler_default_cpu_request: 500m - connection_pooler_default_memory_request: 100Mi - connection_pooler_default_cpu_limit: "1" - connection_pooler_default_memory_limit: 100Mi - -rbac: - # Specifies whether RBAC resources should be created - create: true - -crd: - # Specifies whether custom resource definitions should be created - # When using helm3, this is ignored; instead use "--skip-crds" to skip. - create: true - -serviceAccount: - # Specifies whether a ServiceAccount should be created - create: true - # The name of the ServiceAccount to use. - # If not set and create is true, a name is generated using the fullname template - name: - -podServiceAccount: - # The name of the ServiceAccount to be used by postgres cluster pods - # If not set a name is generated using the fullname template and "-pod" suffix - name: "postgres-pod" - -# priority class for operator pod -priorityClassName: "" - -# priority class for database pods -podPriorityClassName: "" - -resources: - limits: - cpu: 500m - memory: 500Mi - requests: - cpu: 100m - memory: 250Mi - -securityContext: - runAsUser: 1000 - runAsNonRoot: true - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - -# Affinity for pod assignment -# Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity -affinity: {} - -# Node labels for pod assignment -# Ref: https://kubernetes.io/docs/user-guide/node-selection/ -nodeSelector: {} - -# Tolerations for pod assignment -# Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ -tolerations: [] - -controllerID: - # Specifies whether a controller ID should be defined for the operator - # Note, all postgres manifest must then contain the following annotation to be found by this operator - # "acid.zalan.do/controller": - create: false - # The name of the controller ID to use. - # If not set and create is true, a name is generated using the fullname template - name: diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index fde4e203d..d9c2d0e92 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -4,16 +4,16 @@ image: tag: v1.6.3 pullPolicy: "IfNotPresent" -# Optionally specify an array of imagePullSecrets. -# Secrets must be manually created in the namespace. -# ref: https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod -# imagePullSecrets: + # Optionally specify an array of imagePullSecrets. + # Secrets must be manually created in the namespace. + # ref: https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod + # imagePullSecrets: # - name: myRegistryKeySecretName podAnnotations: {} podLabels: {} -configTarget: "ConfigMap" +configTarget: "OperatorConfigurationCRD" # JSON logging format enableJsonLogging: false @@ -21,37 +21,38 @@ enableJsonLogging: false # general configuration parameters configGeneral: # choose if deployment creates/updates CRDs with OpenAPIV3Validation - enable_crd_validation: "true" + enable_crd_validation: true # update only the statefulsets without immediately doing the rolling update - enable_lazy_spilo_upgrade: "false" + enable_lazy_spilo_upgrade: false # set the PGVERSION env var instead of providing the version via postgresql.bin_dir in SPILO_CONFIGURATION - enable_pgversion_env_var: "true" + enable_pgversion_env_var: true # start any new database pod without limitations on shm memory - enable_shm_volume: "true" + enable_shm_volume: true # enables backwards compatible path between Spilo 12 and Spilo 13 images - enable_spilo_wal_path_compat: "false" + enable_spilo_wal_path_compat: false # etcd connection string for Patroni. Empty uses K8s-native DCS. etcd_host: "" # Select if setup uses endpoints (default), or configmaps to manage leader (DCS=k8s) - # kubernetes_use_configmaps: "false" + # kubernetes_use_configmaps: false # Spilo docker image docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p7 # min number of instances in Postgres cluster. -1 = no limit - min_instances: "-1" + min_instances: -1 # max number of instances in Postgres cluster. -1 = no limit - max_instances: "-1" + max_instances: -1 # period between consecutive repair requests repair_period: 5m # period between consecutive sync requests resync_period: 30m # can prevent certain cases of memory overcommitment - # set_memory_request_to_limit: "false" + # set_memory_request_to_limit: false # map of sidecar names to docker images - # sidecar_docker_images: "" + # sidecar_docker_images: + # example: "exampleimage:exampletag" # number of routines the operator spawns to process requests concurrently - workers: "8" + workers: 8 # parameters describing Postgres users configUsers: @@ -70,16 +71,20 @@ configMajorVersionUpgrade: configKubernetes: # list of additional capabilities for postgres container - # additional_pod_capabilities: "SYS_NICE" + # additional_pod_capabilities: + # - "SYS_NICE" # default DNS domain of K8s cluster where operator is running cluster_domain: cluster.local # additional labels assigned to the cluster objects - cluster_labels: application:spilo + cluster_labels: + application: spilo # label assigned to Kubernetes objects created by the operator cluster_name_label: cluster-name - # annotations attached to each database pod - # custom_pod_annotations: "keya:valuea,keyb:valueb" + # additional annotations to add to every database pod + # custom_pod_annotations: + # keya: valuea + # keyb: valueb # key name for annotation that compares manifest value with current date # delete_annotation_date_key: "delete-date" @@ -88,30 +93,36 @@ configKubernetes: # delete_annotation_name_key: "delete-clustername" # list of annotations propagated from cluster manifest to statefulset and deployment - # downscaler_annotations: "deployment-time,downscaler/*" + # downscaler_annotations: + # - deployment-time + # - downscaler/* # enables initContainers to run actions before Spilo is started - enable_init_containers: "true" + enable_init_containers: true # toggles pod anti affinity on the Postgres pods - enable_pod_antiaffinity: "false" + enable_pod_antiaffinity: false # toggles PDB to set to MinAvailabe 0 or 1 - enable_pod_disruption_budget: "true" + enable_pod_disruption_budget: true # enables sidecar containers to run alongside Spilo in the same pod - enable_sidecars: "true" + enable_sidecars: true # namespaced name of the secret containing infrastructure roles names and passwords # infrastructure_roles_secret_name: postgresql-infrastructure-roles # list of annotation keys that can be inherited from the cluster manifest - # inherited_annotations: owned-by + # inherited_annotations: + # - owned-by # list of label keys that can be inherited from the cluster manifest - # inherited_labels: application,environment + # inherited_labels: + # - application + # - environment # timeout for successful migration of master pods from unschedulable node # master_pod_move_timeout: 20m # set of labels that a running and active node should possess to be considered ready - # node_readiness_label: "" + # node_readiness_label: + # status: ready # namespaced name of the secret containing the OAuth2 token to pass to the teams API # oauth_token_secret_name: postgresql-operator @@ -140,16 +151,16 @@ configKubernetes: # template for database user secrets generated by the operator secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}" # set user and group for the spilo container (required to run Spilo as non-root process) - # spilo_runasuser: "101" - # spilo_runasgroup: "103" + # spilo_runasuser: 101 + # spilo_runasgroup: 103 # group ID with write-access to volumes (required to run Spilo as non-root process) - # spilo_fsgroup: "103" + # spilo_fsgroup: 103 # whether the Spilo container should run in privileged mode - spilo_privileged: "false" + spilo_privileged: false # whether the Spilo container should run with additional permissions other than parent. # required by cron which needs setuid - spilo_allow_privilege_escalation: "true" + spilo_allow_privilege_escalation: true # storage resize strategy, available options are: ebs, pvc, off storage_resize_mode: pvc # operator watches for postgres objects in the given namespace @@ -190,34 +201,36 @@ configLoadBalancer: # DNS zone for cluster DNS name when load balancer is configured for cluster db_hosted_zone: db.example.com # annotations to apply to service when load balancing is enabled - # custom_service_annotations: "keyx:valuez,keya:valuea" + # custom_service_annotations: + # keyx: valuez + # keya: valuea # toggles service type load balancer pointing to the master pod of the cluster - enable_master_load_balancer: "false" + enable_master_load_balancer: false # toggles service type load balancer pointing to the replica pod of the cluster - enable_replica_load_balancer: "false" + enable_replica_load_balancer: false # define external traffic policy for the load balancer external_traffic_policy: "Cluster" # defines the DNS name string template for the master load balancer cluster - master_dns_name_format: '{cluster}.{team}.{hostedzone}' + master_dns_name_format: "{cluster}.{team}.{hostedzone}" # defines the DNS name string template for the replica load balancer cluster - replica_dns_name_format: '{cluster}-repl.{team}.{hostedzone}' + replica_dns_name_format: "{cluster}-repl.{team}.{hostedzone}" # options to aid debugging of the operator itself configDebug: # toggles verbose debug logs from the operator - debug_logging: "true" + debug_logging: true # toggles operator functionality that require access to the postgres database - enable_database_access: "true" + enable_database_access: true # parameters affecting logging and REST API listener configLoggingRestApi: # REST API listener listens to this port - api_port: "8080" + api_port: 8080 # number of entries in the cluster history ring buffer - cluster_history_entries: "1000" + cluster_history_entries: 1000 # number of lines in the ring buffer used to store cluster logs - ring_log_lines: "100" + ring_log_lines: 100 # configure interaction with non-Kubernetes objects from AWS or GCP configAwsOrGcp: @@ -231,11 +244,11 @@ configAwsOrGcp: aws_region: eu-central-1 # enable automatic migration on AWS from gp2 to gp3 volumes - enable_ebs_gp3_migration: "false" + enable_ebs_gp3_migration: false # defines maximum volume size in GB until which auto migration happens - # enable_ebs_gp3_migration_max_size: "1000" + # enable_ebs_gp3_migration_max_size: 1000 - # GCP credentials for setting the GOOGLE_APPLICATION_CREDNETIALS environment variable + # GCP credentials that will be used by the operator / pods # gcp_credentials: "" # AWS IAM role to supply in the iam.amazonaws.com/role annotation of Postgres pods @@ -265,10 +278,10 @@ configLogicalBackup: logical_backup_s3_access_key_id: "" # S3 bucket to store backup results logical_backup_s3_bucket: "my-bucket-url" - # S3 endpoint url when not using AWS - logical_backup_s3_endpoint: "" # S3 region of bucket logical_backup_s3_region: "" + # S3 endpoint url when not using AWS + logical_backup_s3_endpoint: "" # S3 Secret Access Key logical_backup_s3_secret_access_key: "" # S3 server side encryption @@ -276,36 +289,38 @@ configLogicalBackup: # backup schedule in the cron format logical_backup_schedule: "30 00 * * *" - # automate creation of human users with teams API service configTeamsApi: # team_admin_role will have the rights to grant roles coming from PG manifests - enable_admin_role_for_users: "true" + enable_admin_role_for_users: true # operator watches for PostgresTeam CRs to assign additional teams and members to clusters - enable_postgres_team_crd: "false" + enable_postgres_team_crd: false # toogle to create additional superuser teams from PostgresTeam CRs - enable_postgres_team_crd_superusers: "false" + enable_postgres_team_crd_superusers: false # toggle to automatically rename roles of former team members and deny LOGIN - enable_team_member_deprecation: "false" + enable_team_member_deprecation: false # toggle to grant superuser to team members created from the Teams API - enable_team_superuser: "false" + enable_team_superuser: false # toggles usage of the Teams API by the operator - enable_teams_api: "false" + enable_teams_api: false # should contain a URL to use for authentication (username and token) # pam_configuration: https://info.example.com/oauth2/tokeninfo?access_token= uid realm=/employees # operator will add all team member roles to this group and add a pg_hba line - pam_role_name: "zalandos" + pam_role_name: zalandos # List of teams which members need the superuser role in each Postgres cluster - postgres_superuser_teams: "postgres_superusers" + postgres_superuser_teams: + - postgres_superusers # List of roles that cannot be overwritten by an application, team or infrastructure role - protected_role_names: "admin" + protected_role_names: + - admin # Suffix to add if members are removed from TeamsAPI or PostgresTeam CRD role_deletion_suffix: "_deleted" # role name to grant to team members created from the Teams API - team_admin_role: "admin" + team_admin_role: admin # postgres config parameters to apply to each team member role - team_api_role_configuration: "log_statement:all" + team_api_role_configuration: + log_statement: all # URL of the Teams API service # teams_api_url: http://fake-teams-api.default.svc.cluster.local @@ -318,11 +333,11 @@ configConnectionPooler: # docker image connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-16" # max db connections the pooler should hold - connection_pooler_max_db_connections: "60" + connection_pooler_max_db_connections: 60 # default pooling mode connection_pooler_mode: "transaction" # number of pooler instances - connection_pooler_number_of_instances: "2" + connection_pooler_number_of_instances: 2 # default resources connection_pooler_default_cpu_request: 500m connection_pooler_default_memory_request: 100Mi diff --git a/docs/developer.md b/docs/developer.md index 6f4e08935..a9b3b143e 100644 --- a/docs/developer.md +++ b/docs/developer.md @@ -314,13 +314,12 @@ Please, reflect your changes in tests, for example in: For the CRD-based configuration, please update the following files: * the default [OperatorConfiguration](../manifests/postgresql-operator-default-configuration.yaml) -* the Helm chart's [values-crd file](../charts/postgres-operator/values.yaml) * the CRD's [validation](../manifests/operatorconfiguration.crd.yaml) +* the CRD's validation in the [Helm chart](../charts/postgres-operator/crds/operatorconfigurations.yaml) -Reflect the changes in the ConfigMap configuration as well (note that numeric -and boolean parameters have to use double quotes here): -* [ConfigMap](../manifests/configmap.yaml) manifest -* the Helm chart's default [values file](../charts/postgres-operator/values.yaml) +Add new options also to the Helm chart's [values file](../charts/postgres-operator/values.yaml) file. +It follows the OperatorConfiguration CRD layout. Nested values will be flattened for the ConfigMap. +Last but no least, update the [ConfigMap](../manifests/configmap.yaml) manifest example as well. ### Updating documentation diff --git a/docs/quickstart.md b/docs/quickstart.md index fe083a61d..a90c90f42 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -81,12 +81,6 @@ the repo root. With Helm v3 installed you should be able to run: helm install postgres-operator ./charts/postgres-operator ``` -To use CRD-based configuration you need to specify the [values-crd yaml file](../charts/postgres-operator/values-crd.yaml). - -```bash -helm install postgres-operator ./charts/postgres-operator -f ./charts/postgres-operator/values-crd.yaml -``` - The chart works with both Helm 2 and Helm 3. The `crd-install` hook from v2 will be skipped with warning when using v3. Documentation for installing applications with Helm 2 can be found in the [v2 docs](https://v2.helm.sh/docs/). From 9668ac21a35bd551e27561c9bf477c31a99230fb Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 4 Jun 2021 16:14:58 +0200 Subject: [PATCH 2/5] using quote in UI deployment template (#1514) --- charts/postgres-operator-ui/templates/deployment.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/postgres-operator-ui/templates/deployment.yaml b/charts/postgres-operator-ui/templates/deployment.yaml index 9a1df3ab4..b72b108e0 100644 --- a/charts/postgres-operator-ui/templates/deployment.yaml +++ b/charts/postgres-operator-ui/templates/deployment.yaml @@ -43,13 +43,13 @@ spec: - name: "APP_URL" value: "http://localhost:8081" - name: "OPERATOR_API_URL" - value: {{ .Values.envs.operatorApiUrl }} + value: {{ .Values.envs.operatorApiUrl | quote }} - name: "OPERATOR_CLUSTER_NAME_LABEL" - value: {{ .Values.envs.operatorClusterNameLabel }} + value: {{ .Values.envs.operatorClusterNameLabel | quote }} - name: "RESOURCES_VISIBLE" - value: "{{ .Values.envs.resourcesVisible }}" + value: {{ .Values.envs.resourcesVisible | quote }} - name: "TARGET_NAMESPACE" - value: "{{ .Values.envs.targetNamespace }}" + value: {{ .Values.envs.targetNamespace | quote }} - name: "TEAMS" value: |- [ From 75a9e2be3833d67888f48e909f122eca24fff036 Mon Sep 17 00:00:00 2001 From: Rafia Sabih Date: Fri, 11 Jun 2021 10:35:30 +0200 Subject: [PATCH 3/5] Create cross namespace secrets (#1490) * Create cross namespace secrets * add test cases * fixes * Fixes - include namespace in secret name only when namespace is provided - use username.namespace as key to pgUsers only when namespace is provided - avoid conflict in the role creation in db by checking namespace alongwith the username * Update unit tests * Fix test case * Fixes - update regular expression for usernames - add test to allow check for valid usernames - create pg roles with namespace (if any) appended in rolename * add more test cases for valid usernames * update docs * fixes as per review comments * update e2e * fixes * Add toggle to allow namespaced secrets * update docs * comment update * Update e2e/tests/test_e2e.py * few minor fixes * fix unit tests * fix e2e * fix e2e attempt 2 * fix e2e Co-authored-by: Rafia Sabih Co-authored-by: Felix Kunde --- .../postgres-operator/crds/postgresqls.yaml | 2 + charts/postgres-operator/values-crd.yaml | 0 charts/postgres-operator/values.yaml | 5 +- docs/reference/operator_parameters.md | 17 ++-- docs/user.md | 21 ++++- e2e/tests/k8s_api.py | 10 ++ e2e/tests/test_e2e.py | 41 +++++++- manifests/complete-postgres-manifest.yaml | 1 + pkg/apis/acid.zalan.do/v1/crds.go | 3 + pkg/apis/acid.zalan.do/v1/postgresql_type.go | 43 ++++----- .../acid.zalan.do/v1/zz_generated.deepcopy.go | 5 + pkg/cluster/cluster.go | 37 ++++++-- pkg/cluster/cluster_test.go | 93 ++++++++++++++++++- pkg/cluster/k8sres.go | 5 +- pkg/cluster/resources.go | 2 +- pkg/cluster/sync.go | 16 +++- pkg/spec/types.go | 1 + 17 files changed, 249 insertions(+), 53 deletions(-) delete mode 100644 charts/postgres-operator/values-crd.yaml diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index aead7fe69..eb628863d 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -515,6 +515,8 @@ spec: type: integer useLoadBalancer: # deprecated type: boolean + enableNamespacedSecret: + type: boolean users: type: object additionalProperties: diff --git a/charts/postgres-operator/values-crd.yaml b/charts/postgres-operator/values-crd.yaml deleted file mode 100644 index e69de29bb..000000000 diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index d9c2d0e92..287835a24 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -148,7 +148,10 @@ configKubernetes: # Postgres pods are terminated forcefully after this timeout pod_terminate_grace_period: 5m - # template for database user secrets generated by the operator + # template for database user secrets generated by the operator, + # here username contains the namespace in the format namespace.username + # if the user is in different namespace than cluster and cross namespace secrets + # are enabled via EnableNamespacedSecret flag. secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}" # set user and group for the spilo container (required to run Spilo as non-root process) # spilo_runasuser: 101 diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 395007c91..1b1ae852e 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -172,11 +172,11 @@ under the `users` key. ## Major version upgrades -Parameters configuring automatic major version upgrades. In a +Parameters configuring automatic major version upgrades. In a CRD-configuration, they are grouped under the `major_version_upgrade` key. * **major_version_upgrade_mode** - Postgres Operator supports [in-place major version upgrade](../administrator.md#in-place-major-version-upgrade) + Postgres Operator supports [in-place major version upgrade](../administrator.md#in-place-major-version-upgrade) with three different modes: `"off"` = no upgrade by the operator, `"manual"` = manifest triggers action, @@ -275,11 +275,14 @@ configuration they are grouped under the `kubernetes` key. * **secret_name_template** a template for the name of the database user secrets generated by the - operator. `{username}` is replaced with name of the secret, `{cluster}` with - the name of the cluster, `{tprkind}` with the kind of CRD (formerly known as - TPR) and `{tprgroup}` with the group of the CRD. No other placeholders are - allowed. The default is - `{username}.{cluster}.credentials.{tprkind}.{tprgroup}`. + operator. `{namespace}` is replaced with name of the namespace (if cross + namespace secrets are enabled via EnableNamespacedSecret flag, otherwise the + secret is in cluster's namespace and in that case it is not present in secret + name), `{username}` is replaced with name of the secret, `{cluster}` with the + name of the cluster, `{tprkind}` with the kind of CRD (formerly known as TPR) + and `{tprgroup}` with the group of the CRD. No other placeholders are allowed. + The default is + `{namespace}.{username}.{cluster}.credentials.{tprkind}.{tprgroup}`. * **cluster_domain** defines the default DNS domain for the kubernetes cluster the operator is diff --git a/docs/user.md b/docs/user.md index 8e406ec00..8194d2ced 100644 --- a/docs/user.md +++ b/docs/user.md @@ -139,6 +139,25 @@ secret, without ever sharing it outside of the cluster. At the moment it is not possible to define membership of the manifest role in other roles. +To define the secrets for the users in a different namespace than that of the cluster, +one can use the flag `EnableNamespacedSecret` and declare the namespace for the +secrets in the manifest in the following manner, + +```yaml +spec: + users: + #users with secret in dfferent namespace + appspace.db_user: + - createdb +``` +Here, anything before the first dot is taken as the namespace and the text after +the first dot is the username. Also, the postgres roles of these usernames would +be in the form of `namespace.username`. + +For such usernames, the secret is created in the given namespace and its name is +of the following form, +`{namespace}.{username}.{team}-{clustername}.credentials.postgresql.acid.zalan.do` + ### Infrastructure roles An infrastructure role is a role that should be present on every PostgreSQL @@ -330,7 +349,7 @@ spec: This creates roles for members of the `c-team` team not only in all clusters owned by `a-team`, but as well in cluster owned by `b-team`, as `a-team` is -an `additionalTeam` to `b-team` +an `additionalTeam` to `b-team` Not, you can also define `additionalSuperuserTeams` in the `PostgresTeam` manifest. By default, this option is disabled and must be configured with diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index 85bcb6245..d28ea69ad 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -197,6 +197,16 @@ class K8s: pod_phase = pods[0].status.phase time.sleep(self.RETRY_TIMEOUT_SEC) + def wait_for_namespace_creation(self, namespace='default'): + ns_found = False + while ns_found != True: + ns = self.api.core_v1.list_namespace().items + for n in ns: + if n.metadata.name == namespace: + ns_found = True + break + time.sleep(self.RETRY_TIMEOUT_SEC) + def get_logical_backup_job(self, namespace='default'): return self.api.batch_v1_beta1.list_namespaced_cron_job(namespace, label_selector="application=spilo") diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 2b9e3ad28..fcac70e10 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -253,7 +253,7 @@ class EndToEndTestCase(unittest.TestCase): WHERE (rolname = 'tester' AND rolcanlogin) OR (rolname = 'kind_delete_me' AND NOT rolcanlogin); """ - self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, "Database role of replaced member in PostgresTeam not renamed", 10, 5) # re-add additional member and check if the role is renamed back @@ -276,7 +276,7 @@ class EndToEndTestCase(unittest.TestCase): WHERE (rolname = 'kind' AND rolcanlogin) OR (rolname = 'tester_delete_me' AND NOT rolcanlogin); """ - self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, "Database role of recreated member in PostgresTeam not renamed back to original name", 10, 5) # revert config change @@ -322,7 +322,6 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: self.k8s.count_running_pods("connection-pooler=acid-minimal-cluster-pooler"), 0, "Pooler pods not scaled down") - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_enable_disable_connection_pooler(self): ''' @@ -568,6 +567,7 @@ class EndToEndTestCase(unittest.TestCase): role.pop("Password", None) self.assertDictEqual(role, { "Name": "robot_zmon_acid_monitoring_new", + "Namespace":"", "Flags": None, "MemberOf": ["robot_zmon"], "Parameters": None, @@ -587,6 +587,41 @@ class EndToEndTestCase(unittest.TestCase): print('Operator log: {}'.format(k8s.get_operator_log())) raise + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_zz_cross_namespace_secrets(self): + ''' + Test secrets in different namespace + ''' + app_namespace = "appspace" + + v1_appnamespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=app_namespace)) + self.k8s.api.core_v1.create_namespace(v1_appnamespace) + self.k8s.wait_for_namespace_creation(app_namespace) + + self.k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableNamespacedSecret': True, + 'users':{ + 'appspace.db_user': [], + } + } + }) + self.eventuallyEqual(lambda: self.k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", app_namespace), + 1, "Secret not created for user in namespace") + + #reset the flag + self.k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableNamespacedSecret': False, + } + }) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_lazy_spilo_upgrade(self): ''' diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 6e2acbdd3..5f995de15 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -12,6 +12,7 @@ spec: dockerImage: registry.opensource.zalan.do/acid/spilo-13:2.0-p7 teamId: "acid" numberOfInstances: 2 + enableNamespacedSecret: False users: # Application/Robot users zalando: - superuser diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 83e7273e4..ae91a9f38 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -730,6 +730,9 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ Type: "boolean", Description: "Deprecated", }, + "enableNamespacedSecret": { + Type: "boolean", + }, "users": { Type: "object", AdditionalProperties: &apiextv1.JSONSchemaPropsOrBool{ diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index 7346fb0e5..1787f5b4e 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -53,27 +53,28 @@ type PostgresSpec struct { // load balancers' source ranges are the same for master and replica services AllowedSourceRanges []string `json:"allowedSourceRanges"` - NumberOfInstances int32 `json:"numberOfInstances"` - Users map[string]UserFlags `json:"users,omitempty"` - MaintenanceWindows []MaintenanceWindow `json:"maintenanceWindows,omitempty"` - Clone *CloneDescription `json:"clone,omitempty"` - ClusterName string `json:"-"` - Databases map[string]string `json:"databases,omitempty"` - PreparedDatabases map[string]PreparedDatabase `json:"preparedDatabases,omitempty"` - SchedulerName *string `json:"schedulerName,omitempty"` - NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"` - Tolerations []v1.Toleration `json:"tolerations,omitempty"` - Sidecars []Sidecar `json:"sidecars,omitempty"` - InitContainers []v1.Container `json:"initContainers,omitempty"` - PodPriorityClassName string `json:"podPriorityClassName,omitempty"` - ShmVolume *bool `json:"enableShmVolume,omitempty"` - EnableLogicalBackup bool `json:"enableLogicalBackup,omitempty"` - LogicalBackupSchedule string `json:"logicalBackupSchedule,omitempty"` - StandbyCluster *StandbyDescription `json:"standby,omitempty"` - PodAnnotations map[string]string `json:"podAnnotations,omitempty"` - ServiceAnnotations map[string]string `json:"serviceAnnotations,omitempty"` - TLS *TLSDescription `json:"tls,omitempty"` - AdditionalVolumes []AdditionalVolume `json:"additionalVolumes,omitempty"` + NumberOfInstances int32 `json:"numberOfInstances"` + EnableNamespacedSecret *bool `json:"enableNamespacedSecret,omitempty"` + Users map[string]UserFlags `json:"users,omitempty"` + MaintenanceWindows []MaintenanceWindow `json:"maintenanceWindows,omitempty"` + Clone *CloneDescription `json:"clone,omitempty"` + ClusterName string `json:"-"` + Databases map[string]string `json:"databases,omitempty"` + PreparedDatabases map[string]PreparedDatabase `json:"preparedDatabases,omitempty"` + SchedulerName *string `json:"schedulerName,omitempty"` + NodeAffinity *v1.NodeAffinity `json:"nodeAffinity,omitempty"` + Tolerations []v1.Toleration `json:"tolerations,omitempty"` + Sidecars []Sidecar `json:"sidecars,omitempty"` + InitContainers []v1.Container `json:"initContainers,omitempty"` + PodPriorityClassName string `json:"podPriorityClassName,omitempty"` + ShmVolume *bool `json:"enableShmVolume,omitempty"` + EnableLogicalBackup bool `json:"enableLogicalBackup,omitempty"` + LogicalBackupSchedule string `json:"logicalBackupSchedule,omitempty"` + StandbyCluster *StandbyDescription `json:"standby,omitempty"` + PodAnnotations map[string]string `json:"podAnnotations,omitempty"` + ServiceAnnotations map[string]string `json:"serviceAnnotations,omitempty"` + TLS *TLSDescription `json:"tls,omitempty"` + AdditionalVolumes []AdditionalVolume `json:"additionalVolumes,omitempty"` // deprecated json tags InitContainersOld []v1.Container `json:"init_containers,omitempty"` diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 584a72143..bad75ffd8 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -614,6 +614,11 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.EnableNamespacedSecret != nil { + in, out := &in.EnableNamespacedSecret, &out.EnableNamespacedSecret + *out = new(bool) + **out = **in + } if in.Users != nil { in, out := &in.Users, &out.Users *out = make(map[string]UserFlags, len(*in)) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index ff474884c..ced184877 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -940,14 +940,16 @@ func (c *Cluster) initSystemUsers() { // secrets, therefore, setting flags like SUPERUSER or REPLICATION // is not necessary here c.systemUsers[constants.SuperuserKeyName] = spec.PgUser{ - Origin: spec.RoleOriginSystem, - Name: c.OpConfig.SuperUsername, - Password: util.RandomPassword(constants.PasswordLength), + Origin: spec.RoleOriginSystem, + Name: c.OpConfig.SuperUsername, + Namespace: c.Namespace, + Password: util.RandomPassword(constants.PasswordLength), } c.systemUsers[constants.ReplicationUserKeyName] = spec.PgUser{ - Origin: spec.RoleOriginSystem, - Name: c.OpConfig.ReplicationUsername, - Password: util.RandomPassword(constants.PasswordLength), + Origin: spec.RoleOriginSystem, + Name: c.OpConfig.ReplicationUsername, + Namespace: c.Namespace, + Password: util.RandomPassword(constants.PasswordLength), } // Connection pooler user is an exception, if requested it's going to be @@ -975,10 +977,11 @@ func (c *Cluster) initSystemUsers() { // connection pooler application should be able to login with this role connectionPoolerUser := spec.PgUser{ - Origin: spec.RoleConnectionPooler, - Name: username, - Flags: []string{constants.RoleFlagLogin}, - Password: util.RandomPassword(constants.PasswordLength), + Origin: spec.RoleConnectionPooler, + Name: username, + Namespace: c.Namespace, + Flags: []string{constants.RoleFlagLogin}, + Password: util.RandomPassword(constants.PasswordLength), } if _, exists := c.pgUsers[username]; !exists { @@ -1081,6 +1084,7 @@ func (c *Cluster) initDefaultRoles(defaultRoles map[string]string, admin, prefix newRole := spec.PgUser{ Origin: spec.RoleOriginBootstrap, Name: roleName, + Namespace: c.Namespace, Password: util.RandomPassword(constants.PasswordLength), Flags: flags, MemberOf: memberOf, @@ -1105,6 +1109,17 @@ func (c *Cluster) initRobotUsers() error { if c.shouldAvoidProtectedOrSystemRole(username, "manifest robot role") { continue } + namespace := c.Namespace + + //if namespaced secrets are allowed + if c.Postgresql.Spec.EnableNamespacedSecret != nil && + *c.Postgresql.Spec.EnableNamespacedSecret { + if strings.Contains(username, ".") { + splits := strings.Split(username, ".") + namespace = splits[0] + } + } + flags, err := normalizeUserFlags(userFlags) if err != nil { return fmt.Errorf("invalid flags for user %q: %v", username, err) @@ -1116,6 +1131,7 @@ func (c *Cluster) initRobotUsers() error { newRole := spec.PgUser{ Origin: spec.RoleOriginManifest, Name: username, + Namespace: namespace, Password: util.RandomPassword(constants.PasswordLength), Flags: flags, AdminRole: adminRole, @@ -1233,6 +1249,7 @@ func (c *Cluster) initInfrastructureRoles() error { return fmt.Errorf("invalid flags for user '%v': %v", username, err) } newRole.Flags = flags + newRole.Namespace = c.Namespace if currentRole, present := c.pgUsers[username]; present { c.pgUsers[username] = c.resolveNameConflict(¤tRole, &newRole) diff --git a/pkg/cluster/cluster_test.go b/pkg/cluster/cluster_test.go index 1f6510e65..cb9356f47 100644 --- a/pkg/cluster/cluster_test.go +++ b/pkg/cluster/cluster_test.go @@ -7,12 +7,14 @@ import ( "github.com/sirupsen/logrus" acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1" + fakeacidv1 "github.com/zalando/postgres-operator/pkg/generated/clientset/versioned/fake" "github.com/zalando/postgres-operator/pkg/spec" "github.com/zalando/postgres-operator/pkg/util/config" "github.com/zalando/postgres-operator/pkg/util/constants" "github.com/zalando/postgres-operator/pkg/util/k8sutil" "github.com/zalando/postgres-operator/pkg/util/teams" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/record" ) @@ -79,8 +81,8 @@ func TestInitRobotUsers(t *testing.T) { }{ { manifestUsers: map[string]acidv1.UserFlags{"foo": {"superuser", "createdb"}}, - infraRoles: map[string]spec.PgUser{"foo": {Origin: spec.RoleOriginInfrastructure, Name: "foo", Password: "bar"}}, - result: map[string]spec.PgUser{"foo": {Origin: spec.RoleOriginInfrastructure, Name: "foo", Password: "bar"}}, + infraRoles: map[string]spec.PgUser{"foo": {Origin: spec.RoleOriginInfrastructure, Name: "foo", Namespace: cl.Namespace, Password: "bar"}}, + result: map[string]spec.PgUser{"foo": {Origin: spec.RoleOriginInfrastructure, Name: "foo", Namespace: cl.Namespace, Password: "bar"}}, err: nil, }, { @@ -845,3 +847,90 @@ func TestPreparedDatabases(t *testing.T) { } } } + +func TestCrossNamespacedSecrets(t *testing.T) { + testName := "test secrets in different namespace" + clientSet := fake.NewSimpleClientset() + acidClientSet := fakeacidv1.NewSimpleClientset() + namespace := "default" + + client := k8sutil.KubernetesClient{ + StatefulSetsGetter: clientSet.AppsV1(), + ServicesGetter: clientSet.CoreV1(), + DeploymentsGetter: clientSet.AppsV1(), + PostgresqlsGetter: acidClientSet.AcidV1(), + SecretsGetter: clientSet.CoreV1(), + } + pg := acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{ + Name: "acid-fake-cluster", + Namespace: namespace, + }, + Spec: acidv1.PostgresSpec{ + Volume: acidv1.Volume{ + Size: "1Gi", + }, + EnableNamespacedSecret: boolToPointer(true), + Users: map[string]acidv1.UserFlags{ + "appspace.db_user": {}, + "db_user": {}, + }, + }, + } + + var cluster = New( + Config{ + OpConfig: config.Config{ + ConnectionPooler: config.ConnectionPooler{ + ConnectionPoolerDefaultCPURequest: "100m", + ConnectionPoolerDefaultCPULimit: "100m", + ConnectionPoolerDefaultMemoryRequest: "100Mi", + ConnectionPoolerDefaultMemoryLimit: "100Mi", + NumberOfInstances: int32ToPointer(1), + }, + PodManagementPolicy: "ordered_ready", + Resources: config.Resources{ + ClusterLabels: map[string]string{"application": "spilo"}, + ClusterNameLabel: "cluster-name", + DefaultCPURequest: "300m", + DefaultCPULimit: "300m", + DefaultMemoryRequest: "300Mi", + DefaultMemoryLimit: "300Mi", + PodRoleLabel: "spilo-role", + }, + }, + }, client, pg, logger, eventRecorder) + + userNamespaceMap := map[string]string{ + cluster.Namespace: "db_user", + "appspace": "appspace.db_user", + } + + err := cluster.initRobotUsers() + if err != nil { + t.Errorf("Could not create secret for namespaced users with error: %s", err) + } + + for _, u := range cluster.pgUsers { + if u.Name != userNamespaceMap[u.Namespace] { + t.Errorf("%s: Could not create namespaced user in its correct namespaces for user %s in namespace %s", testName, u.Name, u.Namespace) + } + } +} + +func TestValidUsernames(t *testing.T) { + testName := "test username validity" + + invalidUsernames := []string{"_", ".", ".user", "appspace.", "user_", "_user", "-user", "user-", ",", "-", ",user", "user,", "namespace,user"} + validUsernames := []string{"user", "appspace.user", "appspace.dot.user", "user_name", "app_space.user_name"} + for _, username := range invalidUsernames { + if isValidUsername(username) { + t.Errorf("%s Invalid username is allowed: %s", testName, username) + } + } + for _, username := range validUsernames { + if !isValidUsername(username) { + t.Errorf("%s Valid username is not allowed: %s", testName, username) + } + } +} diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index cb11170d6..4dcab3b7e 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1547,10 +1547,11 @@ func (c *Cluster) generateUserSecrets() map[string]*v1.Secret { namespace := c.Namespace for username, pgUser := range c.pgUsers { //Skip users with no password i.e. human users (they'll be authenticated using pam) - secret := c.generateSingleUserSecret(namespace, pgUser) + secret := c.generateSingleUserSecret(pgUser.Namespace, pgUser) if secret != nil { secrets[username] = secret } + namespace = pgUser.Namespace } /* special case for the system user */ for _, systemUser := range c.systemUsers { @@ -1590,7 +1591,7 @@ func (c *Cluster) generateSingleUserSecret(namespace string, pgUser spec.PgUser) secret := v1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: c.credentialSecretName(username), - Namespace: namespace, + Namespace: pgUser.Namespace, Labels: lbls, Annotations: c.annotationsSet(nil), }, diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index 48b17f532..f078c6434 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -32,7 +32,7 @@ func (c *Cluster) listResources() error { } for _, obj := range c.Secrets { - c.logger.Infof("found secret: %q (uid: %q)", util.NameFromMeta(obj.ObjectMeta), obj.UID) + c.logger.Infof("found secret: %q (uid: %q) namesapce: %s", util.NameFromMeta(obj.ObjectMeta), obj.UID, obj.ObjectMeta.Namespace) } for role, endpoint := range c.Endpoints { diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 94e930290..79dceedd5 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -483,7 +483,7 @@ func (c *Cluster) syncSecrets() error { for secretUsername, secretSpec := range secrets { if secret, err = c.KubeClient.Secrets(secretSpec.Namespace).Create(context.TODO(), secretSpec, metav1.CreateOptions{}); err == nil { c.Secrets[secret.UID] = secret - c.logger.Debugf("created new secret %s, uid: %s", util.NameFromMeta(secret.ObjectMeta), secret.UID) + c.logger.Debugf("created new secret %s, namespace: %s, uid: %s", util.NameFromMeta(secret.ObjectMeta), secretSpec.Namespace, secret.UID) continue } if k8sutil.ResourceAlreadyExists(err) { @@ -521,7 +521,7 @@ func (c *Cluster) syncSecrets() error { userMap[secretUsername] = pwdUser } } else { - return fmt.Errorf("could not create secret for user %s: %v", secretUsername, err) + return fmt.Errorf("could not create secret for user %s: in namespace %s: %v", secretUsername, secretSpec.Namespace, err) } } @@ -556,11 +556,17 @@ func (c *Cluster) syncRoles() (err error) { // create list of database roles to query for _, u := range c.pgUsers { - userNames = append(userNames, u.Name) + pgRole := u.Name + if u.Namespace != c.Namespace && u.Namespace != "" { + // to avoid the conflict of having multiple users of same name + // but each in different namespace. + pgRole = fmt.Sprintf("%s.%s", u.Name, u.Namespace) + } + userNames = append(userNames, pgRole) // add team member role name with rename suffix in case we need to rename it back if u.Origin == spec.RoleOriginTeamsAPI && c.OpConfig.EnableTeamMemberDeprecation { - deletedUsers[u.Name+c.OpConfig.RoleDeletionSuffix] = u.Name - userNames = append(userNames, u.Name+c.OpConfig.RoleDeletionSuffix) + deletedUsers[pgRole+c.OpConfig.RoleDeletionSuffix] = pgRole + userNames = append(userNames, pgRole+c.OpConfig.RoleDeletionSuffix) } } diff --git a/pkg/spec/types.go b/pkg/spec/types.go index 5d7794b42..533aae79f 100644 --- a/pkg/spec/types.go +++ b/pkg/spec/types.go @@ -49,6 +49,7 @@ const ( type PgUser struct { Origin RoleOrigin `yaml:"-"` Name string `yaml:"-"` + Namespace string `yaml:"-"` Password string `yaml:"-"` Flags []string `yaml:"user_flags"` MemberOf []string `yaml:"inrole"` From ebb3204cdd7002742499c85b1df15b43a68f005b Mon Sep 17 00:00:00 2001 From: Igor Yanchenko <1504692+yanchenko-igor@users.noreply.github.com> Date: Mon, 14 Jun 2021 12:00:58 +0300 Subject: [PATCH 4/5] restart instances via rest api instead of recreating pods, fixes bug with being unable to decrease some values, like max_connections (#1103) * restart instances via rest api instead of recreating pods * Ignore differences in bootstrap.dcs when compare SPILO_CONFIGURATION * isBootstrapOnlyParameter is rewritten, instead of whitelist it uses blacklist * added e2e test for max_connections decreasing * documentation updated * pending_restart flag added to restart api call, wait fot ttl seconds after restart * refactoring, /restart returns error if pending_restart is set to true and patroni is not pending restart * restart postgresql instances within pods only if pod's restart is not required * patroni might need to restart postgresql after pods were recreated if values like max_connections decreased * instancesRestart is not critical, try to restart pods if not successful * cleanup Co-authored-by: Felix Kunde --- docs/administrator.md | 4 + e2e/tests/test_e2e.py | 48 +++++++++++ pkg/cluster/cluster.go | 53 ++++++++++++- pkg/cluster/cluster_test.go | 154 ++++++++++++++++++++++++++++++++++++ pkg/cluster/k8sres.go | 34 ++++++-- pkg/cluster/k8sres_test.go | 6 ++ pkg/cluster/sync.go | 84 +++++++++++++++++--- pkg/util/patroni/patroni.go | 72 +++++++++++++++++ 8 files changed, 438 insertions(+), 17 deletions(-) diff --git a/docs/administrator.md b/docs/administrator.md index db3bae6b9..ad424cab8 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -168,6 +168,10 @@ operator checks during Sync all pods run images specified in their respective statefulsets. The operator triggers a rolling upgrade for PG clusters that violate this condition. +Changes in $SPILO\_CONFIGURATION under path bootstrap.dcs are ignored when +StatefulSets are being compared, if there are changes under this path, they are +applied through rest api interface and following restart of patroni instance + ## Delete protection via annotations To avoid accidental deletes of Postgres clusters the operator can check the diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index fcac70e10..30d0cfe2f 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -1418,6 +1418,54 @@ class EndToEndTestCase(unittest.TestCase): } k8s.update_config(patch_delete_annotations) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_decrease_max_connections(self): + ''' + Test decreasing max_connections and restarting cluster through rest api + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + labels = 'spilo-role=master,' + cluster_label + new_max_connections_value = "99" + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + self.assert_master_is_unique() + masterPod = pods[0] + creationTimestamp = masterPod.metadata.creation_timestamp + + # adjust max_connection + pg_patch_max_connections = { + "spec": { + "postgresql": { + "parameters": { + "max_connections": new_max_connections_value + } + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) + + def get_max_connections(): + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + self.assert_master_is_unique() + masterPod = pods[0] + get_max_connections_cmd = '''psql -At -U postgres -c "SELECT setting FROM pg_settings WHERE name = 'max_connections';"''' + result = k8s.exec_with_kubectl(masterPod.metadata.name, get_max_connections_cmd) + max_connections_value = int(result.stdout) + return max_connections_value + + #Make sure that max_connections decreased + self.eventuallyEqual(get_max_connections, int(new_max_connections_value), "max_connections didn't decrease") + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + self.assert_master_is_unique() + masterPod = pods[0] + #Make sure that pod didn't restart + self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, + "Master pod creation timestamp is updated") + def get_failover_targets(self, master_node, replica_nodes): ''' If all pods live on the same node, failover will happen to other worker(s) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index ced184877..f56a0d1e4 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -5,6 +5,7 @@ package cluster import ( "context" "database/sql" + "encoding/json" "fmt" "reflect" "regexp" @@ -519,7 +520,7 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe newCheck("new statefulset %s's %s (index %d) resources do not match the current ones", func(a, b v1.Container) bool { return !compareResources(&a.Resources, &b.Resources) }), newCheck("new statefulset %s's %s (index %d) environment does not match the current one", - func(a, b v1.Container) bool { return !reflect.DeepEqual(a.Env, b.Env) }), + func(a, b v1.Container) bool { return !compareEnv(a.Env, b.Env) }), newCheck("new statefulset %s's %s (index %d) environment sources do not match the current one", func(a, b v1.Container) bool { return !reflect.DeepEqual(a.EnvFrom, b.EnvFrom) }), newCheck("new statefulset %s's %s (index %d) security context does not match the current one", @@ -576,6 +577,56 @@ func compareResourcesAssumeFirstNotNil(a *v1.ResourceRequirements, b *v1.Resourc } +func compareEnv(a, b []v1.EnvVar) bool { + if len(a) != len(b) { + return false + } + equal := true + for _, enva := range a { + hasmatch := false + for _, envb := range b { + if enva.Name == envb.Name { + hasmatch = true + if enva.Name == "SPILO_CONFIGURATION" { + equal = compareSpiloConfiguration(enva.Value, envb.Value) + } else { + if enva.Value == "" && envb.Value == "" { + equal = reflect.DeepEqual(enva.ValueFrom, envb.ValueFrom) + } else { + equal = (enva.Value == envb.Value) + } + } + if !equal { + return false + } + } + } + if !hasmatch { + return false + } + } + return true +} + +func compareSpiloConfiguration(configa, configb string) bool { + var ( + oa, ob spiloConfiguration + ) + + var err error + err = json.Unmarshal([]byte(configa), &oa) + if err != nil { + return false + } + oa.Bootstrap.DCS = patroniDCS{} + err = json.Unmarshal([]byte(configb), &ob) + if err != nil { + return false + } + ob.Bootstrap.DCS = patroniDCS{} + return reflect.DeepEqual(oa, ob) +} + func (c *Cluster) enforceMinResourceLimits(spec *acidv1.PostgresSpec) error { var ( diff --git a/pkg/cluster/cluster_test.go b/pkg/cluster/cluster_test.go index cb9356f47..9e7f60906 100644 --- a/pkg/cluster/cluster_test.go +++ b/pkg/cluster/cluster_test.go @@ -13,6 +13,7 @@ import ( "github.com/zalando/postgres-operator/pkg/util/constants" "github.com/zalando/postgres-operator/pkg/util/k8sutil" "github.com/zalando/postgres-operator/pkg/util/teams" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/record" @@ -848,6 +849,159 @@ func TestPreparedDatabases(t *testing.T) { } } +func TestCompareSpiloConfiguration(t *testing.T) { + testCases := []struct { + Config string + ExpectedResult bool + }{ + { + `{"postgresql":{"bin_dir":"/usr/lib/postgresql/12/bin","parameters":{"autovacuum_analyze_scale_factor":"0.1"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"test":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"postgresql":{"parameters":{"max_connections":"100","max_locks_per_transaction":"64","max_worker_processes":"4"}}}}}`, + true, + }, + { + `{"postgresql":{"bin_dir":"/usr/lib/postgresql/12/bin","parameters":{"autovacuum_analyze_scale_factor":"0.1"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"test":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"postgresql":{"parameters":{"max_connections":"200","max_locks_per_transaction":"64","max_worker_processes":"4"}}}}}`, + true, + }, + { + `{"postgresql":{"bin_dir":"/usr/lib/postgresql/12/bin","parameters":{"autovacuum_analyze_scale_factor":"0.1"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"test":{"password":"","options":["CREATEDB"]}},"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"postgresql":{"parameters":{"max_connections":"200","max_locks_per_transaction":"64","max_worker_processes":"4"}}}}}`, + false, + }, + { + `{}`, + false, + }, + { + `invalidjson`, + false, + }, + } + refCase := testCases[0] + for _, testCase := range testCases { + if result := compareSpiloConfiguration(refCase.Config, testCase.Config); result != testCase.ExpectedResult { + t.Errorf("expected %v got %v", testCase.ExpectedResult, result) + } + } +} + +func TestCompareEnv(t *testing.T) { + testCases := []struct { + Envs []v1.EnvVar + ExpectedResult bool + }{ + { + Envs: []v1.EnvVar{ + { + Name: "VARIABLE1", + Value: "value1", + }, + { + Name: "VARIABLE2", + Value: "value2", + }, + { + Name: "VARIABLE3", + Value: "value3", + }, + { + Name: "SPILO_CONFIGURATION", + Value: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/12/bin","parameters":{"autovacuum_analyze_scale_factor":"0.1"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"test":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"postgresql":{"parameters":{"max_connections":"100","max_locks_per_transaction":"64","max_worker_processes":"4"}}}}}`, + }, + }, + ExpectedResult: true, + }, + { + Envs: []v1.EnvVar{ + { + Name: "VARIABLE1", + Value: "value1", + }, + { + Name: "VARIABLE2", + Value: "value2", + }, + { + Name: "VARIABLE3", + Value: "value3", + }, + { + Name: "SPILO_CONFIGURATION", + Value: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/12/bin","parameters":{"autovacuum_analyze_scale_factor":"0.1"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"test":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"postgresql":{"parameters":{"max_locks_per_transaction":"64","max_worker_processes":"4"}}}}}`, + }, + }, + ExpectedResult: true, + }, + { + Envs: []v1.EnvVar{ + { + Name: "VARIABLE4", + Value: "value4", + }, + { + Name: "VARIABLE2", + Value: "value2", + }, + { + Name: "VARIABLE3", + Value: "value3", + }, + { + Name: "SPILO_CONFIGURATION", + Value: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/12/bin","parameters":{"autovacuum_analyze_scale_factor":"0.1"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"test":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"postgresql":{"parameters":{"max_locks_per_transaction":"64","max_worker_processes":"4"}}}}}`, + }, + }, + ExpectedResult: false, + }, + { + Envs: []v1.EnvVar{ + { + Name: "VARIABLE1", + Value: "value1", + }, + { + Name: "VARIABLE2", + Value: "value2", + }, + { + Name: "VARIABLE3", + Value: "value3", + }, + { + Name: "VARIABLE4", + Value: "value4", + }, + { + Name: "SPILO_CONFIGURATION", + Value: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/12/bin","parameters":{"autovacuum_analyze_scale_factor":"0.1"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"test":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"postgresql":{"parameters":{"max_connections":"100","max_locks_per_transaction":"64","max_worker_processes":"4"}}}}}`, + }, + }, + ExpectedResult: false, + }, + { + Envs: []v1.EnvVar{ + { + Name: "VARIABLE1", + Value: "value1", + }, + { + Name: "VARIABLE2", + Value: "value2", + }, + { + Name: "SPILO_CONFIGURATION", + Value: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/12/bin","parameters":{"autovacuum_analyze_scale_factor":"0.1"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"users":{"test":{"password":"","options":["CREATEDB","NOLOGIN"]}},"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"postgresql":{"parameters":{"max_connections":"100","max_locks_per_transaction":"64","max_worker_processes":"4"}}}}}`, + }, + }, + ExpectedResult: false, + }, + } + refCase := testCases[0] + for _, testCase := range testCases { + if result := compareEnv(refCase.Envs, testCase.Envs); result != testCase.ExpectedResult { + t.Errorf("expected %v got %v", testCase.ExpectedResult, result) + } + } +} + func TestCrossNamespacedSecrets(t *testing.T) { testName := "test secrets in different namespace" clientSet := fake.NewSimpleClientset() diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 4dcab3b7e..c02a64df0 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -412,13 +412,33 @@ func tolerations(tolerationsSpec *[]v1.Toleration, podToleration map[string]stri // Those parameters must go to the bootstrap/dcs/postgresql/parameters section. // See http://patroni.readthedocs.io/en/latest/dynamic_configuration.html. func isBootstrapOnlyParameter(param string) bool { - return param == "max_connections" || - param == "max_locks_per_transaction" || - param == "max_worker_processes" || - param == "max_prepared_transactions" || - param == "wal_level" || - param == "wal_log_hints" || - param == "track_commit_timestamp" + params := map[string]bool{ + "archive_command": false, + "shared_buffers": false, + "logging_collector": false, + "log_destination": false, + "log_directory": false, + "log_filename": false, + "log_file_mode": false, + "log_rotation_age": false, + "log_truncate_on_rotation": false, + "ssl": false, + "ssl_ca_file": false, + "ssl_crl_file": false, + "ssl_cert_file": false, + "ssl_key_file": false, + "shared_preload_libraries": false, + "bg_mon.listen_address": false, + "bg_mon.history_buckets": false, + "pg_stat_statements.track_utility": false, + "extwlist.extensions": false, + "extwlist.custom_path": false, + } + result, ok := params[param] + if !ok { + result = true + } + return result } func generateVolumeMounts(volume acidv1.Volume) []v1.VolumeMount { diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 5acd4a159..d411dd004 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1207,6 +1207,12 @@ func TestSidecars(t *testing.T) { } spec = acidv1.PostgresSpec{ + PostgresqlParam: acidv1.PostgresqlParam{ + PgVersion: "12.1", + Parameters: map[string]string{ + "max_connections": "100", + }, + }, TeamID: "myapp", NumberOfInstances: 1, Resources: acidv1.Resources{ ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 79dceedd5..5f1b694ae 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -5,6 +5,7 @@ import ( "fmt" "regexp" "strings" + "time" acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando/postgres-operator/pkg/spec" @@ -260,6 +261,7 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error { } func (c *Cluster) syncStatefulSet() error { + var instancesRestartRequired bool podsToRecreate := make([]v1.Pod, 0) switchoverCandidates := make([]spec.NamespacedName, 0) @@ -379,10 +381,21 @@ func (c *Cluster) syncStatefulSet() error { // Apply special PostgreSQL parameters that can only be set via the Patroni API. // it is important to do it after the statefulset pods are there, but before the rolling update // since those parameters require PostgreSQL restart. - if err := c.checkAndSetGlobalPostgreSQLConfiguration(); err != nil { + instancesRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration() + if err != nil { return fmt.Errorf("could not set cluster-wide PostgreSQL configuration options: %v", err) } + + if instancesRestartRequired { + c.logger.Debugln("restarting Postgres server within pods") + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "restarting Postgres server within pods") + if err := c.restartInstances(); err != nil { + c.logger.Warningf("could not restart Postgres server within pods: %v", err) + } + c.logger.Infof("Postgres server successfuly restarted on all pods") + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Postgres server restart done - all instances have been restarted") + } // if we get here we also need to re-create the pods (either leftovers from the old // statefulset or those that got their configuration from the outdated statefulset) if len(podsToRecreate) > 0 { @@ -396,6 +409,57 @@ func (c *Cluster) syncStatefulSet() error { return nil } +func (c *Cluster) restartInstances() error { + c.setProcessName("starting to restart Postgres servers") + ls := c.labelsSet(false) + namespace := c.Namespace + + listOptions := metav1.ListOptions{ + LabelSelector: ls.String(), + } + + pods, err := c.KubeClient.Pods(namespace).List(context.TODO(), listOptions) + if err != nil { + return fmt.Errorf("could not get the list of pods: %v", err) + } + c.logger.Infof("there are %d pods in the cluster which resquire Postgres server restart", len(pods.Items)) + + var ( + masterPod *v1.Pod + ) + for i, pod := range pods.Items { + role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) + + if role == Master { + masterPod = &pods.Items[i] + continue + } + + podName := util.NameFromMeta(pods.Items[i].ObjectMeta) + config, err := c.patroni.GetConfig(&pod) + if err != nil { + return fmt.Errorf("could not get config for pod %s: %v", podName, err) + } + ttl, ok := config["ttl"].(int32) + if !ok { + ttl = 30 + } + if err = c.patroni.Restart(&pod); err != nil { + return fmt.Errorf("could not restart Postgres server on pod %s: %v", podName, err) + } + time.Sleep(time.Duration(ttl) * time.Second) + } + + if masterPod != nil { + podName := util.NameFromMeta(masterPod.ObjectMeta) + if err = c.patroni.Restart(masterPod); err != nil { + return fmt.Errorf("could not restart postgres server on masterPod %s: %v", podName, err) + } + } + + return nil +} + // AnnotationsToPropagate get the annotations to update if required // based on the annotations in postgres CRD func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[string]string { @@ -430,10 +494,11 @@ func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[stri // checkAndSetGlobalPostgreSQLConfiguration checks whether cluster-wide API parameters // (like max_connections) has changed and if necessary sets it via the Patroni API -func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration() error { +func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration() (bool, error) { var ( - err error - pods []v1.Pod + err error + pods []v1.Pod + restartRequired bool ) // we need to extract those options from the cluster manifest. @@ -447,14 +512,14 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration() error { } if len(optionsToSet) == 0 { - return nil + return restartRequired, nil } if pods, err = c.listPods(); err != nil { - return err + return restartRequired, err } if len(pods) == 0 { - return fmt.Errorf("could not call Patroni API: cluster has no pods") + return restartRequired, fmt.Errorf("could not call Patroni API: cluster has no pods") } // try all pods until the first one that is successful, as it doesn't matter which pod // carries the request to change configuration through @@ -463,11 +528,12 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration() error { c.logger.Debugf("calling Patroni API on a pod %s to set the following Postgres options: %v", podName, optionsToSet) if err = c.patroni.SetPostgresParameters(&pod, optionsToSet); err == nil { - return nil + restartRequired = true + return restartRequired, nil } c.logger.Warningf("could not patch postgres parameters with a pod %s: %v", podName, err) } - return fmt.Errorf("could not reach Patroni API to set Postgres options: failed on every pod (%d total)", + return restartRequired, fmt.Errorf("could not reach Patroni API to set Postgres options: failed on every pod (%d total)", len(pods)) } diff --git a/pkg/util/patroni/patroni.go b/pkg/util/patroni/patroni.go index b63912e55..1f2c95552 100644 --- a/pkg/util/patroni/patroni.go +++ b/pkg/util/patroni/patroni.go @@ -19,6 +19,8 @@ import ( const ( failoverPath = "/failover" configPath = "/config" + statusPath = "/patroni" + restartPath = "/restart" apiPort = 8008 timeout = 30 * time.Second ) @@ -28,6 +30,8 @@ type Interface interface { Switchover(master *v1.Pod, candidate string) error SetPostgresParameters(server *v1.Pod, options map[string]string) error GetMemberData(server *v1.Pod) (MemberData, error) + Restart(server *v1.Pod) error + GetConfig(server *v1.Pod) (map[string]interface{}, error) } // Patroni API client @@ -103,6 +107,32 @@ func (p *Patroni) httpPostOrPatch(method string, url string, body *bytes.Buffer) return nil } +func (p *Patroni) httpGet(url string) (string, error) { + request, err := http.NewRequest("GET", url, nil) + if err != nil { + return "", fmt.Errorf("could not create request: %v", err) + } + + p.logger.Debugf("making GET http request: %s", request.URL.String()) + + resp, err := p.httpClient.Do(request) + if err != nil { + return "", fmt.Errorf("could not make request: %v", err) + } + bodyBytes, err := ioutil.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("could not read response: %v", err) + } + if err := resp.Body.Close(); err != nil { + return "", fmt.Errorf("could not close request: %v", err) + } + + if resp.StatusCode != http.StatusOK { + return string(bodyBytes), fmt.Errorf("patroni returned '%d'", resp.StatusCode) + } + return string(bodyBytes), nil +} + // Switchover by calling Patroni REST API func (p *Patroni) Switchover(master *v1.Pod, candidate string) error { buf := &bytes.Buffer{} @@ -149,6 +179,48 @@ type MemberData struct { Patroni MemberDataPatroni `json:"patroni"` } +func (p *Patroni) GetConfigOrStatus(server *v1.Pod, path string) (map[string]interface{}, error) { + result := make(map[string]interface{}) + apiURLString, err := apiURL(server) + if err != nil { + return result, err + } + body, err := p.httpGet(apiURLString + path) + err = json.Unmarshal([]byte(body), &result) + if err != nil { + return result, err + } + + return result, err +} + +func (p *Patroni) GetStatus(server *v1.Pod) (map[string]interface{}, error) { + return p.GetConfigOrStatus(server, statusPath) +} + +func (p *Patroni) GetConfig(server *v1.Pod) (map[string]interface{}, error) { + return p.GetConfigOrStatus(server, configPath) +} + +//Restart method restarts instance via Patroni POST API call. +func (p *Patroni) Restart(server *v1.Pod) error { + buf := &bytes.Buffer{} + err := json.NewEncoder(buf).Encode(map[string]interface{}{"restart_pending": true}) + if err != nil { + return fmt.Errorf("could not encode json: %v", err) + } + apiURLString, err := apiURL(server) + if err != nil { + return err + } + status, err := p.GetStatus(server) + pending_restart, ok := status["pending_restart"] + if !ok || !pending_restart.(bool) { + return nil + } + return p.httpPostOrPatch(http.MethodPost, apiURLString+restartPath, buf) +} + // GetMemberData read member data from patroni API func (p *Patroni) GetMemberData(server *v1.Pod) (MemberData, error) { From 53fb540c3577c2d7c273e2b4fed23e6c874eadfe Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Thu, 17 Jun 2021 08:48:26 +0200 Subject: [PATCH 5/5] Add basic retry around switchover (#1510) * add basic retry around switchover Co-authored-by: Sergey Dudoladov --- pkg/cluster/pod.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index c8dfe2a13..229648dd1 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -304,8 +304,19 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error { } masterCandidateName := util.NameFromMeta(masterCandidatePod.ObjectMeta) - if err := c.Switchover(oldMaster, masterCandidateName); err != nil { - return fmt.Errorf("could not failover to pod %q: %v", masterCandidateName, err) + err = retryutil.Retry(1*time.Minute, 5*time.Minute, + func() (bool, error) { + err := c.Switchover(oldMaster, masterCandidateName) + if err != nil { + c.logger.Errorf("could not failover to pod %q: %v", masterCandidateName, err) + return false, nil + } + return true, nil + }, + ) + + if err != nil { + return fmt.Errorf("could not migrate master pod: %v", err) } return nil