From b619569e286b35c297e02e1bbbbf508797a618e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Inge=20Bols=C3=B8?= Date: Mon, 27 May 2019 15:31:52 +0200 Subject: [PATCH 1/3] Improve cluster sidecar documentation (#573) --- docs/reference/cluster_manifest.md | 33 +++++++++++++++++++++++ docs/user.md | 7 +++++ manifests/complete-postgres-manifest.yaml | 14 ++++++++++ 3 files changed, 54 insertions(+) diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 842b50cf9..269a4b31d 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -270,3 +270,36 @@ defined in the sidecar dictionary: a dictionary of environment variables. Use usual Kubernetes definition (https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/) for environment variables. Optional. + +* **resources** see below. Optional. + +#### Sidecar container resources + +Those parameters define [CPU and memory requests and +limits](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) +for the sidecar container. They are grouped under the `resources` key for each sidecar. +There are two subgroups, `requests` and `limits`. + +##### Requests + +CPU and memory requests for the sidecar container. + +* **cpu** + CPU requests for the sidecar container. Optional, overrides the + `default_cpu_requests` operator configuration parameter. Optional. + +* **memory** + memory requests for the sidecar container. Optional, overrides the + `default_memory_request` operator configuration parameter. Optional. + +##### Limits + +CPU and memory limits for the sidecar container. + +* **cpu** + CPU limits for the sidecar container. Optional, overrides the + `default_cpu_limits` operator configuration parameter. Optional. + +* **memory** + memory limits for the sidecar container. Optional, overrides the + `default_memory_limits` operator configuration parameter. Optional. diff --git a/docs/user.md b/docs/user.md index 1942bab16..0f643f4c5 100644 --- a/docs/user.md +++ b/docs/user.md @@ -270,6 +270,13 @@ spec: sidecars: - name: "container-name" image: "company/image:tag" + resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 100m + memory: 100Mi env: - name: "ENV_VAR_NAME" value: "any-k8s-env-things" diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index b2ebe948e..421f5f613 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -71,3 +71,17 @@ spec: maintenanceWindows: - 01:00-06:00 #UTC - Sat:00:00-04:00 + #sidecars: + # - name: "telegraf-sidecar" + # image: "telegraf:latest" + # resources: + # limits: + # cpu: 500m + # memory: 500Mi + # requests: + # cpu: 100m + # memory: 100Mi + # env: + # - name: "USEFUL_VAR" + # value: "perhaps-true" + From 3ffc8ac5fa560a87ae7c4a3a0e37957c26926729 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Fri, 31 May 2019 15:51:58 +0200 Subject: [PATCH 2/3] Sample logical backup image. (#572) * Sample logical backup image. Based on the earlier work by Dmitry Dolgov @erthalion --- docker/logical-backup/Dockerfile | 33 ++++++++++ docker/logical-backup/dump.sh | 94 +++++++++++++++++++++++++++ docs/administrator.md | 2 +- docs/reference/operator_parameters.md | 4 +- 4 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 docker/logical-backup/Dockerfile create mode 100755 docker/logical-backup/dump.sh diff --git a/docker/logical-backup/Dockerfile b/docker/logical-backup/Dockerfile new file mode 100644 index 000000000..1da6f7386 --- /dev/null +++ b/docker/logical-backup/Dockerfile @@ -0,0 +1,33 @@ +FROM ubuntu:18.04 +LABEL maintainer="Team ACID @ Zalando " + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + apt-utils \ + ca-certificates \ + lsb-release \ + pigz \ + python3-pip \ + python3-setuptools \ + curl \ + jq \ + gnupg \ + && pip3 install --no-cache-dir awscli --upgrade \ + && echo "deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list \ + && cat /etc/apt/sources.list.d/pgdg.list \ + && curl --silent https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \ + && apt-get update \ + && apt-get install --no-install-recommends -y \ + postgresql-client-11 \ + postgresql-client-10 \ + postgresql-client-9.6 \ + postgresql-client-9.5 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY dump.sh ./ + +ENV PG_DIR=/usr/lib/postgresql/ + +ENTRYPOINT ["/dump.sh"] diff --git a/docker/logical-backup/dump.sh b/docker/logical-backup/dump.sh new file mode 100755 index 000000000..d74c7653b --- /dev/null +++ b/docker/logical-backup/dump.sh @@ -0,0 +1,94 @@ +#! /usr/bin/env bash + +# enable unofficial bash strict mode +set -o errexit +set -o nounset +set -o pipefail +IFS=$'\n\t' + +# make script trace visible via `kubectl logs` +set -o xtrace + +ALL_DB_SIZE_QUERY="select sum(pg_database_size(datname)::numeric) from pg_database;" +PG_BIN=$PG_DIR/$PG_VERSION/bin +DUMP_SIZE_COEFF=5 + +TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) +K8S_API_URL=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1 +CERT=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +function estimate_size { + "$PG_BIN"/psql -tqAc "${ALL_DB_SIZE_QUERY}" +} + +function dump { + # settings are taken from the environment + "$PG_BIN"/pg_dumpall +} + +function compress { + pigz +} + +function aws_upload { + declare -r EXPECTED_SIZE="$1" + + # mimic bucket setup from Spilo + # to keep logical backups at the same path as WAL + # NB: $LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX already contains the leading "/" when set by the Postgres operator + PATH_TO_BACKUP=s3://$LOGICAL_BACKUP_S3_BUCKET"/spilo/"$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX"/logical_backups/"$(date +%s).sql.gz + + if [ -z "$EXPECTED_SIZE" ]; then + aws s3 cp - "$PATH_TO_BACKUP" --debug --sse="AES256" + else + aws s3 cp - "$PATH_TO_BACKUP" --debug --expected-size "$EXPECTED_SIZE" --sse="AES256" + fi; +} + +function get_pods { + declare -r SELECTOR="$1" + + curl "${K8S_API_URL}/pods?$SELECTOR" \ + --cacert $CERT \ + -H "Authorization: Bearer ${TOKEN}" | jq .items[].status.podIP -r +} + +function get_current_pod { + curl "${K8S_API_URL}/pods?fieldSelector=metadata.name%3D${HOSTNAME}" \ + --cacert $CERT \ + -H "Authorization: Bearer ${TOKEN}" +} + +declare -a search_strategy=( + list_all_replica_pods_current_node + list_all_replica_pods_any_node + get_master_pod +) + +function list_all_replica_pods_current_node { + get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dreplica&fieldSelector=spec.nodeName%3D${CURRENT_NODENAME}" | head -n 1 +} + +function list_all_replica_pods_any_node { + get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dreplica" | head -n 1 +} + +function get_master_pod { + get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dmaster" | head -n 1 +} + +CURRENT_NODENAME=$(get_current_pod | jq .items[].spec.nodeName --raw-output) +export CURRENT_NODENAME + +for search in "${search_strategy[@]}"; do + + PGHOST=$(eval "$search") + export PGHOST + + if [ -n "$PGHOST" ]; then + break + fi + +done + +dump | compress | aws_upload $(($(estimate_size) / DUMP_SIZE_COEFF)) diff --git a/docs/administrator.md b/docs/administrator.md index f6f37aafb..ab8a51877 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -346,7 +346,7 @@ The operator logs reasons for a rolling update with the `info` level and a diff The operator can manage k8s cron jobs to run logical backups of Postgres clusters. The cron job periodically spawns a batch job that runs a single pod. The backup script within this pod's container can connect to a DB for a logical backup. The operator updates cron jobs during Sync if the job schedule changes; the job name acts as the job identifier. These jobs are to be enabled for each indvidual Postgres cluster by setting `enableLogicalBackup: true` in its manifest. Notes: -1. The provided `registry.opensource.zalan.do/acid/logical-backup` image implements the backup via `pg_dumpall` and upload of (compressed) results to an S3 bucket; `pg_dumpall` requires a `superuser` access to a DB and runs on the replica when possible. +1. The [example image](../docker/logical-backup/Dockerfile) implements the backup via `pg_dumpall` and upload of compressed and encrypted results to an S3 bucket; the default image ``registry.opensource.zalan.do/acid/logical-backup`` is the same image built with the Zalando-internal CI pipeline. `pg_dumpall` requires a `superuser` access to a DB and runs on the replica when possible. 2. Due to the [limitation of Kubernetes cron jobs](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) it is highly advisable to set up additional monitoring for this feature; such monitoring is outside of the scope of operator responsibilities. diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 3e06cf31d..592fef5b8 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -485,9 +485,9 @@ scalyr sidecar. In the CRD-based configuration they are grouped under the * **logical_backup_schedule** Backup schedule in the cron format. Please take [the reference schedule format](https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#schedule) into account. Default: "30 00 \* \* \*" - + * **logical_backup_docker_image** - Docker image for the pods of the cron job. Must implement backup logic and correctly handle pod and job restarts. The default image runs `pg_dumpall` (on a replica if possible) and uploads compressed results to an S3 bucket under the key `/spilo/pg_cluster_name/cluster_k8s_uuid/logical_backups` Default: "registry.opensource.zalan.do/acid/logical-backup" + An image for pods of the logical backup job. The [example image](../../docker/logical-backup/Dockerfile) runs `pg_dumpall` on a replica if possible and uploads compressed results to an S3 bucket under the key `/spilo/pg_cluster_name/cluster_k8s_uuid/logical_backups`. The default image is the same image built with the Zalando-internal CI pipeline. Default: "registry.opensource.zalan.do/acid/logical-backup" * **logical_backup_s3_bucket** S3 bucket to store backup results. The bucket has to be present and accessible by Postgres pods. Default: empty. From ebda39368ec6336c930f7fb93d7b428568c5a27d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Inge=20Bols=C3=B8?= Date: Fri, 31 May 2019 16:32:00 +0200 Subject: [PATCH 3/3] database.go: remove hardcoded .svc.cluster.local dns suffix (#561) * database.go: substitute hardcoded .svc.cluster.local dns suffix with config parameter Use the pod's configured dns search path, for clusters where .svc.cluster.local is not correct. --- charts/postgres-operator/values.yaml | 1 + docs/administrator.md | 6 ++++++ docs/reference/operator_parameters.md | 5 +++++ manifests/configmap.yaml | 1 + manifests/postgresql-operator-default-configuration.yaml | 1 + pkg/apis/acid.zalan.do/v1/operator_configuration_type.go | 1 + pkg/cluster/database.go | 2 +- pkg/controller/operator_config.go | 1 + pkg/util/config/config.go | 1 + 9 files changed, 18 insertions(+), 1 deletion(-) diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index c5349b55f..ee5d8a5dc 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -23,6 +23,7 @@ config: workers: "4" docker_image: registry.opensource.zalan.do/acid/spilo-cdp-11:1.5-p70 secret_name_template: '{username}.{cluster}.credentials' + cluster_domain: cluster.local super_username: postgres enable_teams_api: "false" spilo_privileged: "false" diff --git a/docs/administrator.md b/docs/administrator.md index ab8a51877..cecde541c 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -103,6 +103,12 @@ In this definition, the operator overwrites the account's name to match `pod_service_account_name` and the `default` namespace to match the target namespace. The operator performs **no** further syncing of this account. +## Non-default cluster domain + +If your cluster uses a different dns domain than `cluster.local`, this needs +to be set in the operator ConfigMap. This is used by the operator to connect +to the clusters after creation. + ## Role-based access control for the operator The `manifests/operator-service-account-rbac.yaml` defines cluster roles and diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 592fef5b8..6807d5520 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -159,6 +159,11 @@ configuration they are grouped under the `kubernetes` key. allowed. The default is `{username}.{cluster}.credentials.{tprkind}.{tprgroup}`. +* **cluster_domain** + defines the default dns domain for the kubernetes cluster the operator is + running in. The default is `cluster.local`. Used by the operator to connect + to the postgres clusters after creation. + * **oauth_token_secret_name** a name of the secret containing the `OAuth2` token to pass to the teams API. The default is `postgresql-operator`. diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index bd7d11c6a..90ea021a0 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -13,6 +13,7 @@ data: docker_image: registry.opensource.zalan.do/acid/spilo-cdp-11:1.5-p70 pod_service_account_name: "zalando-postgres-operator" secret_name_template: '{username}.{cluster}.credentials' + cluster_domain: cluster.local super_username: postgres enable_teams_api: "false" spilo_privileged: "false" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index fa27c6956..52f38937c 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -21,6 +21,7 @@ configuration: pod_terminate_grace_period: 5m pdb_name_format: "postgres-{cluster}-pdb" secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}" + cluster_domain: cluster.local oauth_token_secret_name: postgresql-operator pod_role_label: spilo-role spilo_privileged: false diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index c6e87d8ea..8f34a9786 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -49,6 +49,7 @@ type KubernetesMetaConfiguration struct { WatchedNamespace string `json:"watched_namespace,omitempty"` PDBNameFormat config.StringTemplate `json:"pdb_name_format,omitempty"` SecretNameTemplate config.StringTemplate `json:"secret_name_template,omitempty"` + ClusterDomain string `json:"cluster_domain"` OAuthTokenSecretName spec.NamespacedName `json:"oauth_token_secret_name,omitempty"` InfrastructureRolesSecretName spec.NamespacedName `json:"infrastructure_roles_secret_name,omitempty"` PodRoleLabel string `json:"pod_role_label,omitempty"` diff --git a/pkg/cluster/database.go b/pkg/cluster/database.go index a4633537d..07ea011a6 100644 --- a/pkg/cluster/database.go +++ b/pkg/cluster/database.go @@ -34,7 +34,7 @@ func (c *Cluster) pgConnectionString() string { password := c.systemUsers[constants.SuperuserKeyName].Password return fmt.Sprintf("host='%s' dbname=postgres sslmode=require user='%s' password='%s' connect_timeout='%d'", - fmt.Sprintf("%s.%s.svc.cluster.local", c.Name, c.Namespace), + fmt.Sprintf("%s.%s.svc.%s", c.Name, c.Namespace, c.OpConfig.ClusterDomain), c.systemUsers[constants.SuperuserKeyName].Name, strings.Replace(password, "$", "\\$", -1), constants.PostgresConnectTimeout/time.Second) diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 245754e1c..80c9b94cb 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -42,6 +42,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.PodEnvironmentConfigMap = fromCRD.Kubernetes.PodEnvironmentConfigMap result.PodTerminateGracePeriod = time.Duration(fromCRD.Kubernetes.PodTerminateGracePeriod) result.SpiloPrivileged = fromCRD.Kubernetes.SpiloPrivileged + result.ClusterDomain = fromCRD.Kubernetes.ClusterDomain result.WatchedNamespace = fromCRD.Kubernetes.WatchedNamespace result.PDBNameFormat = fromCRD.Kubernetes.PDBNameFormat result.SecretNameTemplate = fromCRD.Kubernetes.SecretNameTemplate diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 0cd662a6e..a95369e2d 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -26,6 +26,7 @@ type Resources struct { PodDeletionWaitTimeout time.Duration `name:"pod_deletion_wait_timeout" default:"10m"` PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` PodPriorityClassName string `name:"pod_priority_class_name"` + ClusterDomain string `name:"cluster_domain" default:"cluster.local"` SpiloPrivileged bool `name:"spilo_privileged" default:"false"` ClusterLabels map[string]string `name:"cluster_labels" default:"application:spilo"` InheritedLabels []string `name:"inherited_labels" default:""`