update docs about helm and cluster configuration

2019-06-03 12:09:24 +02:00 · 2019-06-03 12:09:24 +02:00 · 19fa91eaac
parent a718977a1b ebda39368e
commit 19fa91eaac
16 changed files with 343 additions and 129 deletions
--- a/charts/postgres-operator/values.yaml
+++ b/charts/postgres-operator/values.yaml
@ -25,6 +25,7 @@ configUsers:
  super_username: postgres

 configKubernetes:
+  cluster_domain: cluster.local
  # inherited_labels: ""
  # infrastructure_roles_secret_name: postgresql-infrastructure-roles
  # node_readiness_label: ""
--- a/docker/logical-backup/Dockerfile
+++ b/docker/logical-backup/Dockerfile
@ -0,0 +1,33 @@
+FROM ubuntu:18.04
+LABEL maintainer="Team ACID @ Zalando <team-acid@zalando.de>"
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+RUN apt-get update     \
+    && apt-get install --no-install-recommends -y \
+        apt-utils \
+        ca-certificates \
+        lsb-release \
+        pigz \
+        python3-pip \
+        python3-setuptools \
+        curl \
+        jq \
+        gnupg \
+    && pip3 install --no-cache-dir awscli --upgrade \
+    && echo "deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list \
+    && cat /etc/apt/sources.list.d/pgdg.list \
+    && curl --silent https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \
+    && apt-get update \
+    && apt-get install --no-install-recommends -y  \
+        postgresql-client-11  \
+        postgresql-client-10  \
+        postgresql-client-9.6 \
+        postgresql-client-9.5 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY dump.sh ./
+
+ENV PG_DIR=/usr/lib/postgresql/
+
+ENTRYPOINT ["/dump.sh"]
--- a/docker/logical-backup/dump.sh
+++ b/docker/logical-backup/dump.sh
@ -0,0 +1,94 @@
+#! /usr/bin/env bash
+
+# enable unofficial bash strict mode
+set -o errexit
+set -o nounset
+set -o pipefail
+IFS=$'\n\t'
+
+# make script trace visible via `kubectl logs`
+set -o xtrace 
+
+ALL_DB_SIZE_QUERY="select sum(pg_database_size(datname)::numeric) from pg_database;"
+PG_BIN=$PG_DIR/$PG_VERSION/bin
+DUMP_SIZE_COEFF=5
+
+TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
+K8S_API_URL=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1
+CERT=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+
+function estimate_size {
+    "$PG_BIN"/psql -tqAc "${ALL_DB_SIZE_QUERY}"
+}
+
+function dump {
+    # settings are taken from the environment
+    "$PG_BIN"/pg_dumpall
+}
+
+function compress {
+    pigz
+}
+
+function aws_upload {
+    declare -r EXPECTED_SIZE="$1"
+
+    # mimic bucket setup from Spilo
+    # to keep logical backups at the same path as WAL
+    # NB: $LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX already contains the leading "/" when set by the Postgres operator
+    PATH_TO_BACKUP=s3://$LOGICAL_BACKUP_S3_BUCKET"/spilo/"$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX"/logical_backups/"$(date +%s).sql.gz
+
+    if [ -z "$EXPECTED_SIZE" ]; then
+        aws s3 cp - "$PATH_TO_BACKUP" --debug --sse="AES256"
+    else
+        aws s3 cp - "$PATH_TO_BACKUP" --debug --expected-size "$EXPECTED_SIZE" --sse="AES256"
+    fi;
+}
+
+function get_pods {
+    declare -r SELECTOR="$1"
+
+    curl "${K8S_API_URL}/pods?$SELECTOR"        \
+        --cacert $CERT                          \
+        -H "Authorization: Bearer ${TOKEN}" | jq .items[].status.podIP -r
+}
+
+function get_current_pod {
+    curl "${K8S_API_URL}/pods?fieldSelector=metadata.name%3D${HOSTNAME}" \
+        --cacert $CERT                                                   \
+        -H "Authorization: Bearer ${TOKEN}"
+}
+
+declare -a search_strategy=(
+    list_all_replica_pods_current_node
+    list_all_replica_pods_any_node
+    get_master_pod
+)
+
+function list_all_replica_pods_current_node {
+    get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dreplica&fieldSelector=spec.nodeName%3D${CURRENT_NODENAME}" | head -n 1
+}
+
+function list_all_replica_pods_any_node {
+    get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dreplica" | head -n 1
+}
+
+function get_master_pod {
+    get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dmaster" | head -n 1
+}
+
+CURRENT_NODENAME=$(get_current_pod | jq .items[].spec.nodeName --raw-output)
+export CURRENT_NODENAME
+
+for search in "${search_strategy[@]}"; do
+
+    PGHOST=$(eval "$search")
+    export PGHOST
+
+    if [ -n "$PGHOST" ]; then
+        break
+    fi
+
+done
+
+dump | compress | aws_upload $(($(estimate_size) / DUMP_SIZE_COEFF))
--- a/docs/administrator.md
+++ b/docs/administrator.md
@ -1,47 +1,3 @@
-## Create ConfigMap
-
-A ConfigMap is used to store the configuration of the operator.
-
-```bash
-    $ kubectl create -f manifests/configmap.yaml
-```
-
-## Deploying the operator
-
-First you need to install the service account definition in your Minikube cluster.
-
-```bash
-    $ kubectl create -f manifests/operator-service-account-rbac.yaml
-```
-
-Next deploy the postgres-operator from the docker image Zalando is using:
-
-```bash
-    $ kubectl create -f manifests/postgres-operator.yaml
-```
-
-If you prefer to build the image yourself follow up down below.
-
-### - Helm chart
-
-You can install postgres-operator also with a [Helm](https://helm.sh/) chart.
-This requires installing the Helm CLI first and then initializing it in the
-cluster.
-
-```bash
-    $ helm init
-    $ helm install --name my-release ./charts/postgres-operator
-```
-
-## Check if CustomResourceDefinition has been registered
-
-```bash
-    $ kubectl get crd
-
-	NAME                          KIND
-	postgresqls.acid.zalan.do     CustomResourceDefinition.v1beta1.apiextensions.k8s.io
-```
-
 # How to configure PostgreSQL operator

 ## Select the namespace to deploy to
@ -103,6 +59,12 @@ In this definition, the operator overwrites the account's name to match
 `pod_service_account_name` and the `default` namespace to match the target
 namespace. The operator performs **no** further syncing of this account.

+## Non-default cluster domain
+
+If your cluster uses a different dns domain than `cluster.local`, this needs
+to be set in the operator ConfigMap. This is used by the operator to connect
+to the clusters after creation.
+
 ## Role-based access control for the operator

 The `manifests/operator-service-account-rbac.yaml` defines cluster roles and
@ -346,12 +308,12 @@ The operator logs reasons for a rolling update with the `info` level and a diff

 The operator can manage k8s cron jobs to run logical backups of Postgres clusters. The cron job periodically spawns a batch job that runs a single pod. The backup script within this pod's container can connect to a DB for a logical backup. The operator updates cron jobs during Sync if the job schedule changes; the job name acts as the job identifier. These jobs are to be enabled for each indvidual Postgres cluster by setting `enableLogicalBackup: true` in its manifest. Notes:

-1. The provided  `registry.opensource.zalan.do/acid/logical-backup` image implements the backup via `pg_dumpall` and upload of (compressed) results to an S3 bucket; `pg_dumpall` requires a `superuser` access to a DB and runs on the replica when possible.
+1. The [example image](../docker/logical-backup/Dockerfile) implements the backup via `pg_dumpall` and upload of compressed and encrypted results to an S3 bucket; the default image ``registry.opensource.zalan.do/acid/logical-backup`` is the same image built with the Zalando-internal CI pipeline. `pg_dumpall` requires a `superuser` access to a DB and runs on the replica when possible.  

-2. Due to the [limitation of Kubernetes cron jobs](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) it is highly advisable to set up additional monitoring for this feature; such monitoring is outside of the scope of operator responsibilities. 
+2. Due to the [limitation of Kubernetes cron jobs](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) it is highly advisable to set up additional monitoring for this feature; such monitoring is outside of the scope of operator responsibilities.

 3. The operator does not remove old backups.

 4. You may use your own image by overwriting the relevant field in the operator configuration. Any such image must ensure the logical backup is able to finish [in presence of pod restarts](https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#handling-pod-and-container-failures) and [simultaneous invocations](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) of the backup cron job.

-5. For that feature to work, your RBAC policy must enable operations on the `cronjobs` resource from the `batch` API group for the operator service account. See [example RBAC](../manifests/operator-service-account-rbac.yaml)
+5. For that feature to work, your RBAC policy must enable operations on the `cronjobs` resource from the `batch` API group for the operator service account. See [example RBAC](../manifests/operator-service-account-rbac.yaml)
--- a/docs/developer.md
+++ b/docs/developer.md
@ -20,18 +20,17 @@ that your setup is working.
 Note: if you use multiple Kubernetes clusters, you can switch to Minikube with
 `kubectl config use-context minikube`

-## Create ConfigMap
+## Deploying the operator

-ConfigMap is used to store the configuration of the operator
+### Kubernetes manifest
+
+A ConfigMap is used to store the configuration of the operator. Alternatively,
+a CRD-based configuration can be used, as described [here](reference/operator_parameters).

 ```bash
    $ kubectl --context minikube  create -f manifests/configmap.yaml
 ```

-## Deploying the operator
-
-### - Kubernetes manifest
-
 First you need to install the service account definition in your Minikube cluster.

 ```bash
@ -46,15 +45,23 @@ Next deploy the postgres-operator from the docker image Zalando is using:

 If you prefer to build the image yourself follow up down below.

-### - Helm chart
+### Helm chart

-You can install postgres-operator also with a [Helm](https://helm.sh/) chart.
-This requires installing the Helm CLI first and then initializing it in the
-cluster.
+Alternatively, the operator can be installed by using the provided [Helm](https://helm.sh/)
+chart which saves you the manual steps. Therefore, you would need to install
+the helm CLI on your machine. After initializing helm (and its server
+component Tiller) in your local cluster you can install the operator chart.
+You can define a release name that is prepended to the operator resource's
+names.
+
+Use `--name zalando` to match with the default service account name as older
+operator versions do not support custom names for service accounts. When relying
+solely on the CRD-based configuration edit the `serviceAccount` section in the
+[values yaml file](../charts/values.yaml) by setting the name to `"operator"`.

 ```bash
    $ helm init
-    $ helm install --name my-release ./charts/postgres-operator
+    $ helm install --name zalando ./charts/postgres-operator
 ```

 ## Check if CustomResourceDefinition has been registered
@ -323,7 +330,7 @@ be updated. As explained [here](reference/operator_parameters.md), it's possible
 to configure the operator either with a ConfigMap or CRD, but currently we aim
 to synchronize parameters everywhere.

-When choosing a parameter name for a new option in a PG manifest, keep in mind 
+When choosing a parameter name for a new option in a PG manifest, keep in mind
 the naming conventions there. The `snake_case` variables come from the Patroni/Postgres world, while the `camelCase` from the k8s world.

 Note: If one option is defined in the operator configuration and in the cluster
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@ -20,6 +20,12 @@ cd postgres-operator
 minikube start
 ```

+If you want to configure the Postgres Operator it must happen before deploying a
+Postgres cluster. This can happen in two ways: Via a ConfigMap or a
+`OperatorConfiguration` object, which adheres a CustomResourceDefinition (CRD).
+More details on configuration can be found [here](reference/operator_parameters.md).
+
+
 ## Manual deployment setup

 The Postgres Operator can be installed simply by applying yaml manifests.
@ -37,8 +43,12 @@ chart which saves you the manual steps. Therefore, you would need to install
 the helm CLI on your machine. After initializing helm (and its server
 component Tiller) in your local cluster you can install the operator chart.
 You can define a release name that is prepended to the operator resource's
-names. Use `--name zalando` to match with the default service account name
-as older operator versions do not support custom names for service accounts.
+names.
+
+Use `--name zalando` to match with the default service account name as older
+operator versions do not support custom names for service accounts. When relying
+solely on the CRD-based configuration edit the `serviceAccount` section in the
+[values yaml file](../charts/values.yaml) by setting the name to `"operator"`.

 ```bash
 # 1) initialize helm
--- a/docs/reference/cluster_manifest.md
+++ b/docs/reference/cluster_manifest.md
@ -1,4 +1,3 @@
-
 Individual postgres clusters are described by the Kubernetes *cluster manifest*
 that has the structure defined by the `postgres CRD` (custom resource
 definition). The following section describes the structure of the manifest and
@ -14,7 +13,9 @@ measurements. Please, refer to the [Kubernetes
 documentation](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/)
 for the possible values of those.

-:exclamation: If both operator configmap/CRD and a Postgres cluster manifest define the same parameter, the value from the Postgres cluster manifest is applied.
+:exclamation: If both operator configmap/CRD and a Postgres cluster manifest
+define the same parameter, the value from the Postgres cluster manifest is
+applied.

 ## Manifest structure

@ -105,7 +106,8 @@ These parameters are grouped directly under  the `spec` key in the manifest.
   class](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass)
   that should be assigned to the cluster pods. When not specified, the value
   is taken from the `pod_priority_class_name` operator parameter, if not set
-   then the default priority class is taken. The priority class itself must be defined in advance.
+   then the default priority class is taken. The priority class itself must be
+   defined in advance.

 * **enableShmVolume**
  Start a database pod without limitations on shm memory. By default docker
@ -120,10 +122,12 @@ These parameters are grouped directly under  the `spec` key in the manifest.
  configured (so you can override the operator configuration).

 * **enableLogicalBackup**
-  Determines if the logical backup of this cluster should be taken and uploaded to S3. Default: false.
+  Determines if the logical backup of this cluster should be taken and uploaded
+  to S3. Default: false.

 * **logicalBackupSchedule**
-  Schedule for the logical backup k8s cron job. Please take [the reference schedule format](https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#schedule) into account. Default: "30 00 \* \* \*"
+  Schedule for the logical backup k8s cron job. Please take [the reference schedule format](https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#schedule)
+  into account. Default: "30 00 \* \* \*"

 ## Postgres parameters

@ -181,7 +185,12 @@ explanation of `ttl` and `loop_wait` parameters.
  set by the Spilo docker image. Optional.

 * **slots**
-  permanent replication slots that Patroni preserves after failover by re-creating them on the new primary immediately after doing a promote. Slots could be reconfigured with the help of `patronictl edit-config`. It is the responsibility of a user to avoid clashes in names between replication slots automatically created by Patroni for cluster members and permanent replication slots. Optional.
+  permanent replication slots that Patroni preserves after failover by
+  re-creating them on the new primary immediately after doing a promote. Slots
+  could be reconfigured with the help of `patronictl edit-config`. It is the
+  responsibility of a user to avoid clashes in names between replication slots
+  automatically created by Patroni for cluster members and permanent replication
+  slots. Optional.

 ## Postgres container resources

@ -270,3 +279,36 @@ defined in the sidecar dictionary:
  a dictionary of environment variables. Use usual Kubernetes definition
  (https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/)
  for environment variables. Optional.
+
+* **resources** see below. Optional.
+
+#### Sidecar container resources
+
+Those parameters define [CPU and memory requests and
+limits](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/)
+for the sidecar container. They are grouped under the `resources` key for each sidecar.
+There are two subgroups, `requests` and `limits`.
+
+##### Requests
+
+CPU and memory requests for the sidecar container.
+
+* **cpu**
+  CPU requests for the sidecar container. Optional, overrides the
+  `default_cpu_requests` operator configuration parameter. Optional.
+
+* **memory**
+  memory requests for the sidecar container. Optional, overrides the
+  `default_memory_request` operator configuration parameter. Optional.
+
+##### Limits
+
+CPU and memory limits for the sidecar container.
+
+* **cpu**
+  CPU limits for the sidecar container. Optional, overrides the
+  `default_cpu_limits` operator configuration parameter. Optional.
+
+* **memory**
+  memory limits for the sidecar container. Optional, overrides the
+  `default_memory_limits` operator configuration parameter. Optional.
--- a/docs/reference/operator_parameters.md
+++ b/docs/reference/operator_parameters.md
@ -11,17 +11,18 @@ configuration.
  [example](https://github.com/zalando/postgres-operator/blob/master/manifests/configmap.yaml)

 * CRD-based configuration. The configuration is stored in a custom YAML
-  manifest. The manifest is an instance of the custom resource definition (CRD) called
-  `OperatorConfiguration`. The operator registers this CRD
-  during the start and uses it for configuration if the [operator deployment manifest ](https://github.com/zalando/postgres-operator/blob/master/manifests/postgres-operator.yaml#L21) sets the `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` env variable to a non-empty value. The variable should point to the
-  `postgresql-operator-configuration` object in the operator's namespace.
+  manifest. The manifest is an instance of the custom resource definition (CRD)
+  called `OperatorConfiguration`. The operator registers this CRD during the
+  start and uses it for configuration if the [operator deployment manifest ](https://github.com/zalando/postgres-operator/blob/master/manifests/postgres-operator.yaml#L21)
+  sets the `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` env variable to a non-empty
+  value. The variable should point to the `postgresql-operator-configuration`
+  object in the operator's namespace.

-  The CRD-based configuration is a regular YAML
-  document; non-scalar keys are simply represented in the usual YAML way.
-  There are no default values built-in in the operator, each parameter that is
-  not supplied in the configuration receives an empty value.  In order to
-  create your own configuration just copy the [default
-  one](https://github.com/zalando/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml)
+  The CRD-based configuration is a regular YAML document; non-scalar keys are
+  simply represented in the usual YAML way. There are no default values built-in
+  in the operator, each parameter that is not supplied in the configuration
+  receives an empty value. In order to create your own configuration just copy
+  the [default one](https://github.com/zalando/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml)
  and change it.

  To test the CRD-based configuration locally, use the following
@ -31,19 +32,23 @@ configuration.
  kubectl create -f manifests/postgresql-operator-default-configuration.yaml
  kubectl get operatorconfigurations postgresql-operator-default-configuration -o yaml
  ```
-  Note that the operator first registers the definition of the CRD   `OperatorConfiguration` and then waits for an instance of the CRD to be created. In between these two event the operator pod may be failing since it cannot fetch the not-yet-existing `OperatorConfiguration` instance.
+  Note that the operator first registers the CRD of the `OperatorConfiguration`
+  and then waits for an instance to be created. In between these two event the
+  operator pod may be failing since it cannot fetch the not-yet-existing
+  `OperatorConfiguration` instance.

-The CRD-based configuration is more powerful than the one based on
-ConfigMaps and should be used unless there is a compatibility requirement to
-use an already existing configuration. Even in that case, it should be rather
-straightforward to convert the configmap based configuration into the CRD-based
-one and restart the operator. The ConfigMaps-based configuration will be
-deprecated and subsequently removed in future releases.
+The CRD-based configuration is more powerful than the one based on ConfigMaps
+and should be used unless there is a compatibility requirement to use an already
+existing configuration. Even in that case, it should be rather straightforward
+to convert the configmap based configuration into the CRD-based one and restart
+the operator. The ConfigMaps-based configuration will be deprecated and
+subsequently removed in future releases.

-Note that for the CRD-based configuration groups of configuration options below correspond
-to the non-leaf keys in the target YAML (i.e. for the Kubernetes resources the
-key is `kubernetes`). The key is mentioned alongside the group description. The
-ConfigMap-based configuration is flat and does not allow non-leaf keys.
+Note that for the CRD-based configuration groups of configuration options below
+correspond to the non-leaf keys in the target YAML (i.e. for the Kubernetes
+resources the key is `kubernetes`). The key is mentioned alongside the group
+description. The ConfigMap-based configuration is flat and does not allow
+non-leaf keys.

 Since in the CRD-based case the operator needs to create a CRD first, which is
 controlled by the `resource_check_interval` and `resource_check_timeout`
@ -51,7 +56,11 @@ parameters, those parameters have no effect and are replaced by the
 `CRD_READY_WAIT_INTERVAL` and `CRD_READY_WAIT_TIMEOUT` environment variables.
 They will be deprecated and removed in the future.

-For the configmap operator configuration, the [default parameter values](https://github.com/zalando-incubator/postgres-operator/blob/master/pkg/util/config/config.go#L14) mentioned here are likely to be overwritten in your local operator installation via your local version of the operator configmap. In the case you use the operator CRD, all the CRD defaults are provided in the [operator's default configuration manifest](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml)
+For the configmap operator configuration, the [default parameter values](https://github.com/zalando-incubator/postgres-operator/blob/master/pkg/util/config/config.go#L14)
+mentioned here are likely to be overwritten in your local operator installation
+via your local version of the operator configmap. In the case you use the
+operator CRD, all the CRD defaults are provided in the
+[operator's default configuration manifest](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml)

 Variable names are underscore-separated words.

@ -87,8 +96,8 @@ Those are top-level keys, containing both leaf keys and groups.

 * **min_instances**
  operator will run at least the number of instances for any given postgres
-  cluster equal to the value of this parameter. When `-1` is specified, no limits
-  are applied. The default is `-1`.
+  cluster equal to the value of this parameter. When `-1` is specified, no
+  limits are applied. The default is `-1`.

 * **resync_period**
  period between consecutive sync requests. The default is `30m`.
@ -124,7 +133,8 @@ configuration they are grouped under the `kubernetes` key.
 * **pod_service_account_definition**
  The operator tries to create the pod Service Account in the namespace that
  doesn't define such an account using the YAML definition provided by this
-  option. If not defined, a simple definition that contains only the name will be used. The default is empty.
+  option. If not defined, a simple definition that contains only the name will
+  be used. The default is empty.

 * **pod_service_account_role_binding_definition**
  This definition must bind pod service account to a role with permission
@ -142,8 +152,8 @@ configuration they are grouped under the `kubernetes` key.
 * **watched_namespace**
  The operator watches for postgres objects in the given namespace. If not
  specified, the value is taken from the operator namespace. A special `*`
-  value makes it watch all namespaces. The default is empty (watch the operator pod
-  namespace).
+  value makes it watch all namespaces. The default is empty (watch the operator
+  pod namespace).

 * **pdb_name_format**
  defines the template for PDB (Pod Disruption Budget) names created by the
@ -159,6 +169,11 @@ configuration they are grouped under the `kubernetes` key.
  allowed. The default is
  `{username}.{cluster}.credentials.{tprkind}.{tprgroup}`.

+* **cluster_domain**
+  defines the default dns domain for the kubernetes cluster the operator is
+  running in. The default is `cluster.local`. Used by the operator to connect
+  to the postgres clusters after creation.
+
 * **oauth_token_secret_name**
  a name of the secret containing the `OAuth2` token to pass to the teams API.
  The default is `postgresql-operator`.
@ -176,8 +191,8 @@ configuration they are grouped under the `kubernetes` key.

 * **inherited_labels**
  list of labels that can be inherited from the cluster manifest, and added to
-  each child objects (`StatefulSet`, `Pod`, `Service` and `Endpoints`) created by
-  the opertor.
+  each child objects (`StatefulSet`, `Pod`, `Service` and `Endpoints`) created
+  by the opertor.
  Typical use case is to dynamically pass labels that are specific to a given
  postgres cluster, in order to implement `NetworkPolicy`.
  The default is empty.
@ -198,8 +213,7 @@ configuration they are grouped under the `kubernetes` key.
 * **toleration**
  a dictionary that should contain `key`, `operator`, `value` and
  `effect` keys. In that case, the operator defines a pod toleration
-  according to the values of those keys. See [kubernetes
-  documentation](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)
+  according to the values of those keys. See [kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/)
  for details on taints and tolerations. The default is empty.

 * **pod_environment_configmap**
@ -210,31 +224,35 @@ configuration they are grouped under the `kubernetes` key.
  operator. The default is empty.

 * **pod_priority_class_name**
-  a name of the [priority
-  class](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass)
-  that should be assigned to the Postgres pods. The priority class itself must be defined in advance.
-  Default is empty (use the default priority class).
+  a name of the [priority class](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass)
+  that should be assigned to the Postgres pods. The priority class itself must
+  be defined in advance. Default is empty (use the default priority class).

 * **spilo_privileged**
-  whether the Spilo container should run in privileged mode. Privileged mode is used for AWS volume resizing and not required if you don't need that capability. The default is `false`.
-  
+  whether the Spilo container should run in privileged mode. Privileged mode is
+  used for AWS volume resizing and not required if you don't need that
+  capability. The default is `false`.
+
 * **master_pod_move_timeout**
-   The period of time to wait for the success of migration of master pods from an unschedulable node.
-   The migration includes Patroni switchovers to respective replicas on healthy nodes. The situation where master pods still exist on the old node after this timeout expires has to be fixed manually. The default is 20 minutes.
+   The period of time to wait for the success of migration of master pods from
+   an unschedulable node. The migration includes Patroni switchovers to
+   respective replicas on healthy nodes. The situation where master pods still
+   exist on the old node after this timeout expires has to be fixed manually.
+   The default is 20 minutes.

 * **enable_pod_antiaffinity**
-  toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) on the Postgres pods, to avoid multiple pods
-  of the same Postgres cluster in the same topology , e.g. node. The default is `false`.
+  toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/)
+  on the Postgres pods, to avoid multiple pods of the same Postgres cluster in
+  the same topology , e.g. node. The default is `false`.

 * **pod_antiaffinity_topology_key**
-  override
-  [topology key](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels)
+  override [topology key](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels)
  for pod anti affinity. The default is `kubernetes.io/hostname`.

 * **pod_management_policy**
-  specify the
-  [pod management policy](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies)
-  of stateful sets of PG clusters. The default is `ordered_ready`, the second possible value is `parallel`.
+  specify the [pod management policy](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies)
+  of stateful sets of PG clusters. The default is `ordered_ready`, the second
+  possible value is `parallel`.

 ## Kubernetes resource requests

@ -259,7 +277,14 @@ CRD-based configuration.
  settings. The default is `1Gi`.

 * **set_memory_request_to_limit**
-  Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers created by the operator (Postgres, Scalyr sidecar, and other sidecars); to set resources for the operator's own container, change the [operator deployment manually](https://github.com/zalando/postgres-operator/blob/master/manifests/postgres-operator.yaml#L13). The default is `false`.
+  Set `memory_request` to `memory_limit` for all Postgres clusters (the default
+  value is also increased). This prevents certain cases of memory overcommitment
+  at the cost of overprovisioning memory and potential scheduling problems for
+  containers with high memory limits due to the lack of memory on Kubernetes
+  cluster nodes. This affects all containers created by the operator (Postgres,
+  Scalyr sidecar, and other sidecars); to set resources for the operator's own
+  container, change the [operator deployment manually](https://github.com/zalando/postgres-operator/blob/master/manifests/postgres-operator.yaml#L13).
+  The default is `false`.

 * **enable_shm_volume**
  Instruct operator to start any new database pod without limitations on shm
@ -346,9 +371,10 @@ In the CRD-based configuration they are grouped under the `load_balancer` key.
 ## AWS or GCP interaction

 The options in this group configure operator interactions with non-Kubernetes
-objects from Amazon Web Services (AWS) or Google Cloud Platform (GCP). They have no effect unless you are using
-either. In the CRD-based configuration those options are grouped under the
-`aws_or_gcp` key. Note the GCP integration is not yet officially supported.
+objects from Amazon Web Services (AWS) or Google Cloud Platform (GCP). They have
+no effect unless you are using either. In the CRD-based configuration those
+options are grouped under the `aws_or_gcp` key. Note the GCP integration is not
+yet officially supported.

 * **wal_s3_bucket**
  S3 bucket to use for shipping WAL segments with WAL-E. A bucket has to be
@ -357,7 +383,8 @@ either. In the CRD-based configuration those options are grouped under the

 * **log_s3_bucket**
  S3 bucket to use for shipping postgres daily logs. Works only with S3 on AWS.
-  The bucket has to be present and accessible by Postgres pods. The default is empty.
+  The bucket has to be present and accessible by Postgres pods. The default is
+  empty.

 * **kube_iam_role**
  AWS IAM role to supply in the `iam.amazonaws.com/role` annotation of Postgres
@ -378,8 +405,8 @@ Options to aid debugging of the operator itself. Grouped under the `debug` key.

 * **enable_database_access**
  boolean parameter that toggles the functionality of the operator that require
-  access to the postgres database, i.e. creating databases and users. The default
-  is `true`.
+  access to the postgres database, i.e. creating databases and users. The
+  default is `true`.

 ## Automatic creation of human users in the database

@ -416,7 +443,10 @@ key.
  `admin`, that role is created by Spilo as a `NOLOGIN` role.

 * **enable_admin_role_for_users**
-   if `true`, the `team_admin_role` will have the rights to grant roles coming from PG manifests. Such roles will be created as in "CREATE ROLE 'role_from_manifest' ... ADMIN 'team_admin_role'". The default is `true`.
+   if `true`, the `team_admin_role` will have the rights to grant roles coming
+   from PG manifests. Such roles will be created as in
+   "CREATE ROLE 'role_from_manifest' ... ADMIN 'team_admin_role'".
+   The default is `true`.

 * **pam_role_name**
  when set, the operator will add all team member roles to this group and add a
@ -435,11 +465,14 @@ key.
  infrastructure role. The default is `admin`.

 * **postgres_superuser_teams**
-  List of teams which members need the superuser role in each PG database cluster to administer Postgres and maintain infrastructure built around it. The default is empty.
+  List of teams which members need the superuser role in each PG database
+  cluster to administer Postgres and maintain infrastructure built around it.
+  The default is empty.

 ## Logging and REST API

-Parameters affecting logging and REST API listener. In the CRD-based configuration they are grouped under the `logging_rest_api` key.
+Parameters affecting logging and REST API listener. In the CRD-based
+configuration they are grouped under the `logging_rest_api` key.

 * **api_port**
  REST API listener listens to this port. The default is `8080`.
@ -480,14 +513,20 @@ scalyr sidecar. In the CRD-based configuration they are grouped under the

 ## Logical backup

-  These parameters configure a k8s cron job managed by the operator to produce Postgres logical backups.
-  In the CRD-based configuration those parameters are grouped under the `logical_backup` key.
+  These parameters configure a k8s cron job managed by the operator to produce
+  Postgres logical backups. In the CRD-based configuration those parameters are
+  grouped under the `logical_backup` key.

  * **logical_backup_schedule**
    Backup schedule in the cron format. Please take [the reference schedule format](https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#schedule) into account. Default: "30 00 \* \* \*"
-  
+
  * **logical_backup_docker_image**
-    Docker image for the pods of the cron job. Must implement backup logic and correctly handle pod and job restarts. The default image runs `pg_dumpall` (on a replica if possible) and uploads compressed results to an S3 bucket under the key `/spilo/pg_cluster_name/cluster_k8s_uuid/logical_backups` Default: "registry.opensource.zalan.do/acid/logical-backup" 
+    An image for pods of the logical backup job. The [example image](../../docker/logical-backup/Dockerfile)
+    runs `pg_dumpall` on a replica if possible and uploads compressed results to
+    an S3 bucket under the key `/spilo/pg_cluster_name/cluster_k8s_uuid/logical_backups`.
+    The default image is the same image built with the Zalando-internal CI
+    pipeline. Default: "registry.opensource.zalan.do/acid/logical-backup"

  * **logical_backup_s3_bucket**
-    S3 bucket to store backup results. The bucket has to be present and accessible by Postgres pods. Default: empty.
+    S3 bucket to store backup results. The bucket has to be present and
+    accessible by Postgres pods. Default: empty.
--- a/docs/user.md
+++ b/docs/user.md
@ -270,6 +270,13 @@ spec:
  sidecars:
    - name: "container-name"
      image: "company/image:tag"
+      resources:
+        limits:
+          cpu: 500m
+          memory: 500Mi
+        requests:
+          cpu: 100m
+          memory: 100Mi
      env:
        - name: "ENV_VAR_NAME"
          value: "any-k8s-env-things"
--- a/manifests/complete-postgres-manifest.yaml
+++ b/manifests/complete-postgres-manifest.yaml
@ -71,3 +71,17 @@ spec:
  maintenanceWindows:
  - 01:00-06:00 #UTC
  - Sat:00:00-04:00
+  #sidecars:
+  #  - name: "telegraf-sidecar"
+  #    image: "telegraf:latest"
+  #    resources:
+  #      limits:
+  #        cpu: 500m
+  #        memory: 500Mi
+  #      requests:
+  #        cpu: 100m
+  #        memory: 100Mi
+  #    env:
+  #      - name: "USEFUL_VAR"
+  #        value: "perhaps-true"
+
--- a/manifests/configmap.yaml
+++ b/manifests/configmap.yaml
@ -13,6 +13,7 @@ data:
  docker_image: registry.opensource.zalan.do/acid/spilo-11:1.5-p7
  pod_service_account_name: "zalando-postgres-operator"
  secret_name_template: '{username}.{cluster}.credentials'
+  cluster_domain: cluster.local
  super_username: postgres
  enable_teams_api: "false"
  spilo_privileged: "false"
--- a/manifests/postgresql-operator-default-configuration.yaml
+++ b/manifests/postgresql-operator-default-configuration.yaml
@ -21,6 +21,7 @@ configuration:
    pod_terminate_grace_period: 5m
    pdb_name_format: "postgres-{cluster}-pdb"
    secret_name_template: "{username}.{cluster}.credentials.{tprkind}.{tprgroup}"
+    cluster_domain: cluster.local
    oauth_token_secret_name: postgresql-operator
    pod_role_label: spilo-role
    spilo_privileged: false
--- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go
+++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go
@ -49,6 +49,7 @@ type KubernetesMetaConfiguration struct {
 	WatchedNamespace                       string                `json:"watched_namespace,omitempty"`
 	PDBNameFormat                          config.StringTemplate `json:"pdb_name_format,omitempty"`
 	SecretNameTemplate                     config.StringTemplate `json:"secret_name_template,omitempty"`
+	ClusterDomain                          string                `json:"cluster_domain"`
 	OAuthTokenSecretName                   spec.NamespacedName   `json:"oauth_token_secret_name,omitempty"`
 	InfrastructureRolesSecretName          spec.NamespacedName   `json:"infrastructure_roles_secret_name,omitempty"`
 	PodRoleLabel                           string                `json:"pod_role_label,omitempty"`
--- a/pkg/cluster/database.go
+++ b/pkg/cluster/database.go
@ -34,7 +34,7 @@ func (c *Cluster) pgConnectionString() string {
 	password := c.systemUsers[constants.SuperuserKeyName].Password

 	return fmt.Sprintf("host='%s' dbname=postgres sslmode=require user='%s' password='%s' connect_timeout='%d'",
-		fmt.Sprintf("%s.%s.svc.cluster.local", c.Name, c.Namespace),
+		fmt.Sprintf("%s.%s.svc.%s", c.Name, c.Namespace, c.OpConfig.ClusterDomain),
 		c.systemUsers[constants.SuperuserKeyName].Name,
 		strings.Replace(password, "$", "\\$", -1),
 		constants.PostgresConnectTimeout/time.Second)
--- a/pkg/controller/operator_config.go
+++ b/pkg/controller/operator_config.go
@ -42,6 +42,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
 	result.PodEnvironmentConfigMap = fromCRD.Kubernetes.PodEnvironmentConfigMap
 	result.PodTerminateGracePeriod = time.Duration(fromCRD.Kubernetes.PodTerminateGracePeriod)
 	result.SpiloPrivileged = fromCRD.Kubernetes.SpiloPrivileged
+	result.ClusterDomain = fromCRD.Kubernetes.ClusterDomain
 	result.WatchedNamespace = fromCRD.Kubernetes.WatchedNamespace
 	result.PDBNameFormat = fromCRD.Kubernetes.PDBNameFormat
 	result.SecretNameTemplate = fromCRD.Kubernetes.SecretNameTemplate
--- a/pkg/util/config/config.go
+++ b/pkg/util/config/config.go
@ -26,6 +26,7 @@ type Resources struct {
 	PodDeletionWaitTimeout  time.Duration     `name:"pod_deletion_wait_timeout" default:"10m"`
 	PodTerminateGracePeriod time.Duration     `name:"pod_terminate_grace_period" default:"5m"`
 	PodPriorityClassName    string            `name:"pod_priority_class_name"`
+	ClusterDomain           string            `name:"cluster_domain" default:"cluster.local"`
 	SpiloPrivileged         bool              `name:"spilo_privileged" default:"false"`
 	ClusterLabels           map[string]string `name:"cluster_labels" default:"application:spilo"`
 	InheritedLabels         []string          `name:"inherited_labels" default:""`