Merge branch 'master' into feature/connection-pooler

2020-03-06 13:40:15 +01:00 · 2020-03-06 13:40:15 +01:00 · e6f1e8b7fc
parent 80fee17ea4 ae2a38d62a
commit e6f1e8b7fc
29 changed files with 469 additions and 266 deletions
--- a/charts/postgres-operator-ui/Chart.yaml
+++ b/charts/postgres-operator-ui/Chart.yaml
@ -1,7 +1,7 @@
 apiVersion: v1
 name: postgres-operator-ui
-version: 0.1.0
+version: 1.4.0
-appVersion: 1.3.0
+appVersion: 1.4.0
 home: https://github.com/zalando/postgres-operator
 description: Postgres Operator UI provides a graphical interface for a convenient database-as-a-service user experience
 keywords:
--- a/charts/postgres-operator-ui/index.yaml
+++ b/charts/postgres-operator-ui/index.yaml
@ -0,0 +1,29 @@
 apiVersion: v1
 entries:
  postgres-operator-ui:
  - apiVersion: v1
    appVersion: 1.4.0
    created: "2020-02-24T15:32:47.610967635+01:00"
    description: Postgres Operator UI provides a graphical interface for a convenient
      database-as-a-service user experience
    digest: 00e0eff7056d56467cd5c975657fbb76c8d01accd25a4b7aca81bc42aeac961d
    home: https://github.com/zalando/postgres-operator
    keywords:
    - postgres
    - operator
    - ui
    - cloud-native
    - patroni
    - spilo
    maintainers:
    - email: opensource@zalando.de
      name: Zalando
    - email: sk@sik-net.de
      name: siku4
    name: postgres-operator-ui
    sources:
    - https://github.com/zalando/postgres-operator
    urls:
    - postgres-operator-ui-1.4.0.tgz
    version: 1.4.0
 generated: "2020-02-24T15:32:47.610348278+01:00"
--- a/charts/postgres-operator-ui/postgres-operator-ui-1.4.0.tgz
+++ b/charts/postgres-operator-ui/postgres-operator-ui-1.4.0.tgz
--- a/charts/postgres-operator-ui/values.yaml
+++ b/charts/postgres-operator-ui/values.yaml
@ -8,7 +8,7 @@ replicaCount: 1
 image:
  registry: registry.opensource.zalan.do
  repository: acid/postgres-operator-ui
-  tag: v1.2.0
+  tag: v1.4.0
  pullPolicy: "IfNotPresent"
 rbac:
--- a/charts/postgres-operator/Chart.yaml
+++ b/charts/postgres-operator/Chart.yaml
@ -1,7 +1,7 @@
 apiVersion: v1
 name: postgres-operator
-version: 1.3.0
+version: 1.4.0
-appVersion: 1.3.0
+appVersion: 1.4.0
 home: https://github.com/zalando/postgres-operator
 description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes
 keywords:
--- a/charts/postgres-operator/crds/postgresqls.yaml
+++ b/charts/postgres-operator/crds/postgresqls.yaml
@ -94,7 +94,7 @@ spec:
                s3_secret_access_key:
                  type: string
                s3_force_path_style:
-                  type: string
+                  type: boolean
                s3_wal_path:
                  type: string
                timestamp:
--- a/charts/postgres-operator/index.yaml
+++ b/charts/postgres-operator/index.yaml
@ -1,9 +1,31 @@
 apiVersion: v1
 entries:
  postgres-operator:
  - apiVersion: v1
    appVersion: 1.4.0
    created: "2020-02-20T17:39:25.443276193+01:00"
    description: Postgres Operator creates and manages PostgreSQL clusters running
      in Kubernetes
    digest: b93ccde5581deb8ed0857136b8ce74ca3f1b7240438fa4415f705764a1300bed
    home: https://github.com/zalando/postgres-operator
    keywords:
    - postgres
    - operator
    - cloud-native
    - patroni
    - spilo
    maintainers:
    - email: opensource@zalando.de
      name: Zalando
    name: postgres-operator
    sources:
    - https://github.com/zalando/postgres-operator
    urls:
    - postgres-operator-1.4.0.tgz
    version: 1.4.0
  - apiVersion: v1
    appVersion: 1.3.0
-    created: "2019-12-17T12:58:49.477140129+01:00"
+    created: "2020-02-20T17:39:25.441532163+01:00"
    description: Postgres Operator creates and manages PostgreSQL clusters running
      in Kubernetes
    digest: 7e788fd37daec76a01f6d6f9fe5be5b54f5035e4eba0041e80a760d656537325
@ -25,7 +47,7 @@ entries:
    version: 1.3.0
  - apiVersion: v1
    appVersion: 1.2.0
-    created: "2019-12-17T12:58:49.475844233+01:00"
+    created: "2020-02-20T17:39:25.440278302+01:00"
    description: Postgres Operator creates and manages PostgreSQL clusters running
      in Kubernetes
    digest: d10710c7cf19f4e266e7704f5d1e98dcfc61bee3919522326c35c22ca7d2f2bf
@ -47,4 +69,4 @@ entries:
    urls:
    - postgres-operator-1.2.0.tgz
    version: 1.2.0
-generated: "2019-12-17T12:58:49.474719294+01:00"
+generated: "2020-02-20T17:39:25.439168098+01:00"
--- a/charts/postgres-operator/postgres-operator-1.4.0.tgz
+++ b/charts/postgres-operator/postgres-operator-1.4.0.tgz
--- a/charts/postgres-operator/templates/clusterrole.yaml
+++ b/charts/postgres-operator/templates/clusterrole.yaml
@ -63,9 +63,9 @@ rules:
  - secrets
  verbs:
  - create
  - update
  - delete
  - get
  - update
 # to check nodes for node readiness label
 - apiGroups:
  - ""
@ -102,9 +102,9 @@ rules:
  - delete
  - get
  - list
  - watch
  - update
  - patch
  - update
  - watch
 # to resize the filesystem in Spilo pods when increasing volume size
 - apiGroups:
  - ""
--- a/charts/postgres-operator/values-crd.yaml
+++ b/charts/postgres-operator/values-crd.yaml
@ -1,7 +1,7 @@
 image:
  registry: registry.opensource.zalan.do
  repository: acid/postgres-operator
-  tag: v1.3.1
+  tag: v1.4.0
  pullPolicy: "IfNotPresent"
 # Optionally specify an array of imagePullSecrets.
@ -24,7 +24,7 @@ configGeneral:
  # etcd connection string for Patroni. Empty uses K8s-native DCS.
  etcd_host: ""
  # Spilo docker image
-  docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
+  docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
  # max number of instances in Postgres cluster. -1 = no limit
  min_instances: -1
  # min number of instances in Postgres cluster. -1 = no limit
@ -100,8 +100,14 @@ configKubernetes:
  pod_management_policy: "ordered_ready"
  # label assigned to the Postgres pods (and services/endpoints)
  pod_role_label: spilo-role
  # service account definition as JSON/YAML string to be used by postgres cluster pods
  # pod_service_account_definition: ""
  # name of service account to be used by postgres cluster pods
  pod_service_account_name: "postgres-pod"
  # role binding definition as JSON/YAML string to be used by pod service account
  # pod_service_account_role_binding_definition: ""
  # Postgres pods are terminated forcefully after this timeout
  pod_terminate_grace_period: 5m
  # template for database user secrets generated by the operator
--- a/charts/postgres-operator/values.yaml
+++ b/charts/postgres-operator/values.yaml
@ -1,7 +1,7 @@
 image:
  registry: registry.opensource.zalan.do
  repository: acid/postgres-operator
-  tag: v1.3.1
+  tag: v1.4.0
  pullPolicy: "IfNotPresent"
 # Optionally specify an array of imagePullSecrets.
@ -24,7 +24,7 @@ configGeneral:
  # etcd connection string for Patroni. Empty uses K8s-native DCS.
  etcd_host: ""
  # Spilo docker image
-  docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
+  docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
  # max number of instances in Postgres cluster. -1 = no limit
  min_instances: "-1"
  # min number of instances in Postgres cluster. -1 = no limit
@ -93,8 +93,14 @@ configKubernetes:
  pod_management_policy: "ordered_ready"
  # label assigned to the Postgres pods (and services/endpoints)
  pod_role_label: spilo-role
  # service account definition as JSON/YAML string to be used by postgres cluster pods
  # pod_service_account_definition: ""
  # name of service account to be used by postgres cluster pods
  pod_service_account_name: "postgres-pod"
  # role binding definition as JSON/YAML string to be used by pod service account
  # pod_service_account_role_binding_definition: ""
  # Postgres pods are terminated forcefully after this timeout
  pod_terminate_grace_period: 5m
  # template for database user secrets generated by the operator
--- a/delivery.yaml
+++ b/delivery.yaml
@ -66,20 +66,13 @@ pipeline:
        - desc: 'Build and push Docker image'
          cmd: |
            cd ui
-            image_base='registry-write.opensource.zalan.do/acid/postgres-operator-ui'
+            IS_PR_BUILD=${CDP_PULL_REQUEST_NUMBER+"true"}
-            if [[ "${CDP_TARGET_BRANCH}" == 'master' && -z "${CDP_PULL_REQUEST_NUMBER}" ]]
+            if [[ ${CDP_TARGET_BRANCH} == "master" && ${IS_PR_BUILD} != "true" ]]
            then
-              image="${image_base}"
+              IMAGE=registry-write.opensource.zalan.do/acid/postgres-operator-ui
            else
-              image="${image_base}-test"
+              IMAGE=registry-write.opensource.zalan.do/acid/postgres-operator-ui-test
            fi
-            image_with_tag="${image}:c${CDP_BUILD_VERSION}"
+            export IMAGE
-
+            make docker
-            if docker pull "${image}"
+            make push
            then
              docker build --cache-from="${image}" -t "${image_with_tag}" .
            else
              docker build -t "${image_with_tag}" .
            fi
            docker push "${image_with_tag}"
--- a/docs/administrator.md
+++ b/docs/administrator.md
@ -11,11 +11,11 @@ switchover (planned failover) of the master to the Pod with new minor version.
 The switch should usually take less than 5 seconds, still clients have to
 reconnect.
-Major version upgrades are supported via [cloning](user.md#clone-directly). The
+Major version upgrades are supported via [cloning](user.md#how-to-clone-an-existing-postgresql-cluster).
-new cluster manifest must have a higher `version` string than the source cluster
+The new cluster manifest must have a higher `version` string than the source
-and will be created from a basebackup. Depending of the cluster size, downtime
+cluster and will be created from a basebackup. Depending of the cluster size,
-in this case can be significant as writes to the database should be stopped and
+downtime in this case can be significant as writes to the database should be
-all WAL files should be archived first before cloning is started.
+stopped and all WAL files should be archived first before cloning is started.
 Note, that simply changing the version string in the `postgresql` manifest does
 not work at present and leads to errors. Neither Patroni nor Postgres Operator
--- a/docs/reference/operator_parameters.md
+++ b/docs/reference/operator_parameters.md
@ -110,8 +110,10 @@ Those are top-level keys, containing both leaf keys and groups.
 * **min_instances**
  operator will run at least the number of instances for any given Postgres
-  cluster equal to the value of this parameter. When `-1` is specified, no
+  cluster equal to the value of this parameter. Standby clusters can still run
-  limits are applied. The default is `-1`.
+  with `numberOfInstances: 1` as this is the [recommended setup](../user.md#setting-up-a-standby-cluster).
  When `-1` is specified for `min_instances`, no limits are applied. The default
  is `-1`.
 * **resync_period**
  period between consecutive sync requests. The default is `30m`.
--- a/docs/user.md
+++ b/docs/user.md
@ -254,29 +254,22 @@ spec:
 ## How to clone an existing PostgreSQL cluster
-You can spin up a new cluster as a clone of the existing one, using a clone
+You can spin up a new cluster as a clone of the existing one, using a `clone`
 section in the spec. There are two options here:
-* Clone directly from a source cluster using `pg_basebackup`
+* Clone from an S3 bucket (recommended)
-* Clone from an S3 bucket
+* Clone directly from a source cluster
-### Clone directly
+Note, that cloning can also be used for [major version upgrades](administrator.md#minor-and-major-version-upgrade)
-
+of PostgreSQL.
 ```yaml
 spec:
  clone:
    cluster: "acid-batman"
 ```
 Here `cluster` is a name of a source cluster that is going to be cloned. The
 cluster to clone is assumed to be running and the clone procedure invokes
 `pg_basebackup` from it. The operator will setup the cluster to be cloned to
 connect to the service of the source cluster by name (if the cluster is called
 test, then the connection string will look like host=test port=5432), which
 means that you can clone only from clusters within the same namespace.
 ### Clone from S3
 Cloning from S3 has the advantage that there is no impact on your production
 database. A new Postgres cluster is created by restoring the data of another
 source cluster. If you create it in the same Kubernetes environment, use a
 different name.
 ```yaml
 spec:
  clone:
@ -287,7 +280,8 @@ spec:
 Here `cluster` is a name of a source cluster that is going to be cloned. A new
 cluster will be cloned from S3, using the latest backup before the `timestamp`.
-In this case, `uid` field is also mandatory - operator will use it to find a
+Note, that a time zone is required for `timestamp` in the format of +00:00 which
 is UTC. The `uid` field is also mandatory. The operator will use it to find a
 correct key inside an S3 bucket. You can find this field in the metadata of the
 source cluster:
@ -299,9 +293,6 @@ metadata:
  uid: efd12e58-5786-11e8-b5a7-06148230260c
 ```
 Note that timezone is required for `timestamp`. Otherwise, offset is relative
 to UTC, see [RFC 3339 section 5.6) 3339 section 5.6](https://www.ietf.org/rfc/rfc3339.txt).
 For non AWS S3 following settings can be set to support cloning from other S3
 implementations:
@ -317,14 +308,35 @@ spec:
    s3_force_path_style: true
 ```
 ### Clone directly
 Another way to get a fresh copy of your source DB cluster is via basebackup. To
 use this feature simply leave out the timestamp field from the clone section.
 The operator will connect to the service of the source cluster by name. If the
 cluster is called test, then the connection string will look like host=test
 port=5432), which means that you can clone only from clusters within the same
 namespace.
 ```yaml
 spec:
  clone:
    cluster: "acid-batman"
 ```
 Be aware that on a busy source database this can result in an elevated load!
 ## Setting up a standby cluster
-Standby clusters are like normal cluster but they are streaming from a remote
+Standby cluster is a [Patroni feature](https://github.com/zalando/patroni/blob/master/docs/replica_bootstrap.rst#standby-cluster)
-cluster. As the first version of this feature, the only scenario covered by
+that first clones a database, and keeps replicating changes afterwards. As the
-operator is to stream from a WAL archive of the master. Following the more
+replication is happening by the means of archived WAL files (stored on S3 or
-popular infrastructure of using Amazon's S3 buckets, it is mentioned as
+the equivalent of other cloud providers), the standby cluster can exist in a
-`s3_wal_path` here. To start a cluster as standby add the following `standby`
+different location than its source database. Unlike cloning, the PostgreSQL
-section in the YAML file:
+version between source and target cluster has to be the same.
 To start a cluster as standby, add the following `standby` section in the YAML
 file and specify the S3 bucket path. An empty path will result in an error and
 no statefulset will be created.
 ```yaml
 spec:
@ -332,20 +344,65 @@ spec:
    s3_wal_path: "s3 bucket path to the master"
 ```
-Things to note:
+At the moment, the operator only allows to stream from the WAL archive of the
 master. Thus, it is recommended to deploy standby clusters with only [one pod](../manifests/standby-manifest.yaml#L10).
 You can raise the instance count when detaching. Note, that the same pod role
 labels like for normal clusters are used: The standby leader is labeled as
 `master`.
- An empty string in the `s3_wal_path` field of the standby cluster will result
+### Providing credentials of source cluster
-  in an error and no statefulset will be created.
+
- Only one pod can be deployed for stand-by cluster.
+A standby cluster is replicating the data (including users and passwords) from
- To manually promote the standby_cluster, use `patronictl` and remove config
+the source database and is read-only. The system and application users (like
-  entry.
+standby, postgres etc.) all have a password that does not match the credentials
- There is no way to transform a non-standby cluster to a standby cluster
+stored in secrets which are created by the operator. One solution is to create
-  through the operator. Adding the standby section to the manifest of a running
+secrets beforehand and paste in the credentials of the source cluster.
-  Postgres cluster will have no effect. However, it can be done through Patroni
+Otherwise, you will see errors in the Postgres logs saying users cannot log in
-  by adding the [standby_cluster](https://github.com/zalando/patroni/blob/bd2c54581abb42a7d3a3da551edf0b8732eefd27/docs/replica_bootstrap.rst#standby-cluster)
+and the operator logs will complain about not being able to sync resources.
-  section using `patronictl edit-config`. Note that the transformed standby
+
-  cluster will not be doing any streaming. It will be in standby mode and allow
+When you only run a standby leader, you can safely ignore this, as it will be
-  read-only transactions only.
+sorted out once the cluster is detached from the source. It is also harmless if
 you don’t plan it. But, when you created a standby replica, too, fix the
 credentials right away. WAL files will pile up on the standby leader if no
 connection can be established between standby replica(s). You can also edit the
 secrets after their creation. Find them by:
 ```bash
 kubectl get secrets --all-namespaces | grep <standby-cluster-name>
 ```
 ### Promote the standby
 One big advantage of standby clusters is that they can be promoted to a proper
 database cluster. This means it will stop replicating changes from the source,
 and start accept writes itself. This mechanism makes it possible to move
 databases from one place to another with minimal downtime. Currently, the
 operator does not support promoting a standby cluster. It has to be done
 manually using `patronictl edit-config` inside the postgres container of the
 standby leader pod. Remove the following lines from the YAML structure and the
 leader promotion happens immediately. Before doing so, make sure that the
 standby is not behind the source database.
 ```yaml
 standby_cluster:
  create_replica_methods:
    - bootstrap_standby_with_wale
    - basebackup_fast_xlog
  restore_command: envdir "/home/postgres/etc/wal-e.d/env-standby" /scripts/restore_command.sh
     "%f" "%p"
 ```
 Finally, remove the `standby` section from the postgres cluster manifest.
 ### Turn a normal cluster into a standby
 There is no way to transform a non-standby cluster to a standby cluster through
 the operator. Adding the `standby` section to the manifest of a running
 Postgres cluster will have no effect. But, as explained in the previous
 paragraph it can be done manually through `patronictl edit-config`. This time,
 by adding the `standby_cluster` section to the Patroni configuration. However,
 the transformed standby cluster will not be doing any streaming. It will be in
 standby mode and allow read-only transactions only.
 ## Sidecar Support
--- a/e2e/tests/test_e2e.py
+++ b/e2e/tests/test_e2e.py
@ -60,10 +60,11 @@ class EndToEndTestCase(unittest.TestCase):
            'default', label_selector='name=postgres-operator').items[0].spec.containers[0].image
        print("Tested operator image: {}".format(actual_operator_image))  # shows up after tests finish
-        result = k8s.create_with_kubectl('manifests/minimal-postgres-manifest.yaml')
+        result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml")
        print('stdout: {}, stderr: {}'.format(result.stdout, result.stderr))
        try:
            k8s.wait_for_pod_start('spilo-role=master')
            k8s.wait_for_pod_start('spilo-role=replica')
        except timeout_decorator.TimeoutError:
            print('Operator log: {}'.format(k8s.get_operator_log()))
            raise
@ -117,152 +118,6 @@ class EndToEndTestCase(unittest.TestCase):
        self.assertEqual(repl_svc_type, 'ClusterIP',
                         "Expected ClusterIP service type for replica, found {}".format(repl_svc_type))
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_min_resource_limits(self):
        '''
        Lower resource limits below configured minimum and let operator fix it
        '''
        k8s = self.k8s
        cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
        _, failover_targets = k8s.get_pg_nodes(cluster_label)
        # configure minimum boundaries for CPU and memory limits
        minCPULimit = '500m'
        minMemoryLimit = '500Mi'
        patch_min_resource_limits = {
            "data": {
                "min_cpu_limit": minCPULimit,
                "min_memory_limit": minMemoryLimit
            }
        }
        k8s.update_config(patch_min_resource_limits)
        # lower resource limits below minimum
        pg_patch_resources = {
            "spec": {
                "resources": {
                    "requests": {
                        "cpu": "10m",
                        "memory": "50Mi"
                    },
                    "limits": {
                        "cpu": "200m",
                        "memory": "200Mi"
                    }
                }
            }
        }
        k8s.api.custom_objects_api.patch_namespaced_custom_object(
            "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
        k8s.wait_for_master_failover(failover_targets)
        pods = k8s.api.core_v1.list_namespaced_pod(
            'default', label_selector='spilo-role=master,' + cluster_label).items
        self.assert_master_is_unique()
        masterPod = pods[0]
        self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit,
                         "Expected CPU limit {}, found {}"
                         .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
        self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
                         "Expected memory limit {}, found {}"
                         .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_multi_namespace_support(self):
        '''
        Create a customized Postgres cluster in a non-default namespace.
        '''
        k8s = self.k8s
        with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
            pg_manifest = yaml.safe_load(f)
            pg_manifest["metadata"]["namespace"] = self.namespace
            yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
        k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
        k8s.wait_for_pod_start("spilo-role=master", self.namespace)
        self.assert_master_is_unique(self.namespace, "acid-test-cluster")
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_scaling(self):
        '''
           Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
        '''
        k8s = self.k8s
        labels = "application=spilo,cluster-name=acid-minimal-cluster"
        try:
            k8s.wait_for_pg_to_scale(3)
            self.assertEqual(3, k8s.count_pods_with_label(labels))
            self.assert_master_is_unique()
            k8s.wait_for_pg_to_scale(2)
            self.assertEqual(2, k8s.count_pods_with_label(labels))
            self.assert_master_is_unique()
        except timeout_decorator.TimeoutError:
            print('Operator log: {}'.format(k8s.get_operator_log()))
            pods = k8s.api.core_v1.list_namespaced_pod('default').items
            for p in pods:
                response = k8s.api.core_v1.read_namespaced_pod(
                    name=p.metadata.name,
                    namespace='default'
                )
                print('Pod: {}'.format(response))
            raise
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_taint_based_eviction(self):
        '''
           Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
        '''
        k8s = self.k8s
        cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
        # get nodes of master and replica(s) (expected target of new master)
        current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label)
        num_replicas = len(failover_targets)
        # if all pods live on the same node, failover will happen to other worker(s)
        failover_targets = [x for x in failover_targets if x != current_master_node]
        if len(failover_targets) == 0:
            nodes = k8s.api.core_v1.list_node()
            for n in nodes.items:
                if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node:
                    failover_targets.append(n.metadata.name)
        # taint node with postgres=:NoExecute to force failover
        body = {
            "spec": {
                "taints": [
                    {
                        "effect": "NoExecute",
                        "key": "postgres"
                    }
                ]
            }
        }
        # patch node and test if master is failing over to one of the expected nodes
        k8s.api.core_v1.patch_node(current_master_node, body)
        k8s.wait_for_master_failover(failover_targets)
        k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
        new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
        self.assertNotEqual(current_master_node, new_master_node,
                            "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
        self.assertEqual(num_replicas, len(new_replica_nodes),
                         "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
        self.assert_master_is_unique()
        # undo the tainting
        body = {
            "spec": {
                "taints": []
            }
        }
        k8s.api.core_v1.patch_node(new_master_node, body)
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_logical_backup_cron_job(self):
        '''
@ -327,6 +182,133 @@ class EndToEndTestCase(unittest.TestCase):
        self.assertEqual(0, len(jobs),
                         "Expected 0 logical backup jobs, found {}".format(len(jobs)))
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_min_resource_limits(self):
        '''
        Lower resource limits below configured minimum and let operator fix it
        '''
        k8s = self.k8s
        cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
        labels = 'spilo-role=master,' + cluster_label
        _, failover_targets = k8s.get_pg_nodes(cluster_label)
        # configure minimum boundaries for CPU and memory limits
        minCPULimit = '500m'
        minMemoryLimit = '500Mi'
        patch_min_resource_limits = {
            "data": {
                "min_cpu_limit": minCPULimit,
                "min_memory_limit": minMemoryLimit
            }
        }
        k8s.update_config(patch_min_resource_limits)
        # lower resource limits below minimum
        pg_patch_resources = {
            "spec": {
                "resources": {
                    "requests": {
                        "cpu": "10m",
                        "memory": "50Mi"
                    },
                    "limits": {
                        "cpu": "200m",
                        "memory": "200Mi"
                    }
                }
            }
        }
        k8s.api.custom_objects_api.patch_namespaced_custom_object(
            "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
        k8s.wait_for_pod_failover(failover_targets, labels)
        k8s.wait_for_pod_start('spilo-role=replica')
        pods = k8s.api.core_v1.list_namespaced_pod(
            'default', label_selector=labels).items
        self.assert_master_is_unique()
        masterPod = pods[0]
        self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit,
                         "Expected CPU limit {}, found {}"
                         .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
        self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
                         "Expected memory limit {}, found {}"
                         .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_multi_namespace_support(self):
        '''
        Create a customized Postgres cluster in a non-default namespace.
        '''
        k8s = self.k8s
        with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
            pg_manifest = yaml.safe_load(f)
            pg_manifest["metadata"]["namespace"] = self.namespace
            yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
        k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
        k8s.wait_for_pod_start("spilo-role=master", self.namespace)
        self.assert_master_is_unique(self.namespace, "acid-test-cluster")
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_node_readiness_label(self):
        '''
           Remove node readiness label from master node. This must cause a failover.
        '''
        k8s = self.k8s
        cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
        labels = 'spilo-role=master,' + cluster_label
        readiness_label = 'lifecycle-status'
        readiness_value = 'ready'
        # get nodes of master and replica(s) (expected target of new master)
        current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
        num_replicas = len(current_replica_nodes)
        failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
        # add node_readiness_label to potential failover nodes
        patch_readiness_label = {
            "metadata": {
                "labels": {
                    readiness_label: readiness_value
                }
            }
        }
        for failover_target in failover_targets:
            k8s.api.core_v1.patch_node(failover_target, patch_readiness_label)
        # define node_readiness_label in config map which should trigger a failover of the master
        patch_readiness_label_config = {
            "data": {
                "node_readiness_label": readiness_label + ':' + readiness_value,
            }
        }
        k8s.update_config(patch_readiness_label_config)
        new_master_node, new_replica_nodes = self.assert_failover(
            current_master_node, num_replicas, failover_targets, cluster_label)
        # patch also node where master ran before
        k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label)
        # toggle pod anti affinity to move replica away from master node
        self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_scaling(self):
        '''
           Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
        '''
        k8s = self.k8s
        labels = "cluster-name=acid-minimal-cluster"
        k8s.wait_for_pg_to_scale(3)
        self.assertEqual(3, k8s.count_pods_with_label(labels))
        self.assert_master_is_unique()
        k8s.wait_for_pg_to_scale(2)
        self.assertEqual(2, k8s.count_pods_with_label(labels))
        self.assert_master_is_unique()
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_service_annotations(self):
        '''
@ -453,18 +435,117 @@ class EndToEndTestCase(unittest.TestCase):
            print('Operator log: {}'.format(k8s.get_operator_log()))
            raise
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_taint_based_eviction(self):
        '''
           Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
        '''
        k8s = self.k8s
        cluster_label = 'cluster-name=acid-minimal-cluster'
        # get nodes of master and replica(s) (expected target of new master)
        current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
        num_replicas = len(current_replica_nodes)
        failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
        # taint node with postgres=:NoExecute to force failover
        body = {
            "spec": {
                "taints": [
                    {
                        "effect": "NoExecute",
                        "key": "postgres"
                    }
                ]
            }
        }
        # patch node and test if master is failing over to one of the expected nodes
        k8s.api.core_v1.patch_node(current_master_node, body)
        new_master_node, new_replica_nodes = self.assert_failover(
            current_master_node, num_replicas, failover_targets, cluster_label)
        # add toleration to pods
        patch_toleration_config = {
            "data": {
                "toleration": "key:postgres,operator:Exists,effect:NoExecute"
            }
        }
        k8s.update_config(patch_toleration_config)
        # toggle pod anti affinity to move replica away from master node
        self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
    def get_failover_targets(self, master_node, replica_nodes):
        '''
           If all pods live on the same node, failover will happen to other worker(s)
        '''
        k8s = self.k8s
        failover_targets = [x for x in replica_nodes if x != master_node]
        if len(failover_targets) == 0:
            nodes = k8s.api.core_v1.list_node()
            for n in nodes.items:
                if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node:
                    failover_targets.append(n.metadata.name)
        return failover_targets
    def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label):
        '''
           Check if master is failing over. The replica should move first to be the switchover target
        '''
        k8s = self.k8s
        k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label)
        k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
        new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
        self.assertNotEqual(current_master_node, new_master_node,
                            "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
        self.assertEqual(num_replicas, len(new_replica_nodes),
                         "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
        self.assert_master_is_unique()
        return new_master_node, new_replica_nodes
 >>>>>>> master
    def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
        '''
           Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
           To be called manually after operations that affect pods
        '''
        k8s = self.k8s
        labels = 'spilo-role=master,cluster-name=' + clusterName
        num_of_master_pods = k8s.count_pods_with_label(labels, namespace)
        self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods))
    def assert_distributed_pods(self, master_node, replica_nodes, cluster_label):
        '''
           Other tests can lead to the situation that master and replica are on the same node.
           Toggle pod anti affinty to distribute pods accross nodes (replica in particular).
        '''
        k8s = self.k8s
        failover_targets = self.get_failover_targets(master_node, replica_nodes)
        # enable pod anti affintiy in config map which should trigger movement of replica
        patch_enable_antiaffinity = {
            "data": {
                "enable_pod_antiaffinity": "true"
            }
        }
        k8s.update_config(patch_enable_antiaffinity)
        self.assert_failover(
            master_node, len(replica_nodes), failover_targets, cluster_label)
        # disable pod anti affintiy again
        patch_disable_antiaffinity = {
            "data": {
                "enable_pod_antiaffinity": "false"
            }
        }
        k8s.update_config(patch_disable_antiaffinity)
 class K8sApi:
@ -596,15 +677,14 @@ class K8s:
    def count_pods_with_label(self, labels, namespace='default'):
        return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items)
-    def wait_for_master_failover(self, expected_master_nodes, namespace='default'):
+    def wait_for_pod_failover(self, failover_targets, labels, namespace='default'):
        pod_phase = 'Failing over'
-        new_master_node = ''
+        new_pod_node = ''
        labels = 'spilo-role=master,cluster-name=acid-minimal-cluster'
-        while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes):
+        while (pod_phase != 'Running') or (new_pod_node not in failover_targets):
            pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items
            if pods:
-                new_master_node = pods[0].spec.node_name
+                new_pod_node = pods[0].spec.node_name
                pod_phase = pods[0].status.phase
            time.sleep(self.RETRY_TIMEOUT_SEC)
--- a/manifests/complete-postgres-manifest.yaml
+++ b/manifests/complete-postgres-manifest.yaml
@ -5,7 +5,7 @@ metadata:
 #  labels:
 #    environment: demo
 spec:
-  dockerImage: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
+  dockerImage: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
  teamId: "acid"
  volume:
    size: 1Gi
--- a/manifests/configmap.yaml
+++ b/manifests/configmap.yaml
@ -29,7 +29,7 @@ data:
  # default_cpu_request: 100m
  # default_memory_limit: 500Mi
  # default_memory_request: 100Mi
-  docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
+  docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
  # enable_admin_role_for_users: "true"
  # enable_crd_validation: "true"
  # enable_database_access: "true"
@ -73,7 +73,9 @@ data:
  pod_label_wait_timeout: 10m
  pod_management_policy: "ordered_ready"
  pod_role_label: spilo-role
  # pod_service_account_definition: ""
  pod_service_account_name: "postgres-pod"
  # pod_service_account_role_binding_definition: ""
  pod_terminate_grace_period: 5m
  # postgres_superuser_teams: "postgres_superusers"
  # protected_role_names: "admin"
--- a/manifests/operator-service-account-rbac.yaml
+++ b/manifests/operator-service-account-rbac.yaml
@ -64,9 +64,9 @@ rules:
  - secrets
  verbs:
  - create
  - update
  - delete
  - get
  - update
 # to check nodes for node readiness label
 - apiGroups:
  - ""
@ -103,9 +103,9 @@ rules:
  - delete
  - get
  - list
  - watch
  - update
  - patch
  - update
  - watch
 # to resize the filesystem in Spilo pods when increasing volume size
 - apiGroups:
  - ""
--- a/manifests/postgres-operator.yaml
+++ b/manifests/postgres-operator.yaml
@ -15,7 +15,7 @@ spec:
      serviceAccountName: postgres-operator
      containers:
      - name: postgres-operator
-        image: registry.opensource.zalan.do/acid/postgres-operator:v1.3.1
+        image: registry.opensource.zalan.do/acid/postgres-operator:v1.4.0
        imagePullPolicy: IfNotPresent
        resources:
          requests:
--- a/manifests/postgresql-operator-default-configuration.yaml
+++ b/manifests/postgresql-operator-default-configuration.yaml
@ -5,7 +5,7 @@ metadata:
 configuration:
  # enable_crd_validation: true
  etcd_host: ""
-  docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
+  docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
  # enable_shm_volume: true
  max_instances: -1
  min_instances: -1
@ -110,7 +110,7 @@ configuration:
      log_statement: all
    # teams_api_url: ""
  logging_rest_api:
-    api_port: 8008
+    api_port: 8080
    cluster_history_entries: 1000
    ring_log_lines: 100
  scalyr:
--- a/manifests/postgresql.crd.yaml
+++ b/manifests/postgresql.crd.yaml
@ -58,7 +58,7 @@ spec:
                s3_secret_access_key:
                  type: string
                s3_force_path_style:
-                  type: string
+                  type: boolean
                s3_wal_path:
                  type: string
                timestamp:
--- a/pkg/apis/acid.zalan.do/v1/crds.go
+++ b/pkg/apis/acid.zalan.do/v1/crds.go
@ -160,7 +160,7 @@ var PostgresCRDResourceValidation = apiextv1beta1.CustomResourceValidation{
 								Type: "string",
 							},
 							"s3_force_path_style": {
-								Type: "string",
+								Type: "boolean",
 							},
 							"s3_wal_path": {
 								Type: "string",
--- a/pkg/cluster/k8sres.go
+++ b/pkg/cluster/k8sres.go
@ -1110,11 +1110,13 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 {
 	cur := spec.NumberOfInstances
 	newcur := cur
 	/* Limit the max number of pods to one, if this is standby-cluster */
 	if spec.StandbyCluster != nil {
-		c.logger.Info("Standby cluster can have maximum of 1 pod")
+		if newcur == 1 {
-		min = 1
+			min = newcur
-		max = 1
+			max = newcur
 		} else {
 			c.logger.Warningf("operator only supports standby clusters with 1 pod")
 		}
 	}
 	if max >= 0 && newcur > max {
 		newcur = max
--- a/pkg/controller/controller.go
+++ b/pkg/controller/controller.go
@ -224,7 +224,7 @@ func (c *Controller) initRoleBinding() {
 	switch {
 	case err != nil:
-		panic(fmt.Errorf("unable to parse the definition of the role binding for the pod service account definition from the operator configuration: %v", err))
+		panic(fmt.Errorf("unable to parse the role binding definition from the operator configuration: %v", err))
 	case groupVersionKind.Kind != "RoleBinding":
 		panic(fmt.Errorf("role binding definition in the operator configuration defines another type of resource: %v", groupVersionKind.Kind))
 	default:
--- a/pkg/controller/node.go
+++ b/pkg/controller/node.go
@ -5,7 +5,7 @@ import (
 	"time"
 	"github.com/zalando/postgres-operator/pkg/util/retryutil"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
@ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) {
 }
 func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
-
+	// retry to move master until configured timeout is reached
 	err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout,
 		func() (bool, error) {
 			err := c.attemptToMoveMasterPodsOffNode(node)
 			if err != nil {
-				return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute")
+				return false, err
 			}
 			return true, nil
 		},
 	)
 	if err != nil {
-		c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout)
+		c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err)
 	}
 }
--- a/pkg/controller/postgresql.go
+++ b/pkg/controller/postgresql.go
@ -505,11 +505,11 @@ func (c *Controller) submitRBACCredentials(event ClusterEvent) error {
 	namespace := event.NewSpec.GetNamespace()
 	if err := c.createPodServiceAccount(namespace); err != nil {
-		return fmt.Errorf("could not create pod service account %v : %v", c.opConfig.PodServiceAccountName, err)
+		return fmt.Errorf("could not create pod service account %q : %v", c.opConfig.PodServiceAccountName, err)
 	}
 	if err := c.createRoleBindings(namespace); err != nil {
-		return fmt.Errorf("could not create role binding %v : %v", c.PodServiceAccountRoleBinding.Name, err)
+		return fmt.Errorf("could not create role binding %q : %v", c.PodServiceAccountRoleBinding.Name, err)
 	}
 	return nil
 }
@ -520,16 +520,16 @@ func (c *Controller) createPodServiceAccount(namespace string) error {
 	_, err := c.KubeClient.ServiceAccounts(namespace).Get(podServiceAccountName, metav1.GetOptions{})
 	if k8sutil.ResourceNotFound(err) {
-		c.logger.Infof(fmt.Sprintf("creating pod service account in the namespace %v", namespace))
+		c.logger.Infof(fmt.Sprintf("creating pod service account %q in the %q namespace", podServiceAccountName, namespace))
 		// get a separate copy of service account
 		// to prevent a race condition when setting a namespace for many clusters
 		sa := *c.PodServiceAccount
 		if _, err = c.KubeClient.ServiceAccounts(namespace).Create(&sa); err != nil {
-			return fmt.Errorf("cannot deploy the pod service account %v defined in the config map to the %v namespace: %v", podServiceAccountName, namespace, err)
+			return fmt.Errorf("cannot deploy the pod service account %q defined in the configuration to the %q namespace: %v", podServiceAccountName, namespace, err)
 		}
-		c.logger.Infof("successfully deployed the pod service account %v to the %v namespace", podServiceAccountName, namespace)
+		c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccountName, namespace)
 	} else if k8sutil.ResourceAlreadyExists(err) {
 		return nil
 	}
@ -545,14 +545,14 @@ func (c *Controller) createRoleBindings(namespace string) error {
 	_, err := c.KubeClient.RoleBindings(namespace).Get(podServiceAccountRoleBindingName, metav1.GetOptions{})
 	if k8sutil.ResourceNotFound(err) {
-		c.logger.Infof("Creating the role binding %v in the namespace %v", podServiceAccountRoleBindingName, namespace)
+		c.logger.Infof("Creating the role binding %q in the %q namespace", podServiceAccountRoleBindingName, namespace)
 		// get a separate copy of role binding
 		// to prevent a race condition when setting a namespace for many clusters
 		rb := *c.PodServiceAccountRoleBinding
 		_, err = c.KubeClient.RoleBindings(namespace).Create(&rb)
 		if err != nil {
-			return fmt.Errorf("cannot bind the pod service account %q defined in the config map to the cluster role in the %q namespace: %v", podServiceAccountName, namespace, err)
+			return fmt.Errorf("cannot bind the pod service account %q defined in the configuration to the cluster role in the %q namespace: %v", podServiceAccountName, namespace, err)
 		}
 		c.logger.Infof("successfully deployed the role binding for the pod service account %q to the %q namespace", podServiceAccountName, namespace)
--- a/pkg/util/config/config.go
+++ b/pkg/util/config/config.go
@ -108,9 +108,8 @@ type Config struct {
 	WatchedNamespace      string            `name:"watched_namespace"`    // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to'
 	EtcdHost              string            `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS
-	DockerImage      string            `name:"docker_image" default:"registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"`
+	DockerImage           string            `name:"docker_image" default:"registry.opensource.zalan.do/acid/spilo-12:1.6-p2"`
 	Sidecars              map[string]string `name:"sidecar_docker_images"`
 	// default name `operator` enables backward compatibility with the older ServiceAccountName field
 	PodServiceAccountName string            `name:"pod_service_account_name" default:"postgres-pod"`
 	// value of this string must be valid JSON or YAML; see initPodServiceAccount
 	PodServiceAccountDefinition            string            `name:"pod_service_account_definition" default:""`
--- a/ui/Makefile
+++ b/ui/Makefile
@ -5,9 +5,13 @@ VERSION          ?= $(shell git describe --tags --always --dirty)
 TAG              ?= $(VERSION)
 GITHEAD          = $(shell git rev-parse --short HEAD)
 GITURL           = $(shell git config --get remote.origin.url)
-GITSTATU         = $(shell git status --porcelain || echo 'no changes')
+GITSTATUS        = $(shell git status --porcelain || echo 'no changes')
 TTYFLAGS         = $(shell test -t 0 && echo '-it')
 ifdef CDP_PULL_REQUEST_NUMBER
 	CDP_TAG := -${CDP_BUILD_VERSION}
 endif
 default: docker
 clean:
@ -24,11 +28,12 @@ docker: appjs
 	echo `(env)`
 	echo "Tag ${TAG}"
 	echo "Version ${VERSION}"
 	echo "CDP tag ${CDP_TAG}"
 	echo "git describe $(shell git describe --tags --always --dirty)"
-	docker build --rm -t "$(IMAGE):$(TAG)" -f Dockerfile .
+	docker build --rm -t "$(IMAGE):$(TAG)$(CDP_TAG)" -f Dockerfile .
-push: docker
+push:
-	docker push "$(IMAGE):$(TAG)"
+	docker push "$(IMAGE):$(TAG)$(CDP_TAG)"
 mock:
 	docker run -it -p 8080:8080 "$(IMAGE):$(TAG)" --mock