diff --git a/charts/postgres-operator-ui/Chart.yaml b/charts/postgres-operator-ui/Chart.yaml index 4418675b6..a6e46ab3e 100644 --- a/charts/postgres-operator-ui/Chart.yaml +++ b/charts/postgres-operator-ui/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 name: postgres-operator-ui -version: 0.1.0 -appVersion: 1.3.0 +version: 1.4.0 +appVersion: 1.4.0 home: https://github.com/zalando/postgres-operator description: Postgres Operator UI provides a graphical interface for a convenient database-as-a-service user experience keywords: diff --git a/charts/postgres-operator-ui/index.yaml b/charts/postgres-operator-ui/index.yaml new file mode 100644 index 000000000..0cd03d6e5 --- /dev/null +++ b/charts/postgres-operator-ui/index.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +entries: + postgres-operator-ui: + - apiVersion: v1 + appVersion: 1.4.0 + created: "2020-02-24T15:32:47.610967635+01:00" + description: Postgres Operator UI provides a graphical interface for a convenient + database-as-a-service user experience + digest: 00e0eff7056d56467cd5c975657fbb76c8d01accd25a4b7aca81bc42aeac961d + home: https://github.com/zalando/postgres-operator + keywords: + - postgres + - operator + - ui + - cloud-native + - patroni + - spilo + maintainers: + - email: opensource@zalando.de + name: Zalando + - email: sk@sik-net.de + name: siku4 + name: postgres-operator-ui + sources: + - https://github.com/zalando/postgres-operator + urls: + - postgres-operator-ui-1.4.0.tgz + version: 1.4.0 +generated: "2020-02-24T15:32:47.610348278+01:00" diff --git a/charts/postgres-operator-ui/postgres-operator-ui-1.4.0.tgz b/charts/postgres-operator-ui/postgres-operator-ui-1.4.0.tgz new file mode 100644 index 000000000..8d1276dd1 Binary files /dev/null and b/charts/postgres-operator-ui/postgres-operator-ui-1.4.0.tgz differ diff --git a/charts/postgres-operator-ui/values.yaml b/charts/postgres-operator-ui/values.yaml index dca093410..148a687c3 100644 --- a/charts/postgres-operator-ui/values.yaml +++ b/charts/postgres-operator-ui/values.yaml @@ -8,7 +8,7 @@ replicaCount: 1 image: registry: registry.opensource.zalan.do repository: acid/postgres-operator-ui - tag: v1.2.0 + tag: v1.4.0 pullPolicy: "IfNotPresent" rbac: diff --git a/charts/postgres-operator/Chart.yaml b/charts/postgres-operator/Chart.yaml index 08e242a53..89468dfa4 100644 --- a/charts/postgres-operator/Chart.yaml +++ b/charts/postgres-operator/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 name: postgres-operator -version: 1.3.0 -appVersion: 1.3.0 +version: 1.4.0 +appVersion: 1.4.0 home: https://github.com/zalando/postgres-operator description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes keywords: diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index b4b676236..af535e2c8 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -94,7 +94,7 @@ spec: s3_secret_access_key: type: string s3_force_path_style: - type: string + type: boolean s3_wal_path: type: string timestamp: diff --git a/charts/postgres-operator/index.yaml b/charts/postgres-operator/index.yaml index 84502f6a6..53181d74a 100644 --- a/charts/postgres-operator/index.yaml +++ b/charts/postgres-operator/index.yaml @@ -1,9 +1,31 @@ apiVersion: v1 entries: postgres-operator: + - apiVersion: v1 + appVersion: 1.4.0 + created: "2020-02-20T17:39:25.443276193+01:00" + description: Postgres Operator creates and manages PostgreSQL clusters running + in Kubernetes + digest: b93ccde5581deb8ed0857136b8ce74ca3f1b7240438fa4415f705764a1300bed + home: https://github.com/zalando/postgres-operator + keywords: + - postgres + - operator + - cloud-native + - patroni + - spilo + maintainers: + - email: opensource@zalando.de + name: Zalando + name: postgres-operator + sources: + - https://github.com/zalando/postgres-operator + urls: + - postgres-operator-1.4.0.tgz + version: 1.4.0 - apiVersion: v1 appVersion: 1.3.0 - created: "2019-12-17T12:58:49.477140129+01:00" + created: "2020-02-20T17:39:25.441532163+01:00" description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes digest: 7e788fd37daec76a01f6d6f9fe5be5b54f5035e4eba0041e80a760d656537325 @@ -25,7 +47,7 @@ entries: version: 1.3.0 - apiVersion: v1 appVersion: 1.2.0 - created: "2019-12-17T12:58:49.475844233+01:00" + created: "2020-02-20T17:39:25.440278302+01:00" description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes digest: d10710c7cf19f4e266e7704f5d1e98dcfc61bee3919522326c35c22ca7d2f2bf @@ -47,4 +69,4 @@ entries: urls: - postgres-operator-1.2.0.tgz version: 1.2.0 -generated: "2019-12-17T12:58:49.474719294+01:00" +generated: "2020-02-20T17:39:25.439168098+01:00" diff --git a/charts/postgres-operator/postgres-operator-1.4.0.tgz b/charts/postgres-operator/postgres-operator-1.4.0.tgz new file mode 100644 index 000000000..a988ed236 Binary files /dev/null and b/charts/postgres-operator/postgres-operator-1.4.0.tgz differ diff --git a/charts/postgres-operator/templates/clusterrole.yaml b/charts/postgres-operator/templates/clusterrole.yaml index 9a4165797..7b3dd462d 100644 --- a/charts/postgres-operator/templates/clusterrole.yaml +++ b/charts/postgres-operator/templates/clusterrole.yaml @@ -63,9 +63,9 @@ rules: - secrets verbs: - create - - update - delete - get + - update # to check nodes for node readiness label - apiGroups: - "" @@ -102,9 +102,9 @@ rules: - delete - get - list - - watch - - update - patch + - update + - watch # to resize the filesystem in Spilo pods when increasing volume size - apiGroups: - "" diff --git a/charts/postgres-operator/values-crd.yaml b/charts/postgres-operator/values-crd.yaml index 08c255a04..b5d561807 100644 --- a/charts/postgres-operator/values-crd.yaml +++ b/charts/postgres-operator/values-crd.yaml @@ -1,7 +1,7 @@ image: registry: registry.opensource.zalan.do repository: acid/postgres-operator - tag: v1.3.1 + tag: v1.4.0 pullPolicy: "IfNotPresent" # Optionally specify an array of imagePullSecrets. @@ -24,7 +24,7 @@ configGeneral: # etcd connection string for Patroni. Empty uses K8s-native DCS. etcd_host: "" # Spilo docker image - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16 + docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2 # max number of instances in Postgres cluster. -1 = no limit min_instances: -1 # min number of instances in Postgres cluster. -1 = no limit @@ -100,8 +100,14 @@ configKubernetes: pod_management_policy: "ordered_ready" # label assigned to the Postgres pods (and services/endpoints) pod_role_label: spilo-role + # service account definition as JSON/YAML string to be used by postgres cluster pods + # pod_service_account_definition: "" + # name of service account to be used by postgres cluster pods pod_service_account_name: "postgres-pod" + # role binding definition as JSON/YAML string to be used by pod service account + # pod_service_account_role_binding_definition: "" + # Postgres pods are terminated forcefully after this timeout pod_terminate_grace_period: 5m # template for database user secrets generated by the operator diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 78624e0bd..07ba76285 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -1,7 +1,7 @@ image: registry: registry.opensource.zalan.do repository: acid/postgres-operator - tag: v1.3.1 + tag: v1.4.0 pullPolicy: "IfNotPresent" # Optionally specify an array of imagePullSecrets. @@ -24,7 +24,7 @@ configGeneral: # etcd connection string for Patroni. Empty uses K8s-native DCS. etcd_host: "" # Spilo docker image - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16 + docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2 # max number of instances in Postgres cluster. -1 = no limit min_instances: "-1" # min number of instances in Postgres cluster. -1 = no limit @@ -93,8 +93,14 @@ configKubernetes: pod_management_policy: "ordered_ready" # label assigned to the Postgres pods (and services/endpoints) pod_role_label: spilo-role + # service account definition as JSON/YAML string to be used by postgres cluster pods + # pod_service_account_definition: "" + # name of service account to be used by postgres cluster pods pod_service_account_name: "postgres-pod" + # role binding definition as JSON/YAML string to be used by pod service account + # pod_service_account_role_binding_definition: "" + # Postgres pods are terminated forcefully after this timeout pod_terminate_grace_period: 5m # template for database user secrets generated by the operator diff --git a/delivery.yaml b/delivery.yaml index be35d3e27..144448ea9 100644 --- a/delivery.yaml +++ b/delivery.yaml @@ -66,20 +66,13 @@ pipeline: - desc: 'Build and push Docker image' cmd: | cd ui - image_base='registry-write.opensource.zalan.do/acid/postgres-operator-ui' - if [[ "${CDP_TARGET_BRANCH}" == 'master' && -z "${CDP_PULL_REQUEST_NUMBER}" ]] + IS_PR_BUILD=${CDP_PULL_REQUEST_NUMBER+"true"} + if [[ ${CDP_TARGET_BRANCH} == "master" && ${IS_PR_BUILD} != "true" ]] then - image="${image_base}" + IMAGE=registry-write.opensource.zalan.do/acid/postgres-operator-ui else - image="${image_base}-test" + IMAGE=registry-write.opensource.zalan.do/acid/postgres-operator-ui-test fi - image_with_tag="${image}:c${CDP_BUILD_VERSION}" - - if docker pull "${image}" - then - docker build --cache-from="${image}" -t "${image_with_tag}" . - else - docker build -t "${image_with_tag}" . - fi - - docker push "${image_with_tag}" + export IMAGE + make docker + make push diff --git a/docs/administrator.md b/docs/administrator.md index 7492fe93b..5f56c8be5 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -11,11 +11,11 @@ switchover (planned failover) of the master to the Pod with new minor version. The switch should usually take less than 5 seconds, still clients have to reconnect. -Major version upgrades are supported via [cloning](user.md#clone-directly). The -new cluster manifest must have a higher `version` string than the source cluster -and will be created from a basebackup. Depending of the cluster size, downtime -in this case can be significant as writes to the database should be stopped and -all WAL files should be archived first before cloning is started. +Major version upgrades are supported via [cloning](user.md#how-to-clone-an-existing-postgresql-cluster). +The new cluster manifest must have a higher `version` string than the source +cluster and will be created from a basebackup. Depending of the cluster size, +downtime in this case can be significant as writes to the database should be +stopped and all WAL files should be archived first before cloning is started. Note, that simply changing the version string in the `postgresql` manifest does not work at present and leads to errors. Neither Patroni nor Postgres Operator diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index ca972c22b..ad519b657 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -110,8 +110,10 @@ Those are top-level keys, containing both leaf keys and groups. * **min_instances** operator will run at least the number of instances for any given Postgres - cluster equal to the value of this parameter. When `-1` is specified, no - limits are applied. The default is `-1`. + cluster equal to the value of this parameter. Standby clusters can still run + with `numberOfInstances: 1` as this is the [recommended setup](../user.md#setting-up-a-standby-cluster). + When `-1` is specified for `min_instances`, no limits are applied. The default + is `-1`. * **resync_period** period between consecutive sync requests. The default is `30m`. diff --git a/docs/user.md b/docs/user.md index e1baf9ad1..295c149bd 100644 --- a/docs/user.md +++ b/docs/user.md @@ -254,29 +254,22 @@ spec: ## How to clone an existing PostgreSQL cluster -You can spin up a new cluster as a clone of the existing one, using a clone +You can spin up a new cluster as a clone of the existing one, using a `clone` section in the spec. There are two options here: -* Clone directly from a source cluster using `pg_basebackup` -* Clone from an S3 bucket +* Clone from an S3 bucket (recommended) +* Clone directly from a source cluster -### Clone directly - -```yaml -spec: - clone: - cluster: "acid-batman" -``` - -Here `cluster` is a name of a source cluster that is going to be cloned. The -cluster to clone is assumed to be running and the clone procedure invokes -`pg_basebackup` from it. The operator will setup the cluster to be cloned to -connect to the service of the source cluster by name (if the cluster is called -test, then the connection string will look like host=test port=5432), which -means that you can clone only from clusters within the same namespace. +Note, that cloning can also be used for [major version upgrades](administrator.md#minor-and-major-version-upgrade) +of PostgreSQL. ### Clone from S3 +Cloning from S3 has the advantage that there is no impact on your production +database. A new Postgres cluster is created by restoring the data of another +source cluster. If you create it in the same Kubernetes environment, use a +different name. + ```yaml spec: clone: @@ -287,7 +280,8 @@ spec: Here `cluster` is a name of a source cluster that is going to be cloned. A new cluster will be cloned from S3, using the latest backup before the `timestamp`. -In this case, `uid` field is also mandatory - operator will use it to find a +Note, that a time zone is required for `timestamp` in the format of +00:00 which +is UTC. The `uid` field is also mandatory. The operator will use it to find a correct key inside an S3 bucket. You can find this field in the metadata of the source cluster: @@ -299,9 +293,6 @@ metadata: uid: efd12e58-5786-11e8-b5a7-06148230260c ``` -Note that timezone is required for `timestamp`. Otherwise, offset is relative -to UTC, see [RFC 3339 section 5.6) 3339 section 5.6](https://www.ietf.org/rfc/rfc3339.txt). - For non AWS S3 following settings can be set to support cloning from other S3 implementations: @@ -317,14 +308,35 @@ spec: s3_force_path_style: true ``` +### Clone directly + +Another way to get a fresh copy of your source DB cluster is via basebackup. To +use this feature simply leave out the timestamp field from the clone section. +The operator will connect to the service of the source cluster by name. If the +cluster is called test, then the connection string will look like host=test +port=5432), which means that you can clone only from clusters within the same +namespace. + +```yaml +spec: + clone: + cluster: "acid-batman" +``` + +Be aware that on a busy source database this can result in an elevated load! + ## Setting up a standby cluster -Standby clusters are like normal cluster but they are streaming from a remote -cluster. As the first version of this feature, the only scenario covered by -operator is to stream from a WAL archive of the master. Following the more -popular infrastructure of using Amazon's S3 buckets, it is mentioned as -`s3_wal_path` here. To start a cluster as standby add the following `standby` -section in the YAML file: +Standby cluster is a [Patroni feature](https://github.com/zalando/patroni/blob/master/docs/replica_bootstrap.rst#standby-cluster) +that first clones a database, and keeps replicating changes afterwards. As the +replication is happening by the means of archived WAL files (stored on S3 or +the equivalent of other cloud providers), the standby cluster can exist in a +different location than its source database. Unlike cloning, the PostgreSQL +version between source and target cluster has to be the same. + +To start a cluster as standby, add the following `standby` section in the YAML +file and specify the S3 bucket path. An empty path will result in an error and +no statefulset will be created. ```yaml spec: @@ -332,20 +344,65 @@ spec: s3_wal_path: "s3 bucket path to the master" ``` -Things to note: +At the moment, the operator only allows to stream from the WAL archive of the +master. Thus, it is recommended to deploy standby clusters with only [one pod](../manifests/standby-manifest.yaml#L10). +You can raise the instance count when detaching. Note, that the same pod role +labels like for normal clusters are used: The standby leader is labeled as +`master`. -- An empty string in the `s3_wal_path` field of the standby cluster will result - in an error and no statefulset will be created. -- Only one pod can be deployed for stand-by cluster. -- To manually promote the standby_cluster, use `patronictl` and remove config - entry. -- There is no way to transform a non-standby cluster to a standby cluster - through the operator. Adding the standby section to the manifest of a running - Postgres cluster will have no effect. However, it can be done through Patroni - by adding the [standby_cluster](https://github.com/zalando/patroni/blob/bd2c54581abb42a7d3a3da551edf0b8732eefd27/docs/replica_bootstrap.rst#standby-cluster) - section using `patronictl edit-config`. Note that the transformed standby - cluster will not be doing any streaming. It will be in standby mode and allow - read-only transactions only. +### Providing credentials of source cluster + +A standby cluster is replicating the data (including users and passwords) from +the source database and is read-only. The system and application users (like +standby, postgres etc.) all have a password that does not match the credentials +stored in secrets which are created by the operator. One solution is to create +secrets beforehand and paste in the credentials of the source cluster. +Otherwise, you will see errors in the Postgres logs saying users cannot log in +and the operator logs will complain about not being able to sync resources. + +When you only run a standby leader, you can safely ignore this, as it will be +sorted out once the cluster is detached from the source. It is also harmless if +you don’t plan it. But, when you created a standby replica, too, fix the +credentials right away. WAL files will pile up on the standby leader if no +connection can be established between standby replica(s). You can also edit the +secrets after their creation. Find them by: + +```bash +kubectl get secrets --all-namespaces | grep +``` + +### Promote the standby + +One big advantage of standby clusters is that they can be promoted to a proper +database cluster. This means it will stop replicating changes from the source, +and start accept writes itself. This mechanism makes it possible to move +databases from one place to another with minimal downtime. Currently, the +operator does not support promoting a standby cluster. It has to be done +manually using `patronictl edit-config` inside the postgres container of the +standby leader pod. Remove the following lines from the YAML structure and the +leader promotion happens immediately. Before doing so, make sure that the +standby is not behind the source database. + +```yaml +standby_cluster: + create_replica_methods: + - bootstrap_standby_with_wale + - basebackup_fast_xlog + restore_command: envdir "/home/postgres/etc/wal-e.d/env-standby" /scripts/restore_command.sh + "%f" "%p" +``` + +Finally, remove the `standby` section from the postgres cluster manifest. + +### Turn a normal cluster into a standby + +There is no way to transform a non-standby cluster to a standby cluster through +the operator. Adding the `standby` section to the manifest of a running +Postgres cluster will have no effect. But, as explained in the previous +paragraph it can be done manually through `patronictl edit-config`. This time, +by adding the `standby_cluster` section to the Patroni configuration. However, +the transformed standby cluster will not be doing any streaming. It will be in +standby mode and allow read-only transactions only. ## Sidecar Support diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 12106601e..6760e815d 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase): k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") k8s.wait_for_pod_start('spilo-role=master') + k8s.wait_for_pod_start('spilo-role=replica') @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_enable_load_balancer(self): @@ -107,141 +108,6 @@ class EndToEndTestCase(unittest.TestCase): self.assertEqual(repl_svc_type, 'ClusterIP', "Expected ClusterIP service type for replica, found {}".format(repl_svc_type)) - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_min_resource_limits(self): - ''' - Lower resource limits below configured minimum and let operator fix it - ''' - k8s = self.k8s - cluster_label = 'cluster-name=acid-minimal-cluster' - _, failover_targets = k8s.get_pg_nodes(cluster_label) - - # configure minimum boundaries for CPU and memory limits - minCPULimit = '500m' - minMemoryLimit = '500Mi' - patch_min_resource_limits = { - "data": { - "min_cpu_limit": minCPULimit, - "min_memory_limit": minMemoryLimit - } - } - k8s.update_config(patch_min_resource_limits) - - # lower resource limits below minimum - pg_patch_resources = { - "spec": { - "resources": { - "requests": { - "cpu": "10m", - "memory": "50Mi" - }, - "limits": { - "cpu": "200m", - "memory": "200Mi" - } - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) - k8s.wait_for_master_failover(failover_targets) - - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector='spilo-role=master,' + cluster_label).items - self.assert_master_is_unique() - masterPod = pods[0] - - self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, - "Expected CPU limit {}, found {}" - .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) - self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, - "Expected memory limit {}, found {}" - .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_multi_namespace_support(self): - ''' - Create a customized Postgres cluster in a non-default namespace. - ''' - k8s = self.k8s - - with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: - pg_manifest = yaml.safe_load(f) - pg_manifest["metadata"]["namespace"] = self.namespace - yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) - - k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") - k8s.wait_for_pod_start("spilo-role=master", self.namespace) - self.assert_master_is_unique(self.namespace, "acid-test-cluster") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_scaling(self): - ''' - Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. - ''' - k8s = self.k8s - labels = "cluster-name=acid-minimal-cluster" - - k8s.wait_for_pg_to_scale(3) - self.assertEqual(3, k8s.count_pods_with_label(labels)) - self.assert_master_is_unique() - - k8s.wait_for_pg_to_scale(2) - self.assertEqual(2, k8s.count_pods_with_label(labels)) - self.assert_master_is_unique() - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_taint_based_eviction(self): - ''' - Add taint "postgres=:NoExecute" to node with master. This must cause a failover. - ''' - k8s = self.k8s - cluster_label = 'cluster-name=acid-minimal-cluster' - - # get nodes of master and replica(s) (expected target of new master) - current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label) - num_replicas = len(failover_targets) - - # if all pods live on the same node, failover will happen to other worker(s) - failover_targets = [x for x in failover_targets if x != current_master_node] - if len(failover_targets) == 0: - nodes = k8s.api.core_v1.list_node() - for n in nodes.items: - if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node: - failover_targets.append(n.metadata.name) - - # taint node with postgres=:NoExecute to force failover - body = { - "spec": { - "taints": [ - { - "effect": "NoExecute", - "key": "postgres" - } - ] - } - } - - # patch node and test if master is failing over to one of the expected nodes - k8s.api.core_v1.patch_node(current_master_node, body) - k8s.wait_for_master_failover(failover_targets) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) - self.assertNotEqual(current_master_node, new_master_node, - "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) - self.assertEqual(num_replicas, len(new_replica_nodes), - "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) - self.assert_master_is_unique() - - # undo the tainting - body = { - "spec": { - "taints": [] - } - } - k8s.api.core_v1.patch_node(new_master_node, body) - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_logical_backup_cron_job(self): ''' @@ -306,6 +172,133 @@ class EndToEndTestCase(unittest.TestCase): self.assertEqual(0, len(jobs), "Expected 0 logical backup jobs, found {}".format(len(jobs))) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_min_resource_limits(self): + ''' + Lower resource limits below configured minimum and let operator fix it + ''' + k8s = self.k8s + cluster_label = 'cluster-name=acid-minimal-cluster' + labels = 'spilo-role=master,' + cluster_label + _, failover_targets = k8s.get_pg_nodes(cluster_label) + + # configure minimum boundaries for CPU and memory limits + minCPULimit = '500m' + minMemoryLimit = '500Mi' + patch_min_resource_limits = { + "data": { + "min_cpu_limit": minCPULimit, + "min_memory_limit": minMemoryLimit + } + } + k8s.update_config(patch_min_resource_limits) + + # lower resource limits below minimum + pg_patch_resources = { + "spec": { + "resources": { + "requests": { + "cpu": "10m", + "memory": "50Mi" + }, + "limits": { + "cpu": "200m", + "memory": "200Mi" + } + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) + k8s.wait_for_pod_failover(failover_targets, labels) + k8s.wait_for_pod_start('spilo-role=replica') + + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + self.assert_master_is_unique() + masterPod = pods[0] + + self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, + "Expected CPU limit {}, found {}" + .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) + self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, + "Expected memory limit {}, found {}" + .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_multi_namespace_support(self): + ''' + Create a customized Postgres cluster in a non-default namespace. + ''' + k8s = self.k8s + + with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: + pg_manifest = yaml.safe_load(f) + pg_manifest["metadata"]["namespace"] = self.namespace + yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) + + k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") + k8s.wait_for_pod_start("spilo-role=master", self.namespace) + self.assert_master_is_unique(self.namespace, "acid-test-cluster") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_node_readiness_label(self): + ''' + Remove node readiness label from master node. This must cause a failover. + ''' + k8s = self.k8s + cluster_label = 'cluster-name=acid-minimal-cluster' + labels = 'spilo-role=master,' + cluster_label + readiness_label = 'lifecycle-status' + readiness_value = 'ready' + + # get nodes of master and replica(s) (expected target of new master) + current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) + num_replicas = len(current_replica_nodes) + failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) + + # add node_readiness_label to potential failover nodes + patch_readiness_label = { + "metadata": { + "labels": { + readiness_label: readiness_value + } + } + } + for failover_target in failover_targets: + k8s.api.core_v1.patch_node(failover_target, patch_readiness_label) + + # define node_readiness_label in config map which should trigger a failover of the master + patch_readiness_label_config = { + "data": { + "node_readiness_label": readiness_label + ':' + readiness_value, + } + } + k8s.update_config(patch_readiness_label_config) + new_master_node, new_replica_nodes = self.assert_failover( + current_master_node, num_replicas, failover_targets, cluster_label) + + # patch also node where master ran before + k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label) + # toggle pod anti affinity to move replica away from master node + self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_scaling(self): + ''' + Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. + ''' + k8s = self.k8s + labels = "cluster-name=acid-minimal-cluster" + + k8s.wait_for_pg_to_scale(3) + self.assertEqual(3, k8s.count_pods_with_label(labels)) + self.assert_master_is_unique() + + k8s.wait_for_pg_to_scale(2) + self.assertEqual(2, k8s.count_pods_with_label(labels)) + self.assert_master_is_unique() + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_service_annotations(self): ''' @@ -346,18 +339,116 @@ class EndToEndTestCase(unittest.TestCase): } k8s.update_config(unpatch_custom_service_annotations) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_taint_based_eviction(self): + ''' + Add taint "postgres=:NoExecute" to node with master. This must cause a failover. + ''' + k8s = self.k8s + cluster_label = 'cluster-name=acid-minimal-cluster' + + # get nodes of master and replica(s) (expected target of new master) + current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) + num_replicas = len(current_replica_nodes) + failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) + + # taint node with postgres=:NoExecute to force failover + body = { + "spec": { + "taints": [ + { + "effect": "NoExecute", + "key": "postgres" + } + ] + } + } + + # patch node and test if master is failing over to one of the expected nodes + k8s.api.core_v1.patch_node(current_master_node, body) + new_master_node, new_replica_nodes = self.assert_failover( + current_master_node, num_replicas, failover_targets, cluster_label) + + # add toleration to pods + patch_toleration_config = { + "data": { + "toleration": "key:postgres,operator:Exists,effect:NoExecute" + } + } + k8s.update_config(patch_toleration_config) + + # toggle pod anti affinity to move replica away from master node + self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) + + def get_failover_targets(self, master_node, replica_nodes): + ''' + If all pods live on the same node, failover will happen to other worker(s) + ''' + k8s = self.k8s + + failover_targets = [x for x in replica_nodes if x != master_node] + if len(failover_targets) == 0: + nodes = k8s.api.core_v1.list_node() + for n in nodes.items: + if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node: + failover_targets.append(n.metadata.name) + + return failover_targets + + def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label): + ''' + Check if master is failing over. The replica should move first to be the switchover target + ''' + k8s = self.k8s + k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label) + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + + new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) + self.assertNotEqual(current_master_node, new_master_node, + "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) + self.assertEqual(num_replicas, len(new_replica_nodes), + "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) + self.assert_master_is_unique() + + return new_master_node, new_replica_nodes + def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): ''' Check that there is a single pod in the k8s cluster with the label "spilo-role=master" To be called manually after operations that affect pods ''' - k8s = self.k8s labels = 'spilo-role=master,cluster-name=' + clusterName num_of_master_pods = k8s.count_pods_with_label(labels, namespace) self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) + def assert_distributed_pods(self, master_node, replica_nodes, cluster_label): + ''' + Other tests can lead to the situation that master and replica are on the same node. + Toggle pod anti affinty to distribute pods accross nodes (replica in particular). + ''' + k8s = self.k8s + failover_targets = self.get_failover_targets(master_node, replica_nodes) + + # enable pod anti affintiy in config map which should trigger movement of replica + patch_enable_antiaffinity = { + "data": { + "enable_pod_antiaffinity": "true" + } + } + k8s.update_config(patch_enable_antiaffinity) + self.assert_failover( + master_node, len(replica_nodes), failover_targets, cluster_label) + + # disable pod anti affintiy again + patch_disable_antiaffinity = { + "data": { + "enable_pod_antiaffinity": "false" + } + } + k8s.update_config(patch_disable_antiaffinity) + class K8sApi: @@ -445,15 +536,14 @@ class K8s: def count_pods_with_label(self, labels, namespace='default'): return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) - def wait_for_master_failover(self, expected_master_nodes, namespace='default'): + def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): pod_phase = 'Failing over' - new_master_node = '' - labels = 'spilo-role=master,cluster-name=acid-minimal-cluster' + new_pod_node = '' - while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes): + while (pod_phase != 'Running') or (new_pod_node not in failover_targets): pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items if pods: - new_master_node = pods[0].spec.node_name + new_pod_node = pods[0].spec.node_name pod_phase = pods[0].status.phase time.sleep(self.RETRY_TIMEOUT_SEC) diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 206ab596b..3865e2e0f 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -7,7 +7,7 @@ metadata: # annotations: # "acid.zalan.do/controller": "second-operator" spec: - dockerImage: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16 + dockerImage: registry.opensource.zalan.do/acid/spilo-12:1.6-p2 teamId: "acid" volume: size: 1Gi diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 4289a134c..0300b5495 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -19,7 +19,7 @@ data: # default_cpu_request: 100m # default_memory_limit: 500Mi # default_memory_request: 100Mi - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16 + docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2 # enable_admin_role_for_users: "true" # enable_crd_validation: "true" # enable_database_access: "true" @@ -63,7 +63,9 @@ data: pod_label_wait_timeout: 10m pod_management_policy: "ordered_ready" pod_role_label: spilo-role + # pod_service_account_definition: "" pod_service_account_name: "postgres-pod" + # pod_service_account_role_binding_definition: "" pod_terminate_grace_period: 5m # postgres_superuser_teams: "postgres_superusers" # protected_role_names: "admin" diff --git a/manifests/operator-service-account-rbac.yaml b/manifests/operator-service-account-rbac.yaml index 80fcd89ef..e5bc49f83 100644 --- a/manifests/operator-service-account-rbac.yaml +++ b/manifests/operator-service-account-rbac.yaml @@ -64,9 +64,9 @@ rules: - secrets verbs: - create - - update - delete - get + - update # to check nodes for node readiness label - apiGroups: - "" @@ -103,9 +103,9 @@ rules: - delete - get - list - - watch - - update - patch + - update + - watch # to resize the filesystem in Spilo pods when increasing volume size - apiGroups: - "" diff --git a/manifests/postgres-operator.yaml b/manifests/postgres-operator.yaml index f67ef848f..4b254822c 100644 --- a/manifests/postgres-operator.yaml +++ b/manifests/postgres-operator.yaml @@ -15,7 +15,7 @@ spec: serviceAccountName: postgres-operator containers: - name: postgres-operator - image: registry.opensource.zalan.do/acid/postgres-operator:v1.3.1 + image: registry.opensource.zalan.do/acid/postgres-operator:v1.4.0 imagePullPolicy: IfNotPresent resources: requests: diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 695a4e9c5..33838b2a9 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -5,7 +5,7 @@ metadata: configuration: # enable_crd_validation: true etcd_host: "" - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16 + docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2 # enable_shm_volume: true max_instances: -1 min_instances: -1 @@ -110,7 +110,7 @@ configuration: log_statement: all # teams_api_url: "" logging_rest_api: - api_port: 8008 + api_port: 8080 cluster_history_entries: 1000 ring_log_lines: 100 scalyr: diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index 276bc94b8..453916b26 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -58,7 +58,7 @@ spec: s3_secret_access_key: type: string s3_force_path_style: - type: string + type: boolean s3_wal_path: type: string timestamp: diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 4cfc9a9e6..28dfa1566 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -160,7 +160,7 @@ var PostgresCRDResourceValidation = apiextv1beta1.CustomResourceValidation{ Type: "string", }, "s3_force_path_style": { - Type: "string", + Type: "boolean", }, "s3_wal_path": { Type: "string", diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 4468c8428..e2251a67c 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1048,11 +1048,13 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 { cur := spec.NumberOfInstances newcur := cur - /* Limit the max number of pods to one, if this is standby-cluster */ if spec.StandbyCluster != nil { - c.logger.Info("Standby cluster can have maximum of 1 pod") - min = 1 - max = 1 + if newcur == 1 { + min = newcur + max = newcur + } else { + c.logger.Warningf("operator only supports standby clusters with 1 pod") + } } if max >= 0 && newcur > max { newcur = max diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 7a776dec1..287e2bd30 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -227,7 +227,7 @@ func (c *Controller) initRoleBinding() { switch { case err != nil: - panic(fmt.Errorf("unable to parse the definition of the role binding for the pod service account definition from the operator configuration: %v", err)) + panic(fmt.Errorf("unable to parse the role binding definition from the operator configuration: %v", err)) case groupVersionKind.Kind != "RoleBinding": panic(fmt.Errorf("role binding definition in the operator configuration defines another type of resource: %v", groupVersionKind.Kind)) default: diff --git a/pkg/controller/node.go b/pkg/controller/node.go index 6f7befa27..8052458c3 100644 --- a/pkg/controller/node.go +++ b/pkg/controller/node.go @@ -5,7 +5,7 @@ import ( "time" "github.com/zalando/postgres-operator/pkg/util/retryutil" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" @@ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) { } func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { - + // retry to move master until configured timeout is reached err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, func() (bool, error) { err := c.attemptToMoveMasterPodsOffNode(node) if err != nil { - return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") + return false, err } return true, nil }, ) if err != nil { - c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) + c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err) } } diff --git a/pkg/controller/postgresql.go b/pkg/controller/postgresql.go index 346c08a5c..c5c2e6983 100644 --- a/pkg/controller/postgresql.go +++ b/pkg/controller/postgresql.go @@ -527,11 +527,11 @@ func (c *Controller) submitRBACCredentials(event ClusterEvent) error { namespace := event.NewSpec.GetNamespace() if err := c.createPodServiceAccount(namespace); err != nil { - return fmt.Errorf("could not create pod service account %v : %v", c.opConfig.PodServiceAccountName, err) + return fmt.Errorf("could not create pod service account %q : %v", c.opConfig.PodServiceAccountName, err) } if err := c.createRoleBindings(namespace); err != nil { - return fmt.Errorf("could not create role binding %v : %v", c.PodServiceAccountRoleBinding.Name, err) + return fmt.Errorf("could not create role binding %q : %v", c.PodServiceAccountRoleBinding.Name, err) } return nil } @@ -542,16 +542,16 @@ func (c *Controller) createPodServiceAccount(namespace string) error { _, err := c.KubeClient.ServiceAccounts(namespace).Get(podServiceAccountName, metav1.GetOptions{}) if k8sutil.ResourceNotFound(err) { - c.logger.Infof(fmt.Sprintf("creating pod service account in the namespace %v", namespace)) + c.logger.Infof(fmt.Sprintf("creating pod service account %q in the %q namespace", podServiceAccountName, namespace)) // get a separate copy of service account // to prevent a race condition when setting a namespace for many clusters sa := *c.PodServiceAccount if _, err = c.KubeClient.ServiceAccounts(namespace).Create(&sa); err != nil { - return fmt.Errorf("cannot deploy the pod service account %v defined in the config map to the %v namespace: %v", podServiceAccountName, namespace, err) + return fmt.Errorf("cannot deploy the pod service account %q defined in the configuration to the %q namespace: %v", podServiceAccountName, namespace, err) } - c.logger.Infof("successfully deployed the pod service account %v to the %v namespace", podServiceAccountName, namespace) + c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccountName, namespace) } else if k8sutil.ResourceAlreadyExists(err) { return nil } @@ -567,14 +567,14 @@ func (c *Controller) createRoleBindings(namespace string) error { _, err := c.KubeClient.RoleBindings(namespace).Get(podServiceAccountRoleBindingName, metav1.GetOptions{}) if k8sutil.ResourceNotFound(err) { - c.logger.Infof("Creating the role binding %v in the namespace %v", podServiceAccountRoleBindingName, namespace) + c.logger.Infof("Creating the role binding %q in the %q namespace", podServiceAccountRoleBindingName, namespace) // get a separate copy of role binding // to prevent a race condition when setting a namespace for many clusters rb := *c.PodServiceAccountRoleBinding _, err = c.KubeClient.RoleBindings(namespace).Create(&rb) if err != nil { - return fmt.Errorf("cannot bind the pod service account %q defined in the config map to the cluster role in the %q namespace: %v", podServiceAccountName, namespace, err) + return fmt.Errorf("cannot bind the pod service account %q defined in the configuration to the cluster role in the %q namespace: %v", podServiceAccountName, namespace, err) } c.logger.Infof("successfully deployed the role binding for the pod service account %q to the %q namespace", podServiceAccountName, namespace) diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index ec4af6427..fee65be81 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -91,12 +91,11 @@ type Config struct { Scalyr LogicalBackup - WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' - EtcdHost string `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS - DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"` - Sidecars map[string]string `name:"sidecar_docker_images"` - // default name `operator` enables backward compatibility with the older ServiceAccountName field - PodServiceAccountName string `name:"pod_service_account_name" default:"postgres-pod"` + WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' + EtcdHost string `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS + DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spilo-12:1.6-p2"` + Sidecars map[string]string `name:"sidecar_docker_images"` + PodServiceAccountName string `name:"pod_service_account_name" default:"postgres-pod"` // value of this string must be valid JSON or YAML; see initPodServiceAccount PodServiceAccountDefinition string `name:"pod_service_account_definition" default:""` PodServiceAccountRoleBindingDefinition string `name:"pod_service_account_role_binding_definition" default:""` diff --git a/ui/Makefile b/ui/Makefile index f1cf16840..e7d5df674 100644 --- a/ui/Makefile +++ b/ui/Makefile @@ -5,9 +5,13 @@ VERSION ?= $(shell git describe --tags --always --dirty) TAG ?= $(VERSION) GITHEAD = $(shell git rev-parse --short HEAD) GITURL = $(shell git config --get remote.origin.url) -GITSTATU = $(shell git status --porcelain || echo 'no changes') +GITSTATUS = $(shell git status --porcelain || echo 'no changes') TTYFLAGS = $(shell test -t 0 && echo '-it') +ifdef CDP_PULL_REQUEST_NUMBER + CDP_TAG := -${CDP_BUILD_VERSION} +endif + default: docker clean: @@ -24,11 +28,12 @@ docker: appjs echo `(env)` echo "Tag ${TAG}" echo "Version ${VERSION}" + echo "CDP tag ${CDP_TAG}" echo "git describe $(shell git describe --tags --always --dirty)" - docker build --rm -t "$(IMAGE):$(TAG)" -f Dockerfile . + docker build --rm -t "$(IMAGE):$(TAG)$(CDP_TAG)" -f Dockerfile . -push: docker - docker push "$(IMAGE):$(TAG)" +push: + docker push "$(IMAGE):$(TAG)$(CDP_TAG)" mock: docker run -it -p 8080:8080 "$(IMAGE):$(TAG)" --mock