Merge branch 'master' into feature/connection-pooler
This commit is contained in:
commit
e6f1e8b7fc
|
|
@ -1,7 +1,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
name: postgres-operator-ui
|
name: postgres-operator-ui
|
||||||
version: 0.1.0
|
version: 1.4.0
|
||||||
appVersion: 1.3.0
|
appVersion: 1.4.0
|
||||||
home: https://github.com/zalando/postgres-operator
|
home: https://github.com/zalando/postgres-operator
|
||||||
description: Postgres Operator UI provides a graphical interface for a convenient database-as-a-service user experience
|
description: Postgres Operator UI provides a graphical interface for a convenient database-as-a-service user experience
|
||||||
keywords:
|
keywords:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
apiVersion: v1
|
||||||
|
entries:
|
||||||
|
postgres-operator-ui:
|
||||||
|
- apiVersion: v1
|
||||||
|
appVersion: 1.4.0
|
||||||
|
created: "2020-02-24T15:32:47.610967635+01:00"
|
||||||
|
description: Postgres Operator UI provides a graphical interface for a convenient
|
||||||
|
database-as-a-service user experience
|
||||||
|
digest: 00e0eff7056d56467cd5c975657fbb76c8d01accd25a4b7aca81bc42aeac961d
|
||||||
|
home: https://github.com/zalando/postgres-operator
|
||||||
|
keywords:
|
||||||
|
- postgres
|
||||||
|
- operator
|
||||||
|
- ui
|
||||||
|
- cloud-native
|
||||||
|
- patroni
|
||||||
|
- spilo
|
||||||
|
maintainers:
|
||||||
|
- email: opensource@zalando.de
|
||||||
|
name: Zalando
|
||||||
|
- email: sk@sik-net.de
|
||||||
|
name: siku4
|
||||||
|
name: postgres-operator-ui
|
||||||
|
sources:
|
||||||
|
- https://github.com/zalando/postgres-operator
|
||||||
|
urls:
|
||||||
|
- postgres-operator-ui-1.4.0.tgz
|
||||||
|
version: 1.4.0
|
||||||
|
generated: "2020-02-24T15:32:47.610348278+01:00"
|
||||||
Binary file not shown.
|
|
@ -8,7 +8,7 @@ replicaCount: 1
|
||||||
image:
|
image:
|
||||||
registry: registry.opensource.zalan.do
|
registry: registry.opensource.zalan.do
|
||||||
repository: acid/postgres-operator-ui
|
repository: acid/postgres-operator-ui
|
||||||
tag: v1.2.0
|
tag: v1.4.0
|
||||||
pullPolicy: "IfNotPresent"
|
pullPolicy: "IfNotPresent"
|
||||||
|
|
||||||
rbac:
|
rbac:
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
name: postgres-operator
|
name: postgres-operator
|
||||||
version: 1.3.0
|
version: 1.4.0
|
||||||
appVersion: 1.3.0
|
appVersion: 1.4.0
|
||||||
home: https://github.com/zalando/postgres-operator
|
home: https://github.com/zalando/postgres-operator
|
||||||
description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes
|
description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes
|
||||||
keywords:
|
keywords:
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@ spec:
|
||||||
s3_secret_access_key:
|
s3_secret_access_key:
|
||||||
type: string
|
type: string
|
||||||
s3_force_path_style:
|
s3_force_path_style:
|
||||||
type: string
|
type: boolean
|
||||||
s3_wal_path:
|
s3_wal_path:
|
||||||
type: string
|
type: string
|
||||||
timestamp:
|
timestamp:
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,31 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
entries:
|
entries:
|
||||||
postgres-operator:
|
postgres-operator:
|
||||||
|
- apiVersion: v1
|
||||||
|
appVersion: 1.4.0
|
||||||
|
created: "2020-02-20T17:39:25.443276193+01:00"
|
||||||
|
description: Postgres Operator creates and manages PostgreSQL clusters running
|
||||||
|
in Kubernetes
|
||||||
|
digest: b93ccde5581deb8ed0857136b8ce74ca3f1b7240438fa4415f705764a1300bed
|
||||||
|
home: https://github.com/zalando/postgres-operator
|
||||||
|
keywords:
|
||||||
|
- postgres
|
||||||
|
- operator
|
||||||
|
- cloud-native
|
||||||
|
- patroni
|
||||||
|
- spilo
|
||||||
|
maintainers:
|
||||||
|
- email: opensource@zalando.de
|
||||||
|
name: Zalando
|
||||||
|
name: postgres-operator
|
||||||
|
sources:
|
||||||
|
- https://github.com/zalando/postgres-operator
|
||||||
|
urls:
|
||||||
|
- postgres-operator-1.4.0.tgz
|
||||||
|
version: 1.4.0
|
||||||
- apiVersion: v1
|
- apiVersion: v1
|
||||||
appVersion: 1.3.0
|
appVersion: 1.3.0
|
||||||
created: "2019-12-17T12:58:49.477140129+01:00"
|
created: "2020-02-20T17:39:25.441532163+01:00"
|
||||||
description: Postgres Operator creates and manages PostgreSQL clusters running
|
description: Postgres Operator creates and manages PostgreSQL clusters running
|
||||||
in Kubernetes
|
in Kubernetes
|
||||||
digest: 7e788fd37daec76a01f6d6f9fe5be5b54f5035e4eba0041e80a760d656537325
|
digest: 7e788fd37daec76a01f6d6f9fe5be5b54f5035e4eba0041e80a760d656537325
|
||||||
|
|
@ -25,7 +47,7 @@ entries:
|
||||||
version: 1.3.0
|
version: 1.3.0
|
||||||
- apiVersion: v1
|
- apiVersion: v1
|
||||||
appVersion: 1.2.0
|
appVersion: 1.2.0
|
||||||
created: "2019-12-17T12:58:49.475844233+01:00"
|
created: "2020-02-20T17:39:25.440278302+01:00"
|
||||||
description: Postgres Operator creates and manages PostgreSQL clusters running
|
description: Postgres Operator creates and manages PostgreSQL clusters running
|
||||||
in Kubernetes
|
in Kubernetes
|
||||||
digest: d10710c7cf19f4e266e7704f5d1e98dcfc61bee3919522326c35c22ca7d2f2bf
|
digest: d10710c7cf19f4e266e7704f5d1e98dcfc61bee3919522326c35c22ca7d2f2bf
|
||||||
|
|
@ -47,4 +69,4 @@ entries:
|
||||||
urls:
|
urls:
|
||||||
- postgres-operator-1.2.0.tgz
|
- postgres-operator-1.2.0.tgz
|
||||||
version: 1.2.0
|
version: 1.2.0
|
||||||
generated: "2019-12-17T12:58:49.474719294+01:00"
|
generated: "2020-02-20T17:39:25.439168098+01:00"
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -63,9 +63,9 @@ rules:
|
||||||
- secrets
|
- secrets
|
||||||
verbs:
|
verbs:
|
||||||
- create
|
- create
|
||||||
- update
|
|
||||||
- delete
|
- delete
|
||||||
- get
|
- get
|
||||||
|
- update
|
||||||
# to check nodes for node readiness label
|
# to check nodes for node readiness label
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- ""
|
||||||
|
|
@ -102,9 +102,9 @@ rules:
|
||||||
- delete
|
- delete
|
||||||
- get
|
- get
|
||||||
- list
|
- list
|
||||||
- watch
|
|
||||||
- update
|
|
||||||
- patch
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
# to resize the filesystem in Spilo pods when increasing volume size
|
# to resize the filesystem in Spilo pods when increasing volume size
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- ""
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
image:
|
image:
|
||||||
registry: registry.opensource.zalan.do
|
registry: registry.opensource.zalan.do
|
||||||
repository: acid/postgres-operator
|
repository: acid/postgres-operator
|
||||||
tag: v1.3.1
|
tag: v1.4.0
|
||||||
pullPolicy: "IfNotPresent"
|
pullPolicy: "IfNotPresent"
|
||||||
|
|
||||||
# Optionally specify an array of imagePullSecrets.
|
# Optionally specify an array of imagePullSecrets.
|
||||||
|
|
@ -24,7 +24,7 @@ configGeneral:
|
||||||
# etcd connection string for Patroni. Empty uses K8s-native DCS.
|
# etcd connection string for Patroni. Empty uses K8s-native DCS.
|
||||||
etcd_host: ""
|
etcd_host: ""
|
||||||
# Spilo docker image
|
# Spilo docker image
|
||||||
docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
|
docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
|
||||||
# max number of instances in Postgres cluster. -1 = no limit
|
# max number of instances in Postgres cluster. -1 = no limit
|
||||||
min_instances: -1
|
min_instances: -1
|
||||||
# min number of instances in Postgres cluster. -1 = no limit
|
# min number of instances in Postgres cluster. -1 = no limit
|
||||||
|
|
@ -100,8 +100,14 @@ configKubernetes:
|
||||||
pod_management_policy: "ordered_ready"
|
pod_management_policy: "ordered_ready"
|
||||||
# label assigned to the Postgres pods (and services/endpoints)
|
# label assigned to the Postgres pods (and services/endpoints)
|
||||||
pod_role_label: spilo-role
|
pod_role_label: spilo-role
|
||||||
|
# service account definition as JSON/YAML string to be used by postgres cluster pods
|
||||||
|
# pod_service_account_definition: ""
|
||||||
|
|
||||||
# name of service account to be used by postgres cluster pods
|
# name of service account to be used by postgres cluster pods
|
||||||
pod_service_account_name: "postgres-pod"
|
pod_service_account_name: "postgres-pod"
|
||||||
|
# role binding definition as JSON/YAML string to be used by pod service account
|
||||||
|
# pod_service_account_role_binding_definition: ""
|
||||||
|
|
||||||
# Postgres pods are terminated forcefully after this timeout
|
# Postgres pods are terminated forcefully after this timeout
|
||||||
pod_terminate_grace_period: 5m
|
pod_terminate_grace_period: 5m
|
||||||
# template for database user secrets generated by the operator
|
# template for database user secrets generated by the operator
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
image:
|
image:
|
||||||
registry: registry.opensource.zalan.do
|
registry: registry.opensource.zalan.do
|
||||||
repository: acid/postgres-operator
|
repository: acid/postgres-operator
|
||||||
tag: v1.3.1
|
tag: v1.4.0
|
||||||
pullPolicy: "IfNotPresent"
|
pullPolicy: "IfNotPresent"
|
||||||
|
|
||||||
# Optionally specify an array of imagePullSecrets.
|
# Optionally specify an array of imagePullSecrets.
|
||||||
|
|
@ -24,7 +24,7 @@ configGeneral:
|
||||||
# etcd connection string for Patroni. Empty uses K8s-native DCS.
|
# etcd connection string for Patroni. Empty uses K8s-native DCS.
|
||||||
etcd_host: ""
|
etcd_host: ""
|
||||||
# Spilo docker image
|
# Spilo docker image
|
||||||
docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
|
docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
|
||||||
# max number of instances in Postgres cluster. -1 = no limit
|
# max number of instances in Postgres cluster. -1 = no limit
|
||||||
min_instances: "-1"
|
min_instances: "-1"
|
||||||
# min number of instances in Postgres cluster. -1 = no limit
|
# min number of instances in Postgres cluster. -1 = no limit
|
||||||
|
|
@ -93,8 +93,14 @@ configKubernetes:
|
||||||
pod_management_policy: "ordered_ready"
|
pod_management_policy: "ordered_ready"
|
||||||
# label assigned to the Postgres pods (and services/endpoints)
|
# label assigned to the Postgres pods (and services/endpoints)
|
||||||
pod_role_label: spilo-role
|
pod_role_label: spilo-role
|
||||||
|
# service account definition as JSON/YAML string to be used by postgres cluster pods
|
||||||
|
# pod_service_account_definition: ""
|
||||||
|
|
||||||
# name of service account to be used by postgres cluster pods
|
# name of service account to be used by postgres cluster pods
|
||||||
pod_service_account_name: "postgres-pod"
|
pod_service_account_name: "postgres-pod"
|
||||||
|
# role binding definition as JSON/YAML string to be used by pod service account
|
||||||
|
# pod_service_account_role_binding_definition: ""
|
||||||
|
|
||||||
# Postgres pods are terminated forcefully after this timeout
|
# Postgres pods are terminated forcefully after this timeout
|
||||||
pod_terminate_grace_period: 5m
|
pod_terminate_grace_period: 5m
|
||||||
# template for database user secrets generated by the operator
|
# template for database user secrets generated by the operator
|
||||||
|
|
|
||||||
|
|
@ -66,20 +66,13 @@ pipeline:
|
||||||
- desc: 'Build and push Docker image'
|
- desc: 'Build and push Docker image'
|
||||||
cmd: |
|
cmd: |
|
||||||
cd ui
|
cd ui
|
||||||
image_base='registry-write.opensource.zalan.do/acid/postgres-operator-ui'
|
IS_PR_BUILD=${CDP_PULL_REQUEST_NUMBER+"true"}
|
||||||
if [[ "${CDP_TARGET_BRANCH}" == 'master' && -z "${CDP_PULL_REQUEST_NUMBER}" ]]
|
if [[ ${CDP_TARGET_BRANCH} == "master" && ${IS_PR_BUILD} != "true" ]]
|
||||||
then
|
then
|
||||||
image="${image_base}"
|
IMAGE=registry-write.opensource.zalan.do/acid/postgres-operator-ui
|
||||||
else
|
else
|
||||||
image="${image_base}-test"
|
IMAGE=registry-write.opensource.zalan.do/acid/postgres-operator-ui-test
|
||||||
fi
|
fi
|
||||||
image_with_tag="${image}:c${CDP_BUILD_VERSION}"
|
export IMAGE
|
||||||
|
make docker
|
||||||
if docker pull "${image}"
|
make push
|
||||||
then
|
|
||||||
docker build --cache-from="${image}" -t "${image_with_tag}" .
|
|
||||||
else
|
|
||||||
docker build -t "${image_with_tag}" .
|
|
||||||
fi
|
|
||||||
|
|
||||||
docker push "${image_with_tag}"
|
|
||||||
|
|
|
||||||
|
|
@ -11,11 +11,11 @@ switchover (planned failover) of the master to the Pod with new minor version.
|
||||||
The switch should usually take less than 5 seconds, still clients have to
|
The switch should usually take less than 5 seconds, still clients have to
|
||||||
reconnect.
|
reconnect.
|
||||||
|
|
||||||
Major version upgrades are supported via [cloning](user.md#clone-directly). The
|
Major version upgrades are supported via [cloning](user.md#how-to-clone-an-existing-postgresql-cluster).
|
||||||
new cluster manifest must have a higher `version` string than the source cluster
|
The new cluster manifest must have a higher `version` string than the source
|
||||||
and will be created from a basebackup. Depending of the cluster size, downtime
|
cluster and will be created from a basebackup. Depending of the cluster size,
|
||||||
in this case can be significant as writes to the database should be stopped and
|
downtime in this case can be significant as writes to the database should be
|
||||||
all WAL files should be archived first before cloning is started.
|
stopped and all WAL files should be archived first before cloning is started.
|
||||||
|
|
||||||
Note, that simply changing the version string in the `postgresql` manifest does
|
Note, that simply changing the version string in the `postgresql` manifest does
|
||||||
not work at present and leads to errors. Neither Patroni nor Postgres Operator
|
not work at present and leads to errors. Neither Patroni nor Postgres Operator
|
||||||
|
|
|
||||||
|
|
@ -110,8 +110,10 @@ Those are top-level keys, containing both leaf keys and groups.
|
||||||
|
|
||||||
* **min_instances**
|
* **min_instances**
|
||||||
operator will run at least the number of instances for any given Postgres
|
operator will run at least the number of instances for any given Postgres
|
||||||
cluster equal to the value of this parameter. When `-1` is specified, no
|
cluster equal to the value of this parameter. Standby clusters can still run
|
||||||
limits are applied. The default is `-1`.
|
with `numberOfInstances: 1` as this is the [recommended setup](../user.md#setting-up-a-standby-cluster).
|
||||||
|
When `-1` is specified for `min_instances`, no limits are applied. The default
|
||||||
|
is `-1`.
|
||||||
|
|
||||||
* **resync_period**
|
* **resync_period**
|
||||||
period between consecutive sync requests. The default is `30m`.
|
period between consecutive sync requests. The default is `30m`.
|
||||||
|
|
|
||||||
137
docs/user.md
137
docs/user.md
|
|
@ -254,29 +254,22 @@ spec:
|
||||||
|
|
||||||
## How to clone an existing PostgreSQL cluster
|
## How to clone an existing PostgreSQL cluster
|
||||||
|
|
||||||
You can spin up a new cluster as a clone of the existing one, using a clone
|
You can spin up a new cluster as a clone of the existing one, using a `clone`
|
||||||
section in the spec. There are two options here:
|
section in the spec. There are two options here:
|
||||||
|
|
||||||
* Clone directly from a source cluster using `pg_basebackup`
|
* Clone from an S3 bucket (recommended)
|
||||||
* Clone from an S3 bucket
|
* Clone directly from a source cluster
|
||||||
|
|
||||||
### Clone directly
|
Note, that cloning can also be used for [major version upgrades](administrator.md#minor-and-major-version-upgrade)
|
||||||
|
of PostgreSQL.
|
||||||
```yaml
|
|
||||||
spec:
|
|
||||||
clone:
|
|
||||||
cluster: "acid-batman"
|
|
||||||
```
|
|
||||||
|
|
||||||
Here `cluster` is a name of a source cluster that is going to be cloned. The
|
|
||||||
cluster to clone is assumed to be running and the clone procedure invokes
|
|
||||||
`pg_basebackup` from it. The operator will setup the cluster to be cloned to
|
|
||||||
connect to the service of the source cluster by name (if the cluster is called
|
|
||||||
test, then the connection string will look like host=test port=5432), which
|
|
||||||
means that you can clone only from clusters within the same namespace.
|
|
||||||
|
|
||||||
### Clone from S3
|
### Clone from S3
|
||||||
|
|
||||||
|
Cloning from S3 has the advantage that there is no impact on your production
|
||||||
|
database. A new Postgres cluster is created by restoring the data of another
|
||||||
|
source cluster. If you create it in the same Kubernetes environment, use a
|
||||||
|
different name.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
spec:
|
spec:
|
||||||
clone:
|
clone:
|
||||||
|
|
@ -287,7 +280,8 @@ spec:
|
||||||
|
|
||||||
Here `cluster` is a name of a source cluster that is going to be cloned. A new
|
Here `cluster` is a name of a source cluster that is going to be cloned. A new
|
||||||
cluster will be cloned from S3, using the latest backup before the `timestamp`.
|
cluster will be cloned from S3, using the latest backup before the `timestamp`.
|
||||||
In this case, `uid` field is also mandatory - operator will use it to find a
|
Note, that a time zone is required for `timestamp` in the format of +00:00 which
|
||||||
|
is UTC. The `uid` field is also mandatory. The operator will use it to find a
|
||||||
correct key inside an S3 bucket. You can find this field in the metadata of the
|
correct key inside an S3 bucket. You can find this field in the metadata of the
|
||||||
source cluster:
|
source cluster:
|
||||||
|
|
||||||
|
|
@ -299,9 +293,6 @@ metadata:
|
||||||
uid: efd12e58-5786-11e8-b5a7-06148230260c
|
uid: efd12e58-5786-11e8-b5a7-06148230260c
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that timezone is required for `timestamp`. Otherwise, offset is relative
|
|
||||||
to UTC, see [RFC 3339 section 5.6) 3339 section 5.6](https://www.ietf.org/rfc/rfc3339.txt).
|
|
||||||
|
|
||||||
For non AWS S3 following settings can be set to support cloning from other S3
|
For non AWS S3 following settings can be set to support cloning from other S3
|
||||||
implementations:
|
implementations:
|
||||||
|
|
||||||
|
|
@ -317,14 +308,35 @@ spec:
|
||||||
s3_force_path_style: true
|
s3_force_path_style: true
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Clone directly
|
||||||
|
|
||||||
|
Another way to get a fresh copy of your source DB cluster is via basebackup. To
|
||||||
|
use this feature simply leave out the timestamp field from the clone section.
|
||||||
|
The operator will connect to the service of the source cluster by name. If the
|
||||||
|
cluster is called test, then the connection string will look like host=test
|
||||||
|
port=5432), which means that you can clone only from clusters within the same
|
||||||
|
namespace.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
spec:
|
||||||
|
clone:
|
||||||
|
cluster: "acid-batman"
|
||||||
|
```
|
||||||
|
|
||||||
|
Be aware that on a busy source database this can result in an elevated load!
|
||||||
|
|
||||||
## Setting up a standby cluster
|
## Setting up a standby cluster
|
||||||
|
|
||||||
Standby clusters are like normal cluster but they are streaming from a remote
|
Standby cluster is a [Patroni feature](https://github.com/zalando/patroni/blob/master/docs/replica_bootstrap.rst#standby-cluster)
|
||||||
cluster. As the first version of this feature, the only scenario covered by
|
that first clones a database, and keeps replicating changes afterwards. As the
|
||||||
operator is to stream from a WAL archive of the master. Following the more
|
replication is happening by the means of archived WAL files (stored on S3 or
|
||||||
popular infrastructure of using Amazon's S3 buckets, it is mentioned as
|
the equivalent of other cloud providers), the standby cluster can exist in a
|
||||||
`s3_wal_path` here. To start a cluster as standby add the following `standby`
|
different location than its source database. Unlike cloning, the PostgreSQL
|
||||||
section in the YAML file:
|
version between source and target cluster has to be the same.
|
||||||
|
|
||||||
|
To start a cluster as standby, add the following `standby` section in the YAML
|
||||||
|
file and specify the S3 bucket path. An empty path will result in an error and
|
||||||
|
no statefulset will be created.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
spec:
|
spec:
|
||||||
|
|
@ -332,20 +344,65 @@ spec:
|
||||||
s3_wal_path: "s3 bucket path to the master"
|
s3_wal_path: "s3 bucket path to the master"
|
||||||
```
|
```
|
||||||
|
|
||||||
Things to note:
|
At the moment, the operator only allows to stream from the WAL archive of the
|
||||||
|
master. Thus, it is recommended to deploy standby clusters with only [one pod](../manifests/standby-manifest.yaml#L10).
|
||||||
|
You can raise the instance count when detaching. Note, that the same pod role
|
||||||
|
labels like for normal clusters are used: The standby leader is labeled as
|
||||||
|
`master`.
|
||||||
|
|
||||||
- An empty string in the `s3_wal_path` field of the standby cluster will result
|
### Providing credentials of source cluster
|
||||||
in an error and no statefulset will be created.
|
|
||||||
- Only one pod can be deployed for stand-by cluster.
|
A standby cluster is replicating the data (including users and passwords) from
|
||||||
- To manually promote the standby_cluster, use `patronictl` and remove config
|
the source database and is read-only. The system and application users (like
|
||||||
entry.
|
standby, postgres etc.) all have a password that does not match the credentials
|
||||||
- There is no way to transform a non-standby cluster to a standby cluster
|
stored in secrets which are created by the operator. One solution is to create
|
||||||
through the operator. Adding the standby section to the manifest of a running
|
secrets beforehand and paste in the credentials of the source cluster.
|
||||||
Postgres cluster will have no effect. However, it can be done through Patroni
|
Otherwise, you will see errors in the Postgres logs saying users cannot log in
|
||||||
by adding the [standby_cluster](https://github.com/zalando/patroni/blob/bd2c54581abb42a7d3a3da551edf0b8732eefd27/docs/replica_bootstrap.rst#standby-cluster)
|
and the operator logs will complain about not being able to sync resources.
|
||||||
section using `patronictl edit-config`. Note that the transformed standby
|
|
||||||
cluster will not be doing any streaming. It will be in standby mode and allow
|
When you only run a standby leader, you can safely ignore this, as it will be
|
||||||
read-only transactions only.
|
sorted out once the cluster is detached from the source. It is also harmless if
|
||||||
|
you don’t plan it. But, when you created a standby replica, too, fix the
|
||||||
|
credentials right away. WAL files will pile up on the standby leader if no
|
||||||
|
connection can be established between standby replica(s). You can also edit the
|
||||||
|
secrets after their creation. Find them by:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl get secrets --all-namespaces | grep <standby-cluster-name>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Promote the standby
|
||||||
|
|
||||||
|
One big advantage of standby clusters is that they can be promoted to a proper
|
||||||
|
database cluster. This means it will stop replicating changes from the source,
|
||||||
|
and start accept writes itself. This mechanism makes it possible to move
|
||||||
|
databases from one place to another with minimal downtime. Currently, the
|
||||||
|
operator does not support promoting a standby cluster. It has to be done
|
||||||
|
manually using `patronictl edit-config` inside the postgres container of the
|
||||||
|
standby leader pod. Remove the following lines from the YAML structure and the
|
||||||
|
leader promotion happens immediately. Before doing so, make sure that the
|
||||||
|
standby is not behind the source database.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
standby_cluster:
|
||||||
|
create_replica_methods:
|
||||||
|
- bootstrap_standby_with_wale
|
||||||
|
- basebackup_fast_xlog
|
||||||
|
restore_command: envdir "/home/postgres/etc/wal-e.d/env-standby" /scripts/restore_command.sh
|
||||||
|
"%f" "%p"
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, remove the `standby` section from the postgres cluster manifest.
|
||||||
|
|
||||||
|
### Turn a normal cluster into a standby
|
||||||
|
|
||||||
|
There is no way to transform a non-standby cluster to a standby cluster through
|
||||||
|
the operator. Adding the `standby` section to the manifest of a running
|
||||||
|
Postgres cluster will have no effect. But, as explained in the previous
|
||||||
|
paragraph it can be done manually through `patronictl edit-config`. This time,
|
||||||
|
by adding the `standby_cluster` section to the Patroni configuration. However,
|
||||||
|
the transformed standby cluster will not be doing any streaming. It will be in
|
||||||
|
standby mode and allow read-only transactions only.
|
||||||
|
|
||||||
## Sidecar Support
|
## Sidecar Support
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,10 +60,11 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
'default', label_selector='name=postgres-operator').items[0].spec.containers[0].image
|
'default', label_selector='name=postgres-operator').items[0].spec.containers[0].image
|
||||||
print("Tested operator image: {}".format(actual_operator_image)) # shows up after tests finish
|
print("Tested operator image: {}".format(actual_operator_image)) # shows up after tests finish
|
||||||
|
|
||||||
result = k8s.create_with_kubectl('manifests/minimal-postgres-manifest.yaml')
|
result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml")
|
||||||
print('stdout: {}, stderr: {}'.format(result.stdout, result.stderr))
|
print('stdout: {}, stderr: {}'.format(result.stdout, result.stderr))
|
||||||
try:
|
try:
|
||||||
k8s.wait_for_pod_start('spilo-role=master')
|
k8s.wait_for_pod_start('spilo-role=master')
|
||||||
|
k8s.wait_for_pod_start('spilo-role=replica')
|
||||||
except timeout_decorator.TimeoutError:
|
except timeout_decorator.TimeoutError:
|
||||||
print('Operator log: {}'.format(k8s.get_operator_log()))
|
print('Operator log: {}'.format(k8s.get_operator_log()))
|
||||||
raise
|
raise
|
||||||
|
|
@ -117,152 +118,6 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
self.assertEqual(repl_svc_type, 'ClusterIP',
|
self.assertEqual(repl_svc_type, 'ClusterIP',
|
||||||
"Expected ClusterIP service type for replica, found {}".format(repl_svc_type))
|
"Expected ClusterIP service type for replica, found {}".format(repl_svc_type))
|
||||||
|
|
||||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
|
||||||
def test_min_resource_limits(self):
|
|
||||||
'''
|
|
||||||
Lower resource limits below configured minimum and let operator fix it
|
|
||||||
'''
|
|
||||||
k8s = self.k8s
|
|
||||||
cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
|
|
||||||
_, failover_targets = k8s.get_pg_nodes(cluster_label)
|
|
||||||
|
|
||||||
# configure minimum boundaries for CPU and memory limits
|
|
||||||
minCPULimit = '500m'
|
|
||||||
minMemoryLimit = '500Mi'
|
|
||||||
patch_min_resource_limits = {
|
|
||||||
"data": {
|
|
||||||
"min_cpu_limit": minCPULimit,
|
|
||||||
"min_memory_limit": minMemoryLimit
|
|
||||||
}
|
|
||||||
}
|
|
||||||
k8s.update_config(patch_min_resource_limits)
|
|
||||||
|
|
||||||
# lower resource limits below minimum
|
|
||||||
pg_patch_resources = {
|
|
||||||
"spec": {
|
|
||||||
"resources": {
|
|
||||||
"requests": {
|
|
||||||
"cpu": "10m",
|
|
||||||
"memory": "50Mi"
|
|
||||||
},
|
|
||||||
"limits": {
|
|
||||||
"cpu": "200m",
|
|
||||||
"memory": "200Mi"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
|
||||||
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
|
|
||||||
k8s.wait_for_master_failover(failover_targets)
|
|
||||||
|
|
||||||
pods = k8s.api.core_v1.list_namespaced_pod(
|
|
||||||
'default', label_selector='spilo-role=master,' + cluster_label).items
|
|
||||||
self.assert_master_is_unique()
|
|
||||||
masterPod = pods[0]
|
|
||||||
|
|
||||||
self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit,
|
|
||||||
"Expected CPU limit {}, found {}"
|
|
||||||
.format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
|
|
||||||
self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
|
|
||||||
"Expected memory limit {}, found {}"
|
|
||||||
.format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
|
|
||||||
|
|
||||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
|
||||||
def test_multi_namespace_support(self):
|
|
||||||
'''
|
|
||||||
Create a customized Postgres cluster in a non-default namespace.
|
|
||||||
'''
|
|
||||||
k8s = self.k8s
|
|
||||||
|
|
||||||
with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
|
|
||||||
pg_manifest = yaml.safe_load(f)
|
|
||||||
pg_manifest["metadata"]["namespace"] = self.namespace
|
|
||||||
yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
|
|
||||||
|
|
||||||
k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
|
|
||||||
k8s.wait_for_pod_start("spilo-role=master", self.namespace)
|
|
||||||
self.assert_master_is_unique(self.namespace, "acid-test-cluster")
|
|
||||||
|
|
||||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
|
||||||
def test_scaling(self):
|
|
||||||
'''
|
|
||||||
Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
|
|
||||||
'''
|
|
||||||
k8s = self.k8s
|
|
||||||
labels = "application=spilo,cluster-name=acid-minimal-cluster"
|
|
||||||
|
|
||||||
try:
|
|
||||||
k8s.wait_for_pg_to_scale(3)
|
|
||||||
self.assertEqual(3, k8s.count_pods_with_label(labels))
|
|
||||||
self.assert_master_is_unique()
|
|
||||||
|
|
||||||
k8s.wait_for_pg_to_scale(2)
|
|
||||||
self.assertEqual(2, k8s.count_pods_with_label(labels))
|
|
||||||
self.assert_master_is_unique()
|
|
||||||
except timeout_decorator.TimeoutError:
|
|
||||||
print('Operator log: {}'.format(k8s.get_operator_log()))
|
|
||||||
pods = k8s.api.core_v1.list_namespaced_pod('default').items
|
|
||||||
for p in pods:
|
|
||||||
response = k8s.api.core_v1.read_namespaced_pod(
|
|
||||||
name=p.metadata.name,
|
|
||||||
namespace='default'
|
|
||||||
)
|
|
||||||
print('Pod: {}'.format(response))
|
|
||||||
raise
|
|
||||||
|
|
||||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
|
||||||
def test_taint_based_eviction(self):
|
|
||||||
'''
|
|
||||||
Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
|
|
||||||
'''
|
|
||||||
k8s = self.k8s
|
|
||||||
cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
|
|
||||||
|
|
||||||
# get nodes of master and replica(s) (expected target of new master)
|
|
||||||
current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label)
|
|
||||||
num_replicas = len(failover_targets)
|
|
||||||
|
|
||||||
# if all pods live on the same node, failover will happen to other worker(s)
|
|
||||||
failover_targets = [x for x in failover_targets if x != current_master_node]
|
|
||||||
if len(failover_targets) == 0:
|
|
||||||
nodes = k8s.api.core_v1.list_node()
|
|
||||||
for n in nodes.items:
|
|
||||||
if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node:
|
|
||||||
failover_targets.append(n.metadata.name)
|
|
||||||
|
|
||||||
# taint node with postgres=:NoExecute to force failover
|
|
||||||
body = {
|
|
||||||
"spec": {
|
|
||||||
"taints": [
|
|
||||||
{
|
|
||||||
"effect": "NoExecute",
|
|
||||||
"key": "postgres"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# patch node and test if master is failing over to one of the expected nodes
|
|
||||||
k8s.api.core_v1.patch_node(current_master_node, body)
|
|
||||||
k8s.wait_for_master_failover(failover_targets)
|
|
||||||
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
|
||||||
|
|
||||||
new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
|
|
||||||
self.assertNotEqual(current_master_node, new_master_node,
|
|
||||||
"Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
|
|
||||||
self.assertEqual(num_replicas, len(new_replica_nodes),
|
|
||||||
"Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
|
|
||||||
self.assert_master_is_unique()
|
|
||||||
|
|
||||||
# undo the tainting
|
|
||||||
body = {
|
|
||||||
"spec": {
|
|
||||||
"taints": []
|
|
||||||
}
|
|
||||||
}
|
|
||||||
k8s.api.core_v1.patch_node(new_master_node, body)
|
|
||||||
|
|
||||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
def test_logical_backup_cron_job(self):
|
def test_logical_backup_cron_job(self):
|
||||||
'''
|
'''
|
||||||
|
|
@ -327,6 +182,133 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
self.assertEqual(0, len(jobs),
|
self.assertEqual(0, len(jobs),
|
||||||
"Expected 0 logical backup jobs, found {}".format(len(jobs)))
|
"Expected 0 logical backup jobs, found {}".format(len(jobs)))
|
||||||
|
|
||||||
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
|
def test_min_resource_limits(self):
|
||||||
|
'''
|
||||||
|
Lower resource limits below configured minimum and let operator fix it
|
||||||
|
'''
|
||||||
|
k8s = self.k8s
|
||||||
|
cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
|
||||||
|
labels = 'spilo-role=master,' + cluster_label
|
||||||
|
_, failover_targets = k8s.get_pg_nodes(cluster_label)
|
||||||
|
|
||||||
|
# configure minimum boundaries for CPU and memory limits
|
||||||
|
minCPULimit = '500m'
|
||||||
|
minMemoryLimit = '500Mi'
|
||||||
|
patch_min_resource_limits = {
|
||||||
|
"data": {
|
||||||
|
"min_cpu_limit": minCPULimit,
|
||||||
|
"min_memory_limit": minMemoryLimit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s.update_config(patch_min_resource_limits)
|
||||||
|
|
||||||
|
# lower resource limits below minimum
|
||||||
|
pg_patch_resources = {
|
||||||
|
"spec": {
|
||||||
|
"resources": {
|
||||||
|
"requests": {
|
||||||
|
"cpu": "10m",
|
||||||
|
"memory": "50Mi"
|
||||||
|
},
|
||||||
|
"limits": {
|
||||||
|
"cpu": "200m",
|
||||||
|
"memory": "200Mi"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
||||||
|
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
|
||||||
|
k8s.wait_for_pod_failover(failover_targets, labels)
|
||||||
|
k8s.wait_for_pod_start('spilo-role=replica')
|
||||||
|
|
||||||
|
pods = k8s.api.core_v1.list_namespaced_pod(
|
||||||
|
'default', label_selector=labels).items
|
||||||
|
self.assert_master_is_unique()
|
||||||
|
masterPod = pods[0]
|
||||||
|
|
||||||
|
self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit,
|
||||||
|
"Expected CPU limit {}, found {}"
|
||||||
|
.format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
|
||||||
|
self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
|
||||||
|
"Expected memory limit {}, found {}"
|
||||||
|
.format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
|
||||||
|
|
||||||
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
|
def test_multi_namespace_support(self):
|
||||||
|
'''
|
||||||
|
Create a customized Postgres cluster in a non-default namespace.
|
||||||
|
'''
|
||||||
|
k8s = self.k8s
|
||||||
|
|
||||||
|
with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
|
||||||
|
pg_manifest = yaml.safe_load(f)
|
||||||
|
pg_manifest["metadata"]["namespace"] = self.namespace
|
||||||
|
yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
|
||||||
|
|
||||||
|
k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
|
||||||
|
k8s.wait_for_pod_start("spilo-role=master", self.namespace)
|
||||||
|
self.assert_master_is_unique(self.namespace, "acid-test-cluster")
|
||||||
|
|
||||||
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
|
def test_node_readiness_label(self):
|
||||||
|
'''
|
||||||
|
Remove node readiness label from master node. This must cause a failover.
|
||||||
|
'''
|
||||||
|
k8s = self.k8s
|
||||||
|
cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
|
||||||
|
labels = 'spilo-role=master,' + cluster_label
|
||||||
|
readiness_label = 'lifecycle-status'
|
||||||
|
readiness_value = 'ready'
|
||||||
|
|
||||||
|
# get nodes of master and replica(s) (expected target of new master)
|
||||||
|
current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
|
||||||
|
num_replicas = len(current_replica_nodes)
|
||||||
|
failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
|
||||||
|
|
||||||
|
# add node_readiness_label to potential failover nodes
|
||||||
|
patch_readiness_label = {
|
||||||
|
"metadata": {
|
||||||
|
"labels": {
|
||||||
|
readiness_label: readiness_value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for failover_target in failover_targets:
|
||||||
|
k8s.api.core_v1.patch_node(failover_target, patch_readiness_label)
|
||||||
|
|
||||||
|
# define node_readiness_label in config map which should trigger a failover of the master
|
||||||
|
patch_readiness_label_config = {
|
||||||
|
"data": {
|
||||||
|
"node_readiness_label": readiness_label + ':' + readiness_value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s.update_config(patch_readiness_label_config)
|
||||||
|
new_master_node, new_replica_nodes = self.assert_failover(
|
||||||
|
current_master_node, num_replicas, failover_targets, cluster_label)
|
||||||
|
|
||||||
|
# patch also node where master ran before
|
||||||
|
k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label)
|
||||||
|
# toggle pod anti affinity to move replica away from master node
|
||||||
|
self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
|
||||||
|
|
||||||
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
|
def test_scaling(self):
|
||||||
|
'''
|
||||||
|
Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
|
||||||
|
'''
|
||||||
|
k8s = self.k8s
|
||||||
|
labels = "cluster-name=acid-minimal-cluster"
|
||||||
|
|
||||||
|
k8s.wait_for_pg_to_scale(3)
|
||||||
|
self.assertEqual(3, k8s.count_pods_with_label(labels))
|
||||||
|
self.assert_master_is_unique()
|
||||||
|
|
||||||
|
k8s.wait_for_pg_to_scale(2)
|
||||||
|
self.assertEqual(2, k8s.count_pods_with_label(labels))
|
||||||
|
self.assert_master_is_unique()
|
||||||
|
|
||||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
def test_service_annotations(self):
|
def test_service_annotations(self):
|
||||||
'''
|
'''
|
||||||
|
|
@ -453,18 +435,117 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
print('Operator log: {}'.format(k8s.get_operator_log()))
|
print('Operator log: {}'.format(k8s.get_operator_log()))
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
|
def test_taint_based_eviction(self):
|
||||||
|
'''
|
||||||
|
Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
|
||||||
|
'''
|
||||||
|
k8s = self.k8s
|
||||||
|
cluster_label = 'cluster-name=acid-minimal-cluster'
|
||||||
|
|
||||||
|
# get nodes of master and replica(s) (expected target of new master)
|
||||||
|
current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
|
||||||
|
num_replicas = len(current_replica_nodes)
|
||||||
|
failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
|
||||||
|
|
||||||
|
# taint node with postgres=:NoExecute to force failover
|
||||||
|
body = {
|
||||||
|
"spec": {
|
||||||
|
"taints": [
|
||||||
|
{
|
||||||
|
"effect": "NoExecute",
|
||||||
|
"key": "postgres"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# patch node and test if master is failing over to one of the expected nodes
|
||||||
|
k8s.api.core_v1.patch_node(current_master_node, body)
|
||||||
|
new_master_node, new_replica_nodes = self.assert_failover(
|
||||||
|
current_master_node, num_replicas, failover_targets, cluster_label)
|
||||||
|
|
||||||
|
# add toleration to pods
|
||||||
|
patch_toleration_config = {
|
||||||
|
"data": {
|
||||||
|
"toleration": "key:postgres,operator:Exists,effect:NoExecute"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s.update_config(patch_toleration_config)
|
||||||
|
|
||||||
|
# toggle pod anti affinity to move replica away from master node
|
||||||
|
self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
|
||||||
|
|
||||||
|
def get_failover_targets(self, master_node, replica_nodes):
|
||||||
|
'''
|
||||||
|
If all pods live on the same node, failover will happen to other worker(s)
|
||||||
|
'''
|
||||||
|
k8s = self.k8s
|
||||||
|
|
||||||
|
failover_targets = [x for x in replica_nodes if x != master_node]
|
||||||
|
if len(failover_targets) == 0:
|
||||||
|
nodes = k8s.api.core_v1.list_node()
|
||||||
|
for n in nodes.items:
|
||||||
|
if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node:
|
||||||
|
failover_targets.append(n.metadata.name)
|
||||||
|
|
||||||
|
return failover_targets
|
||||||
|
|
||||||
|
def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label):
|
||||||
|
'''
|
||||||
|
Check if master is failing over. The replica should move first to be the switchover target
|
||||||
|
'''
|
||||||
|
k8s = self.k8s
|
||||||
|
k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label)
|
||||||
|
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
||||||
|
|
||||||
|
new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
|
||||||
|
self.assertNotEqual(current_master_node, new_master_node,
|
||||||
|
"Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
|
||||||
|
self.assertEqual(num_replicas, len(new_replica_nodes),
|
||||||
|
"Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
|
||||||
|
self.assert_master_is_unique()
|
||||||
|
|
||||||
|
return new_master_node, new_replica_nodes
|
||||||
|
>>>>>>> master
|
||||||
|
|
||||||
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
|
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
|
||||||
'''
|
'''
|
||||||
Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
|
Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
|
||||||
To be called manually after operations that affect pods
|
To be called manually after operations that affect pods
|
||||||
'''
|
'''
|
||||||
|
|
||||||
k8s = self.k8s
|
k8s = self.k8s
|
||||||
labels = 'spilo-role=master,cluster-name=' + clusterName
|
labels = 'spilo-role=master,cluster-name=' + clusterName
|
||||||
|
|
||||||
num_of_master_pods = k8s.count_pods_with_label(labels, namespace)
|
num_of_master_pods = k8s.count_pods_with_label(labels, namespace)
|
||||||
self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods))
|
self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods))
|
||||||
|
|
||||||
|
def assert_distributed_pods(self, master_node, replica_nodes, cluster_label):
|
||||||
|
'''
|
||||||
|
Other tests can lead to the situation that master and replica are on the same node.
|
||||||
|
Toggle pod anti affinty to distribute pods accross nodes (replica in particular).
|
||||||
|
'''
|
||||||
|
k8s = self.k8s
|
||||||
|
failover_targets = self.get_failover_targets(master_node, replica_nodes)
|
||||||
|
|
||||||
|
# enable pod anti affintiy in config map which should trigger movement of replica
|
||||||
|
patch_enable_antiaffinity = {
|
||||||
|
"data": {
|
||||||
|
"enable_pod_antiaffinity": "true"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s.update_config(patch_enable_antiaffinity)
|
||||||
|
self.assert_failover(
|
||||||
|
master_node, len(replica_nodes), failover_targets, cluster_label)
|
||||||
|
|
||||||
|
# disable pod anti affintiy again
|
||||||
|
patch_disable_antiaffinity = {
|
||||||
|
"data": {
|
||||||
|
"enable_pod_antiaffinity": "false"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
k8s.update_config(patch_disable_antiaffinity)
|
||||||
|
|
||||||
|
|
||||||
class K8sApi:
|
class K8sApi:
|
||||||
|
|
||||||
|
|
@ -596,15 +677,14 @@ class K8s:
|
||||||
def count_pods_with_label(self, labels, namespace='default'):
|
def count_pods_with_label(self, labels, namespace='default'):
|
||||||
return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items)
|
return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items)
|
||||||
|
|
||||||
def wait_for_master_failover(self, expected_master_nodes, namespace='default'):
|
def wait_for_pod_failover(self, failover_targets, labels, namespace='default'):
|
||||||
pod_phase = 'Failing over'
|
pod_phase = 'Failing over'
|
||||||
new_master_node = ''
|
new_pod_node = ''
|
||||||
labels = 'spilo-role=master,cluster-name=acid-minimal-cluster'
|
|
||||||
|
|
||||||
while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes):
|
while (pod_phase != 'Running') or (new_pod_node not in failover_targets):
|
||||||
pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items
|
pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items
|
||||||
if pods:
|
if pods:
|
||||||
new_master_node = pods[0].spec.node_name
|
new_pod_node = pods[0].spec.node_name
|
||||||
pod_phase = pods[0].status.phase
|
pod_phase = pods[0].status.phase
|
||||||
time.sleep(self.RETRY_TIMEOUT_SEC)
|
time.sleep(self.RETRY_TIMEOUT_SEC)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ metadata:
|
||||||
# labels:
|
# labels:
|
||||||
# environment: demo
|
# environment: demo
|
||||||
spec:
|
spec:
|
||||||
dockerImage: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
|
dockerImage: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
|
||||||
teamId: "acid"
|
teamId: "acid"
|
||||||
volume:
|
volume:
|
||||||
size: 1Gi
|
size: 1Gi
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ data:
|
||||||
# default_cpu_request: 100m
|
# default_cpu_request: 100m
|
||||||
# default_memory_limit: 500Mi
|
# default_memory_limit: 500Mi
|
||||||
# default_memory_request: 100Mi
|
# default_memory_request: 100Mi
|
||||||
docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
|
docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
|
||||||
# enable_admin_role_for_users: "true"
|
# enable_admin_role_for_users: "true"
|
||||||
# enable_crd_validation: "true"
|
# enable_crd_validation: "true"
|
||||||
# enable_database_access: "true"
|
# enable_database_access: "true"
|
||||||
|
|
@ -73,7 +73,9 @@ data:
|
||||||
pod_label_wait_timeout: 10m
|
pod_label_wait_timeout: 10m
|
||||||
pod_management_policy: "ordered_ready"
|
pod_management_policy: "ordered_ready"
|
||||||
pod_role_label: spilo-role
|
pod_role_label: spilo-role
|
||||||
|
# pod_service_account_definition: ""
|
||||||
pod_service_account_name: "postgres-pod"
|
pod_service_account_name: "postgres-pod"
|
||||||
|
# pod_service_account_role_binding_definition: ""
|
||||||
pod_terminate_grace_period: 5m
|
pod_terminate_grace_period: 5m
|
||||||
# postgres_superuser_teams: "postgres_superusers"
|
# postgres_superuser_teams: "postgres_superusers"
|
||||||
# protected_role_names: "admin"
|
# protected_role_names: "admin"
|
||||||
|
|
|
||||||
|
|
@ -64,9 +64,9 @@ rules:
|
||||||
- secrets
|
- secrets
|
||||||
verbs:
|
verbs:
|
||||||
- create
|
- create
|
||||||
- update
|
|
||||||
- delete
|
- delete
|
||||||
- get
|
- get
|
||||||
|
- update
|
||||||
# to check nodes for node readiness label
|
# to check nodes for node readiness label
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- ""
|
||||||
|
|
@ -103,9 +103,9 @@ rules:
|
||||||
- delete
|
- delete
|
||||||
- get
|
- get
|
||||||
- list
|
- list
|
||||||
- watch
|
|
||||||
- update
|
|
||||||
- patch
|
- patch
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
# to resize the filesystem in Spilo pods when increasing volume size
|
# to resize the filesystem in Spilo pods when increasing volume size
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- ""
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ spec:
|
||||||
serviceAccountName: postgres-operator
|
serviceAccountName: postgres-operator
|
||||||
containers:
|
containers:
|
||||||
- name: postgres-operator
|
- name: postgres-operator
|
||||||
image: registry.opensource.zalan.do/acid/postgres-operator:v1.3.1
|
image: registry.opensource.zalan.do/acid/postgres-operator:v1.4.0
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ metadata:
|
||||||
configuration:
|
configuration:
|
||||||
# enable_crd_validation: true
|
# enable_crd_validation: true
|
||||||
etcd_host: ""
|
etcd_host: ""
|
||||||
docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16
|
docker_image: registry.opensource.zalan.do/acid/spilo-12:1.6-p2
|
||||||
# enable_shm_volume: true
|
# enable_shm_volume: true
|
||||||
max_instances: -1
|
max_instances: -1
|
||||||
min_instances: -1
|
min_instances: -1
|
||||||
|
|
@ -110,7 +110,7 @@ configuration:
|
||||||
log_statement: all
|
log_statement: all
|
||||||
# teams_api_url: ""
|
# teams_api_url: ""
|
||||||
logging_rest_api:
|
logging_rest_api:
|
||||||
api_port: 8008
|
api_port: 8080
|
||||||
cluster_history_entries: 1000
|
cluster_history_entries: 1000
|
||||||
ring_log_lines: 100
|
ring_log_lines: 100
|
||||||
scalyr:
|
scalyr:
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ spec:
|
||||||
s3_secret_access_key:
|
s3_secret_access_key:
|
||||||
type: string
|
type: string
|
||||||
s3_force_path_style:
|
s3_force_path_style:
|
||||||
type: string
|
type: boolean
|
||||||
s3_wal_path:
|
s3_wal_path:
|
||||||
type: string
|
type: string
|
||||||
timestamp:
|
timestamp:
|
||||||
|
|
|
||||||
|
|
@ -160,7 +160,7 @@ var PostgresCRDResourceValidation = apiextv1beta1.CustomResourceValidation{
|
||||||
Type: "string",
|
Type: "string",
|
||||||
},
|
},
|
||||||
"s3_force_path_style": {
|
"s3_force_path_style": {
|
||||||
Type: "string",
|
Type: "boolean",
|
||||||
},
|
},
|
||||||
"s3_wal_path": {
|
"s3_wal_path": {
|
||||||
Type: "string",
|
Type: "string",
|
||||||
|
|
|
||||||
|
|
@ -1110,11 +1110,13 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 {
|
||||||
cur := spec.NumberOfInstances
|
cur := spec.NumberOfInstances
|
||||||
newcur := cur
|
newcur := cur
|
||||||
|
|
||||||
/* Limit the max number of pods to one, if this is standby-cluster */
|
|
||||||
if spec.StandbyCluster != nil {
|
if spec.StandbyCluster != nil {
|
||||||
c.logger.Info("Standby cluster can have maximum of 1 pod")
|
if newcur == 1 {
|
||||||
min = 1
|
min = newcur
|
||||||
max = 1
|
max = newcur
|
||||||
|
} else {
|
||||||
|
c.logger.Warningf("operator only supports standby clusters with 1 pod")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if max >= 0 && newcur > max {
|
if max >= 0 && newcur > max {
|
||||||
newcur = max
|
newcur = max
|
||||||
|
|
|
||||||
|
|
@ -224,7 +224,7 @@ func (c *Controller) initRoleBinding() {
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case err != nil:
|
case err != nil:
|
||||||
panic(fmt.Errorf("unable to parse the definition of the role binding for the pod service account definition from the operator configuration: %v", err))
|
panic(fmt.Errorf("unable to parse the role binding definition from the operator configuration: %v", err))
|
||||||
case groupVersionKind.Kind != "RoleBinding":
|
case groupVersionKind.Kind != "RoleBinding":
|
||||||
panic(fmt.Errorf("role binding definition in the operator configuration defines another type of resource: %v", groupVersionKind.Kind))
|
panic(fmt.Errorf("role binding definition in the operator configuration defines another type of resource: %v", groupVersionKind.Kind))
|
||||||
default:
|
default:
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/zalando/postgres-operator/pkg/util/retryutil"
|
"github.com/zalando/postgres-operator/pkg/util/retryutil"
|
||||||
"k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/labels"
|
"k8s.io/apimachinery/pkg/labels"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
|
|
@ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
|
func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
|
||||||
|
// retry to move master until configured timeout is reached
|
||||||
err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout,
|
err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout,
|
||||||
func() (bool, error) {
|
func() (bool, error) {
|
||||||
err := c.attemptToMoveMasterPodsOffNode(node)
|
err := c.attemptToMoveMasterPodsOffNode(node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute")
|
return false, err
|
||||||
}
|
}
|
||||||
return true, nil
|
return true, nil
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout)
|
c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -505,11 +505,11 @@ func (c *Controller) submitRBACCredentials(event ClusterEvent) error {
|
||||||
namespace := event.NewSpec.GetNamespace()
|
namespace := event.NewSpec.GetNamespace()
|
||||||
|
|
||||||
if err := c.createPodServiceAccount(namespace); err != nil {
|
if err := c.createPodServiceAccount(namespace); err != nil {
|
||||||
return fmt.Errorf("could not create pod service account %v : %v", c.opConfig.PodServiceAccountName, err)
|
return fmt.Errorf("could not create pod service account %q : %v", c.opConfig.PodServiceAccountName, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := c.createRoleBindings(namespace); err != nil {
|
if err := c.createRoleBindings(namespace); err != nil {
|
||||||
return fmt.Errorf("could not create role binding %v : %v", c.PodServiceAccountRoleBinding.Name, err)
|
return fmt.Errorf("could not create role binding %q : %v", c.PodServiceAccountRoleBinding.Name, err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -520,16 +520,16 @@ func (c *Controller) createPodServiceAccount(namespace string) error {
|
||||||
_, err := c.KubeClient.ServiceAccounts(namespace).Get(podServiceAccountName, metav1.GetOptions{})
|
_, err := c.KubeClient.ServiceAccounts(namespace).Get(podServiceAccountName, metav1.GetOptions{})
|
||||||
if k8sutil.ResourceNotFound(err) {
|
if k8sutil.ResourceNotFound(err) {
|
||||||
|
|
||||||
c.logger.Infof(fmt.Sprintf("creating pod service account in the namespace %v", namespace))
|
c.logger.Infof(fmt.Sprintf("creating pod service account %q in the %q namespace", podServiceAccountName, namespace))
|
||||||
|
|
||||||
// get a separate copy of service account
|
// get a separate copy of service account
|
||||||
// to prevent a race condition when setting a namespace for many clusters
|
// to prevent a race condition when setting a namespace for many clusters
|
||||||
sa := *c.PodServiceAccount
|
sa := *c.PodServiceAccount
|
||||||
if _, err = c.KubeClient.ServiceAccounts(namespace).Create(&sa); err != nil {
|
if _, err = c.KubeClient.ServiceAccounts(namespace).Create(&sa); err != nil {
|
||||||
return fmt.Errorf("cannot deploy the pod service account %v defined in the config map to the %v namespace: %v", podServiceAccountName, namespace, err)
|
return fmt.Errorf("cannot deploy the pod service account %q defined in the configuration to the %q namespace: %v", podServiceAccountName, namespace, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
c.logger.Infof("successfully deployed the pod service account %v to the %v namespace", podServiceAccountName, namespace)
|
c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccountName, namespace)
|
||||||
} else if k8sutil.ResourceAlreadyExists(err) {
|
} else if k8sutil.ResourceAlreadyExists(err) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -545,14 +545,14 @@ func (c *Controller) createRoleBindings(namespace string) error {
|
||||||
_, err := c.KubeClient.RoleBindings(namespace).Get(podServiceAccountRoleBindingName, metav1.GetOptions{})
|
_, err := c.KubeClient.RoleBindings(namespace).Get(podServiceAccountRoleBindingName, metav1.GetOptions{})
|
||||||
if k8sutil.ResourceNotFound(err) {
|
if k8sutil.ResourceNotFound(err) {
|
||||||
|
|
||||||
c.logger.Infof("Creating the role binding %v in the namespace %v", podServiceAccountRoleBindingName, namespace)
|
c.logger.Infof("Creating the role binding %q in the %q namespace", podServiceAccountRoleBindingName, namespace)
|
||||||
|
|
||||||
// get a separate copy of role binding
|
// get a separate copy of role binding
|
||||||
// to prevent a race condition when setting a namespace for many clusters
|
// to prevent a race condition when setting a namespace for many clusters
|
||||||
rb := *c.PodServiceAccountRoleBinding
|
rb := *c.PodServiceAccountRoleBinding
|
||||||
_, err = c.KubeClient.RoleBindings(namespace).Create(&rb)
|
_, err = c.KubeClient.RoleBindings(namespace).Create(&rb)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("cannot bind the pod service account %q defined in the config map to the cluster role in the %q namespace: %v", podServiceAccountName, namespace, err)
|
return fmt.Errorf("cannot bind the pod service account %q defined in the configuration to the cluster role in the %q namespace: %v", podServiceAccountName, namespace, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
c.logger.Infof("successfully deployed the role binding for the pod service account %q to the %q namespace", podServiceAccountName, namespace)
|
c.logger.Infof("successfully deployed the role binding for the pod service account %q to the %q namespace", podServiceAccountName, namespace)
|
||||||
|
|
|
||||||
|
|
@ -108,9 +108,8 @@ type Config struct {
|
||||||
|
|
||||||
WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to'
|
WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to'
|
||||||
EtcdHost string `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS
|
EtcdHost string `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS
|
||||||
DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"`
|
DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spilo-12:1.6-p2"`
|
||||||
Sidecars map[string]string `name:"sidecar_docker_images"`
|
Sidecars map[string]string `name:"sidecar_docker_images"`
|
||||||
// default name `operator` enables backward compatibility with the older ServiceAccountName field
|
|
||||||
PodServiceAccountName string `name:"pod_service_account_name" default:"postgres-pod"`
|
PodServiceAccountName string `name:"pod_service_account_name" default:"postgres-pod"`
|
||||||
// value of this string must be valid JSON or YAML; see initPodServiceAccount
|
// value of this string must be valid JSON or YAML; see initPodServiceAccount
|
||||||
PodServiceAccountDefinition string `name:"pod_service_account_definition" default:""`
|
PodServiceAccountDefinition string `name:"pod_service_account_definition" default:""`
|
||||||
|
|
|
||||||
13
ui/Makefile
13
ui/Makefile
|
|
@ -5,9 +5,13 @@ VERSION ?= $(shell git describe --tags --always --dirty)
|
||||||
TAG ?= $(VERSION)
|
TAG ?= $(VERSION)
|
||||||
GITHEAD = $(shell git rev-parse --short HEAD)
|
GITHEAD = $(shell git rev-parse --short HEAD)
|
||||||
GITURL = $(shell git config --get remote.origin.url)
|
GITURL = $(shell git config --get remote.origin.url)
|
||||||
GITSTATU = $(shell git status --porcelain || echo 'no changes')
|
GITSTATUS = $(shell git status --porcelain || echo 'no changes')
|
||||||
TTYFLAGS = $(shell test -t 0 && echo '-it')
|
TTYFLAGS = $(shell test -t 0 && echo '-it')
|
||||||
|
|
||||||
|
ifdef CDP_PULL_REQUEST_NUMBER
|
||||||
|
CDP_TAG := -${CDP_BUILD_VERSION}
|
||||||
|
endif
|
||||||
|
|
||||||
default: docker
|
default: docker
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
|
@ -24,11 +28,12 @@ docker: appjs
|
||||||
echo `(env)`
|
echo `(env)`
|
||||||
echo "Tag ${TAG}"
|
echo "Tag ${TAG}"
|
||||||
echo "Version ${VERSION}"
|
echo "Version ${VERSION}"
|
||||||
|
echo "CDP tag ${CDP_TAG}"
|
||||||
echo "git describe $(shell git describe --tags --always --dirty)"
|
echo "git describe $(shell git describe --tags --always --dirty)"
|
||||||
docker build --rm -t "$(IMAGE):$(TAG)" -f Dockerfile .
|
docker build --rm -t "$(IMAGE):$(TAG)$(CDP_TAG)" -f Dockerfile .
|
||||||
|
|
||||||
push: docker
|
push:
|
||||||
docker push "$(IMAGE):$(TAG)"
|
docker push "$(IMAGE):$(TAG)$(CDP_TAG)"
|
||||||
|
|
||||||
mock:
|
mock:
|
||||||
docker run -it -p 8080:8080 "$(IMAGE):$(TAG)" --mock
|
docker run -it -p 8080:8080 "$(IMAGE):$(TAG)" --mock
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue