Merge branch 'master' into fix/critical-op-pdb-on-demand

This commit is contained in:
Felix Kunde 2026-03-02 11:14:06 +01:00 committed by GitHub
commit 2ccbd66f73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
50 changed files with 4797 additions and 292 deletions

3
.gitignore vendored
View File

@ -106,6 +106,3 @@ mocks
ui/.npm/
.DS_Store
# temp build files
pkg/apis/acid.zalan.do/v1/postgresql.crd.yaml

View File

@ -13,7 +13,7 @@ LDFLAGS ?= -X=main.version=$(VERSION)
DOCKERDIR = docker
BASE_IMAGE ?= alpine:latest
IMAGE ?= $(BINARY)
IMAGE ?= ghcr.io/zalando/$(BINARY)
TAG ?= $(VERSION)
GITHEAD = $(shell git rev-parse --short HEAD)
GITURL = $(shell git config --get remote.origin.url)
@ -84,7 +84,7 @@ linux: ${SOURCES} $(GENERATED_CRDS)
macos: ${SOURCES} $(GENERATED_CRDS)
GOOS=darwin GOARCH=amd64 CGO_ENABLED=${CGO_ENABLED} go build -o build/macos/${BINARY} ${BUILD_FLAGS} -ldflags "$(LDFLAGS)" $(SOURCES)
docker: ${DOCKERDIR}/${DOCKERFILE}
docker: $(GENERATED_CRDS) ${DOCKERDIR}/${DOCKERFILE}
echo `(env)`
echo "Tag ${TAG}"
echo "Version ${VERSION}"

View File

@ -29,13 +29,13 @@ pipelines with no access to Kubernetes API directly, promoting infrastructure as
### PostgreSQL features
* Supports PostgreSQL 17, starting from 13+
* Supports PostgreSQL 18, starting from 14+
* Streaming replication cluster via Patroni
* Point-In-Time-Recovery with
[pg_basebackup](https://www.postgresql.org/docs/17/app-pgbasebackup.html) /
[pg_basebackup](https://www.postgresql.org/docs/18/app-pgbasebackup.html) /
[WAL-G](https://github.com/wal-g/wal-g) or [WAL-E](https://github.com/wal-e/wal-e) via [Spilo](https://github.com/zalando/spilo)
* Preload libraries: [bg_mon](https://github.com/CyberDem0n/bg_mon),
[pg_stat_statements](https://www.postgresql.org/docs/17/pgstatstatements.html),
[pg_stat_statements](https://www.postgresql.org/docs/18/pgstatstatements.html),
[pgextwlist](https://github.com/dimitri/pgextwlist),
[pg_auth_mon](https://github.com/RafiaSabih/pg_auth_mon)
* Incl. popular Postgres extensions such as

View File

@ -84,11 +84,11 @@ spec:
"limit_iops": 16000,
"limit_throughput": 1000,
"postgresql_versions": [
"18",
"17",
"16",
"15",
"14",
"13"
]
}
{{- if .Values.extraEnvs }}

View File

@ -68,7 +68,7 @@ spec:
type: string
docker_image:
type: string
default: "ghcr.io/zalando/spilo-17:4.0-p3"
default: "ghcr.io/zalando/spilo-18:4.1-p1"
enable_crd_registration:
type: boolean
default: true
@ -101,6 +101,11 @@ spec:
kubernetes_use_configmaps:
type: boolean
default: false
maintenance_windows:
items:
pattern: '^\ *((Mon|Tue|Wed|Thu|Fri|Sat|Sun):(2[0-3]|[01]?\d):([0-5]?\d)|(2[0-3]|[01]?\d):([0-5]?\d))-((2[0-3]|[01]?\d):([0-5]?\d)|(2[0-3]|[01]?\d):([0-5]?\d))\ *$'
type: string
type: array
max_instances:
type: integer
description: "-1 = disabled"
@ -169,10 +174,10 @@ spec:
type: string
minimal_major_version:
type: string
default: "13"
default: "14"
target_major_version:
type: string
default: "17"
default: "18"
kubernetes:
type: object
properties:

View File

@ -374,11 +374,11 @@ spec:
version:
type: string
enum:
- "13"
- "14"
- "15"
- "16"
- "17"
- "18"
parameters:
type: object
additionalProperties:
@ -493,13 +493,19 @@ spec:
type: string
standby_port:
type: string
oneOf:
standby_primary_slot_name:
type: string
anyOf:
- required:
- s3_wal_path
- required:
- gs_wal_path
- required:
- standby_host
not:
required:
- s3_wal_path
- gs_wal_path
streams:
type: array
items:

View File

@ -31,14 +31,12 @@ configGeneral:
enable_pgversion_env_var: true
# start any new database pod without limitations on shm memory
enable_shm_volume: true
# enables backwards compatible path between Spilo 12 and Spilo 13+ images
enable_spilo_wal_path_compat: false
# operator will sync only clusters where name starts with teamId prefix
enable_team_id_clustername_prefix: false
# etcd connection string for Patroni. Empty uses K8s-native DCS.
etcd_host: ""
# Spilo docker image
docker_image: ghcr.io/zalando/spilo-17:4.0-p3
docker_image: ghcr.io/zalando/spilo-18:4.1-p1
# key name for annotation to ignore globally configured instance limits
# ignore_instance_limits_annotation_key: ""
@ -49,6 +47,10 @@ configGeneral:
# Select if setup uses endpoints (default), or configmaps to manage leader (DCS=k8s)
# kubernetes_use_configmaps: false
# maintenance windows applied to all Postgres clusters unless overridden in the manifest
# maintenance_windows:
# - "Sun:01:00-06:00"
# min number of instances in Postgres cluster. -1 = no limit
min_instances: -1
# max number of instances in Postgres cluster. -1 = no limit
@ -92,9 +94,9 @@ configMajorVersionUpgrade:
# - acid
# minimal Postgres major version that will not automatically be upgraded
minimal_major_version: "13"
minimal_major_version: "14"
# target Postgres major version when upgrading clusters automatically
target_major_version: "17"
target_major_version: "18"
configKubernetes:
# list of additional capabilities for postgres container

View File

@ -23,7 +23,10 @@ pipeline:
- desc: Run unit tests
cmd: |
make mocks test
if ! git diff --quiet; then
echo "Build resulted in files being changed, likely they were not checked in"
exit 1
fi
- desc: Build Docker image
cmd: |
if [ -z ${CDP_SOURCE_BRANCH} ]; then

View File

@ -1312,7 +1312,7 @@ aws_or_gcp:
If cluster members have to be (re)initialized restoring physical backups
happens automatically either from the backup location or by running
[pg_basebackup](https://www.postgresql.org/docs/17/app-pgbasebackup.html)
[pg_basebackup](https://www.postgresql.org/docs/18/app-pgbasebackup.html)
on one of the other running instances (preferably replicas if they do not lag
behind). You can test restoring backups by [cloning](user.md#how-to-clone-an-existing-postgresql-cluster)
clusters.
@ -1346,10 +1346,12 @@ If you are using [additional environment variables](#custom-pod-environment-vari
to access your backup location you have to copy those variables and prepend
the `STANDBY_` prefix for Spilo to find the backups and WAL files to stream.
Alternatively, standby clusters can also stream from a remote primary cluster.
Standby clusters can also stream from a remote primary cluster.
You have to specify the host address. Port is optional and defaults to 5432.
Note, that only one of the options (`s3_wal_path`, `gs_wal_path`,
`standby_host`) can be present under the `standby` top-level key.
You can combine `standby_host` with either `s3_wal_path` or `gs_wal_path`
for additional redundancy. Note that `s3_wal_path` and `gs_wal_path` are
mutually exclusive. At least one of `s3_wal_path`, `gs_wal_path`, or
`standby_host` must be specified under the `standby` top-level key.
## Logical backups

View File

@ -116,9 +116,9 @@ These parameters are grouped directly under the `spec` key in the manifest.
* **maintenanceWindows**
a list which defines specific time frames when certain maintenance operations
such as automatic major upgrades or master pod migration. Accepted formats
are "01:00-06:00" for daily maintenance windows or "Sat:00:00-04:00" for specific
days, with all times in UTC.
such as automatic major upgrades or master pod migration are allowed to happen.
Accepted formats are "01:00-06:00" for daily maintenance windows or
"Sat:00:00-04:00" for specific days, with all times in UTC.
* **users**
a map of usernames to user flags for the users that should be created in the
@ -457,22 +457,31 @@ under the `clone` top-level key and do not affect the already running cluster.
On startup, an existing `standby` top-level key creates a standby Postgres
cluster streaming from a remote location - either from a S3 or GCS WAL
archive or a remote primary. Only one of options is allowed and required
if the `standby` key is present.
archive, a remote primary, or a combination of both. At least one of
`s3_wal_path`, `gs_wal_path`, or `standby_host` must be specified.
Note that `s3_wal_path` and `gs_wal_path` are mutually exclusive.
* **s3_wal_path**
the url to S3 bucket containing the WAL archive of the remote primary.
Can be combined with `standby_host` for additional redundancy.
* **gs_wal_path**
the url to GS bucket containing the WAL archive of the remote primary.
Can be combined with `standby_host` for additional redundancy.
* **standby_host**
hostname or IP address of the primary to stream from.
Can be specified alone or combined with either `s3_wal_path` or `gs_wal_path`.
* **standby_port**
TCP port on which the primary is listening for connections. Patroni will
use `"5432"` if not set.
* **standby_primary_slot_name**
name of the replication slot to use on the primary server when streaming
from a remote primary. See the Patroni documentation
[here](https://patroni.readthedocs.io/en/latest/standby_cluster.html) for more details. Optional.
## Volume properties
Those parameters are grouped under the `volume` top-level key and define the
@ -638,7 +647,7 @@ the global configuration before adding the `tls` section'.
## Change data capture streams
This sections enables change data capture (CDC) streams via Postgres'
[logical decoding](https://www.postgresql.org/docs/17/logicaldecoding.html)
[logical decoding](https://www.postgresql.org/docs/18/logicaldecoding.html)
feature and `pgoutput` plugin. While the Postgres operator takes responsibility
for providing the setup to publish change events, it relies on external tools
to consume them. At Zalando, we are using a workflow based on
@ -671,7 +680,7 @@ can have the following properties:
The CDC operator is following the [outbox pattern](https://debezium.io/blog/2019/02/19/reliable-microservices-data-exchange-with-the-outbox-pattern/).
The application is responsible for putting events into a (JSON/B or VARCHAR)
payload column of the outbox table in the structure of the specified target
event type. The operator will create a [PUBLICATION](https://www.postgresql.org/docs/17/logical-replication-publication.html)
event type. The operator will create a [PUBLICATION](https://www.postgresql.org/docs/18/logical-replication-publication.html)
in Postgres for all tables specified for one `database` and `applicationId`.
The CDC operator will consume from it shortly after transactions are
committed to the outbox table. The `idColumn` will be used in telemetry for

View File

@ -173,6 +173,14 @@ Those are top-level keys, containing both leaf keys and groups.
the thresholds. The value must be `"true"` to be effective. The default is empty
which means the feature is disabled.
* **maintenance_windows**
a list which defines specific time frames when certain maintenance
operations such as automatic major upgrades or master pod migration are
allowed to happen for all database clusters. Accepted formats are
"01:00-06:00" for daily maintenance windows or "Sat:00:00-04:00" for
specific days, with all times in UTC. Locally defined maintenance
windows take precedence over globally configured ones.
* **resync_period**
period between consecutive sync requests. The default is `30m`.
@ -260,12 +268,12 @@ CRD-configuration, they are grouped under the `major_version_upgrade` key.
* **minimal_major_version**
The minimal Postgres major version that will not automatically be upgraded
when `major_version_upgrade_mode` is set to `"full"`. The default is `"13"`.
when `major_version_upgrade_mode` is set to `"full"`. The default is `"14"`.
* **target_major_version**
The target Postgres major version when upgrading clusters automatically
which violate the configured allowed `minimal_major_version` when
`major_version_upgrade_mode` is set to `"full"`. The default is `"17"`.
`major_version_upgrade_mode` is set to `"full"`. The default is `"18"`.
## Kubernetes resources

View File

@ -30,7 +30,7 @@ spec:
databases:
foo: zalando
postgresql:
version: "17"
version: "18"
```
Once you cloned the Postgres Operator [repository](https://github.com/zalando/postgres-operator)
@ -109,7 +109,7 @@ metadata:
spec:
[...]
postgresql:
version: "17"
version: "18"
parameters:
password_encryption: scram-sha-256
```
@ -517,7 +517,7 @@ Postgres Operator will create the following NOLOGIN roles:
The `<dbname>_owner` role is the database owner and should be used when creating
new database objects. All members of the `admin` role, e.g. teams API roles, can
become the owner with the `SET ROLE` command. [Default privileges](https://www.postgresql.org/docs/17/sql-alterdefaultprivileges.html)
become the owner with the `SET ROLE` command. [Default privileges](https://www.postgresql.org/docs/18/sql-alterdefaultprivileges.html)
are configured for the owner role so that the `<dbname>_reader` role
automatically gets read-access (SELECT) to new tables and sequences and the
`<dbname>_writer` receives write-access (INSERT, UPDATE, DELETE on tables,
@ -594,7 +594,7 @@ spec:
### Schema `search_path` for default roles
The schema [`search_path`](https://www.postgresql.org/docs/17/ddl-schemas.html#DDL-SCHEMAS-PATH)
The schema [`search_path`](https://www.postgresql.org/docs/18/ddl-schemas.html#DDL-SCHEMAS-PATH)
for each role will include the role name and the schemas, this role should have
access to. So `foo_bar_writer` does not have to schema-qualify tables from
schemas `foo_bar_writer, bar`, while `foo_writer` can look up `foo_writer` and
@ -695,7 +695,7 @@ handle it.
### HugePages support
The operator supports [HugePages](https://www.postgresql.org/docs/17/kernel-resources.html#LINUX-HUGEPAGES).
The operator supports [HugePages](https://www.postgresql.org/docs/18/kernel-resources.html#LINUX-HUGEPAGES).
To enable HugePages, set the matching resource requests and/or limits in the manifest:
```yaml
@ -757,7 +757,7 @@ If you need to define a `nodeAffinity` for all your Postgres clusters use the
## In-place major version upgrade
Starting with Spilo 13, operator supports in-place major version upgrade to a
Starting with Spilo 14, operator supports in-place major version upgrade to a
higher major version (e.g. from PG 14 to PG 16). To trigger the upgrade,
simply increase the version in the manifest. It is your responsibility to test
your applications against the new version before the upgrade; downgrading is
@ -792,7 +792,7 @@ spec:
clone:
uid: "efd12e58-5786-11e8-b5a7-06148230260c"
cluster: "acid-minimal-cluster"
timestamp: "2017-12-19T12:40:33+01:00"
timestamp: "2025-12-19T12:40:33+01:00"
```
Here `cluster` is a name of a source cluster that is going to be cloned. A new
@ -827,7 +827,7 @@ spec:
clone:
uid: "efd12e58-5786-11e8-b5a7-06148230260c"
cluster: "acid-minimal-cluster"
timestamp: "2017-12-19T12:40:33+01:00"
timestamp: "2025-12-19T12:40:33+01:00"
s3_wal_path: "s3://custom/path/to/bucket"
s3_endpoint: https://s3.acme.org
s3_access_key_id: 0123456789abcdef0123456789abcdef
@ -838,7 +838,7 @@ spec:
### Clone directly
Another way to get a fresh copy of your source DB cluster is via
[pg_basebackup](https://www.postgresql.org/docs/17/app-pgbasebackup.html). To
[pg_basebackup](https://www.postgresql.org/docs/18/app-pgbasebackup.html). To
use this feature simply leave out the timestamp field from the clone section.
The operator will connect to the service of the source cluster by name. If the
cluster is called test, then the connection string will look like host=test
@ -900,8 +900,9 @@ the PostgreSQL version between source and target cluster has to be the same.
To start a cluster as standby, add the following `standby` section in the YAML
file. You can stream changes from archived WAL files (AWS S3 or Google Cloud
Storage) or from a remote primary. Only one option can be specified in the
manifest:
Storage), from a remote primary, or combine a remote primary with a WAL archive.
At least one of `s3_wal_path`, `gs_wal_path`, or `standby_host` must be specified.
Note that `s3_wal_path` and `gs_wal_path` are mutually exclusive.
```yaml
spec:
@ -929,6 +930,16 @@ spec:
standby_port: "5433"
```
You can also combine a remote primary with a WAL archive for additional redundancy:
```yaml
spec:
standby:
standby_host: "acid-minimal-cluster.default"
standby_port: "5433"
s3_wal_path: "s3://<bucketname>/spilo/<source_db_cluster>/<UID>/wal/<PGVERSION>"
```
Note, that the pods and services use the same role labels like for normal clusters:
The standby leader is labeled as `master`. When using the `standby_host` option
you have to copy the credentials from the source cluster's secrets to successfully

View File

@ -7,8 +7,8 @@ set -o pipefail
IFS=$'\n\t'
readonly cluster_name="postgres-operator-e2e-tests"
readonly kubeconfig_path="/tmp/kind-config-${cluster_name}"
readonly spilo_image="registry.opensource.zalan.do/acid/spilo-17-e2e:0.3"
readonly kubeconfig_path="${HOME}/kind-config-${cluster_name}"
readonly spilo_image="ghcr.io/zalando/spilo-18:4.1-p1"
readonly e2e_test_runner_image="ghcr.io/zalando/postgres-operator-e2e-tests-runner:latest"
export GOPATH=${GOPATH-~/go}
@ -19,11 +19,17 @@ echo "Kubeconfig path: ${kubeconfig_path}"
function pull_images(){
operator_tag=$(git describe --tags --always --dirty)
if [[ -z $(docker images -q ghcr.io/zalando/postgres-operator:${operator_tag}) ]]
image_name="ghcr.io/zalando/postgres-operator:${operator_tag}"
if [[ -z $(docker images -q "${image_name}") ]]
then
docker pull ghcr.io/zalando/postgres-operator:latest
if ! docker pull "${image_name}"
then
echo "Failed to pull operator image: ${image_name}"
exit 1
fi
fi
operator_image=$(docker images --filter=reference="ghcr.io/zalando/postgres-operator" --format "{{.Repository}}:{{.Tag}}" | head -1)
operator_image="${image_name}"
echo "Using operator image: ${operator_image}"
}
function start_kind(){
@ -36,7 +42,10 @@ function start_kind(){
export KUBECONFIG="${kubeconfig_path}"
kind create cluster --name ${cluster_name} --config kind-cluster-postgres-operator-e2e-tests.yaml
docker pull "${spilo_image}"
# Pull all platforms to satisfy Kind's --all-platforms requirement
docker pull --platform linux/amd64 "${spilo_image}"
docker pull --platform linux/arm64 "${spilo_image}"
kind load docker-image "${spilo_image}" --name ${cluster_name}
}
@ -52,7 +61,7 @@ function set_kind_api_server_ip(){
# but update the IP address of the API server to the one from the Docker 'bridge' network
readonly local kind_api_server_port=6443 # well-known in the 'kind' codebase
readonly local kind_api_server=$(docker inspect --format "{{ .NetworkSettings.Networks.kind.IPAddress }}:${kind_api_server_port}" "${cluster_name}"-control-plane)
sed -i "s/server.*$/server: https:\/\/$kind_api_server/g" "${kubeconfig_path}"
sed "s/server.*$/server: https:\/\/$kind_api_server/g" "${kubeconfig_path}" > "${kubeconfig_path}".tmp && mv "${kubeconfig_path}".tmp "${kubeconfig_path}"
}
function generate_certificate(){

View File

@ -12,9 +12,9 @@ from kubernetes import client
from tests.k8s_api import K8s
from kubernetes.client.rest import ApiException
SPILO_CURRENT = "registry.opensource.zalan.do/acid/spilo-17-e2e:0.3"
SPILO_LAZY = "registry.opensource.zalan.do/acid/spilo-17-e2e:0.4"
SPILO_FULL_IMAGE = "ghcr.io/zalando/spilo-17:4.0-p3"
SPILO_CURRENT = "ghcr.io/zalando/spilo-e2e:dev-18.3"
SPILO_LAZY = "ghcr.io/zalando/spilo-e2e:dev-18.4"
SPILO_FULL_IMAGE = "ghcr.io/zalando/spilo-18:4.1-p1"
def to_selector(labels):
return ",".join(["=".join(lbl) for lbl in labels.items()])
@ -151,6 +151,7 @@ class EndToEndTestCase(unittest.TestCase):
'default', label_selector='name=postgres-operator').items[0].spec.containers[0].image
print("Tested operator image: {}".format(actual_operator_image)) # shows up after tests finish
# load minimal Postgres manifest and wait for cluster to be up and running
result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml")
print('stdout: {}, stderr: {}'.format(result.stdout, result.stderr))
try:
@ -1211,25 +1212,25 @@ class EndToEndTestCase(unittest.TestCase):
k8s.create_with_kubectl("manifests/minimal-postgres-lowest-version-manifest.yaml")
self.eventuallyEqual(lambda: k8s.count_running_pods(labels=cluster_label), 2, "No 2 pods running")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
self.eventuallyEqual(check_version, 13, "Version is not correct")
self.eventuallyEqual(check_version, 14, "Version is not correct")
master_nodes, _ = k8s.get_cluster_nodes(cluster_labels=cluster_label)
# should upgrade immediately
pg_patch_version_14 = {
pg_patch_version_higher_version = {
"spec": {
"postgresql": {
"version": "14"
"version": "15"
}
}
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_14)
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_higher_version)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
self.eventuallyEqual(check_version, 14, "Version should be upgraded from 13 to 14")
self.eventuallyEqual(check_version, 15, "Version should be upgraded from 14 to 15")
# check if annotation for last upgrade's success is set
annotations = get_annotations()
@ -1238,10 +1239,10 @@ class EndToEndTestCase(unittest.TestCase):
# should not upgrade because current time is not in maintenanceWindow
current_time = datetime.now()
maintenance_window_future = f"{(current_time+timedelta(minutes=60)).strftime('%H:%M')}-{(current_time+timedelta(minutes=120)).strftime('%H:%M')}"
pg_patch_version_15_outside_mw = {
pg_patch_version_higher_version_outside_mw = {
"spec": {
"postgresql": {
"version": "15"
"version": "16"
},
"maintenanceWindows": [
maintenance_window_future
@ -1249,23 +1250,23 @@ class EndToEndTestCase(unittest.TestCase):
}
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15_outside_mw)
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_higher_version_outside_mw)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
# no pod replacement outside of the maintenance window
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
self.eventuallyEqual(check_version, 14, "Version should not be upgraded")
self.eventuallyEqual(check_version, 15, "Version should not be upgraded")
second_annotations = get_annotations()
self.assertIsNone(second_annotations.get("last-major-upgrade-failure"), "Annotation for last upgrade's failure should not be set")
# change maintenanceWindows to current
maintenance_window_current = f"{(current_time-timedelta(minutes=30)).strftime('%H:%M')}-{(current_time+timedelta(minutes=30)).strftime('%H:%M')}"
pg_patch_version_15_in_mw = {
pg_patch_version_higher_version_in_mw = {
"spec": {
"postgresql": {
"version": "15"
"version": "16"
},
"maintenanceWindows": [
maintenance_window_current
@ -1274,13 +1275,13 @@ class EndToEndTestCase(unittest.TestCase):
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15_in_mw)
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_higher_version_in_mw)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=master,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
self.eventuallyEqual(check_version, 15, "Version should be upgraded from 14 to 15")
self.eventuallyEqual(check_version, 16, "Version should be upgraded from 15 to 16")
# check if annotation for last upgrade's success is updated after second upgrade
third_annotations = get_annotations()
@ -1288,7 +1289,7 @@ class EndToEndTestCase(unittest.TestCase):
self.assertNotEqual(annotations.get("last-major-upgrade-success"), third_annotations.get("last-major-upgrade-success"), "Annotation for last upgrade's success is not updated")
# test upgrade with failed upgrade annotation
pg_patch_version_17 = {
pg_patch_version_highest_version = {
"metadata": {
"annotations": {
"last-major-upgrade-failure": "2024-01-02T15:04:05Z"
@ -1296,28 +1297,28 @@ class EndToEndTestCase(unittest.TestCase):
},
"spec": {
"postgresql": {
"version": "17"
"version": "18"
},
},
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_17)
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_highest_version)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
self.eventuallyEqual(check_version, 15, "Version should not be upgraded because annotation for last upgrade's failure is set")
self.eventuallyEqual(check_version, 16, "Version should not be upgraded because annotation for last upgrade's failure is set")
# change the version back to 15 and should remove failure annotation
# change the version back to 16 and should remove failure annotation
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15_in_mw)
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_higher_version_in_mw)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
self.eventuallyEqual(check_version, 15, "Version should not be upgraded from 15")
self.eventuallyEqual(check_version, 16, "Version should not be upgraded from 16")
fourth_annotations = get_annotations()
self.assertIsNone(fourth_annotations.get("last-major-upgrade-failure"), "Annotation for last upgrade's failure is not removed")

2
go.mod
View File

@ -18,6 +18,7 @@ require (
k8s.io/apiextensions-apiserver v0.32.9
k8s.io/apimachinery v0.32.9
k8s.io/client-go v0.32.9
sigs.k8s.io/yaml v1.4.0
)
require (
@ -77,7 +78,6 @@ require (
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)
tool (

View File

@ -5,15 +5,17 @@
#
# Injections:
#
# * oneOf: for the standby field to enforce that only one of s3_wal_path, gs_wal_path or standby_host is set.
# * This can later be done with // +kubebuilder:validation:ExactlyOneOf marker, but this requires latest Kubernetes version. (Currently the operator depends on v1.32.9)
# * oneOf: for the standby field to enforce validation rules:
# - s3_wal_path and gs_wal_path are mutually exclusive
# - standby_host can be specified alone or with either s3_wal_path OR gs_wal_path
# - at least one of s3_wal_path, gs_wal_path, or standby_host must be set
# * type: string and pattern for the maintenanceWindows items.
file="${1:-"manifests/postgresql.crd.yaml"}"
sed -i '/^[[:space:]]*standby:$/{
# Capture the indentation
s/^\([[:space:]]*\)standby:$/\1standby:\n\1 oneOf:\n\1 - required:\n\1 - s3_wal_path\n\1 - required:\n\1 - gs_wal_path\n\1 - required:\n\1 - standby_host/
s/^\([[:space:]]*\)standby:$/\1standby:\n\1 anyOf:\n\1 - required:\n\1 - s3_wal_path\n\1 - required:\n\1 - gs_wal_path\n\1 - required:\n\1 - standby_host\n\1 not:\n\1 required:\n\1 - s3_wal_path\n\1 - gs_wal_path/
}' "$file"
sed -i '/^[[:space:]]*maintenanceWindows:$/{

View File

@ -25,11 +25,11 @@ RUN apt-get update \
&& curl --silent https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
postgresql-client-18 \
postgresql-client-17 \
postgresql-client-16 \
postgresql-client-15 \
postgresql-client-14 \
postgresql-client-13 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

View File

@ -10,7 +10,7 @@ metadata:
# "delete-date": "2020-08-31" # can only be deleted on that day if "delete-date "key is configured
# "delete-clustername": "acid-test-cluster" # can only be deleted when name matches if "delete-clustername" key is configured
spec:
dockerImage: ghcr.io/zalando/spilo-17:4.0-p3
dockerImage: ghcr.io/zalando/spilo-18:4.1-p1
teamId: "acid"
numberOfInstances: 2
users: # Application/Robot users
@ -48,7 +48,7 @@ spec:
defaultRoles: true
defaultUsers: false
postgresql:
version: "17"
version: "18"
parameters: # Expert section
shared_buffers: "32MB"
max_connections: "10"

View File

@ -34,7 +34,7 @@ data:
default_memory_request: 100Mi
# delete_annotation_date_key: delete-date
# delete_annotation_name_key: delete-clustername
docker_image: ghcr.io/zalando/spilo-17:4.0-p3
docker_image: ghcr.io/zalando/spilo-18:4.1-p1
# downscaler_annotations: "deployment-time,downscaler/*"
enable_admin_role_for_users: "true"
enable_crd_registration: "true"
@ -102,6 +102,7 @@ data:
logical_backup_s3_sse: "AES256"
logical_backup_s3_retention_time: ""
logical_backup_schedule: "30 00 * * *"
# maintenance_windows: "Sat:22:00-23:59,Sun:00:00-01:00"
major_version_upgrade_mode: "manual"
# major_version_upgrade_team_allow_list: ""
master_dns_name_format: "{cluster}.{namespace}.{hostedzone}"
@ -113,7 +114,7 @@ data:
min_cpu_limit: 250m
min_instances: "-1"
min_memory_limit: 250Mi
minimal_major_version: "13"
minimal_major_version: "14"
# node_readiness_label: "status:ready"
# node_readiness_label_merge: "OR"
oauth_token_secret_name: postgresql-operator
@ -163,7 +164,7 @@ data:
spilo_privileged: "false"
storage_resize_mode: "pvc"
super_username: postgres
target_major_version: "17"
target_major_version: "18"
team_admin_role: "admin"
team_api_role_configuration: "log_statement:all"
teams_api_url: http://fake-teams-api.default.svc.cluster.local

View File

@ -28,7 +28,7 @@ spec:
preparedDatabases:
bar: {}
postgresql:
version: "13"
version: "18"
sidecars:
- name: "exporter"
image: "quay.io/prometheuscommunity/postgres-exporter:v0.15.0"

View File

@ -17,4 +17,4 @@ spec:
preparedDatabases:
bar: {}
postgresql:
version: "13"
version: "14"

View File

@ -17,4 +17,4 @@ spec:
preparedDatabases:
bar: {}
postgresql:
version: "17"
version: "18"

View File

@ -66,7 +66,7 @@ spec:
type: string
docker_image:
type: string
default: "ghcr.io/zalando/spilo-17:4.0-p3"
default: "ghcr.io/zalando/spilo-18:4.1-p1"
enable_crd_registration:
type: boolean
default: true
@ -99,6 +99,11 @@ spec:
kubernetes_use_configmaps:
type: boolean
default: false
maintenance_windows:
items:
pattern: '^\ *((Mon|Tue|Wed|Thu|Fri|Sat|Sun):(2[0-3]|[01]?\d):([0-5]?\d)|(2[0-3]|[01]?\d):([0-5]?\d))-((2[0-3]|[01]?\d):([0-5]?\d)|(2[0-3]|[01]?\d):([0-5]?\d))\ *$'
type: string
type: array
max_instances:
type: integer
description: "-1 = disabled"
@ -167,10 +172,10 @@ spec:
type: string
minimal_major_version:
type: string
default: "13"
default: "14"
target_major_version:
type: string
default: "17"
default: "18"
kubernetes:
type: object
properties:

View File

@ -3,7 +3,7 @@ kind: OperatorConfiguration
metadata:
name: postgresql-operator-default-configuration
configuration:
docker_image: ghcr.io/zalando/spilo-17:4.0-p3
docker_image: ghcr.io/zalando/spilo-18:4.1-p1
# enable_crd_registration: true
# crd_categories:
# - all
@ -16,6 +16,9 @@ configuration:
# ignore_instance_limits_annotation_key: ""
# ignore_resources_limits_annotation_key: ""
# kubernetes_use_configmaps: false
# maintenance_windows:
# - "Sat:22:00-23:59"
# - "Sun:00:00-01:00"
max_instances: -1
min_instances: -1
resync_period: 30m
@ -40,8 +43,8 @@ configuration:
major_version_upgrade_mode: "manual"
# major_version_upgrade_team_allow_list:
# - acid
minimal_major_version: "13"
target_major_version: "17"
minimal_major_version: "14"
target_major_version: "18"
kubernetes:
# additional_pod_capabilities:
# - "SYS_NICE"

View File

@ -3523,11 +3523,11 @@ spec:
type: object
version:
enum:
- "13"
- "14"
- "15"
- "16"
- "17"
- "18"
type: string
required:
- version
@ -3924,15 +3924,21 @@ spec:
format: int64
type: integer
standby:
oneOf:
anyOf:
- required:
- s3_wal_path
- required:
- gs_wal_path
- required:
- standby_host
description: StandbyDescription contains remote primary config or
s3/gs wal path
not:
required:
- s3_wal_path
- gs_wal_path
description: |-
StandbyDescription contains remote primary config and/or s3/gs wal path.
standby_host can be specified alone or together with either s3_wal_path OR gs_wal_path (mutually exclusive).
At least one field must be specified. s3_wal_path and gs_wal_path are mutually exclusive.
properties:
gs_wal_path:
type: string
@ -3942,6 +3948,8 @@ spec:
type: string
standby_port:
type: string
standby_primary_slot_name:
type: string
type: object
streams:
items:

View File

@ -8,8 +8,10 @@ spec:
size: 1Gi
numberOfInstances: 1
postgresql:
version: "17"
# Make this a standby cluster and provide either the s3 bucket path of source cluster or the remote primary host for continuous streaming.
version: "18"
# Make this a standby cluster. You can specify s3_wal_path or gs_wal_path for WAL archive,
# standby_host for remote primary streaming, or combine standby_host with either WAL path.
# Note: s3_wal_path and gs_wal_path are mutually exclusive.
standby:
# s3_wal_path: "s3://mybucket/spilo/acid-minimal-cluster/abcd1234-2a4b-4b2a-8c9c-c1234defg567/wal/14/"
standby_host: "acid-minimal-cluster.default"

View File

@ -127,6 +127,15 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
"kubernetes_use_configmaps": {
Type: "boolean",
},
"maintenance_windows": {
Type: "array",
Items: &apiextv1.JSONSchemaPropsOrArray{
Schema: &apiextv1.JSONSchemaProps{
Type: "string",
Pattern: "^\\ *((Mon|Tue|Wed|Thu|Fri|Sat|Sun):(2[0-3]|[01]?\\d):([0-5]?\\d)|(2[0-3]|[01]?\\d):([0-5]?\\d))-((Mon|Tue|Wed|Thu|Fri|Sat|Sun):(2[0-3]|[01]?\\d):([0-5]?\\d)|(2[0-3]|[01]?\\d):([0-5]?\\d))\\ *$",
},
},
},
"max_instances": {
Type: "integer",
Description: "-1 = disabled",

View File

@ -31,7 +31,8 @@ func (m *MaintenanceWindow) UnmarshalJSON(data []byte) error {
err error
)
parts := strings.Split(string(data[1:len(data)-1]), "-")
dataStr := strings.Trim(string(data), "\"")
parts := strings.Split(dataStr, "-")
if len(parts) != 2 {
return fmt.Errorf("incorrect maintenance window format")
}

View File

@ -49,8 +49,8 @@ type PostgresUsersConfiguration struct {
type MajorVersionUpgradeConfiguration struct {
MajorVersionUpgradeMode string `json:"major_version_upgrade_mode" default:"manual"` // off - no actions, manual - manifest triggers action, full - manifest and minimal version violation trigger upgrade
MajorVersionUpgradeTeamAllowList []string `json:"major_version_upgrade_team_allow_list,omitempty"`
MinimalMajorVersion string `json:"minimal_major_version" default:"13"`
TargetMajorVersion string `json:"target_major_version" default:"17"`
MinimalMajorVersion string `json:"minimal_major_version" default:"14"`
TargetMajorVersion string `json:"target_major_version" default:"18"`
}
// KubernetesMetaConfiguration defines k8s conf required for all Postgres clusters and the operator itself
@ -266,6 +266,7 @@ type OperatorConfigurationData struct {
Workers uint32 `json:"workers,omitempty"`
ResyncPeriod Duration `json:"resync_period,omitempty"`
RepairPeriod Duration `json:"repair_period,omitempty"`
MaintenanceWindows []MaintenanceWindow `json:"maintenance_windows,omitempty"`
SetMemoryRequestToLimit bool `json:"set_memory_request_to_limit,omitempty"`
ShmVolume *bool `json:"enable_shm_volume,omitempty"`
SidecarImages map[string]string `json:"sidecar_docker_images,omitempty"` // deprecated in favour of SidecarContainers
@ -285,10 +286,9 @@ type OperatorConfigurationData struct {
ConnectionPooler ConnectionPoolerConfiguration `json:"connection_pooler"`
Patroni PatroniConfiguration `json:"patroni"`
MinInstances int32 `json:"min_instances,omitempty"`
MaxInstances int32 `json:"max_instances,omitempty"`
IgnoreInstanceLimitsAnnotationKey string `json:"ignore_instance_limits_annotation_key,omitempty"`
MinInstances int32 `json:"min_instances,omitempty"`
MaxInstances int32 `json:"max_instances,omitempty"`
IgnoreInstanceLimitsAnnotationKey string `json:"ignore_instance_limits_annotation_key,omitempty"`
IgnoreResourcesLimitsAnnotationKey string `json:"ignore_resources_limits_annotation_key,omitempty"`
}

File diff suppressed because it is too large Load Diff

View File

@ -185,7 +185,7 @@ type AdditionalVolume struct {
// PostgresqlParam describes PostgreSQL version and pairs of configuration parameter name - values.
type PostgresqlParam struct {
// +kubebuilder:validation:Enum="13";"14";"15";"16";"17"
// +kubebuilder:validation:Enum="14";"15";"16";"17";"18"
PgVersion string `json:"version"`
Parameters map[string]string `json:"parameters,omitempty"`
}
@ -246,13 +246,15 @@ type Patroni struct {
FailsafeMode *bool `json:"failsafe_mode,omitempty"`
}
// StandbyDescription contains remote primary config or s3/gs wal path
// +kubebuilder:validation:ExactlyOneOf=s3_wal_path;gs_wal_path;standby_host
// StandbyDescription contains remote primary config and/or s3/gs wal path.
// standby_host can be specified alone or together with either s3_wal_path OR gs_wal_path (mutually exclusive).
// At least one field must be specified. s3_wal_path and gs_wal_path are mutually exclusive.
type StandbyDescription struct {
S3WalPath string `json:"s3_wal_path,omitempty"`
GSWalPath string `json:"gs_wal_path,omitempty"`
StandbyHost string `json:"standby_host,omitempty"`
StandbyPort string `json:"standby_port,omitempty"`
S3WalPath string `json:"s3_wal_path,omitempty"`
GSWalPath string `json:"gs_wal_path,omitempty"`
StandbyHost string `json:"standby_host,omitempty"`
StandbyPort string `json:"standby_port,omitempty"`
StandbyPrimarySlotName string `json:"standby_primary_slot_name,omitempty"`
}
// TLSDescription specs TLS properties

View File

@ -91,6 +91,13 @@ var maintenanceWindows = []struct {
StartTime: mustParseTime("10:00"),
EndTime: mustParseTime("20:00"),
}, nil},
{"regular every day scenario",
[]byte(`"05:00-07:00"`),
MaintenanceWindow{
Everyday: true,
StartTime: mustParseTime("05:00"),
EndTime: mustParseTime("07:00"),
}, nil},
{"starts and ends at the same time",
[]byte(`"Mon:10:00-10:00"`),
MaintenanceWindow{
@ -219,7 +226,7 @@ var unmarshalCluster = []struct {
"127.0.0.1/32"
],
"postgresql": {
"version": "17",
"version": "18",
"parameters": {
"shared_buffers": "32MB",
"max_connections": "10",
@ -279,7 +286,7 @@ var unmarshalCluster = []struct {
},
Spec: PostgresSpec{
PostgresqlParam: PostgresqlParam{
PgVersion: "17",
PgVersion: "18",
Parameters: map[string]string{
"shared_buffers": "32MB",
"max_connections": "10",
@ -339,7 +346,7 @@ var unmarshalCluster = []struct {
},
Error: "",
},
marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"17","parameters":{"log_statement":"all","max_connections":"10","shared_buffers":"32MB"}},"pod_priority_class_name":"spilo-pod-priority","volume":{"size":"5Gi","storageClass":"SSD", "subPath": "subdir"},"enableShmVolume":false,"patroni":{"initdb":{"data-checksums":"true","encoding":"UTF8","locale":"en_US.UTF-8"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"],"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"slots":{"permanent_logical_1":{"database":"foo","plugin":"pgoutput","type":"logical"}}},"resources":{"requests":{"cpu":"10m","memory":"50Mi"},"limits":{"cpu":"300m","memory":"3000Mi"}},"teamId":"acid","allowedSourceRanges":["127.0.0.1/32"],"numberOfInstances":2,"users":{"zalando":["superuser","createdb"]},"maintenanceWindows":["Mon:01:00-06:00","Sat:00:00-04:00","05:00-05:15"],"clone":{"cluster":"acid-batman"}},"status":{"PostgresClusterStatus":""}}`),
marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"18","parameters":{"log_statement":"all","max_connections":"10","shared_buffers":"32MB"}},"pod_priority_class_name":"spilo-pod-priority","volume":{"size":"5Gi","storageClass":"SSD", "subPath": "subdir"},"enableShmVolume":false,"patroni":{"initdb":{"data-checksums":"true","encoding":"UTF8","locale":"en_US.UTF-8"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"],"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"slots":{"permanent_logical_1":{"database":"foo","plugin":"pgoutput","type":"logical"}}},"resources":{"requests":{"cpu":"10m","memory":"50Mi"},"limits":{"cpu":"300m","memory":"3000Mi"}},"teamId":"acid","allowedSourceRanges":["127.0.0.1/32"],"numberOfInstances":2,"users":{"zalando":["superuser","createdb"]},"maintenanceWindows":["Mon:01:00-06:00","Sat:00:00-04:00","05:00-05:15"],"clone":{"cluster":"acid-batman"}},"status":{"PostgresClusterStatus":""}}`),
err: nil},
{
about: "example with clone",
@ -404,7 +411,7 @@ var postgresqlList = []struct {
out PostgresqlList
err error
}{
{"expect success", []byte(`{"apiVersion":"v1","items":[{"apiVersion":"acid.zalan.do/v1","kind":"Postgresql","metadata":{"labels":{"team":"acid"},"name":"acid-testcluster42","namespace":"default","resourceVersion":"30446957","selfLink":"/apis/acid.zalan.do/v1/namespaces/default/postgresqls/acid-testcluster42","uid":"857cd208-33dc-11e7-b20a-0699041e4b03"},"spec":{"allowedSourceRanges":["185.85.220.0/22"],"numberOfInstances":1,"postgresql":{"version":"17"},"teamId":"acid","volume":{"size":"10Gi"}},"status":{"PostgresClusterStatus":"Running"}}],"kind":"List","metadata":{},"resourceVersion":"","selfLink":""}`),
{"expect success", []byte(`{"apiVersion":"v1","items":[{"apiVersion":"acid.zalan.do/v1","kind":"Postgresql","metadata":{"labels":{"team":"acid"},"name":"acid-testcluster42","namespace":"default","resourceVersion":"30446957","selfLink":"/apis/acid.zalan.do/v1/namespaces/default/postgresqls/acid-testcluster42","uid":"857cd208-33dc-11e7-b20a-0699041e4b03"},"spec":{"allowedSourceRanges":["185.85.220.0/22"],"numberOfInstances":1,"postgresql":{"version":"18"},"teamId":"acid","volume":{"size":"10Gi"}},"status":{"PostgresClusterStatus":"Running"}}],"kind":"List","metadata":{},"resourceVersion":"","selfLink":""}`),
PostgresqlList{
TypeMeta: metav1.TypeMeta{
Kind: "List",
@ -425,7 +432,7 @@ var postgresqlList = []struct {
},
Spec: PostgresSpec{
ClusterName: "testcluster42",
PostgresqlParam: PostgresqlParam{PgVersion: "17"},
PostgresqlParam: PostgresqlParam{PgVersion: "18"},
Volume: Volume{Size: "10Gi"},
TeamID: "acid",
AllowedSourceRanges: []string{"185.85.220.0/22"},

View File

@ -433,6 +433,13 @@ func (in *OperatorConfigurationData) DeepCopyInto(out *OperatorConfigurationData
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.MaintenanceWindows != nil {
in, out := &in.MaintenanceWindows, &out.MaintenanceWindows
*out = make([]MaintenanceWindow, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.ShmVolume != nil {
in, out := &in.ShmVolume, &out.ShmVolume
*out = new(bool)

View File

@ -32,6 +32,7 @@ import (
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/rest"
@ -271,26 +272,29 @@ func (c *Cluster) Create() (err error) {
)
defer func() {
var (
pgUpdatedStatus *acidv1.Postgresql
errStatus error
)
if err == nil {
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning) //TODO: are you sure it's running?
} else {
currentStatus := c.Status.DeepCopy()
pg := c.Postgresql.DeepCopy()
pg.Status.PostgresClusterStatus = acidv1.ClusterStatusRunning
if err != nil {
c.logger.Warningf("cluster created failed: %v", err)
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusAddFailed)
pg.Status.PostgresClusterStatus = acidv1.ClusterStatusAddFailed
}
if errStatus != nil {
c.logger.Warningf("could not set cluster status: %v", errStatus)
return
}
if pgUpdatedStatus != nil {
if !equality.Semantic.DeepEqual(currentStatus, pg.Status) {
pgUpdatedStatus, err := c.KubeClient.SetPostgresCRDStatus(c.clusterName(), pg)
if err != nil {
c.logger.Warningf("could not set cluster status: %v", err)
return
}
c.setSpec(pgUpdatedStatus)
}
}()
pgCreateStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusCreating)
pg := c.Postgresql.DeepCopy()
pg.Status.PostgresClusterStatus = acidv1.ClusterStatusCreating
pgCreateStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), pg)
if err != nil {
return fmt.Errorf("could not set cluster status: %v", err)
}
@ -381,7 +385,7 @@ func (c *Cluster) Create() (err error) {
// create database objects unless we are running without pods or disabled
// that feature explicitly
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&c.Spec) <= 0 || c.Spec.StandbyCluster != nil) {
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&c.Spec) <= 0 || isStandbyCluster(&c.Spec)) {
c.logger.Infof("Create roles")
if err = c.createRoles(); err != nil {
return fmt.Errorf("could not create users: %v", err)
@ -978,29 +982,33 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
c.mu.Lock()
defer c.mu.Unlock()
c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusUpdating)
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusUpdating
if !isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
newSpec, err := c.KubeClient.SetPostgresCRDStatus(c.clusterName(), newSpec)
if err != nil {
return fmt.Errorf("could not set cluster status to updating: %w", err)
}
if !c.isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
// do not apply any major version related changes yet
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
}
c.setSpec(newSpec)
defer func() {
var (
pgUpdatedStatus *acidv1.Postgresql
err error
)
currentStatus := newSpec.Status.DeepCopy()
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusRunning
if updateFailed {
pgUpdatedStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusUpdateFailed)
} else {
pgUpdatedStatus, err = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning)
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusUpdateFailed
}
if err != nil {
c.logger.Warningf("could not set cluster status: %v", err)
return
}
if pgUpdatedStatus != nil {
if !equality.Semantic.DeepEqual(currentStatus, newSpec.Status) {
pgUpdatedStatus, err := c.KubeClient.SetPostgresCRDStatus(c.clusterName(), newSpec)
if err != nil {
c.logger.Warningf("could not set cluster status: %v", err)
return
}
c.setSpec(pgUpdatedStatus)
}
}()

View File

@ -1691,7 +1691,7 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 {
}
}
if spec.StandbyCluster != nil {
if isStandbyCluster(spec) {
if newcur == 1 {
min = newcur
max = newcur
@ -2207,23 +2207,29 @@ func (c *Cluster) generateStandbyEnvironment(description *acidv1.StandbyDescript
Value: description.StandbyPort,
})
}
} else {
c.logger.Info("standby cluster streaming from WAL location")
if description.S3WalPath != "" {
if description.StandbyPrimarySlotName != "" {
result = append(result, v1.EnvVar{
Name: "STANDBY_WALE_S3_PREFIX",
Value: description.S3WalPath,
Name: "STANDBY_PRIMARY_SLOT_NAME",
Value: description.StandbyPrimarySlotName,
})
} else if description.GSWalPath != "" {
result = append(result, v1.EnvVar{
Name: "STANDBY_WALE_GS_PREFIX",
Value: description.GSWalPath,
})
} else {
c.logger.Error("no WAL path specified in standby section")
return result
}
}
// WAL archive can be specified with or without standby_host
if description.S3WalPath != "" {
c.logger.Info("standby cluster using S3 WAL archive")
result = append(result, v1.EnvVar{
Name: "STANDBY_WALE_S3_PREFIX",
Value: description.S3WalPath,
})
result = append(result, v1.EnvVar{Name: "STANDBY_METHOD", Value: "STANDBY_WITH_WALE"})
result = append(result, v1.EnvVar{Name: "STANDBY_WAL_BUCKET_SCOPE_PREFIX", Value: ""})
} else if description.GSWalPath != "" {
c.logger.Info("standby cluster using GCS WAL archive")
result = append(result, v1.EnvVar{
Name: "STANDBY_WALE_GS_PREFIX",
Value: description.GSWalPath,
})
result = append(result, v1.EnvVar{Name: "STANDBY_METHOD", Value: "STANDBY_WITH_WALE"})
result = append(result, v1.EnvVar{Name: "STANDBY_WAL_BUCKET_SCOPE_PREFIX", Value: ""})
}

View File

@ -72,18 +72,18 @@ func TestGenerateSpiloJSONConfiguration(t *testing.T) {
}{
{
subtest: "Patroni default configuration",
pgParam: &acidv1.PostgresqlParam{PgVersion: "17"},
pgParam: &acidv1.PostgresqlParam{PgVersion: "18"},
patroni: &acidv1.Patroni{},
opConfig: &config.Config{
Auth: config.Auth{
PamRoleName: "zalandos",
},
},
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/17/bin"},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"}],"dcs":{}}}`,
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/18/bin"},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"}],"dcs":{}}}`,
},
{
subtest: "Patroni configured",
pgParam: &acidv1.PostgresqlParam{PgVersion: "17"},
pgParam: &acidv1.PostgresqlParam{PgVersion: "18"},
patroni: &acidv1.Patroni{
InitDB: map[string]string{
"encoding": "UTF8",
@ -102,38 +102,38 @@ func TestGenerateSpiloJSONConfiguration(t *testing.T) {
FailsafeMode: util.True(),
},
opConfig: &config.Config{},
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/17/bin","pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"synchronous_mode":true,"synchronous_mode_strict":true,"synchronous_node_count":1,"slots":{"permanent_logical_1":{"database":"foo","plugin":"pgoutput","type":"logical"}},"failsafe_mode":true}}}`,
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/18/bin","pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"]},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"},"data-checksums",{"encoding":"UTF8"},{"locale":"en_US.UTF-8"}],"dcs":{"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"synchronous_mode":true,"synchronous_mode_strict":true,"synchronous_node_count":1,"slots":{"permanent_logical_1":{"database":"foo","plugin":"pgoutput","type":"logical"}},"failsafe_mode":true}}}`,
},
{
subtest: "Patroni failsafe_mode configured globally",
pgParam: &acidv1.PostgresqlParam{PgVersion: "17"},
pgParam: &acidv1.PostgresqlParam{PgVersion: "18"},
patroni: &acidv1.Patroni{},
opConfig: &config.Config{
EnablePatroniFailsafeMode: util.True(),
},
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/17/bin"},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"}],"dcs":{"failsafe_mode":true}}}`,
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/18/bin"},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"}],"dcs":{"failsafe_mode":true}}}`,
},
{
subtest: "Patroni failsafe_mode configured globally, disabled for cluster",
pgParam: &acidv1.PostgresqlParam{PgVersion: "17"},
pgParam: &acidv1.PostgresqlParam{PgVersion: "18"},
patroni: &acidv1.Patroni{
FailsafeMode: util.False(),
},
opConfig: &config.Config{
EnablePatroniFailsafeMode: util.True(),
},
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/17/bin"},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"}],"dcs":{"failsafe_mode":false}}}`,
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/18/bin"},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"}],"dcs":{"failsafe_mode":false}}}`,
},
{
subtest: "Patroni failsafe_mode disabled globally, configured for cluster",
pgParam: &acidv1.PostgresqlParam{PgVersion: "17"},
pgParam: &acidv1.PostgresqlParam{PgVersion: "18"},
patroni: &acidv1.Patroni{
FailsafeMode: util.True(),
},
opConfig: &config.Config{
EnablePatroniFailsafeMode: util.False(),
},
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/17/bin"},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"}],"dcs":{"failsafe_mode":true}}}`,
result: `{"postgresql":{"bin_dir":"/usr/lib/postgresql/18/bin"},"bootstrap":{"initdb":[{"auth-host":"md5"},{"auth-local":"trust"}],"dcs":{"failsafe_mode":true}}}`,
},
}
for _, tt := range tests {
@ -164,15 +164,15 @@ func TestExtractPgVersionFromBinPath(t *testing.T) {
},
{
subTest: "test current bin path against hard coded template",
binPath: "/usr/lib/postgresql/17/bin",
binPath: "/usr/lib/postgresql/18/bin",
template: pgBinariesLocationTemplate,
expected: "17",
expected: "18",
},
{
subTest: "test alternative bin path against a matching template",
binPath: "/usr/pgsql-17/bin",
binPath: "/usr/pgsql-18/bin",
template: "/usr/pgsql-%v/bin",
expected: "17",
expected: "18",
},
}
@ -1370,7 +1370,33 @@ func TestStandbyEnv(t *testing.T) {
envLen: 2,
},
{
subTest: "from remote primary - ignore WAL path",
subTest: "from remote primary with S3 WAL path",
standbyOpts: &acidv1.StandbyDescription{
S3WalPath: "s3://some/path/",
StandbyHost: "remote-primary",
},
env: v1.EnvVar{
Name: "STANDBY_HOST",
Value: "remote-primary",
},
envPos: 0,
envLen: 4,
},
{
subTest: "verify S3 WAL env with standby host",
standbyOpts: &acidv1.StandbyDescription{
S3WalPath: "s3://some/path/",
StandbyHost: "remote-primary",
},
env: v1.EnvVar{
Name: "STANDBY_WALE_S3_PREFIX",
Value: "s3://some/path/",
},
envPos: 1,
envLen: 4,
},
{
subTest: "from remote primary with GCS WAL path",
standbyOpts: &acidv1.StandbyDescription{
GSWalPath: "gs://some/path/",
StandbyHost: "remote-primary",
@ -1380,7 +1406,20 @@ func TestStandbyEnv(t *testing.T) {
Value: "remote-primary",
},
envPos: 0,
envLen: 1,
envLen: 4,
},
{
subTest: "from remote primary with slot name",
standbyOpts: &acidv1.StandbyDescription{
StandbyHost: "remote-primary",
StandbyPrimarySlotName: "my_slot",
},
env: v1.EnvVar{
Name: "STANDBY_PRIMARY_SLOT_NAME",
Value: "my_slot",
},
envPos: 1,
envLen: 2,
},
}
@ -2149,7 +2188,7 @@ func TestSidecars(t *testing.T) {
spec = acidv1.PostgresSpec{
PostgresqlParam: acidv1.PostgresqlParam{
PgVersion: "17",
PgVersion: "18",
Parameters: map[string]string{
"max_connections": "100",
},
@ -2342,7 +2381,7 @@ func TestContainerValidation(t *testing.T) {
name: "init container without image",
spec: acidv1.PostgresSpec{
PostgresqlParam: acidv1.PostgresqlParam{
PgVersion: "17",
PgVersion: "18",
},
TeamID: "myapp",
NumberOfInstances: 1,
@ -2371,7 +2410,7 @@ func TestContainerValidation(t *testing.T) {
name: "sidecar without name",
spec: acidv1.PostgresSpec{
PostgresqlParam: acidv1.PostgresqlParam{
PgVersion: "17",
PgVersion: "18",
},
TeamID: "myapp",
NumberOfInstances: 1,
@ -2400,7 +2439,7 @@ func TestContainerValidation(t *testing.T) {
name: "sidecar without image",
spec: acidv1.PostgresSpec{
PostgresqlParam: acidv1.PostgresqlParam{
PgVersion: "17",
PgVersion: "18",
},
TeamID: "myapp",
NumberOfInstances: 1,
@ -2429,7 +2468,7 @@ func TestContainerValidation(t *testing.T) {
name: "valid containers pass validation",
spec: acidv1.PostgresSpec{
PostgresqlParam: acidv1.PostgresqlParam{
PgVersion: "17",
PgVersion: "18",
},
TeamID: "myapp",
NumberOfInstances: 1,

View File

@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"strconv"
"strings"
"github.com/Masterminds/semver"
@ -14,15 +15,6 @@ import (
"k8s.io/apimachinery/pkg/types"
)
// VersionMap Map of version numbers
var VersionMap = map[string]int{
"13": 130000,
"14": 140000,
"15": 150000,
"16": 160000,
"17": 170000,
}
const (
majorVersionUpgradeSuccessAnnotation = "last-major-upgrade-success"
majorVersionUpgradeFailureAnnotation = "last-major-upgrade-failure"
@ -30,21 +22,22 @@ const (
// IsBiggerPostgresVersion Compare two Postgres version numbers
func IsBiggerPostgresVersion(old string, new string) bool {
oldN := VersionMap[old]
newN := VersionMap[new]
oldN, _ := strconv.Atoi(old)
newN, _ := strconv.Atoi(new)
return newN > oldN
}
// GetDesiredMajorVersionAsInt Convert string to comparable integer of PG version
func (c *Cluster) GetDesiredMajorVersionAsInt() int {
return VersionMap[c.GetDesiredMajorVersion()]
version, _ := strconv.Atoi(c.GetDesiredMajorVersion())
return version * 10000
}
// GetDesiredMajorVersion returns major version to use, incl. potential auto upgrade
func (c *Cluster) GetDesiredMajorVersion() string {
if c.Config.OpConfig.MajorVersionUpgradeMode == "full" {
// e.g. current is 13, minimal is 13 allowing 13 to 17 clusters, everything below is upgraded
// e.g. current is 14, minimal is 14 allowing 14 to 18 clusters, everything below is upgraded
if IsBiggerPostgresVersion(c.Spec.PgVersion, c.Config.OpConfig.MinimalMajorVersion) {
c.logger.Infof("overwriting configured major version %s to %s", c.Spec.PgVersion, c.Config.OpConfig.TargetMajorVersion)
return c.Config.OpConfig.TargetMajorVersion
@ -197,7 +190,7 @@ func (c *Cluster) majorVersionUpgrade() error {
return nil
}
if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) {
if !c.isInMaintenanceWindow(c.Spec.MaintenanceWindows) {
c.logger.Infof("skipping major version upgrade, not in maintenance window")
return nil
}
@ -289,6 +282,10 @@ func (c *Cluster) majorVersionUpgrade() error {
if err != nil {
isUpgradeSuccess = false
c.annotatePostgresResource(isUpgradeSuccess)
c.logger.Errorf("upgrade action triggered but command failed: %v", err)
if strings.TrimSpace(scriptErrMsg) == "" {
scriptErrMsg = err.Error()
}
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, scriptErrMsg)
return fmt.Errorf("%s", scriptErrMsg)
}

View File

@ -280,7 +280,7 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
}
scheduleSwitchover := false
if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) {
if !c.isInMaintenanceWindow(c.Spec.MaintenanceWindows) {
c.logger.Infof("postponing switchover, not in maintenance window")
scheduleSwitchover = true
}

View File

@ -20,6 +20,7 @@ import (
batchv1 "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)
@ -43,21 +44,19 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
c.setSpec(newSpec)
defer func() {
var (
pgUpdatedStatus *acidv1.Postgresql
errStatus error
)
if err != nil {
c.logger.Warningf("error while syncing cluster state: %v", err)
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusSyncFailed)
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusSyncFailed
} else if !c.Status.Running() {
pgUpdatedStatus, errStatus = c.KubeClient.SetPostgresCRDStatus(c.clusterName(), acidv1.ClusterStatusRunning)
newSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusRunning
}
if errStatus != nil {
c.logger.Warningf("could not set cluster status: %v", errStatus)
return
}
if pgUpdatedStatus != nil {
if !equality.Semantic.DeepEqual(oldSpec.Status, newSpec.Status) {
pgUpdatedStatus, err := c.KubeClient.SetPostgresCRDStatus(c.clusterName(), newSpec)
if err != nil {
c.logger.Warningf("could not set cluster status: %v", err)
return
}
c.setSpec(pgUpdatedStatus)
}
}()
@ -98,7 +97,7 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
}
}
if !isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
if !c.isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
// do not apply any major version related changes yet
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
}
@ -1071,6 +1070,23 @@ func (c *Cluster) syncStandbyClusterConfiguration() error {
standbyOptionsToSet["create_replica_methods"] = []string{"bootstrap_standby_with_wale", "basebackup_fast_xlog"}
standbyOptionsToSet["restore_command"] = "envdir \"/run/etc/wal-e.d/env-standby\" /scripts/restore_command.sh \"%f\" \"%p\""
if c.Spec.StandbyCluster.StandbyHost != "" {
standbyOptionsToSet["host"] = c.Spec.StandbyCluster.StandbyHost
} else {
standbyOptionsToSet["host"] = nil
}
if c.Spec.StandbyCluster.StandbyPort != "" {
standbyOptionsToSet["port"] = c.Spec.StandbyCluster.StandbyPort
} else {
standbyOptionsToSet["port"] = nil
}
if c.Spec.StandbyCluster.StandbyPrimarySlotName != "" {
standbyOptionsToSet["primary_slot_name"] = c.Spec.StandbyCluster.StandbyPrimarySlotName
} else {
standbyOptionsToSet["primary_slot_name"] = nil
}
} else {
c.logger.Infof("promoting standby cluster and detach from source")
standbyOptionsToSet = nil
@ -1198,42 +1214,15 @@ func (c *Cluster) updateSecret(
pwdUser := userMap[userKey]
secretName := util.NameFromMeta(secret.ObjectMeta)
// if password rotation is enabled update password and username if rotation interval has been passed
// rotation can be enabled globally or via the manifest (excluding the Postgres superuser)
rotationEnabledInManifest := secretUsername != constants.SuperuserKeyName &&
(slices.Contains(c.Spec.UsersWithSecretRotation, secretUsername) ||
slices.Contains(c.Spec.UsersWithInPlaceSecretRotation, secretUsername))
// globally enabled rotation is only allowed for manifest and bootstrapped roles
allowedRoleTypes := []spec.RoleOrigin{spec.RoleOriginManifest, spec.RoleOriginBootstrap}
rotationAllowed := !pwdUser.IsDbOwner && slices.Contains(allowedRoleTypes, pwdUser.Origin) && c.Spec.StandbyCluster == nil
// users can ignore any kind of rotation
isIgnoringRotation := slices.Contains(c.Spec.UsersIgnoringSecretRotation, secretUsername)
if ((c.OpConfig.EnablePasswordRotation && rotationAllowed) || rotationEnabledInManifest) && !isIgnoringRotation {
updateSecretMsg, err = c.rotatePasswordInSecret(secret, secretUsername, pwdUser.Origin, currentTime, retentionUsers)
// do not perform any rotation of reset for standby clusters
if !isStandbyCluster(&c.Spec) {
updateSecretMsg, err = c.checkForPasswordRotation(secret, secretUsername, pwdUser, retentionUsers, currentTime)
if err != nil {
c.logger.Warnf("password rotation failed for user %s: %v", secretUsername, err)
return nil, fmt.Errorf("error while checking for password rotation: %v", err)
}
if updateSecretMsg != "" {
updateSecret = true
}
} else {
// username might not match if password rotation has been disabled again
usernameFromSecret := string(secret.Data["username"])
if secretUsername != usernameFromSecret {
// handle edge case when manifest user conflicts with a user from prepared databases
if strings.Replace(usernameFromSecret, "-", "_", -1) == strings.Replace(secretUsername, "-", "_", -1) {
return nil, fmt.Errorf("could not update secret because of user name mismatch: expected: %s, got: %s", secretUsername, usernameFromSecret)
}
*retentionUsers = append(*retentionUsers, secretUsername)
secret.Data["username"] = []byte(secretUsername)
secret.Data["password"] = []byte(util.RandomPassword(constants.PasswordLength))
secret.Data["nextRotation"] = []byte{}
updateSecret = true
updateSecretMsg = fmt.Sprintf("secret does not contain the role %s - updating username and resetting password", secretUsername)
}
}
// if this secret belongs to the infrastructure role and the password has changed - replace it in the secret
@ -1280,6 +1269,55 @@ func (c *Cluster) updateSecret(
return secret, nil
}
func (c *Cluster) checkForPasswordRotation(
secret *v1.Secret,
secretUsername string,
pwdUser spec.PgUser,
retentionUsers *[]string,
currentTime time.Time) (string, error) {
var (
passwordRotationMsg string
err error
)
// if password rotation is enabled update password and username if rotation interval has been passed
// rotation can be enabled globally or via the manifest (excluding the Postgres superuser)
rotationEnabledInManifest := secretUsername != constants.SuperuserKeyName &&
(slices.Contains(c.Spec.UsersWithSecretRotation, secretUsername) ||
slices.Contains(c.Spec.UsersWithInPlaceSecretRotation, secretUsername))
// globally enabled rotation is only allowed for manifest and bootstrapped roles
allowedRoleTypes := []spec.RoleOrigin{spec.RoleOriginManifest, spec.RoleOriginBootstrap}
rotationAllowed := !pwdUser.IsDbOwner && slices.Contains(allowedRoleTypes, pwdUser.Origin)
// users can ignore any kind of rotation
isIgnoringRotation := slices.Contains(c.Spec.UsersIgnoringSecretRotation, secretUsername)
if ((c.OpConfig.EnablePasswordRotation && rotationAllowed) || rotationEnabledInManifest) && !isIgnoringRotation {
passwordRotationMsg, err = c.rotatePasswordInSecret(secret, secretUsername, pwdUser.Origin, currentTime, retentionUsers)
if err != nil {
c.logger.Warnf("password rotation failed for user %s: %v", secretUsername, err)
}
} else {
// username might not match if password rotation has been disabled again
usernameFromSecret := string(secret.Data["username"])
if secretUsername != usernameFromSecret {
// handle edge case when manifest user conflicts with a user from prepared databases
if strings.Replace(usernameFromSecret, "-", "_", -1) == strings.Replace(secretUsername, "-", "_", -1) {
return "", fmt.Errorf("could not update secret because of user name mismatch: expected: %s, got: %s", secretUsername, usernameFromSecret)
}
*retentionUsers = append(*retentionUsers, secretUsername)
secret.Data["username"] = []byte(secretUsername)
secret.Data["password"] = []byte(util.RandomPassword(constants.PasswordLength))
secret.Data["nextRotation"] = []byte{}
passwordRotationMsg = fmt.Sprintf("secret does not contain the role %s - updating username and resetting password", secretUsername)
}
}
return passwordRotationMsg, nil
}
func (c *Cluster) rotatePasswordInSecret(
secret *v1.Secret,
secretUsername string,

View File

@ -801,6 +801,41 @@ func TestSyncStandbyClusterConfiguration(t *testing.T) {
// this should update the Patroni config again
err = cluster.syncStandbyClusterConfiguration()
assert.NoError(t, err)
// test with standby_host, standby_port and standby_primary_slot_name
cluster.Spec.StandbyCluster = &acidv1.StandbyDescription{
StandbyHost: "remote-primary.example.com",
StandbyPort: "5433",
StandbyPrimarySlotName: "standby_slot",
}
cluster.syncStatefulSet()
updatedSts4 := cluster.Statefulset
// check that pods have all three STANDBY_* environment variables
assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_HOST", Value: "remote-primary.example.com"})
assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PORT", Value: "5433"})
assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PRIMARY_SLOT_NAME", Value: "standby_slot"})
// this should update the Patroni config with host, port and primary_slot_name
err = cluster.syncStandbyClusterConfiguration()
assert.NoError(t, err)
// test property deletion: remove standby_primary_slot_name
cluster.Spec.StandbyCluster = &acidv1.StandbyDescription{
StandbyHost: "remote-primary.example.com",
StandbyPort: "5433",
}
cluster.syncStatefulSet()
updatedSts5 := cluster.Statefulset
// check that STANDBY_PRIMARY_SLOT_NAME is not present
assert.Contains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_HOST", Value: "remote-primary.example.com"})
assert.Contains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PORT", Value: "5433"})
assert.NotContains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PRIMARY_SLOT_NAME", Value: "standby_slot"})
// this should update the Patroni config and set primary_slot_name to nil
err = cluster.syncStandbyClusterConfiguration()
assert.NoError(t, err)
}
func TestUpdateSecret(t *testing.T) {
@ -1015,7 +1050,7 @@ func TestUpdateSecretNameConflict(t *testing.T) {
assert.Error(t, err)
// the order of secrets to sync is not deterministic, check only first part of the error message
expectedError := fmt.Sprintf("syncing secret %s failed: could not update secret because of user name mismatch", "default/prepared-owner-user.acid-test-cluster.credentials")
expectedError := fmt.Sprintf("syncing secret %s failed: error while checking for password rotation: could not update secret because of user name mismatch", "default/prepared-owner-user.acid-test-cluster.credentials")
assert.Contains(t, err.Error(), expectedError)
}

View File

@ -8,6 +8,7 @@ import (
"fmt"
"net/http"
"reflect"
"regexp"
"sort"
"strings"
"time"
@ -663,15 +664,38 @@ func parseResourceRequirements(resourcesRequirement v1.ResourceRequirements) (ac
return resources, nil
}
func isInMaintenanceWindow(specMaintenanceWindows []acidv1.MaintenanceWindow) bool {
if len(specMaintenanceWindows) == 0 {
func isStandbyCluster(spec *acidv1.PostgresSpec) bool {
for _, env := range spec.Env {
hasStandbyEnv, _ := regexp.MatchString(`^STANDBY_WALE_(S3|GS|GSC|SWIFT)_PREFIX$`, env.Name)
if hasStandbyEnv && env.Value != "" {
return true
}
}
return spec.StandbyCluster != nil
}
func (c *Cluster) isInMaintenanceWindow(specMaintenanceWindows []acidv1.MaintenanceWindow) bool {
if len(specMaintenanceWindows) == 0 && len(c.OpConfig.MaintenanceWindows) == 0 {
return true
}
now := time.Now()
currentDay := now.Weekday()
currentTime := now.Format("15:04")
for _, window := range specMaintenanceWindows {
maintenanceWindows := specMaintenanceWindows
if len(maintenanceWindows) == 0 {
maintenanceWindows = make([]acidv1.MaintenanceWindow, 0, len(c.OpConfig.MaintenanceWindows))
for _, windowStr := range c.OpConfig.MaintenanceWindows {
var window acidv1.MaintenanceWindow
if err := window.UnmarshalJSON([]byte(windowStr)); err != nil {
c.logger.Errorf("could not parse default maintenance window %q: %v", windowStr, err)
continue
}
maintenanceWindows = append(maintenanceWindows, window)
}
}
for _, window := range maintenanceWindows {
startTime := window.StartTime.Format("15:04")
endTime := window.EndTime.Format("15:04")

View File

@ -288,6 +288,12 @@ func newInheritedAnnotationsCluster(client k8sutil.KubernetesClient) (*Cluster,
},
}
// add postgresql cluster to fake client
_, err := client.PostgresqlsGetter.Postgresqls(namespace).Create(context.TODO(), &pg, metav1.CreateOptions{})
if err != nil {
return nil, err
}
cluster := New(
Config{
OpConfig: config.Config{
@ -321,7 +327,7 @@ func newInheritedAnnotationsCluster(client k8sutil.KubernetesClient) (*Cluster,
}, client, pg, logger, eventRecorder)
cluster.Name = clusterName
cluster.Namespace = namespace
_, err := cluster.createStatefulSet()
_, err = cluster.createStatefulSet()
if err != nil {
return nil, err
}
@ -651,6 +657,22 @@ func Test_trimCronjobName(t *testing.T) {
}
func TestIsInMaintenanceWindow(t *testing.T) {
cluster := New(
Config{
OpConfig: config.Config{
Resources: config.Resources{
ClusterLabels: map[string]string{"application": "spilo"},
ClusterNameLabel: "cluster-name",
DefaultCPURequest: "300m",
DefaultCPULimit: "300m",
DefaultMemoryRequest: "300Mi",
DefaultMemoryLimit: "300Mi",
},
},
}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)
cluster.Name = clusterName
cluster.Namespace = namespace
now := time.Now()
futureTimeStart := now.Add(1 * time.Hour)
futureTimeStartFormatted := futureTimeStart.Format("15:04")
@ -658,14 +680,16 @@ func TestIsInMaintenanceWindow(t *testing.T) {
futureTimeEndFormatted := futureTimeEnd.Format("15:04")
tests := []struct {
name string
windows []acidv1.MaintenanceWindow
expected bool
name string
windows []acidv1.MaintenanceWindow
configWindows []string
expected bool
}{
{
name: "no maintenance windows",
windows: nil,
expected: true,
name: "no maintenance windows",
windows: nil,
configWindows: nil,
expected: true,
},
{
name: "maintenance windows with everyday",
@ -676,7 +700,8 @@ func TestIsInMaintenanceWindow(t *testing.T) {
EndTime: mustParseTime("23:59"),
},
},
expected: true,
configWindows: nil,
expected: true,
},
{
name: "maintenance windows with weekday",
@ -687,7 +712,8 @@ func TestIsInMaintenanceWindow(t *testing.T) {
EndTime: mustParseTime("23:59"),
},
},
expected: true,
configWindows: nil,
expected: true,
},
{
name: "maintenance windows with future interval time",
@ -700,12 +726,25 @@ func TestIsInMaintenanceWindow(t *testing.T) {
},
expected: false,
},
{
name: "global maintenance windows with future interval time",
windows: nil,
configWindows: []string{fmt.Sprintf("%s-%s", futureTimeStartFormatted, futureTimeEndFormatted)},
expected: false,
},
{
name: "global maintenance windows all day",
windows: nil,
configWindows: []string{"00:00-02:00", "02:00-23:59"},
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cluster.OpConfig.MaintenanceWindows = tt.configWindows
cluster.Spec.MaintenanceWindows = tt.windows
if isInMaintenanceWindow(cluster.Spec.MaintenanceWindows) != tt.expected {
if cluster.isInMaintenanceWindow(cluster.Spec.MaintenanceWindows) != tt.expected {
t.Errorf("Expected isInMaintenanceWindow to return %t", tt.expected)
}
})

View File

@ -347,9 +347,11 @@ func (c *Controller) initController() {
logMultiLineConfig(c.logger, c.opConfig.MustMarshal())
roleDefs := c.getInfrastructureRoleDefinitions()
if infraRoles, err := c.getInfrastructureRoles(roleDefs); err != nil {
c.logger.Warningf("could not get infrastructure roles: %v", err)
} else {
infraRoles, err := c.getInfrastructureRoles(roleDefs)
if err != nil {
c.logger.Warningf("could not get all infrastructure roles: %v", err)
}
if len(infraRoles) > 0 {
c.config.InfrastructureRoles = infraRoles
}

View File

@ -39,7 +39,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.EnableTeamIdClusternamePrefix = fromCRD.EnableTeamIdClusternamePrefix
result.EtcdHost = fromCRD.EtcdHost
result.KubernetesUseConfigMaps = fromCRD.KubernetesUseConfigMaps
result.DockerImage = util.Coalesce(fromCRD.DockerImage, "ghcr.io/zalando/spilo-17:4.0-p3")
result.DockerImage = util.Coalesce(fromCRD.DockerImage, "ghcr.io/zalando/spilo-18:4.1-p1")
result.Workers = util.CoalesceUInt32(fromCRD.Workers, 8)
result.MinInstances = fromCRD.MinInstances
result.MaxInstances = fromCRD.MaxInstances
@ -51,6 +51,16 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.ShmVolume = util.CoalesceBool(fromCRD.ShmVolume, util.True())
result.SidecarImages = fromCRD.SidecarImages
result.SidecarContainers = fromCRD.SidecarContainers
if len(fromCRD.MaintenanceWindows) > 0 {
result.MaintenanceWindows = make([]string, 0, len(fromCRD.MaintenanceWindows))
for _, window := range fromCRD.MaintenanceWindows {
w, err := window.MarshalJSON()
if err != nil {
panic(fmt.Errorf("could not marshal configured maintenance window: %v", err))
}
result.MaintenanceWindows = append(result.MaintenanceWindows, string(w))
}
}
// user config
result.SuperUsername = util.Coalesce(fromCRD.PostgresUsersConfiguration.SuperUsername, "postgres")
@ -63,8 +73,8 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
// major version upgrade config
result.MajorVersionUpgradeMode = util.Coalesce(fromCRD.MajorVersionUpgrade.MajorVersionUpgradeMode, "manual")
result.MajorVersionUpgradeTeamAllowList = fromCRD.MajorVersionUpgrade.MajorVersionUpgradeTeamAllowList
result.MinimalMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.MinimalMajorVersion, "13")
result.TargetMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.TargetMajorVersion, "17")
result.MinimalMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.MinimalMajorVersion, "14")
result.TargetMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.TargetMajorVersion, "18")
// kubernetes config
result.EnableOwnerReferences = util.CoalesceBool(fromCRD.Kubernetes.EnableOwnerReferences, util.False())

View File

@ -161,7 +161,8 @@ func (c *Controller) acquireInitialListOfClusters() error {
func (c *Controller) addCluster(lg *logrus.Entry, clusterName spec.NamespacedName, pgSpec *acidv1.Postgresql) (*cluster.Cluster, error) {
if c.opConfig.EnableTeamIdClusternamePrefix {
if _, err := acidv1.ExtractClusterName(clusterName.Name, pgSpec.Spec.TeamID); err != nil {
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusInvalid)
pgSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusInvalid
c.KubeClient.SetPostgresCRDStatus(clusterName, pgSpec)
return nil, err
}
}
@ -470,13 +471,25 @@ func (c *Controller) queueClusterEvent(informerOldSpec, informerNewSpec *acidv1.
switch eventType {
case EventAdd:
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusAddFailed)
informerNewSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusAddFailed
_, err := c.KubeClient.SetPostgresCRDStatus(clusterName, informerNewSpec)
if err != nil {
c.logger.WithField("cluster-name", clusterName).Errorf("could not set PostgresCRD status: %v", err)
}
c.eventRecorder.Eventf(c.GetReference(informerNewSpec), v1.EventTypeWarning, "Create", "%v", clusterError)
case EventUpdate:
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusUpdateFailed)
informerNewSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusUpdateFailed
_, err := c.KubeClient.SetPostgresCRDStatus(clusterName, informerNewSpec)
if err != nil {
c.logger.WithField("cluster-name", clusterName).Errorf("could not set PostgresCRD status: %v", err)
}
c.eventRecorder.Eventf(c.GetReference(informerNewSpec), v1.EventTypeWarning, "Update", "%v", clusterError)
default:
c.KubeClient.SetPostgresCRDStatus(clusterName, acidv1.ClusterStatusSyncFailed)
informerNewSpec.Status.PostgresClusterStatus = acidv1.ClusterStatusSyncFailed
_, err := c.KubeClient.SetPostgresCRDStatus(clusterName, informerNewSpec)
if err != nil {
c.logger.WithField("cluster-name", clusterName).Errorf("could not set PostgresCRD status: %v", err)
}
c.eventRecorder.Eventf(c.GetReference(informerNewSpec), v1.EventTypeWarning, "Sync", "%v", clusterError)
}

View File

@ -63,10 +63,9 @@ type Resources struct {
NodeReadinessLabelMerge string `name:"node_readiness_label_merge" default:"OR"`
ShmVolume *bool `name:"enable_shm_volume" default:"true"`
MaxInstances int32 `name:"max_instances" default:"-1"`
MinInstances int32 `name:"min_instances" default:"-1"`
IgnoreInstanceLimitsAnnotationKey string `name:"ignore_instance_limits_annotation_key"`
MaxInstances int32 `name:"max_instances" default:"-1"`
MinInstances int32 `name:"min_instances" default:"-1"`
IgnoreInstanceLimitsAnnotationKey string `name:"ignore_instance_limits_annotation_key"`
IgnoreResourcesLimitsAnnotationKey string `name:"ignore_resources_limits_annotation_key"`
}
@ -177,7 +176,8 @@ type Config struct {
WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to'
KubernetesUseConfigMaps bool `name:"kubernetes_use_configmaps" default:"false"`
EtcdHost string `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS
DockerImage string `name:"docker_image" default:"ghcr.io/zalando/spilo-17:4.0-p3"`
MaintenanceWindows []string `name:"maintenance_windows"`
DockerImage string `name:"docker_image" default:"ghcr.io/zalando/spilo-18:4.1-p1"`
SidecarImages map[string]string `name:"sidecar_docker_images"` // deprecated in favour of SidecarContainers
SidecarContainers []v1.Container `name:"sidecars"`
PodServiceAccountName string `name:"pod_service_account_name" default:"postgres-pod"`
@ -248,8 +248,8 @@ type Config struct {
EnableTeamIdClusternamePrefix bool `name:"enable_team_id_clustername_prefix" default:"false"`
MajorVersionUpgradeMode string `name:"major_version_upgrade_mode" default:"manual"`
MajorVersionUpgradeTeamAllowList []string `name:"major_version_upgrade_team_allow_list" default:""`
MinimalMajorVersion string `name:"minimal_major_version" default:"13"`
TargetMajorVersion string `name:"target_major_version" default:"17"`
MinimalMajorVersion string `name:"minimal_major_version" default:"14"`
TargetMajorVersion string `name:"target_major_version" default:"18"`
PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"`
PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"`
EnablePatroniFailsafeMode *bool `name:"enable_patroni_failsafe_mode" default:"false"`

View File

@ -191,24 +191,8 @@ func NewFromConfig(cfg *rest.Config) (KubernetesClient, error) {
}
// SetPostgresCRDStatus of Postgres cluster
func (client *KubernetesClient) SetPostgresCRDStatus(clusterName spec.NamespacedName, status string) (*apiacidv1.Postgresql, error) {
var pg *apiacidv1.Postgresql
var pgStatus apiacidv1.PostgresStatus
pgStatus.PostgresClusterStatus = status
patch, err := json.Marshal(struct {
PgStatus interface{} `json:"status"`
}{&pgStatus})
if err != nil {
return pg, fmt.Errorf("could not marshal status: %v", err)
}
// we cannot do a full scale update here without fetching the previous manifest (as the resourceVersion may differ),
// however, we could do patch without it. In the future, once /status subresource is there (starting Kubernetes 1.11)
// we should take advantage of it.
pg, err = client.PostgresqlsGetter.Postgresqls(clusterName.Namespace).Patch(
context.TODO(), clusterName.Name, types.MergePatchType, patch, metav1.PatchOptions{}, "status")
func (client *KubernetesClient) SetPostgresCRDStatus(clusterName spec.NamespacedName, pg *apiacidv1.Postgresql) (*apiacidv1.Postgresql, error) {
pg, err := client.PostgresqlsGetter.Postgresqls(clusterName.Namespace).UpdateStatus(context.TODO(), pg, metav1.UpdateOptions{})
if err != nil {
return pg, fmt.Errorf("could not update status: %v", err)
}

View File

@ -73,11 +73,11 @@ spec:
"limit_iops": 16000,
"limit_throughput": 1000,
"postgresql_versions": [
"18",
"17",
"16",
"15",
"14",
"13"
]
}
# Exemple of settings to make snapshot view working in the ui when using AWS

View File

@ -259,7 +259,7 @@ DEFAULT_UI_CONFIG = {
'users_visible': True,
'databases_visible': True,
'resources_visible': RESOURCES_VISIBLE,
'postgresql_versions': ['13', '14', '15', '16', '17'],
'postgresql_versions': ['14', '15', '16', '17', '18'],
'dns_format_string': '{0}.{1}',
'pgui_link': '',
'static_network_whitelist': {},

View File

@ -31,11 +31,11 @@ default_operator_ui_config='{
"limit_iops": 16000,
"limit_throughput": 1000,
"postgresql_versions": [
"18",
"17",
"16",
"15",
"14",
"13"
],
"static_network_whitelist": {
"localhost": ["172.0.0.1/32"]