diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index 667c58efa..cbf5c98ef 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -493,13 +493,19 @@ spec: type: string standby_port: type: string - oneOf: + standby_primary_slot_name: + type: string + anyOf: - required: - s3_wal_path - required: - gs_wal_path - required: - standby_host + not: + required: + - s3_wal_path + - gs_wal_path streams: type: array items: diff --git a/delivery.yaml b/delivery.yaml index 11e17c012..a8716399c 100644 --- a/delivery.yaml +++ b/delivery.yaml @@ -17,8 +17,8 @@ pipeline: image: cdp-runtime/go cache: paths: - - /go/pkg/mod # pkg cache for Go modules - - ~/.cache/go-build # Go build cache + - /go/pkg/mod # pkg cache for Go modules + - ~/.cache/go-build # Go build cache commands: - desc: Run unit tests cmd: | @@ -65,7 +65,7 @@ pipeline: else IMAGE=${MULTI_ARCH_REGISTRY}/postgres-operator-ui-test fi - + make appjs docker buildx create --config /etc/cdp-buildkitd.toml --driver-opt network=host --bootstrap --use docker buildx build --platform linux/amd64,linux/arm64 \ diff --git a/docs/administrator.md b/docs/administrator.md index d7cda8e8b..fe36e3744 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -1346,10 +1346,12 @@ If you are using [additional environment variables](#custom-pod-environment-vari to access your backup location you have to copy those variables and prepend the `STANDBY_` prefix for Spilo to find the backups and WAL files to stream. -Alternatively, standby clusters can also stream from a remote primary cluster. +Standby clusters can also stream from a remote primary cluster. You have to specify the host address. Port is optional and defaults to 5432. -Note, that only one of the options (`s3_wal_path`, `gs_wal_path`, -`standby_host`) can be present under the `standby` top-level key. +You can combine `standby_host` with either `s3_wal_path` or `gs_wal_path` +for additional redundancy. Note that `s3_wal_path` and `gs_wal_path` are +mutually exclusive. At least one of `s3_wal_path`, `gs_wal_path`, or +`standby_host` must be specified under the `standby` top-level key. ## Logical backups diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index ab0353202..7b4ef7ada 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -457,22 +457,31 @@ under the `clone` top-level key and do not affect the already running cluster. On startup, an existing `standby` top-level key creates a standby Postgres cluster streaming from a remote location - either from a S3 or GCS WAL -archive or a remote primary. Only one of options is allowed and required -if the `standby` key is present. +archive, a remote primary, or a combination of both. At least one of +`s3_wal_path`, `gs_wal_path`, or `standby_host` must be specified. +Note that `s3_wal_path` and `gs_wal_path` are mutually exclusive. * **s3_wal_path** the url to S3 bucket containing the WAL archive of the remote primary. + Can be combined with `standby_host` for additional redundancy. * **gs_wal_path** the url to GS bucket containing the WAL archive of the remote primary. + Can be combined with `standby_host` for additional redundancy. * **standby_host** hostname or IP address of the primary to stream from. + Can be specified alone or combined with either `s3_wal_path` or `gs_wal_path`. * **standby_port** TCP port on which the primary is listening for connections. Patroni will use `"5432"` if not set. +* **standby_primary_slot_name** + name of the replication slot to use on the primary server when streaming + from a remote primary. See the Patroni documentation + [here](https://patroni.readthedocs.io/en/latest/standby_cluster.html) for more details. Optional. + ## Volume properties Those parameters are grouped under the `volume` top-level key and define the diff --git a/docs/user.md b/docs/user.md index c1a7c7d45..db33d0bd6 100644 --- a/docs/user.md +++ b/docs/user.md @@ -900,8 +900,9 @@ the PostgreSQL version between source and target cluster has to be the same. To start a cluster as standby, add the following `standby` section in the YAML file. You can stream changes from archived WAL files (AWS S3 or Google Cloud -Storage) or from a remote primary. Only one option can be specified in the -manifest: +Storage), from a remote primary, or combine a remote primary with a WAL archive. +At least one of `s3_wal_path`, `gs_wal_path`, or `standby_host` must be specified. +Note that `s3_wal_path` and `gs_wal_path` are mutually exclusive. ```yaml spec: @@ -929,6 +930,16 @@ spec: standby_port: "5433" ``` +You can also combine a remote primary with a WAL archive for additional redundancy: + +```yaml +spec: + standby: + standby_host: "acid-minimal-cluster.default" + standby_port: "5433" + s3_wal_path: "s3:///spilo///wal/" +``` + Note, that the pods and services use the same role labels like for normal clusters: The standby leader is labeled as `master`. When using the `standby_host` option you have to copy the credentials from the source cluster's secrets to successfully diff --git a/hack/adjust_postgresql_crd.sh b/hack/adjust_postgresql_crd.sh index cceb33f64..d06b74a2d 100755 --- a/hack/adjust_postgresql_crd.sh +++ b/hack/adjust_postgresql_crd.sh @@ -5,15 +5,17 @@ # # Injections: # -# * oneOf: for the standby field to enforce that only one of s3_wal_path, gs_wal_path or standby_host is set. -# * This can later be done with // +kubebuilder:validation:ExactlyOneOf marker, but this requires latest Kubernetes version. (Currently the operator depends on v1.32.9) +# * oneOf: for the standby field to enforce validation rules: +# - s3_wal_path and gs_wal_path are mutually exclusive +# - standby_host can be specified alone or with either s3_wal_path OR gs_wal_path +# - at least one of s3_wal_path, gs_wal_path, or standby_host must be set # * type: string and pattern for the maintenanceWindows items. file="${1:-"manifests/postgresql.crd.yaml"}" sed -i '/^[[:space:]]*standby:$/{ # Capture the indentation - s/^\([[:space:]]*\)standby:$/\1standby:\n\1 oneOf:\n\1 - required:\n\1 - s3_wal_path\n\1 - required:\n\1 - gs_wal_path\n\1 - required:\n\1 - standby_host/ + s/^\([[:space:]]*\)standby:$/\1standby:\n\1 anyOf:\n\1 - required:\n\1 - s3_wal_path\n\1 - required:\n\1 - gs_wal_path\n\1 - required:\n\1 - standby_host\n\1 not:\n\1 required:\n\1 - s3_wal_path\n\1 - gs_wal_path/ }' "$file" sed -i '/^[[:space:]]*maintenanceWindows:$/{ diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index 49fef43e5..808326444 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -3930,15 +3930,22 @@ spec: format: int64 type: integer standby: - oneOf: + anyOf: - required: - s3_wal_path - required: - gs_wal_path - required: - standby_host - description: StandbyDescription contains remote primary config or - s3/gs wal path + not: + required: + - s3_wal_path + - gs_wal_path + description: StandbyDescription contains remote primary config and/or + s3/gs wal path. standby_host can be specified alone or together with + either s3_wal_path OR gs_wal_path (mutually exclusive). At least + one field must be specified. s3_wal_path and gs_wal_path are mutually + exclusive. properties: gs_wal_path: type: string @@ -3948,6 +3955,8 @@ spec: type: string standby_port: type: string + standby_primary_slot_name: + type: string type: object streams: items: diff --git a/manifests/standby-manifest.yaml b/manifests/standby-manifest.yaml index eb90464a6..b06956a1b 100644 --- a/manifests/standby-manifest.yaml +++ b/manifests/standby-manifest.yaml @@ -9,7 +9,9 @@ spec: numberOfInstances: 1 postgresql: version: "17" - # Make this a standby cluster and provide either the s3 bucket path of source cluster or the remote primary host for continuous streaming. + # Make this a standby cluster. You can specify s3_wal_path or gs_wal_path for WAL archive, + # standby_host for remote primary streaming, or combine standby_host with either WAL path. + # Note: s3_wal_path and gs_wal_path are mutually exclusive. standby: # s3_wal_path: "s3://mybucket/spilo/acid-minimal-cluster/abcd1234-2a4b-4b2a-8c9c-c1234defg567/wal/14/" standby_host: "acid-minimal-cluster.default" diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index 99999232a..3dced435d 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -247,13 +247,15 @@ type Patroni struct { IgnoreSlots []map[string]string `json:"ignore_slots,omitempty"` } -// StandbyDescription contains remote primary config or s3/gs wal path -// +kubebuilder:validation:ExactlyOneOf=s3_wal_path;gs_wal_path;standby_host +// StandbyDescription contains remote primary config and/or s3/gs wal path. +// standby_host can be specified alone or together with either s3_wal_path OR gs_wal_path (mutually exclusive). +// At least one field must be specified. s3_wal_path and gs_wal_path are mutually exclusive. type StandbyDescription struct { - S3WalPath string `json:"s3_wal_path,omitempty"` - GSWalPath string `json:"gs_wal_path,omitempty"` - StandbyHost string `json:"standby_host,omitempty"` - StandbyPort string `json:"standby_port,omitempty"` + S3WalPath string `json:"s3_wal_path,omitempty"` + GSWalPath string `json:"gs_wal_path,omitempty"` + StandbyHost string `json:"standby_host,omitempty"` + StandbyPort string `json:"standby_port,omitempty"` + StandbyPrimarySlotName string `json:"standby_primary_slot_name,omitempty"` } // TLSDescription specs TLS properties diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index d48fa9e9a..5826a52d0 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -2212,23 +2212,29 @@ func (c *Cluster) generateStandbyEnvironment(description *acidv1.StandbyDescript Value: description.StandbyPort, }) } - } else { - c.logger.Info("standby cluster streaming from WAL location") - if description.S3WalPath != "" { + if description.StandbyPrimarySlotName != "" { result = append(result, v1.EnvVar{ - Name: "STANDBY_WALE_S3_PREFIX", - Value: description.S3WalPath, + Name: "STANDBY_PRIMARY_SLOT_NAME", + Value: description.StandbyPrimarySlotName, }) - } else if description.GSWalPath != "" { - result = append(result, v1.EnvVar{ - Name: "STANDBY_WALE_GS_PREFIX", - Value: description.GSWalPath, - }) - } else { - c.logger.Error("no WAL path specified in standby section") - return result } + } + // WAL archive can be specified with or without standby_host + if description.S3WalPath != "" { + c.logger.Info("standby cluster using S3 WAL archive") + result = append(result, v1.EnvVar{ + Name: "STANDBY_WALE_S3_PREFIX", + Value: description.S3WalPath, + }) + result = append(result, v1.EnvVar{Name: "STANDBY_METHOD", Value: "STANDBY_WITH_WALE"}) + result = append(result, v1.EnvVar{Name: "STANDBY_WAL_BUCKET_SCOPE_PREFIX", Value: ""}) + } else if description.GSWalPath != "" { + c.logger.Info("standby cluster using GCS WAL archive") + result = append(result, v1.EnvVar{ + Name: "STANDBY_WALE_GS_PREFIX", + Value: description.GSWalPath, + }) result = append(result, v1.EnvVar{Name: "STANDBY_METHOD", Value: "STANDBY_WITH_WALE"}) result = append(result, v1.EnvVar{Name: "STANDBY_WAL_BUCKET_SCOPE_PREFIX", Value: ""}) } diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 94f231564..16be25640 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1381,7 +1381,33 @@ func TestStandbyEnv(t *testing.T) { envLen: 2, }, { - subTest: "from remote primary - ignore WAL path", + subTest: "from remote primary with S3 WAL path", + standbyOpts: &acidv1.StandbyDescription{ + S3WalPath: "s3://some/path/", + StandbyHost: "remote-primary", + }, + env: v1.EnvVar{ + Name: "STANDBY_HOST", + Value: "remote-primary", + }, + envPos: 0, + envLen: 4, + }, + { + subTest: "verify S3 WAL env with standby host", + standbyOpts: &acidv1.StandbyDescription{ + S3WalPath: "s3://some/path/", + StandbyHost: "remote-primary", + }, + env: v1.EnvVar{ + Name: "STANDBY_WALE_S3_PREFIX", + Value: "s3://some/path/", + }, + envPos: 1, + envLen: 4, + }, + { + subTest: "from remote primary with GCS WAL path", standbyOpts: &acidv1.StandbyDescription{ GSWalPath: "gs://some/path/", StandbyHost: "remote-primary", @@ -1391,7 +1417,20 @@ func TestStandbyEnv(t *testing.T) { Value: "remote-primary", }, envPos: 0, - envLen: 1, + envLen: 4, + }, + { + subTest: "from remote primary with slot name", + standbyOpts: &acidv1.StandbyDescription{ + StandbyHost: "remote-primary", + StandbyPrimarySlotName: "my_slot", + }, + env: v1.EnvVar{ + Name: "STANDBY_PRIMARY_SLOT_NAME", + Value: "my_slot", + }, + envPos: 1, + envLen: 2, }, } diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 9142f33bb..908806dda 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -1031,6 +1031,23 @@ func (c *Cluster) syncStandbyClusterConfiguration() error { standbyOptionsToSet["create_replica_methods"] = []string{"bootstrap_standby_with_wale", "basebackup_fast_xlog"} standbyOptionsToSet["restore_command"] = "envdir \"/run/etc/wal-e.d/env-standby\" /scripts/restore_command.sh \"%f\" \"%p\"" + if c.Spec.StandbyCluster.StandbyHost != "" { + standbyOptionsToSet["host"] = c.Spec.StandbyCluster.StandbyHost + } else { + standbyOptionsToSet["host"] = nil + } + + if c.Spec.StandbyCluster.StandbyPort != "" { + standbyOptionsToSet["port"] = c.Spec.StandbyCluster.StandbyPort + } else { + standbyOptionsToSet["port"] = nil + } + + if c.Spec.StandbyCluster.StandbyPrimarySlotName != "" { + standbyOptionsToSet["primary_slot_name"] = c.Spec.StandbyCluster.StandbyPrimarySlotName + } else { + standbyOptionsToSet["primary_slot_name"] = nil + } } else { c.logger.Infof("promoting standby cluster and detach from source") standbyOptionsToSet = nil diff --git a/pkg/cluster/sync_test.go b/pkg/cluster/sync_test.go index 87e9dc8a5..e2b242d9d 100644 --- a/pkg/cluster/sync_test.go +++ b/pkg/cluster/sync_test.go @@ -801,6 +801,41 @@ func TestSyncStandbyClusterConfiguration(t *testing.T) { // this should update the Patroni config again err = cluster.syncStandbyClusterConfiguration() assert.NoError(t, err) + + // test with standby_host, standby_port and standby_primary_slot_name + cluster.Spec.StandbyCluster = &acidv1.StandbyDescription{ + StandbyHost: "remote-primary.example.com", + StandbyPort: "5433", + StandbyPrimarySlotName: "standby_slot", + } + cluster.syncStatefulSet() + updatedSts4 := cluster.Statefulset + + // check that pods have all three STANDBY_* environment variables + assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_HOST", Value: "remote-primary.example.com"}) + assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PORT", Value: "5433"}) + assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PRIMARY_SLOT_NAME", Value: "standby_slot"}) + + // this should update the Patroni config with host, port and primary_slot_name + err = cluster.syncStandbyClusterConfiguration() + assert.NoError(t, err) + + // test property deletion: remove standby_primary_slot_name + cluster.Spec.StandbyCluster = &acidv1.StandbyDescription{ + StandbyHost: "remote-primary.example.com", + StandbyPort: "5433", + } + cluster.syncStatefulSet() + updatedSts5 := cluster.Statefulset + + // check that STANDBY_PRIMARY_SLOT_NAME is not present + assert.Contains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_HOST", Value: "remote-primary.example.com"}) + assert.Contains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PORT", Value: "5433"}) + assert.NotContains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PRIMARY_SLOT_NAME", Value: "standby_slot"}) + + // this should update the Patroni config and set primary_slot_name to nil + err = cluster.syncStandbyClusterConfiguration() + assert.NoError(t, err) } func TestUpdateSecret(t *testing.T) {