Standby section improvements (#3033)

- Allow standby_host to be specified together with wal_path
- Add standby_primary_slot_name
This commit is contained in:
Polina Bungina 2026-01-19 13:54:27 +01:00 committed by GitHub
parent ad9ae4ec1b
commit b97de5d7f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 176 additions and 36 deletions

View File

@ -493,13 +493,19 @@ spec:
type: string
standby_port:
type: string
oneOf:
standby_primary_slot_name:
type: string
anyOf:
- required:
- s3_wal_path
- required:
- gs_wal_path
- required:
- standby_host
not:
required:
- s3_wal_path
- gs_wal_path
streams:
type: array
items:

View File

@ -1346,10 +1346,12 @@ If you are using [additional environment variables](#custom-pod-environment-vari
to access your backup location you have to copy those variables and prepend
the `STANDBY_` prefix for Spilo to find the backups and WAL files to stream.
Alternatively, standby clusters can also stream from a remote primary cluster.
Standby clusters can also stream from a remote primary cluster.
You have to specify the host address. Port is optional and defaults to 5432.
Note, that only one of the options (`s3_wal_path`, `gs_wal_path`,
`standby_host`) can be present under the `standby` top-level key.
You can combine `standby_host` with either `s3_wal_path` or `gs_wal_path`
for additional redundancy. Note that `s3_wal_path` and `gs_wal_path` are
mutually exclusive. At least one of `s3_wal_path`, `gs_wal_path`, or
`standby_host` must be specified under the `standby` top-level key.
## Logical backups

View File

@ -457,22 +457,31 @@ under the `clone` top-level key and do not affect the already running cluster.
On startup, an existing `standby` top-level key creates a standby Postgres
cluster streaming from a remote location - either from a S3 or GCS WAL
archive or a remote primary. Only one of options is allowed and required
if the `standby` key is present.
archive, a remote primary, or a combination of both. At least one of
`s3_wal_path`, `gs_wal_path`, or `standby_host` must be specified.
Note that `s3_wal_path` and `gs_wal_path` are mutually exclusive.
* **s3_wal_path**
the url to S3 bucket containing the WAL archive of the remote primary.
Can be combined with `standby_host` for additional redundancy.
* **gs_wal_path**
the url to GS bucket containing the WAL archive of the remote primary.
Can be combined with `standby_host` for additional redundancy.
* **standby_host**
hostname or IP address of the primary to stream from.
Can be specified alone or combined with either `s3_wal_path` or `gs_wal_path`.
* **standby_port**
TCP port on which the primary is listening for connections. Patroni will
use `"5432"` if not set.
* **standby_primary_slot_name**
name of the replication slot to use on the primary server when streaming
from a remote primary. See the Patroni documentation
[here](https://patroni.readthedocs.io/en/latest/standby_cluster.html) for more details. Optional.
## Volume properties
Those parameters are grouped under the `volume` top-level key and define the

View File

@ -900,8 +900,9 @@ the PostgreSQL version between source and target cluster has to be the same.
To start a cluster as standby, add the following `standby` section in the YAML
file. You can stream changes from archived WAL files (AWS S3 or Google Cloud
Storage) or from a remote primary. Only one option can be specified in the
manifest:
Storage), from a remote primary, or combine a remote primary with a WAL archive.
At least one of `s3_wal_path`, `gs_wal_path`, or `standby_host` must be specified.
Note that `s3_wal_path` and `gs_wal_path` are mutually exclusive.
```yaml
spec:
@ -929,6 +930,16 @@ spec:
standby_port: "5433"
```
You can also combine a remote primary with a WAL archive for additional redundancy:
```yaml
spec:
standby:
standby_host: "acid-minimal-cluster.default"
standby_port: "5433"
s3_wal_path: "s3://<bucketname>/spilo/<source_db_cluster>/<UID>/wal/<PGVERSION>"
```
Note, that the pods and services use the same role labels like for normal clusters:
The standby leader is labeled as `master`. When using the `standby_host` option
you have to copy the credentials from the source cluster's secrets to successfully

View File

@ -5,15 +5,17 @@
#
# Injections:
#
# * oneOf: for the standby field to enforce that only one of s3_wal_path, gs_wal_path or standby_host is set.
# * This can later be done with // +kubebuilder:validation:ExactlyOneOf marker, but this requires latest Kubernetes version. (Currently the operator depends on v1.32.9)
# * oneOf: for the standby field to enforce validation rules:
# - s3_wal_path and gs_wal_path are mutually exclusive
# - standby_host can be specified alone or with either s3_wal_path OR gs_wal_path
# - at least one of s3_wal_path, gs_wal_path, or standby_host must be set
# * type: string and pattern for the maintenanceWindows items.
file="${1:-"manifests/postgresql.crd.yaml"}"
sed -i '/^[[:space:]]*standby:$/{
# Capture the indentation
s/^\([[:space:]]*\)standby:$/\1standby:\n\1 oneOf:\n\1 - required:\n\1 - s3_wal_path\n\1 - required:\n\1 - gs_wal_path\n\1 - required:\n\1 - standby_host/
s/^\([[:space:]]*\)standby:$/\1standby:\n\1 anyOf:\n\1 - required:\n\1 - s3_wal_path\n\1 - required:\n\1 - gs_wal_path\n\1 - required:\n\1 - standby_host\n\1 not:\n\1 required:\n\1 - s3_wal_path\n\1 - gs_wal_path/
}' "$file"
sed -i '/^[[:space:]]*maintenanceWindows:$/{

View File

@ -3924,15 +3924,22 @@ spec:
format: int64
type: integer
standby:
oneOf:
anyOf:
- required:
- s3_wal_path
- required:
- gs_wal_path
- required:
- standby_host
description: StandbyDescription contains remote primary config or
s3/gs wal path
not:
required:
- s3_wal_path
- gs_wal_path
description: StandbyDescription contains remote primary config and/or
s3/gs wal path. standby_host can be specified alone or together with
either s3_wal_path OR gs_wal_path (mutually exclusive). At least
one field must be specified. s3_wal_path and gs_wal_path are mutually
exclusive.
properties:
gs_wal_path:
type: string
@ -3942,6 +3949,8 @@ spec:
type: string
standby_port:
type: string
standby_primary_slot_name:
type: string
type: object
streams:
items:

View File

@ -9,7 +9,9 @@ spec:
numberOfInstances: 1
postgresql:
version: "17"
# Make this a standby cluster and provide either the s3 bucket path of source cluster or the remote primary host for continuous streaming.
# Make this a standby cluster. You can specify s3_wal_path or gs_wal_path for WAL archive,
# standby_host for remote primary streaming, or combine standby_host with either WAL path.
# Note: s3_wal_path and gs_wal_path are mutually exclusive.
standby:
# s3_wal_path: "s3://mybucket/spilo/acid-minimal-cluster/abcd1234-2a4b-4b2a-8c9c-c1234defg567/wal/14/"
standby_host: "acid-minimal-cluster.default"

View File

@ -246,13 +246,15 @@ type Patroni struct {
FailsafeMode *bool `json:"failsafe_mode,omitempty"`
}
// StandbyDescription contains remote primary config or s3/gs wal path
// +kubebuilder:validation:ExactlyOneOf=s3_wal_path;gs_wal_path;standby_host
// StandbyDescription contains remote primary config and/or s3/gs wal path.
// standby_host can be specified alone or together with either s3_wal_path OR gs_wal_path (mutually exclusive).
// At least one field must be specified. s3_wal_path and gs_wal_path are mutually exclusive.
type StandbyDescription struct {
S3WalPath string `json:"s3_wal_path,omitempty"`
GSWalPath string `json:"gs_wal_path,omitempty"`
StandbyHost string `json:"standby_host,omitempty"`
StandbyPort string `json:"standby_port,omitempty"`
S3WalPath string `json:"s3_wal_path,omitempty"`
GSWalPath string `json:"gs_wal_path,omitempty"`
StandbyHost string `json:"standby_host,omitempty"`
StandbyPort string `json:"standby_port,omitempty"`
StandbyPrimarySlotName string `json:"standby_primary_slot_name,omitempty"`
}
// TLSDescription specs TLS properties

View File

@ -2207,23 +2207,29 @@ func (c *Cluster) generateStandbyEnvironment(description *acidv1.StandbyDescript
Value: description.StandbyPort,
})
}
} else {
c.logger.Info("standby cluster streaming from WAL location")
if description.S3WalPath != "" {
if description.StandbyPrimarySlotName != "" {
result = append(result, v1.EnvVar{
Name: "STANDBY_WALE_S3_PREFIX",
Value: description.S3WalPath,
Name: "STANDBY_PRIMARY_SLOT_NAME",
Value: description.StandbyPrimarySlotName,
})
} else if description.GSWalPath != "" {
result = append(result, v1.EnvVar{
Name: "STANDBY_WALE_GS_PREFIX",
Value: description.GSWalPath,
})
} else {
c.logger.Error("no WAL path specified in standby section")
return result
}
}
// WAL archive can be specified with or without standby_host
if description.S3WalPath != "" {
c.logger.Info("standby cluster using S3 WAL archive")
result = append(result, v1.EnvVar{
Name: "STANDBY_WALE_S3_PREFIX",
Value: description.S3WalPath,
})
result = append(result, v1.EnvVar{Name: "STANDBY_METHOD", Value: "STANDBY_WITH_WALE"})
result = append(result, v1.EnvVar{Name: "STANDBY_WAL_BUCKET_SCOPE_PREFIX", Value: ""})
} else if description.GSWalPath != "" {
c.logger.Info("standby cluster using GCS WAL archive")
result = append(result, v1.EnvVar{
Name: "STANDBY_WALE_GS_PREFIX",
Value: description.GSWalPath,
})
result = append(result, v1.EnvVar{Name: "STANDBY_METHOD", Value: "STANDBY_WITH_WALE"})
result = append(result, v1.EnvVar{Name: "STANDBY_WAL_BUCKET_SCOPE_PREFIX", Value: ""})
}

View File

@ -1370,7 +1370,33 @@ func TestStandbyEnv(t *testing.T) {
envLen: 2,
},
{
subTest: "from remote primary - ignore WAL path",
subTest: "from remote primary with S3 WAL path",
standbyOpts: &acidv1.StandbyDescription{
S3WalPath: "s3://some/path/",
StandbyHost: "remote-primary",
},
env: v1.EnvVar{
Name: "STANDBY_HOST",
Value: "remote-primary",
},
envPos: 0,
envLen: 4,
},
{
subTest: "verify S3 WAL env with standby host",
standbyOpts: &acidv1.StandbyDescription{
S3WalPath: "s3://some/path/",
StandbyHost: "remote-primary",
},
env: v1.EnvVar{
Name: "STANDBY_WALE_S3_PREFIX",
Value: "s3://some/path/",
},
envPos: 1,
envLen: 4,
},
{
subTest: "from remote primary with GCS WAL path",
standbyOpts: &acidv1.StandbyDescription{
GSWalPath: "gs://some/path/",
StandbyHost: "remote-primary",
@ -1380,7 +1406,20 @@ func TestStandbyEnv(t *testing.T) {
Value: "remote-primary",
},
envPos: 0,
envLen: 1,
envLen: 4,
},
{
subTest: "from remote primary with slot name",
standbyOpts: &acidv1.StandbyDescription{
StandbyHost: "remote-primary",
StandbyPrimarySlotName: "my_slot",
},
env: v1.EnvVar{
Name: "STANDBY_PRIMARY_SLOT_NAME",
Value: "my_slot",
},
envPos: 1,
envLen: 2,
},
}

View File

@ -1031,6 +1031,23 @@ func (c *Cluster) syncStandbyClusterConfiguration() error {
standbyOptionsToSet["create_replica_methods"] = []string{"bootstrap_standby_with_wale", "basebackup_fast_xlog"}
standbyOptionsToSet["restore_command"] = "envdir \"/run/etc/wal-e.d/env-standby\" /scripts/restore_command.sh \"%f\" \"%p\""
if c.Spec.StandbyCluster.StandbyHost != "" {
standbyOptionsToSet["host"] = c.Spec.StandbyCluster.StandbyHost
} else {
standbyOptionsToSet["host"] = nil
}
if c.Spec.StandbyCluster.StandbyPort != "" {
standbyOptionsToSet["port"] = c.Spec.StandbyCluster.StandbyPort
} else {
standbyOptionsToSet["port"] = nil
}
if c.Spec.StandbyCluster.StandbyPrimarySlotName != "" {
standbyOptionsToSet["primary_slot_name"] = c.Spec.StandbyCluster.StandbyPrimarySlotName
} else {
standbyOptionsToSet["primary_slot_name"] = nil
}
} else {
c.logger.Infof("promoting standby cluster and detach from source")
standbyOptionsToSet = nil

View File

@ -801,6 +801,41 @@ func TestSyncStandbyClusterConfiguration(t *testing.T) {
// this should update the Patroni config again
err = cluster.syncStandbyClusterConfiguration()
assert.NoError(t, err)
// test with standby_host, standby_port and standby_primary_slot_name
cluster.Spec.StandbyCluster = &acidv1.StandbyDescription{
StandbyHost: "remote-primary.example.com",
StandbyPort: "5433",
StandbyPrimarySlotName: "standby_slot",
}
cluster.syncStatefulSet()
updatedSts4 := cluster.Statefulset
// check that pods have all three STANDBY_* environment variables
assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_HOST", Value: "remote-primary.example.com"})
assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PORT", Value: "5433"})
assert.Contains(t, updatedSts4.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PRIMARY_SLOT_NAME", Value: "standby_slot"})
// this should update the Patroni config with host, port and primary_slot_name
err = cluster.syncStandbyClusterConfiguration()
assert.NoError(t, err)
// test property deletion: remove standby_primary_slot_name
cluster.Spec.StandbyCluster = &acidv1.StandbyDescription{
StandbyHost: "remote-primary.example.com",
StandbyPort: "5433",
}
cluster.syncStatefulSet()
updatedSts5 := cluster.Statefulset
// check that STANDBY_PRIMARY_SLOT_NAME is not present
assert.Contains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_HOST", Value: "remote-primary.example.com"})
assert.Contains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PORT", Value: "5433"})
assert.NotContains(t, updatedSts5.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "STANDBY_PRIMARY_SLOT_NAME", Value: "standby_slot"})
// this should update the Patroni config and set primary_slot_name to nil
err = cluster.syncStandbyClusterConfiguration()
assert.NoError(t, err)
}
func TestUpdateSecret(t *testing.T) {