Merge branch 'master' into fix/break-rolling-update-deadlock-on-non-running-pods

This commit is contained in:
Felix Kunde 2026-04-23 15:45:47 +02:00 committed by GitHub
commit 63485e807d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 95 additions and 26 deletions

View File

@ -78,6 +78,9 @@ $(GENERATED_CRDS): $(GENERATED)
local: ${SOURCES} $(GENERATED_CRDS)
CGO_ENABLED=${CGO_ENABLED} go build -o build/${BINARY} $(LOCAL_BUILD_FLAGS) -ldflags "$(LDFLAGS)" $(SOURCES)
wasm: ${SOURCES} $(GENERATED_CRDS)
GOOS=wasip1 GOARCH=wasm CGO_ENABLED=${CGO_ENABLED} go build -o build/${BINARY}.wasm ${BUILD_FLAGS} -ldflags "$(LDFLAGS)" $(SOURCES)
linux: ${SOURCES} $(GENERATED_CRDS)
GOOS=linux GOARCH=amd64 CGO_ENABLED=${CGO_ENABLED} go build -o build/linux/${BINARY} ${BUILD_FLAGS} -ldflags "$(LDFLAGS)" $(SOURCES)

View File

@ -79,6 +79,9 @@ spec:
enable_lazy_spilo_upgrade:
type: boolean
default: false
enable_maintenance_windows:
type: boolean
default: true
enable_pgversion_env_var:
type: boolean
default: true

View File

@ -27,6 +27,8 @@ configGeneral:
- "all"
# update only the statefulsets without immediately doing the rolling update
enable_lazy_spilo_upgrade: false
# toogle to use maintenance windows feature
enable_maintenance_windows: true
# set the PGVERSION env var instead of providing the version via postgresql.bin_dir in SPILO_CONFIGURATION
enable_pgversion_env_var: true
# start any new database pod without limitations on shm memory

View File

@ -65,7 +65,10 @@ the `PGVERSION` environment variable is set for the database pods. Since
In-place major version upgrades can be configured to be executed by the
operator with the `major_version_upgrade_mode` option. By default, it is
enabled (mode: `manual`). In any case, altering the version in the manifest
will trigger a rolling update of pods to update the `PGVERSION` env variable.
will update the desired `PGVERSION`. If `maintenanceWindows` are configured,
major-version-related pod rotation is deferred until the next maintenance
window. Without maintenance windows, the operator will trigger a rolling
update of pods to apply the new `PGVERSION`.
Spilo's [`configure_spilo`](https://github.com/zalando/spilo/blob/master/postgres-appliance/scripts/configure_spilo.py)
script will notice the version mismatch but start the current version again.
@ -92,10 +95,11 @@ Thus, the `full` mode can create drift between desired and actual state.
### Upgrade during maintenance windows
When `maintenanceWindows` are defined in the Postgres manifest the operator
will trigger a major version upgrade only during these periods. Make sure they
are at least twice as long as your configured `resync_period` to guarantee
that operator actions can be triggered.
When `maintenanceWindows` are defined in the Postgres manifest or in the global
config the operator will trigger major-version-related pod rotation and the
major version upgrade only during these periods. Make sure they are at least
twice as long as your configured `resync_period` to guarantee that operator
actions can be triggered.
### Upgrade annotations

View File

@ -118,7 +118,9 @@ These parameters are grouped directly under the `spec` key in the manifest.
a list which defines specific time frames when certain maintenance operations
such as automatic major upgrades or master pod migration are allowed to happen.
Accepted formats are "01:00-06:00" for daily maintenance windows or
"Sat:00:00-04:00" for specific days, with all times in UTC.
"Sat:00:00-04:00" for specific days, with all times in UTC. Note, when the
global config option `enable_maintenance_windows` is false, the specified
windows will be ignored.
* **users**
a map of usernames to user flags for the users that should be created in the

View File

@ -173,6 +173,9 @@ Those are top-level keys, containing both leaf keys and groups.
the thresholds. The value must be `"true"` to be effective. The default is empty
which means the feature is disabled.
* **enable_maintenance_windows**
toggle for using the maintenance windows feature. Default is `"true"`.
* **maintenance_windows**
a list which defines specific time frames when certain maintenance
operations such as automatic major upgrades or master pod migration are

4
go.mod
View File

@ -6,11 +6,11 @@ require (
github.com/Masterminds/semver v1.5.0
github.com/aws/aws-sdk-go v1.55.8
github.com/golang/mock v1.6.0
github.com/lib/pq v1.10.9
github.com/lib/pq v1.11.2
github.com/motomux/pretty v0.0.0-20161209205251-b2aad2c9a95d
github.com/pkg/errors v0.9.1
github.com/r3labs/diff v1.1.0
github.com/sirupsen/logrus v1.9.3
github.com/sirupsen/logrus v1.9.4
github.com/stretchr/testify v1.11.1
golang.org/x/crypto v0.45.0
gopkg.in/yaml.v2 v2.4.0

4
go.sum
View File

@ -73,6 +73,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lib/pq v1.11.2 h1:x6gxUeu39V0BHZiugWe8LXZYZ+Utk7hSJGThs8sdzfs=
github.com/lib/pq v1.11.2/go.mod h1:/p+8NSbOcwzAEI7wiMXFlgydTwcgTr3OSKMsD2BitpA=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
@ -113,6 +115,8 @@ github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Copyright 2017 The Kubernetes Authors.
#

View File

@ -46,6 +46,7 @@ data:
enable_ebs_gp3_migration_max_size: "1000"
enable_init_containers: "true"
enable_lazy_spilo_upgrade: "false"
enable_maintenance_windows: "true"
enable_master_load_balancer: "false"
enable_master_pooler_load_balancer: "false"
enable_password_rotation: "false"

View File

@ -77,6 +77,9 @@ spec:
enable_lazy_spilo_upgrade:
type: boolean
default: false
enable_maintenance_windows:
type: boolean
default: true
enable_pgversion_env_var:
type: boolean
default: true

View File

@ -8,6 +8,7 @@ configuration:
# crd_categories:
# - all
# enable_lazy_spilo_upgrade: false
enable_maintenance_windows: true
enable_pgversion_env_var: true
# enable_shm_volume: true
enable_spilo_wal_path_compat: false

View File

@ -105,6 +105,9 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
"enable_lazy_spilo_upgrade": {
Type: "boolean",
},
"enable_maintenance_windows": {
Type: "boolean",
},
"enable_shm_volume": {
Type: "boolean",
},

View File

@ -266,6 +266,7 @@ type OperatorConfigurationData struct {
Workers uint32 `json:"workers,omitempty"`
ResyncPeriod Duration `json:"resync_period,omitempty"`
RepairPeriod Duration `json:"repair_period,omitempty"`
EnableMaintenanceWindows *bool `json:"enable_maintenance_windows,omitempty"`
MaintenanceWindows []MaintenanceWindow `json:"maintenance_windows,omitempty"`
SetMemoryRequestToLimit bool `json:"set_memory_request_to_limit,omitempty"`
ShmVolume *bool `json:"enable_shm_volume,omitempty"`

View File

@ -433,6 +433,11 @@ func (in *OperatorConfigurationData) DeepCopyInto(out *OperatorConfigurationData
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.EnableMaintenanceWindows != nil {
in, out := &in.EnableMaintenanceWindows, &out.EnableMaintenanceWindows
*out = new(bool)
**out = **in
}
if in.MaintenanceWindows != nil {
in, out := &in.MaintenanceWindows, &out.MaintenanceWindows
*out = make([]MaintenanceWindow, len(*in))

View File

@ -41,6 +41,12 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
defer c.mu.Unlock()
oldSpec := c.Postgresql
if !c.isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
// do not apply any major version related changes yet
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
}
c.setSpec(newSpec)
defer func() {
@ -97,11 +103,6 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
}
}
if !c.isInMaintenanceWindow(newSpec.Spec.MaintenanceWindows) {
// do not apply any major version related changes yet
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
}
if err = c.syncStatefulSet(); err != nil {
if !k8sutil.ResourceAlreadyExists(err) {
err = fmt.Errorf("could not sync statefulsets: %v", err)

View File

@ -675,7 +675,9 @@ func isStandbyCluster(spec *acidv1.PostgresSpec) bool {
}
func (c *Cluster) isInMaintenanceWindow(specMaintenanceWindows []acidv1.MaintenanceWindow) bool {
if len(specMaintenanceWindows) == 0 && len(c.OpConfig.MaintenanceWindows) == 0 {
ignoreMaintenanceWindows := c.OpConfig.EnableMaintenanceWindows != nil && !*c.OpConfig.EnableMaintenanceWindows
noWindowsDefined := len(specMaintenanceWindows) == 0 && len(c.OpConfig.MaintenanceWindows) == 0
if noWindowsDefined || ignoreMaintenanceWindows {
return true
}
now := time.Now()

View File

@ -660,6 +660,7 @@ func TestIsInMaintenanceWindow(t *testing.T) {
cluster := New(
Config{
OpConfig: config.Config{
EnableMaintenanceWindows: util.True(),
Resources: config.Resources{
ClusterLabels: map[string]string{"application": "spilo"},
ClusterNameLabel: "cluster-name",
@ -683,12 +684,27 @@ func TestIsInMaintenanceWindow(t *testing.T) {
name string
windows []acidv1.MaintenanceWindow
configWindows []string
windowsFlag bool
expected bool
}{
{
name: "no maintenance windows",
windows: nil,
configWindows: nil,
windowsFlag: true,
expected: true,
},
{
name: "maintenance windows diabled",
windows: []acidv1.MaintenanceWindow{
{
Everyday: true,
StartTime: mustParseTime("00:00"),
EndTime: mustParseTime("23:59"),
},
},
configWindows: nil,
windowsFlag: false,
expected: true,
},
{
@ -701,6 +717,7 @@ func TestIsInMaintenanceWindow(t *testing.T) {
},
},
configWindows: nil,
windowsFlag: true,
expected: true,
},
{
@ -713,6 +730,7 @@ func TestIsInMaintenanceWindow(t *testing.T) {
},
},
configWindows: nil,
windowsFlag: true,
expected: true,
},
{
@ -724,24 +742,35 @@ func TestIsInMaintenanceWindow(t *testing.T) {
EndTime: mustParseTime(futureTimeEndFormatted),
},
},
expected: false,
windowsFlag: true,
expected: false,
},
{
name: "global maintenance windows with future interval time",
windows: nil,
configWindows: []string{fmt.Sprintf("%s-%s", futureTimeStartFormatted, futureTimeEndFormatted)},
windowsFlag: true,
expected: false,
},
{
name: "global maintenance windows all day",
windows: nil,
configWindows: []string{"00:00-02:00", "02:00-23:59"},
windowsFlag: true,
expected: true,
},
{
name: "global maintenance windows ignored",
windows: nil,
configWindows: []string{"00:00-02:00", "02:00-23:59"},
windowsFlag: false,
expected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cluster.OpConfig.EnableMaintenanceWindows = &tt.windowsFlag
cluster.OpConfig.MaintenanceWindows = tt.configWindows
cluster.Spec.MaintenanceWindows = tt.windows
if cluster.isInMaintenanceWindow(cluster.Spec.MaintenanceWindows) != tt.expected {

View File

@ -51,6 +51,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.ShmVolume = util.CoalesceBool(fromCRD.ShmVolume, util.True())
result.SidecarImages = fromCRD.SidecarImages
result.SidecarContainers = fromCRD.SidecarContainers
result.EnableMaintenanceWindows = util.CoalesceBool(fromCRD.EnableMaintenanceWindows, util.True())
if len(fromCRD.MaintenanceWindows) > 0 {
result.MaintenanceWindows = make([]string, 0, len(fromCRD.MaintenanceWindows))
for _, window := range fromCRD.MaintenanceWindows {

View File

@ -173,14 +173,15 @@ type Config struct {
LogicalBackup
ConnectionPooler
WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to'
KubernetesUseConfigMaps bool `name:"kubernetes_use_configmaps" default:"false"`
EtcdHost string `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS
MaintenanceWindows []string `name:"maintenance_windows"`
DockerImage string `name:"docker_image" default:"ghcr.io/zalando/spilo-18:4.1-p1"`
SidecarImages map[string]string `name:"sidecar_docker_images"` // deprecated in favour of SidecarContainers
SidecarContainers []v1.Container `name:"sidecars"`
PodServiceAccountName string `name:"pod_service_account_name" default:"postgres-pod"`
WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to'
KubernetesUseConfigMaps bool `name:"kubernetes_use_configmaps" default:"false"`
EtcdHost string `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS
EnableMaintenanceWindows *bool `name:"enable_maintenance_windows" default:"true"`
MaintenanceWindows []string `name:"maintenance_windows"`
DockerImage string `name:"docker_image" default:"ghcr.io/zalando/spilo-18:4.1-p1"`
SidecarImages map[string]string `name:"sidecar_docker_images"` // deprecated in favour of SidecarContainers
SidecarContainers []v1.Container `name:"sidecars"`
PodServiceAccountName string `name:"pod_service_account_name" default:"postgres-pod"`
// value of this string must be valid JSON or YAML; see initPodServiceAccount
PodServiceAccountDefinition string `name:"pod_service_account_definition" default:""`
PodServiceAccountRoleBindingDefinition string `name:"pod_service_account_role_binding_definition" default:""`

View File

@ -77,7 +77,7 @@ spec:
"17",
"16",
"15",
"14",
"14"
]
}
# Exemple of settings to make snapshot view working in the ui when using AWS

View File

@ -11,4 +11,4 @@ kubernetes==11.0.0
python-json-logger==2.0.7
requests==2.32.4
stups-tokens>=1.1.19
werkzeug==3.1.5
werkzeug==3.1.6