diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 3906f9052..d5af19e2d 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -127,6 +127,18 @@ spec: super_username: type: string default: postgres + major_version_upgrade: + type: object + properties: + major_version_upgrade_mode: + type: string + default: "off" + minimal_major_version: + type: string + default: "9.5" + target_major_version: + type: string + default: "13" kubernetes: type: object properties: diff --git a/charts/postgres-operator/templates/configmap.yaml b/charts/postgres-operator/templates/configmap.yaml index 87fd752b1..7b762390c 100644 --- a/charts/postgres-operator/templates/configmap.yaml +++ b/charts/postgres-operator/templates/configmap.yaml @@ -15,6 +15,7 @@ data: pod_service_account_name: {{ include "postgres-pod.serviceAccountName" . }} {{ toYaml .Values.configGeneral | indent 2 }} {{ toYaml .Values.configUsers | indent 2 }} +{{ toYaml .Values.configMajorVersionUpgrade | indent 2 }} {{ toYaml .Values.configKubernetes | indent 2 }} {{ toYaml .Values.configTimeouts | indent 2 }} {{ toYaml .Values.configLoadBalancer | indent 2 }} diff --git a/charts/postgres-operator/templates/operatorconfiguration.yaml b/charts/postgres-operator/templates/operatorconfiguration.yaml index 0625e1327..be1608297 100644 --- a/charts/postgres-operator/templates/operatorconfiguration.yaml +++ b/charts/postgres-operator/templates/operatorconfiguration.yaml @@ -12,6 +12,8 @@ configuration: {{ toYaml .Values.configGeneral | indent 2 }} users: {{ toYaml .Values.configUsers | indent 4 }} + major_version_upgrade: +{{ toYaml .Values.configMajorVersionUpgrade | indent 4 }} kubernetes: {{- if .Values.podPriorityClassName }} pod_priority_class_name: {{ .Values.podPriorityClassName }} diff --git a/charts/postgres-operator/values-crd.yaml b/charts/postgres-operator/values-crd.yaml index a66911f96..f9b1000f9 100644 --- a/charts/postgres-operator/values-crd.yaml +++ b/charts/postgres-operator/values-crd.yaml @@ -58,6 +58,14 @@ configUsers: # postgres superuser name to be created by initdb super_username: postgres +configMajorVersionUpgrade: + # "off": no upgrade, "manual": manifest triggers action, "full": minimal version violation triggers too + major_version_upgrade_mode: "off" + # minimal Postgres major version that will not automatically be upgraded + minimal_major_version: "9.5" + # target Postgres major version when upgrading clusters automatically + target_major_version: "13" + configKubernetes: # list of additional capabilities for postgres container # additional_pod_capabilities: diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index e55da4b86..482df042a 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -60,6 +60,14 @@ configUsers: # postgres superuser name to be created by initdb super_username: postgres +configMajorVersionUpgrade: + # "off": no upgrade, "manual": manifest triggers action, "full": minimal version violation triggers too + major_version_upgrade_mode: "off" + # minimal Postgres major version that will not automatically be upgraded + minimal_major_version: "9.5" + # target Postgres major version when upgrading clusters automatically + target_major_version: "13" + configKubernetes: # list of additional capabilities for postgres container # additional_pod_capabilities: "SYS_NICE" diff --git a/docs/administrator.md b/docs/administrator.md index 715b8f74e..878e53fa2 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -11,30 +11,43 @@ switchover (planned failover) of the master to the Pod with new minor version. The switch should usually take less than 5 seconds, still clients have to reconnect. -Major version upgrades are supported either via [cloning](user.md#how-to-clone-an-existing-postgresql-cluster) -or in-place. +### Upgrade on cloning -With cloning, the new cluster manifest must have a higher `version` string than -the source cluster and will be created from a basebackup. Depending of the -cluster size, downtime in this case can be significant as writes to the database -should be stopped and all WAL files should be archived first before cloning is -started. +With [cloning](user.md#how-to-clone-an-existing-postgresql-cluster), the new +cluster manifest must have a higher `version` string than the source cluster +and will be created from a basebackup. Depending of the cluster size, downtime +in this case can be significant as writes to the database should be stopped +and all WAL files should be archived first before cloning is started. +Therefore, use cloning only to test major version upgrades and check for +compatibility of your app with to Postgres server of a higher version. -Starting with Spilo 13, Postgres Operator can do in-place major version upgrade, -which should be faster than cloning. However, it is not fully automatic yet. -First, you need to make sure, that setting the `PGVERSION` environment variable -is enabled in the configuration. Since `v1.6.0`, `enable_pgversion_env_var` is -enabled by default. +### In-place major version upgrade -To trigger the upgrade, increase the version in the cluster manifest. After -Pods are rotated `configure_spilo` will notice the version mismatch and start -the old version again. You can then exec into the Postgres container of the -master instance and call `python3 /scripts/inplace_upgrade.py N` where `N` -is the number of members of your cluster (see [`numberOfInstances`](https://github.com/zalando/postgres-operator/blob/50cb5898ea715a1db7e634de928b2d16dc8cd969/manifests/minimal-postgres-manifest.yaml#L10)). +Starting with Spilo 13, Postgres Operator can run an in-place major version +upgrade which is much faster than cloning. First, you need to make sure, that +the `PGVERSION` environment variable is set for the database pods. Since +`v1.6.0` the related option `enable_pgversion_env_var` is enabled by default. + +In-place major version upgrades can be configured to be executed by the +operator with the `major_version_upgrade_mode` option. By default it is set +to `off` which means the cluster version will not change when increased in +the manifest. Still, a rolling update would be triggered updating the +`PGVERSION` variable. But Spilo's [`configure_spilo`](https://github.com/zalando/spilo/blob/master/postgres-appliance/scripts/configure_spilo.py) +script will notice the version mismatch and start the old version again. + +In this scenario the major version could then be run by a user from within the +master pod. Exec into the container and run: +```bash +python3 /scripts/inplace_upgrade.py N +``` +where `N` is the number of members of your cluster (see [`numberOfInstances`](https://github.com/zalando/postgres-operator/blob/50cb5898ea715a1db7e634de928b2d16dc8cd969/manifests/minimal-postgres-manifest.yaml#L10)). The upgrade is usually fast, well under one minute for most DBs. Note, that changes become irrevertible once `pg_upgrade` is called. To understand the upgrade procedure, refer to the [corresponding PR in Spilo](https://github.com/zalando/spilo/pull/488). +When `major_version_upgrade_mode` is set to `manual` the operator will run +the upgrade script for you after the manifest is updated and pods are rotated. + ## CRD Validation [CustomResourceDefinitions](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/#customresourcedefinitions) diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index eb1d855b7..13402d15f 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -170,6 +170,29 @@ under the `users` key. Postgres username used for replication between instances. The default is `standby`. +## Major version upgrades + +Parameters configuring automatic major version upgrades. In a +CRD-configuration, they are grouped under the `major_version_upgrade` key. + +* **major_version_upgrade_mode** + Postgres Operator supports [in-place major version upgrade](../administrator.md#in-place-major-version-upgrade) + with three different modes: + `"off"` = no upgrade by the operator, + `"manual"` = manifest triggers action, + `"full"` = manifest and minimal version violation trigger upgrade. + Note, that with all three modes increasing the version in the manifest will + trigger a rolling update of the pods. The default is `"off"`. + +* **minimal_major_version** + The minimal Postgres major version that will not automatically be upgraded + when `major_version_upgrade_mode` is set to `"full"`. The default is `"9.5"`. + +* **target_major_version** + The target Postgres major version when upgrading clusters automatically + which violate the configured allowed `minimal_major_version` when + `major_version_upgrade_mode` is set to `"full"`. The default is `"13"`. + ## Kubernetes resources Parameters to configure cluster-related Kubernetes objects created by the diff --git a/docs/user.md b/docs/user.md index 8ba649bdd..a5d2f1820 100644 --- a/docs/user.md +++ b/docs/user.md @@ -646,7 +646,13 @@ spec: ## In-place major version upgrade -Starting with Spilo 13, operator supports in-place major version upgrade to a higher major version (e.g. from PG 10 to PG 12). To trigger the upgrade, simply increase the version in the manifest. It is your responsibility to test your applications against the new version before the upgrade; downgrading is not supported. The easiest way to do so is to try the upgrade on the cloned cluster first. For details of how Spilo does the upgrade [see here](https://github.com/zalando/spilo/pull/488), operator implementation is described [in the admin docs](administrator.md#minor-and-major-version-upgrade). +Starting with Spilo 13, operator supports in-place major version upgrade to a +higher major version (e.g. from PG 10 to PG 12). To trigger the upgrade, +simply increase the version in the manifest. It is your responsibility to test +your applications against the new version before the upgrade; downgrading is +not supported. The easiest way to do so is to try the upgrade on the cloned +cluster first (see next chapter). More details can be found in the +[admin docs](administrator.md#minor-and-major-version-upgrade). ## How to clone an existing PostgreSQL cluster diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 8bb9b715b..02803123a 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -81,6 +81,7 @@ data: # min_instances: "-1" # min_cpu_limit: 250m # min_memory_limit: 250Mi + # minimal_major_version: "9.5" # node_readiness_label: "" # oauth_token_secret_name: postgresql-operator # pam_configuration: | @@ -119,6 +120,7 @@ data: spilo_privileged: "false" storage_resize_mode: "pvc" super_username: postgres + # target_major_version: "13" # team_admin_role: "admin" # team_api_role_configuration: "log_statement:all" # teams_api_url: http://fake-teams-api.default.svc.cluster.local diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 227ce6689..2c6d0ccac 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -123,6 +123,18 @@ spec: super_username: type: string default: postgres + major_version_upgrade: + type: object + properties: + major_version_upgrade_mode: + type: string + default: "off" + minimal_major_version: + type: string + default: "9.5" + target_major_version: + type: string + default: "13" kubernetes: type: object properties: diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 0c0daa924..535d9a4ea 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -26,6 +26,10 @@ configuration: users: replication_username: standby super_username: postgres + major_version_upgrade: + major_version_upgrade_mode: "off" + minimal_major_version: "9.5" + target_major_version: "13" kubernetes: # additional_pod_capabilities: # - "SYS_NICE" diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 19430e78d..ef376653d 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -965,6 +965,20 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "major_version_upgrade": { + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "major_version_upgrade_mode": { + Type: "string", + }, + "minimal_major_version": { + Type: "string", + }, + "target_major_version": { + Type: "string", + }, + }, + }, "kubernetes": { Type: "object", Properties: map[string]apiextv1.JSONSchemaProps{ diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index cddaa9dd4..5400e6f0e 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -41,6 +41,13 @@ type PostgresUsersConfiguration struct { ReplicationUsername string `json:"replication_username,omitempty"` } +// MajorVersionUpgradeConfiguration defines how to execute major version upgrades of Postgres. +type MajorVersionUpgradeConfiguration struct { + MajorVersionUpgradeMode string `json:"major_version_upgrade_mode" default:"off"` // off - no actions, manual - manifest triggers action, full - manifest and minimal version violation trigger upgrade + MinimalMajorVersion string `json:"minimal_major_version" default:"9.5"` + TargetMajorVersion string `json:"target_major_version" default:"13"` +} + // KubernetesMetaConfiguration defines k8s conf required for all Postgres clusters and the operator itself type KubernetesMetaConfiguration struct { PodServiceAccountName string `json:"pod_service_account_name,omitempty"` @@ -219,6 +226,7 @@ type OperatorConfigurationData struct { SidecarImages map[string]string `json:"sidecar_docker_images,omitempty"` // deprecated in favour of SidecarContainers SidecarContainers []v1.Container `json:"sidecars,omitempty"` PostgresUsersConfiguration PostgresUsersConfiguration `json:"users"` + MajorVersionUpgrade MajorVersionUpgradeConfiguration `json:"major_version_upgrade"` Kubernetes KubernetesMetaConfiguration `json:"kubernetes"` PostgresPodResources PostgresPodResourcesDefaults `json:"postgres_pod_resources"` Timeouts OperatorTimeouts `json:"timeouts"` diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 81f8a76b5..8da5eb6ba 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -309,6 +309,22 @@ func (in *MaintenanceWindow) DeepCopy() *MaintenanceWindow { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MajorVersionUpgradeConfiguration) DeepCopyInto(out *MajorVersionUpgradeConfiguration) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MajorVersionUpgradeConfiguration. +func (in *MajorVersionUpgradeConfiguration) DeepCopy() *MajorVersionUpgradeConfiguration { + if in == nil { + return nil + } + out := new(MajorVersionUpgradeConfiguration) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OperatorConfiguration) DeepCopyInto(out *OperatorConfiguration) { *out = *in @@ -364,6 +380,7 @@ func (in *OperatorConfigurationData) DeepCopyInto(out *OperatorConfigurationData } } out.PostgresUsersConfiguration = in.PostgresUsersConfiguration + out.MajorVersionUpgrade = in.MajorVersionUpgrade in.Kubernetes.DeepCopyInto(&out.Kubernetes) out.PostgresPodResources = in.PostgresPodResources out.Timeouts = in.Timeouts diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 986f747f9..424c8e89a 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -1310,6 +1310,7 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e } } else { err = fmt.Errorf("could not switch over from %q to %q: %v", curMaster.Name, candidate, err) + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switchover from %q to %q FAILED: %v", curMaster.Name, candidate, err) } // signal the role label waiting goroutine to close the shop and go home @@ -1320,9 +1321,7 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e // close the label waiting channel no sooner than the waiting goroutine terminates. close(podLabelErr) - c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Switchover from %q to %q FAILED: %v", curMaster.Name, candidate, err) return err - } // Lock locks the cluster diff --git a/pkg/cluster/majorversionupgrade.go b/pkg/cluster/majorversionupgrade.go index f34a19290..6ee703ca5 100644 --- a/pkg/cluster/majorversionupgrade.go +++ b/pkg/cluster/majorversionupgrade.go @@ -85,13 +85,17 @@ func (c *Cluster) majorVersionUpgrade() error { if c.currentMajorVersion < desiredVersion { podName := &spec.NamespacedName{Namespace: masterPod.Namespace, Name: masterPod.Name} c.logger.Infof("triggering major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods) + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "Starting major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods) upgradeCommand := fmt.Sprintf("/usr/bin/python3 /scripts/inplace_upgrade.py %d 2>&1 | tee last_upgrade.log", numberOfPods) + result, err := c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand) if err != nil { + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "Upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, err) return err } c.logger.Infof("upgrade action triggered and command completed: %s", result[:50]) + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "Upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion) } } diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index dbdd4afb4..85890c022 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -54,6 +54,11 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.SuperUsername = util.Coalesce(fromCRD.PostgresUsersConfiguration.SuperUsername, "postgres") result.ReplicationUsername = util.Coalesce(fromCRD.PostgresUsersConfiguration.ReplicationUsername, "standby") + // major version upgrade config + result.MajorVersionUpgradeMode = util.Coalesce(fromCRD.MajorVersionUpgrade.MajorVersionUpgradeMode, "off") + result.MinimalMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.MinimalMajorVersion, "9.5") + result.TargetMajorVersion = util.Coalesce(fromCRD.MajorVersionUpgrade.TargetMajorVersion, "13") + // kubernetes config result.CustomPodAnnotations = fromCRD.Kubernetes.CustomPodAnnotations result.PodServiceAccountName = util.Coalesce(fromCRD.Kubernetes.PodServiceAccountName, "postgres-pod") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 2969441c6..10e2b35c0 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -206,10 +206,9 @@ type Config struct { EnableLazySpiloUpgrade bool `name:"enable_lazy_spilo_upgrade" default:"false"` EnablePgVersionEnvVar bool `name:"enable_pgversion_env_var" default:"true"` EnableSpiloWalPathCompat bool `name:"enable_spilo_wal_path_compat" default:"false"` - MajorVersionUpgradeMode string `name:"major_version_upgrade_mode" default:"off"` // off - no actions, manual - manifest triggers action, full - manifest and minimal version violation trigger upgrade + MajorVersionUpgradeMode string `name:"major_version_upgrade_mode" default:"off"` MinimalMajorVersion string `name:"minimal_major_version" default:"9.5"` TargetMajorVersion string `name:"target_major_version" default:"13"` - AllowedMajorUpgradeVersions []string `name:"allowed_major_upgrade_versions" default:"12,13"` } // MustMarshal marshals the config or panics