Implement major upgrade result annotations (#2727)
Co-authored-by: Felix Kunde <felix-kunde@gmx.de> Co-authored-by: Polina Bungina <27892524+hughcapet@users.noreply.github.com>
This commit is contained in:
parent
a09b7655c9
commit
2e398120d2
|
|
@ -85,6 +85,12 @@ It is also possible to define `maintenanceWindows` in the Postgres manifest to
|
||||||
better control when such automated upgrades should take place after increasing
|
better control when such automated upgrades should take place after increasing
|
||||||
the version.
|
the version.
|
||||||
|
|
||||||
|
### Upgrade annotations
|
||||||
|
|
||||||
|
When an upgrade is executed, the operator sets an annotation in the PostgreSQL resource, either `last-major-upgrade-success` if the upgrade succeeds, or `last-major-upgrade-failure` if it fails. The value of the annotation is a timestamp indicating when the upgrade occurred.
|
||||||
|
|
||||||
|
If a PostgreSQL resource contains a failure annotation, the operator will not attempt to retry the upgrade during a sync event. To remove the failure annotation, you can revert the PostgreSQL version back to the current version. This action will trigger the removal of the failure annotation.
|
||||||
|
|
||||||
## Non-default cluster domain
|
## Non-default cluster domain
|
||||||
|
|
||||||
If your cluster uses a DNS domain other than the default `cluster.local`, this
|
If your cluster uses a DNS domain other than the default `cluster.local`, this
|
||||||
|
|
|
||||||
|
|
@ -1185,13 +1185,19 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
def test_major_version_upgrade(self):
|
def test_major_version_upgrade(self):
|
||||||
"""
|
"""
|
||||||
Test major version upgrade
|
Test major version upgrade: with full upgrade, maintenance window, and annotation
|
||||||
"""
|
"""
|
||||||
def check_version():
|
def check_version():
|
||||||
p = k8s.patroni_rest("acid-upgrade-test-0", "")
|
p = k8s.patroni_rest("acid-upgrade-test-0", "")
|
||||||
version = p.get("server_version", 0) // 10000
|
version = p.get("server_version", 0) // 10000
|
||||||
return version
|
return version
|
||||||
|
|
||||||
|
def get_annotations():
|
||||||
|
pg_manifest = k8s.api.custom_objects_api.get_namespaced_custom_object(
|
||||||
|
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test")
|
||||||
|
annotations = pg_manifest["metadata"]["annotations"]
|
||||||
|
return annotations
|
||||||
|
|
||||||
k8s = self.k8s
|
k8s = self.k8s
|
||||||
cluster_label = 'application=spilo,cluster-name=acid-upgrade-test'
|
cluster_label = 'application=spilo,cluster-name=acid-upgrade-test'
|
||||||
|
|
||||||
|
|
@ -1209,30 +1215,33 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
|
|
||||||
master_nodes, _ = k8s.get_cluster_nodes(cluster_labels=cluster_label)
|
master_nodes, _ = k8s.get_cluster_nodes(cluster_labels=cluster_label)
|
||||||
# should upgrade immediately
|
# should upgrade immediately
|
||||||
pg_patch_version_14 = {
|
pg_patch_version_13 = {
|
||||||
"spec": {
|
"spec": {
|
||||||
"postgresql": {
|
"postgresql": {
|
||||||
"version": "14"
|
"version": "13"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
||||||
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_14)
|
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_13)
|
||||||
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
|
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
|
||||||
|
|
||||||
# should have finish failover
|
|
||||||
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label)
|
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label)
|
||||||
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
|
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
|
||||||
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
||||||
self.eventuallyEqual(check_version, 14, "Version should be upgraded from 12 to 14")
|
self.eventuallyEqual(check_version, 13, "Version should be upgraded from 12 to 13")
|
||||||
|
|
||||||
|
# check if annotation for last upgrade's success is set
|
||||||
|
annotations = get_annotations()
|
||||||
|
self.assertIsNotNone(annotations.get("last-major-upgrade-success"), "Annotation for last upgrade's success is not set")
|
||||||
|
|
||||||
# should not upgrade because current time is not in maintenanceWindow
|
# should not upgrade because current time is not in maintenanceWindow
|
||||||
current_time = datetime.now()
|
current_time = datetime.now()
|
||||||
maintenance_window_future = f"{(current_time+timedelta(minutes=60)).strftime('%H:%M')}-{(current_time+timedelta(minutes=120)).strftime('%H:%M')}"
|
maintenance_window_future = f"{(current_time+timedelta(minutes=60)).strftime('%H:%M')}-{(current_time+timedelta(minutes=120)).strftime('%H:%M')}"
|
||||||
pg_patch_version_15 = {
|
pg_patch_version_14 = {
|
||||||
"spec": {
|
"spec": {
|
||||||
"postgresql": {
|
"postgresql": {
|
||||||
"version": "15"
|
"version": "14"
|
||||||
},
|
},
|
||||||
"maintenanceWindows": [
|
"maintenanceWindows": [
|
||||||
maintenance_window_future
|
maintenance_window_future
|
||||||
|
|
@ -1240,21 +1249,23 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
||||||
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15)
|
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_14)
|
||||||
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
|
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
|
||||||
|
|
||||||
# should have finish failover
|
|
||||||
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=master,' + cluster_label)
|
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=master,' + cluster_label)
|
||||||
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
|
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
|
||||||
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
||||||
self.eventuallyEqual(check_version, 14, "Version should not be upgraded")
|
self.eventuallyEqual(check_version, 13, "Version should not be upgraded")
|
||||||
|
|
||||||
|
second_annotations = get_annotations()
|
||||||
|
self.assertIsNone(second_annotations.get("last-major-upgrade-failure"), "Annotation for last upgrade's failure should not be set")
|
||||||
|
|
||||||
# change the version again to trigger operator sync
|
# change the version again to trigger operator sync
|
||||||
maintenance_window_current = f"{(current_time-timedelta(minutes=30)).strftime('%H:%M')}-{(current_time+timedelta(minutes=30)).strftime('%H:%M')}"
|
maintenance_window_current = f"{(current_time-timedelta(minutes=30)).strftime('%H:%M')}-{(current_time+timedelta(minutes=30)).strftime('%H:%M')}"
|
||||||
pg_patch_version_16 = {
|
pg_patch_version_15 = {
|
||||||
"spec": {
|
"spec": {
|
||||||
"postgresql": {
|
"postgresql": {
|
||||||
"version": "16"
|
"version": "15"
|
||||||
},
|
},
|
||||||
"maintenanceWindows": [
|
"maintenanceWindows": [
|
||||||
maintenance_window_current
|
maintenance_window_current
|
||||||
|
|
@ -1263,14 +1274,52 @@ class EndToEndTestCase(unittest.TestCase):
|
||||||
}
|
}
|
||||||
|
|
||||||
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
||||||
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_16)
|
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15)
|
||||||
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
|
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
|
||||||
|
|
||||||
# should have finish failover
|
|
||||||
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label)
|
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label)
|
||||||
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
|
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
|
||||||
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
||||||
self.eventuallyEqual(check_version, 16, "Version should be upgraded from 14 to 16")
|
self.eventuallyEqual(check_version, 15, "Version should be upgraded from 13 to 15")
|
||||||
|
|
||||||
|
# check if annotation for last upgrade's success is updated after second upgrade
|
||||||
|
third_annotations = get_annotations()
|
||||||
|
self.assertIsNotNone(third_annotations.get("last-major-upgrade-success"), "Annotation for last upgrade's success is not set")
|
||||||
|
self.assertNotEqual(annotations.get("last-major-upgrade-success"), third_annotations.get("last-major-upgrade-success"), "Annotation for last upgrade's success is not updated")
|
||||||
|
|
||||||
|
# test upgrade with failed upgrade annotation
|
||||||
|
pg_patch_version_16 = {
|
||||||
|
"metadata": {
|
||||||
|
"annotations": {
|
||||||
|
"last-major-upgrade-failure": "2024-01-02T15:04:05Z"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"spec": {
|
||||||
|
"postgresql": {
|
||||||
|
"version": "16"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
||||||
|
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_16)
|
||||||
|
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
|
||||||
|
|
||||||
|
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=master,' + cluster_label)
|
||||||
|
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
|
||||||
|
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
||||||
|
self.eventuallyEqual(check_version, 15, "Version should not be upgraded because annotation for last upgrade's failure is set")
|
||||||
|
|
||||||
|
# change the version back to 15 and should remove failure annotation
|
||||||
|
k8s.api.custom_objects_api.patch_namespaced_custom_object(
|
||||||
|
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15)
|
||||||
|
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
|
||||||
|
|
||||||
|
k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label)
|
||||||
|
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
|
||||||
|
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
||||||
|
|
||||||
|
fourth_annotations = get_annotations()
|
||||||
|
self.assertIsNone(fourth_annotations.get("last-major-upgrade-failure"), "Annotation for last upgrade's failure is not removed")
|
||||||
|
|
||||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||||
def test_persistent_volume_claim_retention_policy(self):
|
def test_persistent_volume_claim_retention_policy(self):
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,16 @@
|
||||||
package cluster
|
package cluster
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/zalando/postgres-operator/pkg/spec"
|
"github.com/zalando/postgres-operator/pkg/spec"
|
||||||
"github.com/zalando/postgres-operator/pkg/util"
|
"github.com/zalando/postgres-operator/pkg/util"
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
// VersionMap Map of version numbers
|
// VersionMap Map of version numbers
|
||||||
|
|
@ -18,6 +22,11 @@ var VersionMap = map[string]int{
|
||||||
"16": 160000,
|
"16": 160000,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
majorVersionUpgradeSuccessAnnotation = "last-major-upgrade-success"
|
||||||
|
majorVersionUpgradeFailureAnnotation = "last-major-upgrade-failure"
|
||||||
|
)
|
||||||
|
|
||||||
// IsBiggerPostgresVersion Compare two Postgres version numbers
|
// IsBiggerPostgresVersion Compare two Postgres version numbers
|
||||||
func IsBiggerPostgresVersion(old string, new string) bool {
|
func IsBiggerPostgresVersion(old string, new string) bool {
|
||||||
oldN := VersionMap[old]
|
oldN := VersionMap[old]
|
||||||
|
|
@ -54,6 +63,47 @@ func (c *Cluster) isUpgradeAllowedForTeam(owningTeam string) bool {
|
||||||
return util.SliceContains(allowedTeams, owningTeam)
|
return util.SliceContains(allowedTeams, owningTeam)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Cluster) annotatePostgresResource(isSuccess bool) error {
|
||||||
|
annotations := make(map[string]string)
|
||||||
|
currentTime := metav1.Now().Format("2006-01-02T15:04:05Z")
|
||||||
|
if isSuccess {
|
||||||
|
annotations[majorVersionUpgradeSuccessAnnotation] = currentTime
|
||||||
|
} else {
|
||||||
|
annotations[majorVersionUpgradeFailureAnnotation] = currentTime
|
||||||
|
}
|
||||||
|
patchData, err := metaAnnotationsPatch(annotations)
|
||||||
|
if err != nil {
|
||||||
|
c.logger.Errorf("could not form patch for %s postgresql resource: %v", c.Name, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.MergePatchType, patchData, metav1.PatchOptions{})
|
||||||
|
if err != nil {
|
||||||
|
c.logger.Errorf("failed to patch annotations to postgresql resource: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cluster) removeFailuresAnnotation() error {
|
||||||
|
annotationToRemove := []map[string]string{
|
||||||
|
{
|
||||||
|
"op": "remove",
|
||||||
|
"path": fmt.Sprintf("/metadata/annotations/%s", majorVersionUpgradeFailureAnnotation),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
removePatch, err := json.Marshal(annotationToRemove)
|
||||||
|
if err != nil {
|
||||||
|
c.logger.Errorf("could not form removal patch for %s postgresql resource: %v", c.Name, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.JSONPatchType, removePatch, metav1.PatchOptions{})
|
||||||
|
if err != nil {
|
||||||
|
c.logger.Errorf("failed to remove annotations from postgresql resource: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Execute upgrade when mode is set to manual or full or when the owning team is allowed for upgrade (and mode is "off").
|
Execute upgrade when mode is set to manual or full or when the owning team is allowed for upgrade (and mode is "off").
|
||||||
|
|
||||||
|
|
@ -69,10 +119,19 @@ func (c *Cluster) majorVersionUpgrade() error {
|
||||||
desiredVersion := c.GetDesiredMajorVersionAsInt()
|
desiredVersion := c.GetDesiredMajorVersionAsInt()
|
||||||
|
|
||||||
if c.currentMajorVersion >= desiredVersion {
|
if c.currentMajorVersion >= desiredVersion {
|
||||||
|
if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { // if failure annotation exists, remove it
|
||||||
|
c.removeFailuresAnnotation()
|
||||||
|
c.logger.Infof("removing failure annotation as the cluster is already up to date")
|
||||||
|
}
|
||||||
c.logger.Infof("cluster version up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion)
|
c.logger.Infof("cluster version up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists {
|
||||||
|
c.logger.Infof("last major upgrade failed, skipping upgrade")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
if !isInMainternanceWindow(c.Spec.MaintenanceWindows) {
|
if !isInMainternanceWindow(c.Spec.MaintenanceWindows) {
|
||||||
c.logger.Infof("skipping major version upgrade, not in maintenance window")
|
c.logger.Infof("skipping major version upgrade, not in maintenance window")
|
||||||
return nil
|
return nil
|
||||||
|
|
@ -107,6 +166,7 @@ func (c *Cluster) majorVersionUpgrade() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
isUpgradeSuccess := true
|
||||||
numberOfPods := len(pods)
|
numberOfPods := len(pods)
|
||||||
if allRunning && masterPod != nil {
|
if allRunning && masterPod != nil {
|
||||||
c.logger.Infof("healthy cluster ready to upgrade, current: %d desired: %d", c.currentMajorVersion, desiredVersion)
|
c.logger.Infof("healthy cluster ready to upgrade, current: %d desired: %d", c.currentMajorVersion, desiredVersion)
|
||||||
|
|
@ -132,11 +192,14 @@ func (c *Cluster) majorVersionUpgrade() error {
|
||||||
result, err = c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand)
|
result, err = c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
isUpgradeSuccess = false
|
||||||
|
c.annotatePostgresResource(isUpgradeSuccess)
|
||||||
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, err)
|
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
c.logger.Infof("upgrade action triggered and command completed: %s", result[:100])
|
|
||||||
|
|
||||||
|
c.annotatePostgresResource(isUpgradeSuccess)
|
||||||
|
c.logger.Infof("upgrade action triggered and command completed: %s", result[:100])
|
||||||
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion)
|
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue