Implement major upgrade result annotations (#2727)
Co-authored-by: Felix Kunde <felix-kunde@gmx.de> Co-authored-by: Polina Bungina <27892524+hughcapet@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									a09b7655c9
								
							
						
					
					
						commit
						2e398120d2
					
				|  | @ -85,6 +85,12 @@ It is also possible to define `maintenanceWindows` in the Postgres manifest to | |||
| better control when such automated upgrades should take place after increasing | ||||
| the version. | ||||
| 
 | ||||
| ### Upgrade annotations | ||||
| 
 | ||||
| When an upgrade is executed, the operator sets an annotation in the PostgreSQL resource, either `last-major-upgrade-success` if the upgrade succeeds, or `last-major-upgrade-failure` if it fails. The value of the annotation is a timestamp indicating when the upgrade occurred. | ||||
| 
 | ||||
| If a PostgreSQL resource contains a failure annotation, the operator will not attempt to retry the upgrade during a sync event. To remove the failure annotation, you can revert the PostgreSQL version back to the current version. This action will trigger the removal of the failure annotation. | ||||
| 
 | ||||
| ## Non-default cluster domain | ||||
| 
 | ||||
| If your cluster uses a DNS domain other than the default `cluster.local`, this | ||||
|  |  | |||
|  | @ -1185,13 +1185,19 @@ class EndToEndTestCase(unittest.TestCase): | |||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_major_version_upgrade(self): | ||||
|         """ | ||||
|         Test major version upgrade | ||||
|         Test major version upgrade: with full upgrade, maintenance window, and annotation | ||||
|         """ | ||||
|         def check_version(): | ||||
|             p = k8s.patroni_rest("acid-upgrade-test-0", "") | ||||
|             version = p.get("server_version", 0) // 10000 | ||||
|             return version | ||||
| 
 | ||||
|         def get_annotations(): | ||||
|             pg_manifest = k8s.api.custom_objects_api.get_namespaced_custom_object( | ||||
|                 "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test") | ||||
|             annotations = pg_manifest["metadata"]["annotations"] | ||||
|             return annotations | ||||
| 
 | ||||
|         k8s = self.k8s | ||||
|         cluster_label = 'application=spilo,cluster-name=acid-upgrade-test' | ||||
| 
 | ||||
|  | @ -1209,30 +1215,33 @@ class EndToEndTestCase(unittest.TestCase): | |||
| 
 | ||||
|         master_nodes, _ = k8s.get_cluster_nodes(cluster_labels=cluster_label) | ||||
|         # should upgrade immediately | ||||
|         pg_patch_version_14 = { | ||||
|         pg_patch_version_13 = { | ||||
|             "spec": { | ||||
|                 "postgresql": { | ||||
|                     "version": "14" | ||||
|                     "version": "13" | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_14) | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_13) | ||||
|         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||
| 
 | ||||
|         # should have finish failover | ||||
|         k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||
|         self.eventuallyEqual(check_version, 14, "Version should be upgraded from 12 to 14") | ||||
|         self.eventuallyEqual(check_version, 13, "Version should be upgraded from 12 to 13") | ||||
| 
 | ||||
|         # check if annotation for last upgrade's success is set | ||||
|         annotations = get_annotations() | ||||
|         self.assertIsNotNone(annotations.get("last-major-upgrade-success"), "Annotation for last upgrade's success is not set") | ||||
| 
 | ||||
|         # should not upgrade because current time is not in maintenanceWindow | ||||
|         current_time = datetime.now() | ||||
|         maintenance_window_future = f"{(current_time+timedelta(minutes=60)).strftime('%H:%M')}-{(current_time+timedelta(minutes=120)).strftime('%H:%M')}" | ||||
|         pg_patch_version_15 = { | ||||
|         pg_patch_version_14 = { | ||||
|             "spec": { | ||||
|                 "postgresql": { | ||||
|                     "version": "15" | ||||
|                     "version": "14" | ||||
|                 }, | ||||
|                 "maintenanceWindows": [ | ||||
|                     maintenance_window_future | ||||
|  | @ -1240,21 +1249,23 @@ class EndToEndTestCase(unittest.TestCase): | |||
|             } | ||||
|         } | ||||
|         k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15) | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_14) | ||||
|         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||
| 
 | ||||
|         # should have finish failover | ||||
|         k8s.wait_for_pod_failover(master_nodes, 'spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||
|         self.eventuallyEqual(check_version, 14, "Version should not be upgraded") | ||||
|         self.eventuallyEqual(check_version, 13, "Version should not be upgraded") | ||||
| 
 | ||||
|         second_annotations = get_annotations() | ||||
|         self.assertIsNone(second_annotations.get("last-major-upgrade-failure"), "Annotation for last upgrade's failure should not be set") | ||||
| 
 | ||||
|         # change the version again to trigger operator sync | ||||
|         maintenance_window_current = f"{(current_time-timedelta(minutes=30)).strftime('%H:%M')}-{(current_time+timedelta(minutes=30)).strftime('%H:%M')}" | ||||
|         pg_patch_version_16 = { | ||||
|         pg_patch_version_15 = { | ||||
|             "spec": { | ||||
|                 "postgresql": { | ||||
|                     "version": "16" | ||||
|                     "version": "15" | ||||
|                 }, | ||||
|                 "maintenanceWindows": [ | ||||
|                     maintenance_window_current | ||||
|  | @ -1263,14 +1274,52 @@ class EndToEndTestCase(unittest.TestCase): | |||
|         } | ||||
| 
 | ||||
|         k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_16) | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15) | ||||
|         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||
| 
 | ||||
|         # should have finish failover | ||||
|         k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||
|         self.eventuallyEqual(check_version, 16, "Version should be upgraded from 14 to 16") | ||||
|         self.eventuallyEqual(check_version, 15, "Version should be upgraded from 13 to 15") | ||||
| 
 | ||||
|         # check if annotation for last upgrade's success is updated after second upgrade | ||||
|         third_annotations = get_annotations() | ||||
|         self.assertIsNotNone(third_annotations.get("last-major-upgrade-success"), "Annotation for last upgrade's success is not set") | ||||
|         self.assertNotEqual(annotations.get("last-major-upgrade-success"), third_annotations.get("last-major-upgrade-success"), "Annotation for last upgrade's success is not updated") | ||||
| 
 | ||||
|         # test upgrade with failed upgrade annotation | ||||
|         pg_patch_version_16 = { | ||||
|             "metadata": { | ||||
|                 "annotations": { | ||||
|                     "last-major-upgrade-failure": "2024-01-02T15:04:05Z" | ||||
|                 }, | ||||
|             }, | ||||
|             "spec": { | ||||
|                 "postgresql": { | ||||
|                     "version": "16" | ||||
|                 }, | ||||
|             }, | ||||
|         } | ||||
|         k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_16) | ||||
|         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||
| 
 | ||||
|         k8s.wait_for_pod_failover(master_nodes, 'spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||
|         self.eventuallyEqual(check_version, 15, "Version should not be upgraded because annotation for last upgrade's failure is set") | ||||
| 
 | ||||
|         # change the version back to 15 and should remove failure annotation | ||||
|         k8s.api.custom_objects_api.patch_namespaced_custom_object( | ||||
|             "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version_15) | ||||
|         self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") | ||||
| 
 | ||||
|         k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||
| 
 | ||||
|         fourth_annotations = get_annotations() | ||||
|         self.assertIsNone(fourth_annotations.get("last-major-upgrade-failure"), "Annotation for last upgrade's failure is not removed") | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_persistent_volume_claim_retention_policy(self): | ||||
|  |  | |||
|  | @ -1,12 +1,16 @@ | |||
| package cluster | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/zalando/postgres-operator/pkg/spec" | ||||
| 	"github.com/zalando/postgres-operator/pkg/util" | ||||
| 	v1 "k8s.io/api/core/v1" | ||||
| 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| ) | ||||
| 
 | ||||
| // VersionMap Map of version numbers
 | ||||
|  | @ -18,6 +22,11 @@ var VersionMap = map[string]int{ | |||
| 	"16": 160000, | ||||
| } | ||||
| 
 | ||||
| const ( | ||||
| 	majorVersionUpgradeSuccessAnnotation = "last-major-upgrade-success" | ||||
| 	majorVersionUpgradeFailureAnnotation = "last-major-upgrade-failure" | ||||
| ) | ||||
| 
 | ||||
| // IsBiggerPostgresVersion Compare two Postgres version numbers
 | ||||
| func IsBiggerPostgresVersion(old string, new string) bool { | ||||
| 	oldN := VersionMap[old] | ||||
|  | @ -54,6 +63,47 @@ func (c *Cluster) isUpgradeAllowedForTeam(owningTeam string) bool { | |||
| 	return util.SliceContains(allowedTeams, owningTeam) | ||||
| } | ||||
| 
 | ||||
| func (c *Cluster) annotatePostgresResource(isSuccess bool) error { | ||||
| 	annotations := make(map[string]string) | ||||
| 	currentTime := metav1.Now().Format("2006-01-02T15:04:05Z") | ||||
| 	if isSuccess { | ||||
| 		annotations[majorVersionUpgradeSuccessAnnotation] = currentTime | ||||
| 	} else { | ||||
| 		annotations[majorVersionUpgradeFailureAnnotation] = currentTime | ||||
| 	} | ||||
| 	patchData, err := metaAnnotationsPatch(annotations) | ||||
| 	if err != nil { | ||||
| 		c.logger.Errorf("could not form patch for %s postgresql resource: %v", c.Name, err) | ||||
| 		return err | ||||
| 	} | ||||
| 	_, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.MergePatchType, patchData, metav1.PatchOptions{}) | ||||
| 	if err != nil { | ||||
| 		c.logger.Errorf("failed to patch annotations to postgresql resource: %v", err) | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (c *Cluster) removeFailuresAnnotation() error { | ||||
| 	annotationToRemove := []map[string]string{ | ||||
| 		{ | ||||
| 			"op":   "remove", | ||||
| 			"path": fmt.Sprintf("/metadata/annotations/%s", majorVersionUpgradeFailureAnnotation), | ||||
| 		}, | ||||
| 	} | ||||
| 	removePatch, err := json.Marshal(annotationToRemove) | ||||
| 	if err != nil { | ||||
| 		c.logger.Errorf("could not form removal patch for %s postgresql resource: %v", c.Name, err) | ||||
| 		return err | ||||
| 	} | ||||
| 	_, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.JSONPatchType, removePatch, metav1.PatchOptions{}) | ||||
| 	if err != nil { | ||||
| 		c.logger.Errorf("failed to remove annotations from postgresql resource: %v", err) | ||||
| 		return err | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| /* | ||||
| Execute upgrade when mode is set to manual or full or when the owning team is allowed for upgrade (and mode is "off"). | ||||
| 
 | ||||
|  | @ -69,10 +119,19 @@ func (c *Cluster) majorVersionUpgrade() error { | |||
| 	desiredVersion := c.GetDesiredMajorVersionAsInt() | ||||
| 
 | ||||
| 	if c.currentMajorVersion >= desiredVersion { | ||||
| 		if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { // if failure annotation exists, remove it
 | ||||
| 			c.removeFailuresAnnotation() | ||||
| 			c.logger.Infof("removing failure annotation as the cluster is already up to date") | ||||
| 		} | ||||
| 		c.logger.Infof("cluster version up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion) | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { | ||||
| 		c.logger.Infof("last major upgrade failed, skipping upgrade") | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	if !isInMainternanceWindow(c.Spec.MaintenanceWindows) { | ||||
| 		c.logger.Infof("skipping major version upgrade, not in maintenance window") | ||||
| 		return nil | ||||
|  | @ -107,6 +166,7 @@ func (c *Cluster) majorVersionUpgrade() error { | |||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	isUpgradeSuccess := true | ||||
| 	numberOfPods := len(pods) | ||||
| 	if allRunning && masterPod != nil { | ||||
| 		c.logger.Infof("healthy cluster ready to upgrade, current: %d desired: %d", c.currentMajorVersion, desiredVersion) | ||||
|  | @ -132,11 +192,14 @@ func (c *Cluster) majorVersionUpgrade() error { | |||
| 				result, err = c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand) | ||||
| 			} | ||||
| 			if err != nil { | ||||
| 				isUpgradeSuccess = false | ||||
| 				c.annotatePostgresResource(isUpgradeSuccess) | ||||
| 				c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, err) | ||||
| 				return err | ||||
| 			} | ||||
| 			c.logger.Infof("upgrade action triggered and command completed: %s", result[:100]) | ||||
| 
 | ||||
| 			c.annotatePostgresResource(isUpgradeSuccess) | ||||
| 			c.logger.Infof("upgrade action triggered and command completed: %s", result[:100]) | ||||
| 			c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion) | ||||
| 		} | ||||
| 	} | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue