diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 59671dc19..28b8f28ca 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -26,22 +26,22 @@ spec: - name: Image type: string description: Spilo image to be used for Pods - JSONPath: .configuration.docker_image + jsonPath: .configuration.docker_image - name: Cluster-Label type: string description: Label for K8s resources created by operator - JSONPath: .configuration.kubernetes.cluster_name_label + jsonPath: .configuration.kubernetes.cluster_name_label - name: Service-Account type: string description: Name of service account to be used - JSONPath: .configuration.kubernetes.pod_service_account_name + jsonPath: .configuration.kubernetes.pod_service_account_name - name: Min-Instances type: integer description: Minimum number of instances per Postgres cluster - JSONPath: .configuration.min_instances + jsonPath: .configuration.min_instances - name: Age type: date - JSONPath: .metadata.creationTimestamp + jsonPath: .metadata.creationTimestamp schema: openAPIV3Schema: type: object @@ -49,15 +49,15 @@ spec: - kind - apiVersion - configuration - properties: - kind: - type: string - enum: - - OperatorConfiguration - apiVersion: - type: string - enum: - - acid.zalan.do/v1 + properties: + kind: + type: string + enum: + - OperatorConfiguration + apiVersion: + type: string + enum: + - acid.zalan.do/v1 configuration: type: object properties: diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index dc7fe0d05..74c8f74b8 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -26,34 +26,34 @@ spec: - name: Team type: string description: Team responsible for Postgres CLuster - JSONPath: .spec.teamId + jsonPath: .spec.teamId - name: Version type: string description: PostgreSQL version - JSONPath: .spec.postgresql.version + jsonPath: .spec.postgresql.version - name: Pods type: integer description: Number of Pods per Postgres cluster - JSONPath: .spec.numberOfInstances + jsonPath: .spec.numberOfInstances - name: Volume type: string description: Size of the bound volume - JSONPath: .spec.volume.size + jsonPath: .spec.volume.size - name: CPU-Request type: string description: Requested CPU for Postgres containers - JSONPath: .spec.resources.requests.cpu + jsonPath: .spec.resources.requests.cpu - name: Memory-Request type: string description: Requested memory for Postgres containers - JSONPath: .spec.resources.requests.memory + jsonPath: .spec.resources.requests.memory - name: Age type: date - JSONPath: .metadata.creationTimestamp + jsonPath: .metadata.creationTimestamp - name: Status type: string description: Current sync status of postgresql resource - JSONPath: .status.PostgresClusterStatus + jsonPath: .status.PostgresClusterStatus schema: openAPIV3Schema: type: object diff --git a/charts/postgres-operator/templates/clusterrole.yaml b/charts/postgres-operator/templates/clusterrole.yaml index 84da313d9..00ee776f5 100644 --- a/charts/postgres-operator/templates/clusterrole.yaml +++ b/charts/postgres-operator/templates/clusterrole.yaml @@ -105,6 +105,10 @@ rules: - delete - get - list +{{- if toString .Values.configKubernetes.storage_resize_mode | eq "pvc" }} + - patch + - update +{{- end }} # to read existing PVs. Creation should be done via dynamic provisioning - apiGroups: - "" @@ -113,7 +117,9 @@ rules: verbs: - get - list +{{- if toString .Values.configKubernetes.storage_resize_mode | eq "ebs" }} - update # only for resizing AWS volumes +{{- end }} # to watch Spilo pods and do rolling updates. Creation via StatefulSet - apiGroups: - "" diff --git a/charts/postgres-operator/values-crd.yaml b/charts/postgres-operator/values-crd.yaml index 6196c6fb2..71c2d5bb1 100644 --- a/charts/postgres-operator/values-crd.yaml +++ b/charts/postgres-operator/values-crd.yaml @@ -136,7 +136,7 @@ configKubernetes: # whether the Spilo container should run in privileged mode spilo_privileged: false # storage resize strategy, available options are: ebs, pvc, off - storage_resize_mode: ebs + storage_resize_mode: pvc # operator watches for postgres objects in the given namespace watched_namespace: "*" # listen to all namespaces diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 231b0b9ac..95865503d 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -130,7 +130,7 @@ configKubernetes: # whether the Spilo container should run in privileged mode spilo_privileged: "false" # storage resize strategy, available options are: ebs, pvc, off - storage_resize_mode: ebs + storage_resize_mode: pvc # operator watches for postgres objects in the given namespace watched_namespace: "*" # listen to all namespaces diff --git a/e2e/README.md b/e2e/README.md index ce8931f62..3bba6ccc3 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -56,12 +56,24 @@ NOCLEANUP=True ./run.sh main tests.test_e2e.EndToEndTestCase.test_lazy_spilo_upg ## Inspecting Kind -If you want to inspect Kind/Kubernetes cluster, use the following script to exec into the K8s setup and then use `kubectl` +If you want to inspect Kind/Kubernetes cluster, switch `kubeconfig` file and context +```bash +# save the old config in case you have it +export KUBECONFIG_SAVED=$KUBECONFIG + +# use the one created by e2e tests +export KUBECONFIG=/tmp/kind-config-postgres-operator-e2e-tests + +# this kubeconfig defines a single context +kubectl config use-context kind-postgres-operator-e2e-tests +``` + +or use the following script to exec into the K8s setup and then use `kubectl` ```bash ./exec_into_env.sh -# use kube ctl +# use kubectl kubectl get pods # watch relevant objects @@ -71,6 +83,14 @@ kubectl get pods ./scripts/get_logs.sh ``` +If you want to inspect the state of the `kind` cluster manually with a single command, add a `context` flag +```bash +kubectl get pods --context kind-kind +``` +or set the context for a few commands at once + + + ## Cleaning up Kind To cleanup kind and start fresh @@ -79,6 +99,12 @@ To cleanup kind and start fresh e2e/run.sh cleanup ``` +That also helps in case you see the +``` +ERROR: no nodes found for cluster "postgres-operator-e2e-tests" +``` +that happens when the `kind` cluster was deleted manually but its configuraiton file was not. + ## Covered use cases The current tests are all bundled in [`test_e2e.py`](tests/test_e2e.py): diff --git a/e2e/scripts/watch_objects.sh b/e2e/scripts/watch_objects.sh index 9dde54f5e..dbd98ffc6 100755 --- a/e2e/scripts/watch_objects.sh +++ b/e2e/scripts/watch_objects.sh @@ -1,17 +1,20 @@ #!/bin/bash watch -c " -kubectl get postgresql +kubectl get postgresql --all-namespaces echo echo -n 'Rolling upgrade pending: ' kubectl get statefulset -o jsonpath='{.items..metadata.annotations.zalando-postgres-operator-rolling-update-required}' echo echo -kubectl get pods -o wide +echo 'Pods' +kubectl get pods -l application=spilo -l name=postgres-operator -l application=db-connection-pooler -o wide --all-namespaces echo -kubectl get statefulsets +echo 'Statefulsets' +kubectl get statefulsets --all-namespaces echo -kubectl get deployments +echo 'Deployments' +kubectl get deployments --all-namespaces -l application=db-connection-pooler -l name=postgres-operator echo echo echo 'Step from operator deployment' diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index b566308ba..fc76d14f0 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -11,9 +11,11 @@ from datetime import datetime from kubernetes import client, config from kubernetes.client.rest import ApiException + def to_selector(labels): return ",".join(["=".join(l) for l in labels.items()]) + class K8sApi: def __init__(self): @@ -181,10 +183,10 @@ class K8s: def count_pdbs_with_label(self, labels, namespace='default'): return len(self.api.policy_v1_beta1.list_namespaced_pod_disruption_budget( namespace, label_selector=labels).items) - + def count_running_pods(self, labels='application=spilo,cluster-name=acid-minimal-cluster', namespace='default'): pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items - return len(list(filter(lambda x: x.status.phase=='Running', pods))) + return len(list(filter(lambda x: x.status.phase == 'Running', pods))) def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): pod_phase = 'Failing over' @@ -210,9 +212,9 @@ class K8s: def wait_for_logical_backup_job_creation(self): self.wait_for_logical_backup_job(expected_num_of_jobs=1) - def delete_operator_pod(self, step="Delete operator deplyment"): - operator_pod = self.api.core_v1.list_namespaced_pod('default', label_selector="name=postgres-operator").items[0].metadata.name - self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, time.time())}}}}}) + def delete_operator_pod(self, step="Delete operator pod"): + # patching the pod template in the deployment restarts the operator pod + self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, datetime.fromtimestamp(time.time()))}}}}}) self.wait_for_operator_pod_start() def update_config(self, config_map_patch, step="Updating operator deployment"): @@ -241,7 +243,7 @@ class K8s: def get_operator_state(self): pod = self.get_operator_pod() - if pod == None: + if pod is None: return None pod = pod.metadata.name @@ -251,7 +253,6 @@ class K8s: return json.loads(r.stdout.decode()) - def get_patroni_running_members(self, pod="acid-minimal-cluster-0"): result = self.get_patroni_state(pod) return list(filter(lambda x: "State" in x and x["State"] == "running", result)) @@ -260,9 +261,9 @@ class K8s: try: deployment = self.api.apps_v1.read_namespaced_deployment(name, namespace) return deployment.spec.replicas - except ApiException as e: + except ApiException: return None - + def get_statefulset_image(self, label_selector="application=spilo,cluster-name=acid-minimal-cluster", namespace='default'): ssets = self.api.apps_v1.list_namespaced_stateful_set(namespace, label_selector=label_selector, limit=1) if len(ssets.items) == 0: @@ -463,7 +464,6 @@ class K8sBase: self.wait_for_logical_backup_job(expected_num_of_jobs=1) def delete_operator_pod(self, step="Delete operator deplyment"): - operator_pod = self.api.core_v1.list_namespaced_pod('default', label_selector="name=postgres-operator").items[0].metadata.name self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, time.time())}}}}}) self.wait_for_operator_pod_start() @@ -521,7 +521,7 @@ class K8sOperator(K8sBase): class K8sPostgres(K8sBase): def __init__(self, labels="cluster-name=acid-minimal-cluster", namespace="default"): super().__init__(labels, namespace) - + def get_pg_nodes(self): master_pod_node = '' replica_pod_nodes = [] @@ -532,4 +532,4 @@ class K8sPostgres(K8sBase): elif pod.metadata.labels.get('spilo-role') == 'replica': replica_pod_nodes.append(pod.spec.node_name) - return master_pod_node, replica_pod_nodes \ No newline at end of file + return master_pod_node, replica_pod_nodes diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index d10f0c353..362508cce 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -2,15 +2,14 @@ import json import unittest import time import timeout_decorator -import subprocess -import warnings import os import yaml from datetime import datetime -from kubernetes import client, config +from kubernetes import client from tests.k8s_api import K8s +from kubernetes.client.rest import ApiException SPILO_CURRENT = "registry.opensource.zalan.do/acid/spilo-12:1.6-p5" SPILO_LAZY = "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p114" @@ -89,17 +88,17 @@ class EndToEndTestCase(unittest.TestCase): # remove existing local storage class and create hostpath class try: k8s.api.storage_v1_api.delete_storage_class("standard") - except: - print("Storage class has already been remove") + except ApiException as e: + print("Failed to delete the 'standard' storage class: {0}".format(e)) # operator deploys pod service account there on start up # needed for test_multi_namespace_support() - cls.namespace = "test" + cls.test_namespace = "test" try: - v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.namespace)) + v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.test_namespace)) k8s.api.core_v1.create_namespace(v1_namespace) - except: - print("Namespace already present") + except ApiException as e: + print("Failed to create the '{0}' namespace: {1}".format(cls.test_namespace, e)) # submit the most recent operator image built on the Docker host with open("manifests/postgres-operator.yaml", 'r+') as f: @@ -135,10 +134,8 @@ class EndToEndTestCase(unittest.TestCase): # make sure we start a new operator on every new run, # this tackles the problem when kind is reused - # and the Docker image is infact changed (dirty one) + # and the Docker image is in fact changed (dirty one) - # patch resync period, this can catch some problems with hanging e2e tests - # k8s.update_config({"data": {"resync_period":"30s"}},step="TestSuite setup") k8s.update_config({}, step="TestSuite Startup") actual_operator_image = k8s.api.core_v1.list_namespaced_pod( @@ -170,9 +167,6 @@ class EndToEndTestCase(unittest.TestCase): 'connection-pooler': 'acid-minimal-cluster-pooler', }) - pod_selector = to_selector(pod_labels) - service_selector = to_selector(service_labels) - # enable connection pooler k8s.api.custom_objects_api.patch_namespaced_custom_object( 'acid.zalan.do', 'v1', 'default', @@ -347,7 +341,7 @@ class EndToEndTestCase(unittest.TestCase): }, } k8s.update_config(patch_infrastructure_roles) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") try: # check that new roles are represented in the config by requesting the @@ -604,19 +598,25 @@ class EndToEndTestCase(unittest.TestCase): with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: pg_manifest = yaml.safe_load(f) - pg_manifest["metadata"]["namespace"] = self.namespace + pg_manifest["metadata"]["namespace"] = self.test_namespace yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) try: k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") - k8s.wait_for_pod_start("spilo-role=master", self.namespace) - self.assert_master_is_unique(self.namespace, "acid-test-cluster") + k8s.wait_for_pod_start("spilo-role=master", self.test_namespace) + self.assert_master_is_unique(self.test_namespace, "acid-test-cluster") except timeout_decorator.TimeoutError: print('Operator log: {}'.format(k8s.get_operator_log())) raise finally: - k8s.api.core_v1.delete_namespace(self.namespace) + # delete the new cluster so that the k8s_api.get_operator_state works correctly in subsequent tests + # ideally we should delete the 'test' namespace here but + # the pods inside the namespace stuck in the Terminating state making the test time out + k8s.api.custom_objects_api.delete_namespaced_custom_object( + "acid.zalan.do", "v1", self.test_namespace, "postgresqls", "acid-test-cluster") + time.sleep(5) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_zz_node_readiness_label(self): @@ -748,12 +748,12 @@ class EndToEndTestCase(unittest.TestCase): } k8s.api.custom_objects_api.patch_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_crd_annotations) - + annotations = { "deployment-time": "2020-04-30 12:00:00", "downscaler/downtime_replicas": "0", } - + self.eventuallyTrue(lambda: k8s.check_statefulset_annotations(cluster_label, annotations), "Annotations missing") @@ -825,14 +825,14 @@ class EndToEndTestCase(unittest.TestCase): } } k8s.update_config(patch_delete_annotations) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") try: # this delete attempt should be omitted because of missing annotations k8s.api.custom_objects_api.delete_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster") time.sleep(5) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") # check that pods and services are still there k8s.wait_for_running_pods(cluster_label, 2) @@ -843,7 +843,7 @@ class EndToEndTestCase(unittest.TestCase): # wait a little before proceeding time.sleep(10) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") # add annotations to manifest delete_date = datetime.today().strftime('%Y-%m-%d') @@ -857,7 +857,7 @@ class EndToEndTestCase(unittest.TestCase): } k8s.api.custom_objects_api.patch_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_delete_annotations) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") # wait a little before proceeding time.sleep(20) @@ -884,7 +884,7 @@ class EndToEndTestCase(unittest.TestCase): print('Operator log: {}'.format(k8s.get_operator_log())) raise - #reset configmap + # reset configmap patch_delete_annotations = { "data": { "delete_annotation_date_key": "", diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index e59bfcea0..59283fd6e 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -107,7 +107,7 @@ data: # spilo_runasgroup: 103 # spilo_fsgroup: 103 spilo_privileged: "false" - # storage_resize_mode: "off" + storage_resize_mode: "pvc" super_username: postgres # team_admin_role: "admin" # team_api_role_configuration: "log_statement:all" diff --git a/manifests/operator-service-account-rbac.yaml b/manifests/operator-service-account-rbac.yaml index 15ed7f53b..1ba5b4d23 100644 --- a/manifests/operator-service-account-rbac.yaml +++ b/manifests/operator-service-account-rbac.yaml @@ -106,6 +106,8 @@ rules: - delete - get - list + - patch + - update # to read existing PVs. Creation should be done via dynamic provisioning - apiGroups: - "" diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 808e3acb0..0987467aa 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -22,22 +22,22 @@ spec: - name: Image type: string description: Spilo image to be used for Pods - JSONPath: .configuration.docker_image + jsonPath: .configuration.docker_image - name: Cluster-Label type: string description: Label for K8s resources created by operator - JSONPath: .configuration.kubernetes.cluster_name_label + jsonPath: .configuration.kubernetes.cluster_name_label - name: Service-Account type: string description: Name of service account to be used - JSONPath: .configuration.kubernetes.pod_service_account_name + jsonPath: .configuration.kubernetes.pod_service_account_name - name: Min-Instances type: integer description: Minimum number of instances per Postgres cluster - JSONPath: .configuration.min_instances + jsonPath: .configuration.min_instances - name: Age type: date - JSONPath: .metadata.creationTimestamp + jsonPath: .metadata.creationTimestamp schema: openAPIV3Schema: type: object @@ -45,15 +45,15 @@ spec: - kind - apiVersion - configuration - properties: - kind: - type: string - enum: - - OperatorConfiguration - apiVersion: - type: string - enum: - - acid.zalan.do/v1 + properties: + kind: + type: string + enum: + - OperatorConfiguration + apiVersion: + type: string + enum: + - acid.zalan.do/v1 configuration: type: object properties: diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 14acc4356..84537e06a 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -72,7 +72,7 @@ configuration: # spilo_runasgroup: 103 # spilo_fsgroup: 103 spilo_privileged: false - storage_resize_mode: ebs + storage_resize_mode: pvc # toleration: {} # watched_namespace: "" postgres_pod_resources: diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index ffcf49056..16bdac564 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -22,34 +22,34 @@ spec: - name: Team type: string description: Team responsible for Postgres CLuster - JSONPath: .spec.teamId + jsonPath: .spec.teamId - name: Version type: string description: PostgreSQL version - JSONPath: .spec.postgresql.version + jsonPath: .spec.postgresql.version - name: Pods type: integer description: Number of Pods per Postgres cluster - JSONPath: .spec.numberOfInstances + jsonPath: .spec.numberOfInstances - name: Volume type: string description: Size of the bound volume - JSONPath: .spec.volume.size + jsonPath: .spec.volume.size - name: CPU-Request type: string description: Requested CPU for Postgres containers - JSONPath: .spec.resources.requests.cpu + jsonPath: .spec.resources.requests.cpu - name: Memory-Request type: string description: Requested memory for Postgres containers - JSONPath: .spec.resources.requests.memory + jsonPath: .spec.resources.requests.memory - name: Age type: date - JSONPath: .metadata.creationTimestamp + jsonPath: .metadata.creationTimestamp - name: Status type: string description: Current sync status of postgresql resource - JSONPath: .status.PostgresClusterStatus + jsonPath: .status.PostgresClusterStatus schema: openAPIV3Schema: type: object diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 18778fd41..7ec7be176 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -140,7 +140,7 @@ func New(cfg Config, kubeClient k8sutil.KubernetesClient, pgSpec acidv1.Postgres Secrets: make(map[types.UID]*v1.Secret), Services: make(map[PostgresRole]*v1.Service), Endpoints: make(map[PostgresRole]*v1.Endpoints)}, - userSyncStrategy: users.DefaultUserSyncStrategy{password_encryption}, + userSyncStrategy: users.DefaultUserSyncStrategy{PasswordEncryption: password_encryption}, deleteOptions: metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy}, podEventsQueue: podEventsQueue, KubeClient: kubeClient, @@ -626,14 +626,14 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { } }() - if oldSpec.Spec.PostgresqlParam.PgVersion >= newSpec.Spec.PostgresqlParam.PgVersion { + if oldSpec.Spec.PostgresqlParam.PgVersion > newSpec.Spec.PostgresqlParam.PgVersion { c.logger.Warningf("postgresql version change(%q -> %q) has no effect", oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion) c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "PostgreSQL", "postgresql version change(%q -> %q) has no effect", oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion) // we need that hack to generate statefulset with the old version newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion - } else { + } else if oldSpec.Spec.PostgresqlParam.PgVersion < newSpec.Spec.PostgresqlParam.PgVersion { c.logger.Infof("postgresql version increased (%q -> %q), major version upgrade can be done manually after StatefulSet Sync", oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion) } @@ -671,13 +671,21 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { // Volume if oldSpec.Spec.Size != newSpec.Spec.Size { - c.logger.Debugf("syncing persistent volumes") c.logVolumeChanges(oldSpec.Spec.Volume, newSpec.Spec.Volume) - - if err := c.syncVolumes(); err != nil { - c.logger.Errorf("could not sync persistent volumes: %v", err) - updateFailed = true + c.logger.Debugf("syncing volumes using %q storage resize mode", c.OpConfig.StorageResizeMode) + if c.OpConfig.StorageResizeMode == "pvc" { + if err := c.syncVolumeClaims(); err != nil { + c.logger.Errorf("could not sync persistent volume claims: %v", err) + updateFailed = true + } + } else if c.OpConfig.StorageResizeMode == "ebs" { + if err := c.syncVolumes(); err != nil { + c.logger.Errorf("could not sync persistent volumes: %v", err) + updateFailed = true + } } + } else { + c.logger.Infof("Storage resize is disabled (storage_resize_mode is off). Skipping volume sync.") } // Statefulset diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index ee7672a5b..61be7919d 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -57,8 +57,8 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error { return err } + c.logger.Debugf("syncing volumes using %q storage resize mode", c.OpConfig.StorageResizeMode) if c.OpConfig.StorageResizeMode == "pvc" { - c.logger.Debugf("syncing persistent volume claims") if err = c.syncVolumeClaims(); err != nil { err = fmt.Errorf("could not sync persistent volume claims: %v", err) return err @@ -70,7 +70,6 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error { // TODO: handle the case of the cluster that is downsized and enlarged again // (there will be a volume from the old pod for which we can't act before the // the statefulset modification is concluded) - c.logger.Debugf("syncing persistent volumes") if err = c.syncVolumes(); err != nil { err = fmt.Errorf("could not sync persistent volumes: %v", err) return err diff --git a/pkg/cluster/volumes.go b/pkg/cluster/volumes.go index d5c08c2e2..44b85663f 100644 --- a/pkg/cluster/volumes.go +++ b/pkg/cluster/volumes.go @@ -62,7 +62,7 @@ func (c *Cluster) resizeVolumeClaims(newVolume acidv1.Volume) error { if err != nil { return fmt.Errorf("could not parse volume size: %v", err) } - _, newSize, err := c.listVolumesWithManifestSize(newVolume) + newSize := quantityToGigabyte(newQuantity) for _, pvc := range pvcs { volumeSize := quantityToGigabyte(pvc.Spec.Resources.Requests[v1.ResourceStorage]) if volumeSize >= newSize { diff --git a/pkg/cluster/volumes_test.go b/pkg/cluster/volumes_test.go new file mode 100644 index 000000000..49fbbd228 --- /dev/null +++ b/pkg/cluster/volumes_test.go @@ -0,0 +1,171 @@ +package cluster + +import ( + "testing" + + "context" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + + "github.com/stretchr/testify/assert" + acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1" + "github.com/zalando/postgres-operator/pkg/util/config" + "github.com/zalando/postgres-operator/pkg/util/constants" + "github.com/zalando/postgres-operator/pkg/util/k8sutil" + "k8s.io/client-go/kubernetes/fake" +) + +func NewFakeKubernetesClient() (k8sutil.KubernetesClient, *fake.Clientset) { + clientSet := fake.NewSimpleClientset() + + return k8sutil.KubernetesClient{ + PersistentVolumeClaimsGetter: clientSet.CoreV1(), + }, clientSet +} + +func TestResizeVolumeClaim(t *testing.T) { + testName := "test resizing of persistent volume claims" + client, _ := NewFakeKubernetesClient() + clusterName := "acid-test-cluster" + namespace := "default" + newVolumeSize := "2Gi" + + // new cluster with pvc storage resize mode and configured labels + var cluster = New( + Config{ + OpConfig: config.Config{ + Resources: config.Resources{ + ClusterLabels: map[string]string{"application": "spilo"}, + ClusterNameLabel: "cluster-name", + }, + StorageResizeMode: "pvc", + }, + }, client, acidv1.Postgresql{}, logger, eventRecorder) + + // set metadata, so that labels will get correct values + cluster.Name = clusterName + cluster.Namespace = namespace + filterLabels := cluster.labelsSet(false) + + // define and create PVCs for 1Gi volumes + storage1Gi, err := resource.ParseQuantity("1Gi") + assert.NoError(t, err) + + pvcList := &v1.PersistentVolumeClaimList{ + Items: []v1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: constants.DataVolumeName + "-" + clusterName + "-0", + Namespace: namespace, + Labels: filterLabels, + }, + Spec: v1.PersistentVolumeClaimSpec{ + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: storage1Gi, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: constants.DataVolumeName + "-" + clusterName + "-1", + Namespace: namespace, + Labels: filterLabels, + }, + Spec: v1.PersistentVolumeClaimSpec{ + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: storage1Gi, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: constants.DataVolumeName + "-" + clusterName + "-2-0", + Namespace: namespace, + Labels: labels.Set{}, + }, + Spec: v1.PersistentVolumeClaimSpec{ + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: storage1Gi, + }, + }, + }, + }, + }, + } + + for _, pvc := range pvcList.Items { + cluster.KubeClient.PersistentVolumeClaims(namespace).Create(context.TODO(), &pvc, metav1.CreateOptions{}) + } + + // test resizing + cluster.resizeVolumeClaims(acidv1.Volume{Size: newVolumeSize}) + + pvcs, err := cluster.listPersistentVolumeClaims() + assert.NoError(t, err) + + // check if listPersistentVolumeClaims returns only the PVCs matching the filter + if len(pvcs) != len(pvcList.Items)-1 { + t.Errorf("%s: could not find all PVCs, got %v, expected %v", testName, len(pvcs), len(pvcList.Items)-1) + } + + // check if PVCs were correctly resized + for _, pvc := range pvcs { + newStorageSize := quantityToGigabyte(pvc.Spec.Resources.Requests[v1.ResourceStorage]) + expectedQuantity, err := resource.ParseQuantity(newVolumeSize) + assert.NoError(t, err) + expectedSize := quantityToGigabyte(expectedQuantity) + if newStorageSize != expectedSize { + t.Errorf("%s: resizing failed, got %v, expected %v", testName, newStorageSize, expectedSize) + } + } + + // check if other PVC was not resized + pvc2, err := cluster.KubeClient.PersistentVolumeClaims(namespace).Get(context.TODO(), constants.DataVolumeName+"-"+clusterName+"-2-0", metav1.GetOptions{}) + assert.NoError(t, err) + unchangedSize := quantityToGigabyte(pvc2.Spec.Resources.Requests[v1.ResourceStorage]) + expectedSize := quantityToGigabyte(storage1Gi) + if unchangedSize != expectedSize { + t.Errorf("%s: volume size changed, got %v, expected %v", testName, unchangedSize, expectedSize) + } +} + +func TestQuantityToGigabyte(t *testing.T) { + tests := []struct { + name string + quantityStr string + expected int64 + }{ + { + "test with 1Gi", + "1Gi", + 1, + }, + { + "test with float", + "1.5Gi", + int64(1), + }, + { + "test with 1000Mi", + "1000Mi", + int64(0), + }, + } + + for _, tt := range tests { + quantity, err := resource.ParseQuantity(tt.quantityStr) + assert.NoError(t, err) + gigabyte := quantityToGigabyte(quantity) + if gigabyte != tt.expected { + t.Errorf("%s: got %v, expected %v", tt.name, gigabyte, tt.expected) + } + } +} diff --git a/pkg/controller/util.go b/pkg/controller/util.go index 2adc0bea1..7f87de97d 100644 --- a/pkg/controller/util.go +++ b/pkg/controller/util.go @@ -341,9 +341,7 @@ func (c *Controller) getInfrastructureRole( util.Coalesce(string(secretData[infraRole.RoleKey]), infraRole.DefaultRoleValue)) } - if roleDescr.Valid() { - roles = append(roles, *roleDescr) - } else { + if !roleDescr.Valid() { msg := "infrastructure role %q is not complete and ignored" c.logger.Warningf(msg, roleDescr)