diff --git a/e2e/requirements.txt b/e2e/requirements.txt index c46b334ad..562e34c3a 100644 --- a/e2e/requirements.txt +++ b/e2e/requirements.txt @@ -1,2 +1,3 @@ kubernetes==9.0.0 timeout_decorator==0.4.1 +pyyaml diff --git a/e2e/tests/test_smoke.py b/e2e/tests/test_e2e.py old mode 100755 new mode 100644 similarity index 83% rename from e2e/tests/test_smoke.py rename to e2e/tests/test_e2e.py index 698f06529..b61d70933 --- a/e2e/tests/test_smoke.py +++ b/e2e/tests/test_e2e.py @@ -4,19 +4,18 @@ import timeout_decorator import subprocess import warnings import os +import yaml from kubernetes import client, config, utils -class SmokeTestCase(unittest.TestCase): +class EndToEndTestCase(unittest.TestCase): ''' - Test the most basic functions of the operator. + Test interaction of the operator with multiple k8s components. ''' # `kind` pods may stuck in the `Terminating` phase for a few minutes; hence high test timeout TEST_TIMEOUT_SEC = 600 - # labels may be assigned before a pod becomes fully operational; so wait a few seconds more - OPERATOR_POD_START_PERIOD_SEC = 5 @classmethod @timeout_decorator.timeout(TEST_TIMEOUT_SEC) @@ -27,37 +26,24 @@ class SmokeTestCase(unittest.TestCase): /e2e/run.sh deletes the 'kind' cluster after successful run along with all operator-related entities. In the case of test failure the cluster will stay to enable manual examination; - next invocation of "make e2e" will re-create it. + next invocation of "make e2e-run" will re-create it. ''' # set a single k8s wrapper for all tests k8s = cls.k8s = K8s() + # submit the most recent operator image built on the Docker host + with open("manifests/postgres-operator.yaml", 'r+') as f: + operator_deployment = yaml.load(f, Loader=yaml.Loader) + operator_deployment["spec"]["template"]["spec"]["containers"][0]["image"] = os.environ['OPERATOR_IMAGE'] + yaml.dump(operator_deployment, f, Dumper=yaml.Dumper) + for filename in ["operator-service-account-rbac.yaml", "configmap.yaml", "postgres-operator.yaml"]: k8s.create_with_kubectl("manifests/" + filename) - # submit the most recent operator image built on the Docker host - body = { - "spec": { - "template": { - "spec": { - "containers": [ - { - "name": "postgres-operator", - "image": os.environ['OPERATOR_IMAGE'] - } - ] - } - } - } - } - k8s.api.apps_v1.patch_namespaced_deployment("postgres-operator", "default", body) - - k8s.wait_for_pod_start('name=postgres-operator') - # reason: CRD may take time to register - time.sleep(cls.OPERATOR_POD_START_PERIOD_SEC) + k8s.wait_for_operator_pod_start() actual_operator_image = k8s.api.core_v1.list_namespaced_pod('default', label_selector='name=postgres-operator').items[0].spec.containers[0].image print("Tested operator image: {}".format(actual_operator_image)) # shows up after tests finish @@ -66,22 +52,10 @@ class SmokeTestCase(unittest.TestCase): k8s.wait_for_pod_start('spilo-role=master') - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_master_is_unique(self): - """ - Check that there is a single pod in the k8s cluster with the label "spilo-role=master". - """ - - k8s = self.k8s - labels = 'spilo-role=master,version=acid-minimal-cluster' - - num_of_master_pods = k8s.count_pods_with_label(labels) - self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_scaling(self): """ - Scale up from 2 to 3 pods and back to 2 by updating the Postgres manifest at runtime. + Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. """ k8s = self.k8s @@ -89,14 +63,16 @@ class SmokeTestCase(unittest.TestCase): k8s.wait_for_pg_to_scale(3) self.assertEqual(3, k8s.count_pods_with_label(labels)) + self.assert_master_is_unique() k8s.wait_for_pg_to_scale(2) self.assertEqual(2, k8s.count_pods_with_label(labels)) + self.assert_master_is_unique() @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_taint_based_eviction(self): """ - Add taint "postgres=:NoExecute" to node with master. + Add taint "postgres=:NoExecute" to node with master. This must cause a failover. """ k8s = self.k8s labels = 'version=acid-minimal-cluster' @@ -135,6 +111,7 @@ class SmokeTestCase(unittest.TestCase): "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) self.assertTrue(num_replicas == len(new_replica_nodes), "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) + self.assert_master_is_unique() # undo the tainting body = { @@ -149,7 +126,7 @@ class SmokeTestCase(unittest.TestCase): def test_logical_backup_cron_job(self): """ Ensure we can (a) create the cron job at user request for a specific PG cluster - (b) update the cluster-wide image of the logical backup pod + (b) update the cluster-wide image for the logical backup pod (c) delete the job at user request Limitations: @@ -191,9 +168,7 @@ class SmokeTestCase(unittest.TestCase): operator_pod = k8s.api.core_v1.list_namespaced_pod('default', label_selector="name=postgres-operator").items[0].metadata.name k8s.api.core_v1.delete_namespaced_pod(operator_pod, "default") # restart reloads the conf - k8s.wait_for_pod_start('name=postgres-operator') - # reason: patch below is otherwise dropped during pod restart - time.sleep(self.OPERATOR_POD_START_PERIOD_SEC) + k8s.wait_for_operator_pod_start() jobs = k8s.get_logical_backup_job().items actual_image = jobs[0].spec.job_template.spec.template.spec.containers[0].image @@ -212,6 +187,18 @@ class SmokeTestCase(unittest.TestCase): self.assertTrue(0 == len(jobs), "Expected 0 logical backup jobs, found {}".format(len(jobs))) + def assert_master_is_unique(self): + """ + Check that there is a single pod in the k8s cluster with the label "spilo-role=master" + To be called manually after operations that affect pods + """ + + k8s = self.k8s + labels = 'spilo-role=master,version=acid-minimal-cluster' + + num_of_master_pods = k8s.count_pods_with_label(labels) + self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) + class K8sApi: @@ -223,17 +210,16 @@ class K8sApi: self.config = config.load_kube_config() self.k8s_client = client.ApiClient() - self.crd = client.CustomObjectsApi() self.core_v1 = client.CoreV1Api() self.apps_v1 = client.AppsV1Api() self.batch_v1_beta1 = client.BatchV1beta1Api() self.custom_objects_api = client.CustomObjectsApi() - class K8s: - - RETRY_TIMEOUT_SEC = 5 + ''' + Wraps around K8 api client and helper methods. + ''' def __init__(self): self.api = K8sApi() @@ -250,13 +236,17 @@ class K8s: return master_pod_node, replica_pod_nodes + def wait_for_operator_pod_start(self): + self. wait_for_pod_start("name=postgres-operator") + # HACK operator must register CRD / add existing PG clusters after pod start up + time.sleep(10) + def wait_for_pod_start(self, pod_labels): pod_phase = 'No pod running' while pod_phase != 'Running': pods = self.api.core_v1.list_namespaced_pod('default', label_selector=pod_labels).items if pods: pod_phase = pods[0].status.phase - time.sleep(self.RETRY_TIMEOUT_SEC) def wait_for_pg_to_scale(self, number_of_instances): @@ -265,12 +255,12 @@ class K8s: "numberOfInstances": number_of_instances } } - _ = self.api.crd.patch_namespaced_custom_object("acid.zalan.do", + _ = self.api.custom_objects_api.patch_namespaced_custom_object("acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", body) labels = 'version=acid-minimal-cluster' while self.count_pods_with_label(labels) != number_of_instances: - time.sleep(self.RETRY_TIMEOUT_SEC) + pass def count_pods_with_label(self, labels): return len(self.api.core_v1.list_namespaced_pod('default', label_selector=labels).items) @@ -282,25 +272,23 @@ class K8s: while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes): pods = self.api.core_v1.list_namespaced_pod('default', label_selector=labels).items - if pods: new_master_node = pods[0].spec.node_name pod_phase = pods[0].status.phase - time.sleep(self.RETRY_TIMEOUT_SEC) def get_logical_backup_job(self): return self.api.batch_v1_beta1.list_namespaced_cron_job("default", label_selector="application=spilo") def wait_for_logical_backup_job(self, expected_num_of_jobs): - while (len(self.api.get_logical_backup_job().items) != expected_num_of_jobs): - time.sleep(self.RETRY_TIMEOUT_SEC) + while (len(self.get_logical_backup_job().items) != expected_num_of_jobs): + pass def wait_for_logical_backup_job_deletion(self): - Utils.wait_for_logical_backup_job(expected_num_of_jobs = 0) + self.wait_for_logical_backup_job(expected_num_of_jobs = 0) def wait_for_logical_backup_job_creation(self): - Utils.wait_for_logical_backup_job(expected_num_of_jobs = 1) - + self.wait_for_logical_backup_job(expected_num_of_jobs = 1) + def create_with_kubectl(self, path): subprocess.run(["kubectl", "create", "-f", path])