Improving e2e more (#1185)

* Add curl to operator image. * Wait for idle operator in delete.
2020-10-29 13:59:22 +01:00 · 2020-10-29 13:59:22 +01:00 · 7f7beba66b
parent c694a72352
commit 7f7beba66b
6 changed files with 46 additions and 3 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -2,6 +2,7 @@ FROM alpine
 MAINTAINER Team ACID @ Zalando <team-acid@zalando.de>

 # We need root certificates to deal with teams api over https
+RUN apk --no-cache add curl
 RUN apk --no-cache add ca-certificates

 COPY build/* /
--- a/e2e/Makefile
+++ b/e2e/Makefile
@ -51,3 +51,6 @@ tools:

 e2etest: tools copy clean
 	./run.sh main
+
+cleanup: clean
+	./run.sh cleanup
--- a/e2e/README.md
+++ b/e2e/README.md
@ -33,7 +33,7 @@ runtime.
 In the e2e folder you can invoke tests either with `make test` or with:

 ```bash
-./run.sh
+./run.sh main
 ```

 To run both the build and test step you can invoke `make e2e` from the parent
@ -41,7 +41,7 @@ directory.

 To run the end 2 end test and keep the kind state execute:
 ```bash
-NOCLEANUP=True ./run.sh
+NOCLEANUP=True ./run.sh main
 ```

 ## Run indidual test
--- a/e2e/scripts/watch_objects.sh
+++ b/e2e/scripts/watch_objects.sh
@ -13,7 +13,15 @@ kubectl get statefulsets
 echo
 kubectl get deployments
 echo
+echo
+echo 'Step from operator deployment'
 kubectl get pods -l name=postgres-operator -o jsonpath='{.items..metadata.annotations.step}'
 echo
+echo
+echo 'Spilo Image in statefulset'
 kubectl get pods -l application=spilo -o jsonpath='{.items..spec.containers..image}'
-"
+echo
+echo
+echo 'Queue Status'
+kubectl exec -it \$(kubectl get pods -l name=postgres-operator -o jsonpath='{.items..metadata.name}') -- curl localhost:8080/workers/all/status/
+echo"
--- a/e2e/tests/k8s_api.py
+++ b/e2e/tests/k8s_api.py
@ -239,6 +239,19 @@ class K8s:
            return []
        return json.loads(r.stdout.decode())

+    def get_operator_state(self):
+        pod = self.get_operator_pod()
+        if pod == None:
+            return None
+        pod = pod.metadata.name
+
+        r = self.exec_with_kubectl(pod, "curl localhost:8080/workers/all/status/")
+        if not r.returncode == 0 or not r.stdout.decode()[0:1]=="{":
+            return None
+
+        return json.loads(r.stdout.decode())
+
+
    def get_patroni_running_members(self, pod="acid-minimal-cluster-0"):
        result = self.get_patroni_state(pod)
        return list(filter(lambda x: "State" in x and x["State"] == "running", result))
--- a/e2e/tests/test_e2e.py
+++ b/e2e/tests/test_e2e.py
@ -109,9 +109,18 @@ class EndToEndTestCase(unittest.TestCase):
        with open("manifests/postgres-operator.yaml", 'w') as f:
            yaml.dump(operator_deployment, f, Dumper=yaml.Dumper)

+        with open("manifests/configmap.yaml", 'r+') as f:
+                    configmap = yaml.safe_load(f)
+                    configmap["data"]["workers"] = "1"
+
+        with open("manifests/configmap.yaml", 'w') as f:
+            yaml.dump(configmap, f, Dumper=yaml.Dumper)
+
        for filename in ["operator-service-account-rbac.yaml",
+                         "postgresteam.crd.yaml",
                         "configmap.yaml",
                         "postgres-operator.yaml",
+                         "api-service.yaml",
                         "infrastructure-roles.yaml",
                         "infrastructure-roles-new.yaml",
                         "e2e-storage-class.yaml"]:
@ -338,6 +347,7 @@ class EndToEndTestCase(unittest.TestCase):
            },
        }
        k8s.update_config(patch_infrastructure_roles)
+        self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")

        try:
            # check that new roles are represented in the config by requesting the
@ -447,6 +457,7 @@ class EndToEndTestCase(unittest.TestCase):
            # so we additonally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works
            self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), conf_image, "Rolling upgrade was not executed", 50, 3)
            self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), conf_image, "Rolling upgrade was not executed", 50, 3)
+            self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), 2, "Postgres status did not enter running")

        except timeout_decorator.TimeoutError:
            print('Operator log: {}'.format(k8s.get_operator_log()))
@ -519,6 +530,9 @@ class EndToEndTestCase(unittest.TestCase):
            print('Operator log: {}'.format(k8s.get_operator_log()))
            raise

+        # ensure cluster is healthy after tests
+        self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running")
+
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
    def test_min_resource_limits(self):
        '''
@ -809,12 +823,14 @@ class EndToEndTestCase(unittest.TestCase):
            }
        }
        k8s.update_config(patch_delete_annotations)
+        self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")

        try:
            # this delete attempt should be omitted because of missing annotations
            k8s.api.custom_objects_api.delete_namespaced_custom_object(
                "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster")
            time.sleep(5)
+            self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")

            # check that pods and services are still there
            k8s.wait_for_running_pods(cluster_label, 2)
@ -825,6 +841,7 @@ class EndToEndTestCase(unittest.TestCase):

            # wait a little before proceeding
            time.sleep(10)
+            self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")

            # add annotations to manifest
            delete_date = datetime.today().strftime('%Y-%m-%d')
@ -838,6 +855,7 @@ class EndToEndTestCase(unittest.TestCase):
            }
            k8s.api.custom_objects_api.patch_namespaced_custom_object(
                "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_delete_annotations)
+            self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")

            # wait a little before proceeding
            time.sleep(20)