* Patroni state function added in k8s

* Lazy upgrade now properly covered with eventual and waiting for pod
start
 * patching config now updates deployment, patching annotation, allowing
to trace change step
 * run.sh no takes NOCLEANUP to stop kind from being deleted
 * if kind config is present, run will not install kind
 * Fast e2e local execution now possible once kind is up
This commit is contained in:
Jan Mußler 2020-10-19 23:35:08 +02:00
parent c1ad71668b
commit 966575dd4b
4 changed files with 90 additions and 31 deletions

View File

@ -35,6 +35,11 @@ In the e2e folder you can invoke tests either with `make test` or with:
To run both the build and test step you can invoke `make e2e` from the parent
directory.
To run the end 2 end test and keep the kind state execute:
```bash
NOCLEANUP=True ./run.sh
```
## Covered use cases
The current tests are all bundled in [`test_e2e.py`](tests/test_e2e.py):

View File

@ -58,7 +58,6 @@ function run_tests(){
--mount type=bind,source="$(readlink -f tests)",target=/tests \
--mount type=bind,source="$(readlink -f exec.sh)",target=/exec.sh \
-e OPERATOR_IMAGE="${operator_image}" "${e2e_test_runner_image}"
}
function clean_up(){
@ -70,11 +69,10 @@ function clean_up(){
function main(){
trap "clean_up" QUIT TERM EXIT
time pull_images
time start_kind
time set_kind_api_server_ip
[[ -z ${NOCLEANUP-} ]] && trap "clean_up" QUIT TERM EXIT
pull_images
[[ ! -f ${kubeconfig_path} ]] && start_kind
set_kind_api_server_ip
run_tests
exit 0
}

13
e2e/run_tests_image.sh Executable file
View File

@ -0,0 +1,13 @@
#!/bin/bash
export cluster_name="postgres-operator-e2e-tests"
export kubeconfig_path="/tmp/kind-config-${cluster_name}"
export operator_image="registry.opensource.zalan.do/acid/postgres-operator:latest"
export e2e_test_runner_image="registry.opensource.zalan.do/acid/postgres-operator-e2e-tests-runner:latest"
docker run -it --entrypoint /bin/bash --network=host -e "TERM=xterm-256color" \
--mount type=bind,source="$(readlink -f ${kubeconfig_path})",target=/root/.kube/config \
--mount type=bind,source="$(readlink -f manifests)",target=/manifests \
--mount type=bind,source="$(readlink -f tests)",target=/tests \
--mount type=bind,source="$(readlink -f exec.sh)",target=/exec.sh \
-e OPERATOR_IMAGE="${operator_image}" "${e2e_test_runner_image}"

View File

@ -10,6 +10,8 @@ import yaml
from datetime import datetime
from kubernetes import client, config
SPILO_CURRENT = "registry.opensource.zalan.do/acid/spilo-12:1.6-p5"
SPILO_LAZY = "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p114"
def to_selector(labels):
return ",".join(["=".join(l) for l in labels.items()])
@ -75,13 +77,19 @@ class EndToEndTestCase(unittest.TestCase):
k8s = cls.k8s = K8s()
# remove existing local storage class and create hostpath class
k8s.api.storage_v1_api.delete_storage_class("standard")
try:
k8s.api.storage_v1_api.delete_storage_class("standard")
except:
print("Storage class has already been remove")
# operator deploys pod service account there on start up
# needed for test_multi_namespace_support()
cls.namespace = "test"
v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.namespace))
k8s.api.core_v1.create_namespace(v1_namespace)
try:
v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.namespace))
k8s.api.core_v1.create_namespace(v1_namespace)
except:
print("Namespace already present")
# submit the most recent operator image built on the Docker host
with open("manifests/postgres-operator.yaml", 'r+') as f:
@ -313,27 +321,47 @@ class EndToEndTestCase(unittest.TestCase):
k8s = self.k8s
pod0 = 'acid-minimal-cluster-0'
pod1 = 'acid-minimal-cluster-1'
self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running")
self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), 2, "Postgres status did not enter running")
patch_lazy_spilo_upgrade = {
"data": {
"docker_image": SPILO_CURRENT,
"enable_lazy_spilo_upgrade": "false"
}
}
k8s.update_config(patch_lazy_spilo_upgrade, step="Init baseline image version")
self.eventuallyEqual(lambda: k8s.get_statefulset_image(), SPILO_CURRENT, "Stagefulset not updated initially")
self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running")
self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), 2, "Postgres status did not enter running")
self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), SPILO_CURRENT, "Rolling upgrade was not executed")
self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), SPILO_CURRENT, "Rolling upgrade was not executed")
# update docker image in config and enable the lazy upgrade
conf_image = "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p114"
conf_image = SPILO_LAZY
patch_lazy_spilo_upgrade = {
"data": {
"docker_image": conf_image,
"enable_lazy_spilo_upgrade": "true"
}
}
k8s.update_config(patch_lazy_spilo_upgrade)
pod0 = 'acid-minimal-cluster-0'
pod1 = 'acid-minimal-cluster-1'
k8s.update_config(patch_lazy_spilo_upgrade,step="patch image and lazy upgrade")
self.eventuallyEqual(lambda: k8s.get_statefulset_image(), conf_image, "Statefulset not updated to next Docker image")
try:
# restart the pod to get a container with the new image
k8s.api.core_v1.delete_namespaced_pod(pod0, 'default')
# verify only pod-0 which was deleted got new image from statefulset
self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), conf_image, "Delete pod-0 did not get new spilo image")
old_image = k8s.get_effective_pod_image(pod1)
self.assertNotEqual(conf_image, old_image, "pod-1 should not have change Docker image to {}".format(old_image))
self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), conf_image, "Delete pod-0 did not get new spilo image")
self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No two pods running after lazy rolling upgrade")
self.assertNotEqual(lambda: k8s.get_effective_pod_image(pod1), SPILO_CURRENT, "pod-1 should not have change Docker image to {}".format(SPILO_CURRENT))
# clean up
unpatch_lazy_spilo_upgrade = {
@ -341,13 +369,12 @@ class EndToEndTestCase(unittest.TestCase):
"enable_lazy_spilo_upgrade": "false",
}
}
k8s.update_config(unpatch_lazy_spilo_upgrade)
k8s.update_config(unpatch_lazy_spilo_upgrade, step="patch lazy upgrade")
# at this point operator will complete the normal rolling upgrade
# so we additonally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works
self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), conf_image, "Rolling upgrade was not executed")
self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), conf_image, "Rolling upgrade was not executed")
self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), conf_image, "Rolling upgrade was not executed", 50, 3)
self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), conf_image, "Rolling upgrade was not executed", 50, 3)
except timeout_decorator.TimeoutError:
print('Operator log: {}'.format(k8s.get_operator_log()))
@ -379,7 +406,7 @@ class EndToEndTestCase(unittest.TestCase):
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_backup)
try:
self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job()), 1, "failed to create logical backup job")
self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 1, "failed to create logical backup job")
job = k8s.get_logical_backup_job().items[0]
self.assertEqual(job.metadata.name, "logical-backup-acid-minimal-cluster",
@ -396,7 +423,7 @@ class EndToEndTestCase(unittest.TestCase):
"logical_backup_docker_image": image,
}
}
k8s.update_config(patch_logical_backup_image)
k8s.update_config(patch_logical_backup_image, step="patch logical backup image")
def get_docker_image():
jobs = k8s.get_logical_backup_job().items
@ -414,7 +441,7 @@ class EndToEndTestCase(unittest.TestCase):
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_backup)
self.eventuallyEqual(lambda: len(self.get_logical_backup_job()), 0, "failed to create logical backup job")
self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 0, "failed to create logical backup job")
except timeout_decorator.TimeoutError:
print('Operator log: {}'.format(k8s.get_operator_log()))
@ -991,6 +1018,10 @@ class K8s:
def count_pdbs_with_label(self, labels, namespace='default'):
return len(self.api.policy_v1_beta1.list_namespaced_pod_disruption_budget(
namespace, label_selector=labels).items)
def count_running_pods(self, labels='application=spilo,cluster-name=acid-minimal-cluster', namespace='default'):
pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items
return len(list(filter(lambda x: x.status.phase=='Running', pods)))
def wait_for_pod_failover(self, failover_targets, labels, namespace='default'):
pod_phase = 'Failing over'
@ -1016,19 +1047,18 @@ class K8s:
def wait_for_logical_backup_job_creation(self):
self.wait_for_logical_backup_job(expected_num_of_jobs=1)
def delete_operator_pod(self):
operator_pod = self.api.core_v1.list_namespaced_pod(
'default', label_selector="name=postgres-operator").items[0].metadata.name
self.api.core_v1.delete_namespaced_pod(operator_pod, "default") # restart reloads the conf
def delete_operator_pod(self, step="Delete operator deplyment"):
operator_pod = self.api.core_v1.list_namespaced_pod('default', label_selector="name=postgres-operator").items[0].metadata.name
self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, time.time())}}}}})
self.wait_for_operator_pod_start()
def update_config(self, config_map_patch):
def update_config(self, config_map_patch, step="Updating operator deployment"):
self.api.core_v1.patch_namespaced_config_map("postgres-operator", "default", config_map_patch)
self.delete_operator_pod()
self.delete_operator_pod(step=step)
def create_with_kubectl(self, path):
return subprocess.run(
["kubectl", "create", "-f", path],
["kubectl", "apply", "-f", path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
@ -1037,6 +1067,19 @@ class K8s:
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
def get_patroni_state(self, pod):
return json.loads(self.exec_with_kubectl(pod, "patronictl list -f json").stdout)
def get_patroni_running_members(self, pod):
result = self.get_patroni_state(pod)
return list(filter(lambda x: x["State"]=="running", result))
def get_statefulset_image(self, label_selector="application=spilo,cluster-name=acid-minimal-cluster", namespace='default'):
ssets = self.api.apps_v1.list_namespaced_stateful_set(namespace, label_selector=label_selector, limit=1)
if len(ssets.items) == 0:
return None
return ssets.items[0].spec.template.spec.containers[0].image
def get_effective_pod_image(self, pod_name, namespace='default'):
'''
Get the Spilo image pod currently uses. In case of lazy rolling updates