import json import unittest import time import timeout_decorator import subprocess import warnings import os import yaml from kubernetes import client, config def to_selector(labels): return ",".join(["=".join(l) for l in labels.items()]) class EndToEndTestCase(unittest.TestCase): ''' Test interaction of the operator with multiple K8s components. ''' # `kind` pods may stuck in the `Terminating` phase for a few minutes; hence high test timeout TEST_TIMEOUT_SEC = 600 @classmethod @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def setUpClass(cls): ''' Deploy operator to a "kind" cluster created by run.sh using examples from /manifests. This operator deployment is to be shared among all tests. run.sh deletes the 'kind' cluster after successful run along with all operator-related entities. In the case of test failure the cluster will stay to enable manual examination; next invocation of "make test" will re-create it. ''' # set a single K8s wrapper for all tests k8s = cls.k8s = K8s() # operator deploys pod service account there on start up # needed for test_multi_namespace_support() cls.namespace = "test" v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.namespace)) k8s.api.core_v1.create_namespace(v1_namespace) # submit the most recent operator image built on the Docker host with open("manifests/postgres-operator.yaml", 'r+') as f: operator_deployment = yaml.safe_load(f) operator_deployment["spec"]["template"]["spec"]["containers"][0]["image"] = os.environ['OPERATOR_IMAGE'] yaml.dump(operator_deployment, f, Dumper=yaml.Dumper) for filename in ["operator-service-account-rbac.yaml", "configmap.yaml", "postgres-operator.yaml", "infrastructure-roles-new.yaml"]: result = k8s.create_with_kubectl("manifests/" + filename) print("stdout: {}, stderr: {}".format(result.stdout, result.stderr)) k8s.wait_for_operator_pod_start() actual_operator_image = k8s.api.core_v1.list_namespaced_pod( 'default', label_selector='name=postgres-operator').items[0].spec.containers[0].image print("Tested operator image: {}".format(actual_operator_image)) # shows up after tests finish result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") print('stdout: {}, stderr: {}'.format(result.stdout, result.stderr)) try: k8s.wait_for_pod_start('spilo-role=master') k8s.wait_for_pod_start('spilo-role=replica') except timeout_decorator.TimeoutError: print('Operator log: {}'.format(k8s.get_operator_log())) raise # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_enable_disable_connection_pooler(self): # ''' # For a database without connection pooler, then turns it on, scale up, # turn off and on again. Test with different ways of doing this (via # enableConnectionPooler or connectionPooler configuration section). At # the end turn connection pooler off to not interfere with other tests. # ''' # k8s = self.k8s # service_labels = { # 'cluster-name': 'acid-minimal-cluster', # } # pod_labels = dict({ # 'connection-pooler': 'acid-minimal-cluster-pooler', # }) # pod_selector = to_selector(pod_labels) # service_selector = to_selector(service_labels) # try: # # enable connection pooler # k8s.api.custom_objects_api.patch_namespaced_custom_object( # 'acid.zalan.do', 'v1', 'default', # 'postgresqls', 'acid-minimal-cluster', # { # 'spec': { # 'enableConnectionPooler': True, # } # }) # k8s.wait_for_pod_start(pod_selector) # pods = k8s.api.core_v1.list_namespaced_pod( # 'default', label_selector=pod_selector # ).items # self.assertTrue(pods, 'No connection pooler pods') # k8s.wait_for_service(service_selector) # services = k8s.api.core_v1.list_namespaced_service( # 'default', label_selector=service_selector # ).items # services = [ # s for s in services # if s.metadata.name.endswith('pooler') # ] # self.assertTrue(services, 'No connection pooler service') # # scale up connection pooler deployment # k8s.api.custom_objects_api.patch_namespaced_custom_object( # 'acid.zalan.do', 'v1', 'default', # 'postgresqls', 'acid-minimal-cluster', # { # 'spec': { # 'connectionPooler': { # 'numberOfInstances': 2, # }, # } # }) # k8s.wait_for_running_pods(pod_selector, 2) # # turn it off, keeping configuration section # k8s.api.custom_objects_api.patch_namespaced_custom_object( # 'acid.zalan.do', 'v1', 'default', # 'postgresqls', 'acid-minimal-cluster', # { # 'spec': { # 'enableConnectionPooler': False, # } # }) # k8s.wait_for_pods_to_stop(pod_selector) # except timeout_decorator.TimeoutError: # print('Operator log: {}'.format(k8s.get_operator_log())) # raise # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_enable_load_balancer(self): # ''' # Test if services are updated when enabling/disabling load balancers # ''' # k8s = self.k8s # cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' # # enable load balancer services # pg_patch_enable_lbs = { # "spec": { # "enableMasterLoadBalancer": True, # "enableReplicaLoadBalancer": True # } # } # k8s.api.custom_objects_api.patch_namespaced_custom_object( # "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_lbs) # # wait for service recreation # time.sleep(60) # master_svc_type = k8s.get_service_type(cluster_label + ',spilo-role=master') # self.assertEqual(master_svc_type, 'LoadBalancer', # "Expected LoadBalancer service type for master, found {}".format(master_svc_type)) # repl_svc_type = k8s.get_service_type(cluster_label + ',spilo-role=replica') # self.assertEqual(repl_svc_type, 'LoadBalancer', # "Expected LoadBalancer service type for replica, found {}".format(repl_svc_type)) # # disable load balancer services again # pg_patch_disable_lbs = { # "spec": { # "enableMasterLoadBalancer": False, # "enableReplicaLoadBalancer": False # } # } # k8s.api.custom_objects_api.patch_namespaced_custom_object( # "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_lbs) # # wait for service recreation # time.sleep(60) # master_svc_type = k8s.get_service_type(cluster_label + ',spilo-role=master') # self.assertEqual(master_svc_type, 'ClusterIP', # "Expected ClusterIP service type for master, found {}".format(master_svc_type)) # repl_svc_type = k8s.get_service_type(cluster_label + ',spilo-role=replica') # self.assertEqual(repl_svc_type, 'ClusterIP', # "Expected ClusterIP service type for replica, found {}".format(repl_svc_type)) # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_lazy_spilo_upgrade(self): # ''' # Test lazy upgrade for the Spilo image: operator changes a stateful set but lets pods run with the old image # until they are recreated for reasons other than operator's activity. That works because the operator configures # stateful sets to use "onDelete" pod update policy. # The test covers: # 1) enabling lazy upgrade in existing operator deployment # 2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod # ''' # k8s = self.k8s # # update docker image in config and enable the lazy upgrade # conf_image = "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p114" # patch_lazy_spilo_upgrade = { # "data": { # "docker_image": conf_image, # "enable_lazy_spilo_upgrade": "true" # } # } # k8s.update_config(patch_lazy_spilo_upgrade) # pod0 = 'acid-minimal-cluster-0' # pod1 = 'acid-minimal-cluster-1' # # restart the pod to get a container with the new image # k8s.api.core_v1.delete_namespaced_pod(pod0, 'default') # time.sleep(60) # # lazy update works if the restarted pod and older pods run different Spilo versions # new_image = k8s.get_effective_pod_image(pod0) # old_image = k8s.get_effective_pod_image(pod1) # self.assertNotEqual(new_image, old_image, "Lazy updated failed: pods have the same image {}".format(new_image)) # # sanity check # assert_msg = "Image {} of a new pod differs from {} in operator conf".format(new_image, conf_image) # self.assertEqual(new_image, conf_image, assert_msg) # # clean up # unpatch_lazy_spilo_upgrade = { # "data": { # "enable_lazy_spilo_upgrade": "false", # } # } # k8s.update_config(unpatch_lazy_spilo_upgrade) # # at this point operator will complete the normal rolling upgrade # # so we additonally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works # # XXX there is no easy way to wait until the end of Sync() # time.sleep(60) # image0 = k8s.get_effective_pod_image(pod0) # image1 = k8s.get_effective_pod_image(pod1) # assert_msg = "Disabling lazy upgrade failed: pods still have different images {} and {}".format(image0, image1) # self.assertEqual(image0, image1, assert_msg) # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_logical_backup_cron_job(self): # ''' # Ensure we can (a) create the cron job at user request for a specific PG cluster # (b) update the cluster-wide image for the logical backup pod # (c) delete the job at user request # Limitations: # (a) Does not run the actual batch job because there is no S3 mock to upload backups to # (b) Assumes 'acid-minimal-cluster' exists as defined in setUp # ''' # k8s = self.k8s # # create the cron job # schedule = "7 7 7 7 *" # pg_patch_enable_backup = { # "spec": { # "enableLogicalBackup": True, # "logicalBackupSchedule": schedule # } # } # k8s.api.custom_objects_api.patch_namespaced_custom_object( # "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_backup) # k8s.wait_for_logical_backup_job_creation() # jobs = k8s.get_logical_backup_job().items # self.assertEqual(1, len(jobs), "Expected 1 logical backup job, found {}".format(len(jobs))) # job = jobs[0] # self.assertEqual(job.metadata.name, "logical-backup-acid-minimal-cluster", # "Expected job name {}, found {}" # .format("logical-backup-acid-minimal-cluster", job.metadata.name)) # self.assertEqual(job.spec.schedule, schedule, # "Expected {} schedule, found {}" # .format(schedule, job.spec.schedule)) # # update the cluster-wide image of the logical backup pod # image = "test-image-name" # patch_logical_backup_image = { # "data": { # "logical_backup_docker_image": image, # } # } # k8s.update_config(patch_logical_backup_image) # jobs = k8s.get_logical_backup_job().items # actual_image = jobs[0].spec.job_template.spec.template.spec.containers[0].image # self.assertEqual(actual_image, image, # "Expected job image {}, found {}".format(image, actual_image)) # # delete the logical backup cron job # pg_patch_disable_backup = { # "spec": { # "enableLogicalBackup": False, # } # } # k8s.api.custom_objects_api.patch_namespaced_custom_object( # "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_backup) # k8s.wait_for_logical_backup_job_deletion() # jobs = k8s.get_logical_backup_job().items # self.assertEqual(0, len(jobs), # "Expected 0 logical backup jobs, found {}".format(len(jobs))) # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_min_resource_limits(self): # ''' # Lower resource limits below configured minimum and let operator fix it # ''' # k8s = self.k8s # cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' # labels = 'spilo-role=master,' + cluster_label # _, failover_targets = k8s.get_pg_nodes(cluster_label) # # configure minimum boundaries for CPU and memory limits # minCPULimit = '500m' # minMemoryLimit = '500Mi' # patch_min_resource_limits = { # "data": { # "min_cpu_limit": minCPULimit, # "min_memory_limit": minMemoryLimit # } # } # k8s.update_config(patch_min_resource_limits) # # lower resource limits below minimum # pg_patch_resources = { # "spec": { # "resources": { # "requests": { # "cpu": "10m", # "memory": "50Mi" # }, # "limits": { # "cpu": "200m", # "memory": "200Mi" # } # } # } # } # k8s.api.custom_objects_api.patch_namespaced_custom_object( # "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) # k8s.wait_for_pod_failover(failover_targets, labels) # k8s.wait_for_pod_start('spilo-role=replica') # pods = k8s.api.core_v1.list_namespaced_pod( # 'default', label_selector=labels).items # self.assert_master_is_unique() # masterPod = pods[0] # self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, # "Expected CPU limit {}, found {}" # .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) # self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, # "Expected memory limit {}, found {}" # .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_multi_namespace_support(self): # ''' # Create a customized Postgres cluster in a non-default namespace. # ''' # k8s = self.k8s # with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: # pg_manifest = yaml.safe_load(f) # pg_manifest["metadata"]["namespace"] = self.namespace # yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) # k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") # k8s.wait_for_pod_start("spilo-role=master", self.namespace) # self.assert_master_is_unique(self.namespace, "acid-test-cluster") # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_node_readiness_label(self): # ''' # Remove node readiness label from master node. This must cause a failover. # ''' # k8s = self.k8s # cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' # readiness_label = 'lifecycle-status' # readiness_value = 'ready' # # get nodes of master and replica(s) (expected target of new master) # current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) # num_replicas = len(current_replica_nodes) # failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) # # add node_readiness_label to potential failover nodes # patch_readiness_label = { # "metadata": { # "labels": { # readiness_label: readiness_value # } # } # } # for failover_target in failover_targets: # k8s.api.core_v1.patch_node(failover_target, patch_readiness_label) # # define node_readiness_label in config map which should trigger a failover of the master # patch_readiness_label_config = { # "data": { # "node_readiness_label": readiness_label + ':' + readiness_value, # } # } # k8s.update_config(patch_readiness_label_config) # new_master_node, new_replica_nodes = self.assert_failover( # current_master_node, num_replicas, failover_targets, cluster_label) # # patch also node where master ran before # k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label) # # wait a little before proceeding with the pod distribution test # time.sleep(30) # # toggle pod anti affinity to move replica away from master node # self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_scaling(self): # ''' # Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. # ''' # k8s = self.k8s # labels = "application=spilo,cluster-name=acid-minimal-cluster" # k8s.wait_for_pg_to_scale(3) # self.assertEqual(3, k8s.count_pods_with_label(labels)) # self.assert_master_is_unique() # k8s.wait_for_pg_to_scale(2) # self.assertEqual(2, k8s.count_pods_with_label(labels)) # self.assert_master_is_unique() # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_service_annotations(self): # ''' # Create a Postgres cluster with service annotations and check them. # ''' # k8s = self.k8s # patch_custom_service_annotations = { # "data": { # "custom_service_annotations": "foo:bar", # } # } # k8s.update_config(patch_custom_service_annotations) # pg_patch_custom_annotations = { # "spec": { # "serviceAnnotations": { # "annotation.key": "value", # "foo": "bar", # } # } # } # k8s.api.custom_objects_api.patch_namespaced_custom_object( # "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_custom_annotations) # # wait a little before proceeding # time.sleep(30) # annotations = { # "annotation.key": "value", # "foo": "bar", # } # self.assertTrue(k8s.check_service_annotations( # "cluster-name=acid-minimal-cluster,spilo-role=master", annotations)) # self.assertTrue(k8s.check_service_annotations( # "cluster-name=acid-minimal-cluster,spilo-role=replica", annotations)) # # clean up # unpatch_custom_service_annotations = { # "data": { # "custom_service_annotations": "", # } # } # k8s.update_config(unpatch_custom_service_annotations) # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_statefulset_annotation_propagation(self): # ''' # Inject annotation to Postgresql CRD and check it's propagation to stateful set # ''' # k8s = self.k8s # cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' # patch_sset_propagate_annotations = { # "data": { # "downscaler_annotations": "deployment-time,downscaler/*", # } # } # k8s.update_config(patch_sset_propagate_annotations) # pg_crd_annotations = { # "metadata": { # "annotations": { # "deployment-time": "2020-04-30 12:00:00", # "downscaler/downtime_replicas": "0", # }, # } # } # k8s.api.custom_objects_api.patch_namespaced_custom_object( # "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_crd_annotations) # # wait a little before proceeding # time.sleep(60) # annotations = { # "deployment-time": "2020-04-30 12:00:00", # "downscaler/downtime_replicas": "0", # } # self.assertTrue(k8s.check_statefulset_annotations(cluster_label, annotations)) # @timeout_decorator.timeout(TEST_TIMEOUT_SEC) # def test_taint_based_eviction(self): # ''' # Add taint "postgres=:NoExecute" to node with master. This must cause a failover. # ''' # k8s = self.k8s # cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' # # get nodes of master and replica(s) (expected target of new master) # current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label) # num_replicas = len(current_replica_nodes) # failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes) # # taint node with postgres=:NoExecute to force failover # body = { # "spec": { # "taints": [ # { # "effect": "NoExecute", # "key": "postgres" # } # ] # } # } # # patch node and test if master is failing over to one of the expected nodes # k8s.api.core_v1.patch_node(current_master_node, body) # new_master_node, new_replica_nodes = self.assert_failover( # current_master_node, num_replicas, failover_targets, cluster_label) # # add toleration to pods # patch_toleration_config = { # "data": { # "toleration": "key:postgres,operator:Exists,effect:NoExecute" # } # } # k8s.update_config(patch_toleration_config) # # wait a little before proceeding with the pod distribution test # time.sleep(30) # # toggle pod anti affinity to move replica away from master node # self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_infrastructure_roles(self): ''' Test using external secrets for infrastructure roles ''' k8s = self.k8s # update infrastructure roles description secret_name = "postgresql-infrastructure-roles-old" roles = "secretname: postgresql-infrastructure-roles-new, userkey: user, rolekey: role, passwordkey: password" patch_infrastructure_roles = { "data": { "infrastructure_roles_secret_name": secret_name, "infrastructure_roles_secrets": roles, }, } k8s.update_config(patch_infrastructure_roles) # wait a little before proceeding time.sleep(30) # check that new roles are represented in the config by requesting the # operator configuration via API operator_pod = k8s.get_operator_pod() get_config_cmd = "wget --quiet -O - localhost:8080/config" result = k8s.exec_with_kubectl(operator_pod.metadata.name, get_config_cmd) roles_dict = (json.loads(result.stdout) .get("controller", {}) .get("InfrastructureRoles")) self.assertTrue("robot_zmon_acid_monitoring_new" in roles_dict) role = roles_dict["robot_zmon_acid_monitoring_new"] role.pop("Password", None) self.assertDictEqual(role, { "Name": "robot_zmon_acid_monitoring_new", "Flags": None, "MemberOf": ["robot_zmon_new"], "Parameters": None, "AdminRole": "", "Origin": 2, }) def get_failover_targets(self, master_node, replica_nodes): ''' If all pods live on the same node, failover will happen to other worker(s) ''' k8s = self.k8s failover_targets = [x for x in replica_nodes if x != master_node] if len(failover_targets) == 0: nodes = k8s.api.core_v1.list_node() for n in nodes.items: if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node: failover_targets.append(n.metadata.name) return failover_targets def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label): ''' Check if master is failing over. The replica should move first to be the switchover target ''' k8s = self.k8s k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label) k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label) self.assertNotEqual(current_master_node, new_master_node, "Master on {} did not fail over to one of {}".format(current_master_node, failover_targets)) self.assertEqual(num_replicas, len(new_replica_nodes), "Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes))) self.assert_master_is_unique() return new_master_node, new_replica_nodes def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): ''' Check that there is a single pod in the k8s cluster with the label "spilo-role=master" To be called manually after operations that affect pods ''' k8s = self.k8s labels = 'spilo-role=master,cluster-name=' + clusterName num_of_master_pods = k8s.count_pods_with_label(labels, namespace) self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) def assert_distributed_pods(self, master_node, replica_nodes, cluster_label): ''' Other tests can lead to the situation that master and replica are on the same node. Toggle pod anti affinty to distribute pods accross nodes (replica in particular). ''' k8s = self.k8s failover_targets = self.get_failover_targets(master_node, replica_nodes) # enable pod anti affintiy in config map which should trigger movement of replica patch_enable_antiaffinity = { "data": { "enable_pod_antiaffinity": "true" } } k8s.update_config(patch_enable_antiaffinity) self.assert_failover( master_node, len(replica_nodes), failover_targets, cluster_label) # now disable pod anti affintiy again which will cause yet another failover patch_disable_antiaffinity = { "data": { "enable_pod_antiaffinity": "false" } } k8s.update_config(patch_disable_antiaffinity) k8s.wait_for_pod_start('spilo-role=master') k8s.wait_for_pod_start('spilo-role=replica') class K8sApi: def __init__(self): # https://github.com/kubernetes-client/python/issues/309 warnings.simplefilter("ignore", ResourceWarning) self.config = config.load_kube_config() self.k8s_client = client.ApiClient() self.core_v1 = client.CoreV1Api() self.apps_v1 = client.AppsV1Api() self.batch_v1_beta1 = client.BatchV1beta1Api() self.custom_objects_api = client.CustomObjectsApi() class K8s: ''' Wraps around K8 api client and helper methods. ''' RETRY_TIMEOUT_SEC = 10 def __init__(self): self.api = K8sApi() def get_pg_nodes(self, pg_cluster_name, namespace='default'): master_pod_node = '' replica_pod_nodes = [] podsList = self.api.core_v1.list_namespaced_pod(namespace, label_selector=pg_cluster_name) for pod in podsList.items: if pod.metadata.labels.get('spilo-role') == 'master': master_pod_node = pod.spec.node_name elif pod.metadata.labels.get('spilo-role') == 'replica': replica_pod_nodes.append(pod.spec.node_name) return master_pod_node, replica_pod_nodes def wait_for_operator_pod_start(self): self. wait_for_pod_start("name=postgres-operator") # HACK operator must register CRD and/or Sync existing PG clusters after start up # for local execution ~ 10 seconds suffices time.sleep(60) def get_operator_pod(self): pods = self.api.core_v1.list_namespaced_pod( 'default', label_selector='name=postgres-operator' ).items if pods: return pods[0] return None def get_operator_log(self): operator_pod = self.get_operator_pod() pod_name = operator_pod.metadata.name return self.api.core_v1.read_namespaced_pod_log( name=pod_name, namespace='default' ) def wait_for_pod_start(self, pod_labels, namespace='default'): pod_phase = 'No pod running' while pod_phase != 'Running': pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=pod_labels).items if pods: pod_phase = pods[0].status.phase if pods and pod_phase != 'Running': pod_name = pods[0].metadata.name response = self.api.core_v1.read_namespaced_pod( name=pod_name, namespace=namespace ) print("Pod description {}".format(response)) time.sleep(self.RETRY_TIMEOUT_SEC) def get_service_type(self, svc_labels, namespace='default'): svc_type = '' svcs = self.api.core_v1.list_namespaced_service(namespace, label_selector=svc_labels, limit=1).items for svc in svcs: svc_type = svc.spec.type return svc_type def check_service_annotations(self, svc_labels, annotations, namespace='default'): svcs = self.api.core_v1.list_namespaced_service(namespace, label_selector=svc_labels, limit=1).items for svc in svcs: for key, value in annotations.items(): if key not in svc.metadata.annotations or svc.metadata.annotations[key] != value: print("Expected key {} not found in annotations {}".format(key, svc.metadata.annotation)) return False return True def check_statefulset_annotations(self, sset_labels, annotations, namespace='default'): ssets = self.api.apps_v1.list_namespaced_stateful_set(namespace, label_selector=sset_labels, limit=1).items for sset in ssets: for key, value in annotations.items(): if key not in sset.metadata.annotations or sset.metadata.annotations[key] != value: print("Expected key {} not found in annotations {}".format(key, sset.metadata.annotation)) return False return True def wait_for_pg_to_scale(self, number_of_instances, namespace='default'): body = { "spec": { "numberOfInstances": number_of_instances } } _ = self.api.custom_objects_api.patch_namespaced_custom_object( "acid.zalan.do", "v1", namespace, "postgresqls", "acid-minimal-cluster", body) labels = 'application=spilo,cluster-name=acid-minimal-cluster' while self.count_pods_with_label(labels) != number_of_instances: time.sleep(self.RETRY_TIMEOUT_SEC) def wait_for_running_pods(self, labels, number, namespace=''): while self.count_pods_with_label(labels) != number: time.sleep(self.RETRY_TIMEOUT_SEC) def wait_for_pods_to_stop(self, labels, namespace=''): while self.count_pods_with_label(labels) != 0: time.sleep(self.RETRY_TIMEOUT_SEC) def wait_for_service(self, labels, namespace='default'): def get_services(): return self.api.core_v1.list_namespaced_service( namespace, label_selector=labels ).items while not get_services(): time.sleep(self.RETRY_TIMEOUT_SEC) def count_pods_with_label(self, labels, namespace='default'): return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): pod_phase = 'Failing over' new_pod_node = '' while (pod_phase != 'Running') or (new_pod_node not in failover_targets): pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items if pods: new_pod_node = pods[0].spec.node_name pod_phase = pods[0].status.phase time.sleep(self.RETRY_TIMEOUT_SEC) def get_logical_backup_job(self, namespace='default'): return self.api.batch_v1_beta1.list_namespaced_cron_job(namespace, label_selector="application=spilo") def wait_for_logical_backup_job(self, expected_num_of_jobs): while (len(self.get_logical_backup_job().items) != expected_num_of_jobs): time.sleep(self.RETRY_TIMEOUT_SEC) def wait_for_logical_backup_job_deletion(self): self.wait_for_logical_backup_job(expected_num_of_jobs=0) def wait_for_logical_backup_job_creation(self): self.wait_for_logical_backup_job(expected_num_of_jobs=1) def delete_operator_pod(self): operator_pod = self.api.core_v1.list_namespaced_pod( 'default', label_selector="name=postgres-operator").items[0].metadata.name self.api.core_v1.delete_namespaced_pod(operator_pod, "default") # restart reloads the conf self.wait_for_operator_pod_start() def update_config(self, config_map_patch): self.api.core_v1.patch_namespaced_config_map("postgres-operator", "default", config_map_patch) self.delete_operator_pod() def create_with_kubectl(self, path): return subprocess.run( ["kubectl", "create", "-f", path], stdout=subprocess.PIPE, stderr=subprocess.PIPE) def exec_with_kubectl(self, pod, cmd): return subprocess.run(["./exec.sh", pod, cmd], stdout=subprocess.PIPE, stderr=subprocess.PIPE) def get_effective_pod_image(self, pod_name, namespace='default'): ''' Get the Spilo image pod currently uses. In case of lazy rolling updates it may differ from the one specified in the stateful set. ''' pod = self.api.core_v1.list_namespaced_pod( namespace, label_selector="statefulset.kubernetes.io/pod-name=" + pod_name) return pod.items[0].spec.containers[0].image if __name__ == '__main__': unittest.main()