merge commit

This commit is contained in:
Sergey Dudoladov 2020-03-10 12:17:13 +01:00
commit dd10127e5d
2 changed files with 261 additions and 171 deletions

View File

@ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase):
k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml")
k8s.wait_for_pod_start('spilo-role=master') k8s.wait_for_pod_start('spilo-role=master')
k8s.wait_for_pod_start('spilo-role=replica')
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_enable_load_balancer(self): def test_enable_load_balancer(self):
@ -108,139 +109,59 @@ class EndToEndTestCase(unittest.TestCase):
"Expected ClusterIP service type for replica, found {}".format(repl_svc_type)) "Expected ClusterIP service type for replica, found {}".format(repl_svc_type))
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_min_resource_limits(self): def test_lazy_image_update(self):
''' '''
Lower resource limits below configured minimum and let operator fix it Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image
''' until they are recreated for reasons other than operator's activity. That works because the operator uses
k8s = self.k8s "onDelete" pod update policy for stateful sets.
cluster_label = 'cluster-name=acid-minimal-cluster'
_, failover_targets = k8s.get_pg_nodes(cluster_label)
# configure minimum boundaries for CPU and memory limits The test covers:
minCPULimit = '500m' 1) enabling lazy upgrade in existing operator deployment
minMemoryLimit = '500Mi' 2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
patch_min_resource_limits = { '''
k8s = self.k8s
pod0 = "acid-minimal-cluster-0"
pod1 = "acid-minimal-cluster-1"
# enable lazy update
patch_lazy_image_upgrade = {
"data": { "data": {
"min_cpu_limit": minCPULimit, "enable_lazy_image_upgrade": "true",
"min_memory_limit": minMemoryLimit "docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
} }
} }
k8s.update_config(patch_min_resource_limits) k8s.update_config(patch_lazy_image_upgrade)
# lower resource limits below minimum # wait for sts update
pg_patch_resources = { time.sleep(60)
"spec": {
"resources": { # restart the pod to get a container with the new image
"requests": { k8s.api.core_v1.delete_namespaced_pod(pod0, "default")
"cpu": "10m", time.sleep(60)
"memory": "50Mi"
}, # lazy update works if the restarted pod and older pods have different Spilo versions
"limits": { # i.e. the update did not immediately affect all pods
"cpu": "200m", new_image = k8s.get_effective_pod_image(pod0)
"memory": "200Mi" old_image = k8s.get_effective_pod_image(pod1)
self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image))
# clean up
unpatch_lazy_image_upgrade = {
"data": {
"enable_lazy_image_upgrade": "false",
} }
} }
} k8s.update_config(unpatch_lazy_image_upgrade)
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
k8s.wait_for_master_failover(failover_targets)
pods = k8s.api.core_v1.list_namespaced_pod( # at this point operator will complete the normal rolling update
'default', label_selector='spilo-role=master,' + cluster_label).items # so we additonally test if disabling the lazy update (forcing the normal rolling update) works
self.assert_master_is_unique() time.sleep(60)
masterPod = pods[0]
self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, image0 = k8s.get_effective_pod_image(pod0)
"Expected CPU limit {}, found {}" image1 = k8s.get_effective_pod_image(pod1)
.format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
"Expected memory limit {}, found {}"
.format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1))
def test_multi_namespace_support(self):
'''
Create a customized Postgres cluster in a non-default namespace.
'''
k8s = self.k8s
with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
pg_manifest = yaml.safe_load(f)
pg_manifest["metadata"]["namespace"] = self.namespace
yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
k8s.wait_for_pod_start("spilo-role=master", self.namespace)
self.assert_master_is_unique(self.namespace, "acid-test-cluster")
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_scaling(self):
'''
Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
'''
k8s = self.k8s
labels = "cluster-name=acid-minimal-cluster"
k8s.wait_for_pg_to_scale(3)
self.assertEqual(3, k8s.count_pods_with_label(labels))
self.assert_master_is_unique()
k8s.wait_for_pg_to_scale(2)
self.assertEqual(2, k8s.count_pods_with_label(labels))
self.assert_master_is_unique()
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_taint_based_eviction(self):
'''
Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
'''
k8s = self.k8s
cluster_label = 'cluster-name=acid-minimal-cluster'
# get nodes of master and replica(s) (expected target of new master)
current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label)
num_replicas = len(failover_targets)
# if all pods live on the same node, failover will happen to other worker(s)
failover_targets = [x for x in failover_targets if x != current_master_node]
if len(failover_targets) == 0:
nodes = k8s.api.core_v1.list_node()
for n in nodes.items:
if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node:
failover_targets.append(n.metadata.name)
# taint node with postgres=:NoExecute to force failover
body = {
"spec": {
"taints": [
{
"effect": "NoExecute",
"key": "postgres"
}
]
}
}
# patch node and test if master is failing over to one of the expected nodes
k8s.api.core_v1.patch_node(current_master_node, body)
k8s.wait_for_master_failover(failover_targets)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
self.assertNotEqual(current_master_node, new_master_node,
"Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
self.assertEqual(num_replicas, len(new_replica_nodes),
"Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
self.assert_master_is_unique()
# undo the tainting
body = {
"spec": {
"taints": []
}
}
k8s.api.core_v1.patch_node(new_master_node, body)
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_logical_backup_cron_job(self): def test_logical_backup_cron_job(self):
@ -306,6 +227,133 @@ class EndToEndTestCase(unittest.TestCase):
self.assertEqual(0, len(jobs), self.assertEqual(0, len(jobs),
"Expected 0 logical backup jobs, found {}".format(len(jobs))) "Expected 0 logical backup jobs, found {}".format(len(jobs)))
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_min_resource_limits(self):
'''
Lower resource limits below configured minimum and let operator fix it
'''
k8s = self.k8s
cluster_label = 'cluster-name=acid-minimal-cluster'
labels = 'spilo-role=master,' + cluster_label
_, failover_targets = k8s.get_pg_nodes(cluster_label)
# configure minimum boundaries for CPU and memory limits
minCPULimit = '500m'
minMemoryLimit = '500Mi'
patch_min_resource_limits = {
"data": {
"min_cpu_limit": minCPULimit,
"min_memory_limit": minMemoryLimit
}
}
k8s.update_config(patch_min_resource_limits)
# lower resource limits below minimum
pg_patch_resources = {
"spec": {
"resources": {
"requests": {
"cpu": "10m",
"memory": "50Mi"
},
"limits": {
"cpu": "200m",
"memory": "200Mi"
}
}
}
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
k8s.wait_for_pod_failover(failover_targets, labels)
k8s.wait_for_pod_start('spilo-role=replica')
pods = k8s.api.core_v1.list_namespaced_pod(
'default', label_selector=labels).items
self.assert_master_is_unique()
masterPod = pods[0]
self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit,
"Expected CPU limit {}, found {}"
.format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
"Expected memory limit {}, found {}"
.format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_multi_namespace_support(self):
'''
Create a customized Postgres cluster in a non-default namespace.
'''
k8s = self.k8s
with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
pg_manifest = yaml.safe_load(f)
pg_manifest["metadata"]["namespace"] = self.namespace
yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
k8s.wait_for_pod_start("spilo-role=master", self.namespace)
self.assert_master_is_unique(self.namespace, "acid-test-cluster")
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_node_readiness_label(self):
'''
Remove node readiness label from master node. This must cause a failover.
'''
k8s = self.k8s
cluster_label = 'cluster-name=acid-minimal-cluster'
labels = 'spilo-role=master,' + cluster_label
readiness_label = 'lifecycle-status'
readiness_value = 'ready'
# get nodes of master and replica(s) (expected target of new master)
current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
num_replicas = len(current_replica_nodes)
failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
# add node_readiness_label to potential failover nodes
patch_readiness_label = {
"metadata": {
"labels": {
readiness_label: readiness_value
}
}
}
for failover_target in failover_targets:
k8s.api.core_v1.patch_node(failover_target, patch_readiness_label)
# define node_readiness_label in config map which should trigger a failover of the master
patch_readiness_label_config = {
"data": {
"node_readiness_label": readiness_label + ':' + readiness_value,
}
}
k8s.update_config(patch_readiness_label_config)
new_master_node, new_replica_nodes = self.assert_failover(
current_master_node, num_replicas, failover_targets, cluster_label)
# patch also node where master ran before
k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label)
# toggle pod anti affinity to move replica away from master node
self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_scaling(self):
'''
Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
'''
k8s = self.k8s
labels = "cluster-name=acid-minimal-cluster"
k8s.wait_for_pg_to_scale(3)
self.assertEqual(3, k8s.count_pods_with_label(labels))
self.assert_master_is_unique()
k8s.wait_for_pg_to_scale(2)
self.assertEqual(2, k8s.count_pods_with_label(labels))
self.assert_master_is_unique()
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_service_annotations(self): def test_service_annotations(self):
''' '''
@ -347,72 +395,115 @@ class EndToEndTestCase(unittest.TestCase):
k8s.update_config(unpatch_custom_service_annotations) k8s.update_config(unpatch_custom_service_annotations)
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_lazy_image_update(self): def test_taint_based_eviction(self):
''' '''
Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
until they are recreated for reasons other than operator's activity. That works because the operator uses
"onDelete" pod update policy for stateful sets.
The test covers:
1) enabling lazy upgrade in existing operator deployment
2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
''' '''
k8s = self.k8s k8s = self.k8s
pod0 = "acid-minimal-cluster-0" cluster_label = 'cluster-name=acid-minimal-cluster'
pod1 = "acid-minimal-cluster-1"
# enable lazy update # get nodes of master and replica(s) (expected target of new master)
patch_lazy_image_upgrade = { current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
num_replicas = len(current_replica_nodes)
failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
# taint node with postgres=:NoExecute to force failover
body = {
"spec": {
"taints": [
{
"effect": "NoExecute",
"key": "postgres"
}
]
}
}
# patch node and test if master is failing over to one of the expected nodes
k8s.api.core_v1.patch_node(current_master_node, body)
new_master_node, new_replica_nodes = self.assert_failover(
current_master_node, num_replicas, failover_targets, cluster_label)
# add toleration to pods
patch_toleration_config = {
"data": { "data": {
"enable_lazy_image_upgrade": "true", "toleration": "key:postgres,operator:Exists,effect:NoExecute"
"docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
} }
} }
k8s.update_config(patch_lazy_image_upgrade) k8s.update_config(patch_toleration_config)
# wait for sts update # toggle pod anti affinity to move replica away from master node
time.sleep(60) self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
# restart the pod to get a container with the new image def get_failover_targets(self, master_node, replica_nodes):
k8s.api.core_v1.delete_namespaced_pod(pod0, "default") '''
time.sleep(60) If all pods live on the same node, failover will happen to other worker(s)
'''
k8s = self.k8s
# lazy update works if the restarted pod and older pods have different Spilo versions failover_targets = [x for x in replica_nodes if x != master_node]
# i.e. the update did not immediately affect all pods if len(failover_targets) == 0:
new_image = k8s.get_effective_pod_image(pod0) nodes = k8s.api.core_v1.list_node()
old_image = k8s.get_effective_pod_image(pod1) for n in nodes.items:
self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image)) if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node:
failover_targets.append(n.metadata.name)
# clean up return failover_targets
unpatch_lazy_image_upgrade = {
"data": {
"enable_lazy_image_upgrade": "false",
}
}
k8s.update_config(unpatch_lazy_image_upgrade)
# at this point operator will complete the normal rolling update def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label):
# so we additonally test if disabling the lazy update (forcing the normal rolling update) works '''
time.sleep(60) Check if master is failing over. The replica should move first to be the switchover target
'''
k8s = self.k8s
k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
image0 = k8s.get_effective_pod_image(pod0) new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
image1 = k8s.get_effective_pod_image(pod1) self.assertNotEqual(current_master_node, new_master_node,
"Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
self.assertEqual(num_replicas, len(new_replica_nodes),
"Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
self.assert_master_is_unique()
self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1)) return new_master_node, new_replica_nodes
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
''' '''
Check that there is a single pod in the k8s cluster with the label "spilo-role=master" Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
To be called manually after operations that affect pods To be called manually after operations that affect pods
''' '''
k8s = self.k8s k8s = self.k8s
labels = 'spilo-role=master,cluster-name=' + clusterName labels = 'spilo-role=master,cluster-name=' + clusterName
num_of_master_pods = k8s.count_pods_with_label(labels, namespace) num_of_master_pods = k8s.count_pods_with_label(labels, namespace)
self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods))
def assert_distributed_pods(self, master_node, replica_nodes, cluster_label):
'''
Other tests can lead to the situation that master and replica are on the same node.
Toggle pod anti affinty to distribute pods accross nodes (replica in particular).
'''
k8s = self.k8s
failover_targets = self.get_failover_targets(master_node, replica_nodes)
# enable pod anti affintiy in config map which should trigger movement of replica
patch_enable_antiaffinity = {
"data": {
"enable_pod_antiaffinity": "true"
}
}
k8s.update_config(patch_enable_antiaffinity)
self.assert_failover(
master_node, len(replica_nodes), failover_targets, cluster_label)
# disable pod anti affintiy again
patch_disable_antiaffinity = {
"data": {
"enable_pod_antiaffinity": "false"
}
}
k8s.update_config(patch_disable_antiaffinity)
class K8sApi: class K8sApi:
@ -500,15 +591,14 @@ class K8s:
def count_pods_with_label(self, labels, namespace='default'): def count_pods_with_label(self, labels, namespace='default'):
return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items)
def wait_for_master_failover(self, expected_master_nodes, namespace='default'): def wait_for_pod_failover(self, failover_targets, labels, namespace='default'):
pod_phase = 'Failing over' pod_phase = 'Failing over'
new_master_node = '' new_pod_node = ''
labels = 'spilo-role=master,cluster-name=acid-minimal-cluster'
while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes): while (pod_phase != 'Running') or (new_pod_node not in failover_targets):
pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items
if pods: if pods:
new_master_node = pods[0].spec.node_name new_pod_node = pods[0].spec.node_name
pod_phase = pods[0].status.phase pod_phase = pods[0].status.phase
time.sleep(self.RETRY_TIMEOUT_SEC) time.sleep(self.RETRY_TIMEOUT_SEC)

View File

@ -5,7 +5,7 @@ import (
"time" "time"
"github.com/zalando/postgres-operator/pkg/util/retryutil" "github.com/zalando/postgres-operator/pkg/util/retryutil"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
@ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) {
} }
func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
// retry to move master until configured timeout is reached
err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout,
func() (bool, error) { func() (bool, error) {
err := c.attemptToMoveMasterPodsOffNode(node) err := c.attemptToMoveMasterPodsOffNode(node)
if err != nil { if err != nil {
return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") return false, err
} }
return true, nil return true, nil
}, },
) )
if err != nil { if err != nil {
c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err)
} }
} }