merge commit

This commit is contained in:
Sergey Dudoladov 2020-03-10 12:17:13 +01:00
commit dd10127e5d
2 changed files with 261 additions and 171 deletions

View File

@ -57,6 +57,7 @@ class EndToEndTestCase(unittest.TestCase):
k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml")
k8s.wait_for_pod_start('spilo-role=master') k8s.wait_for_pod_start('spilo-role=master')
k8s.wait_for_pod_start('spilo-role=replica')
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_enable_load_balancer(self): def test_enable_load_balancer(self):
@ -108,139 +109,59 @@ class EndToEndTestCase(unittest.TestCase):
"Expected ClusterIP service type for replica, found {}".format(repl_svc_type)) "Expected ClusterIP service type for replica, found {}".format(repl_svc_type))
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_min_resource_limits(self): def test_lazy_image_update(self):
''' '''
Lower resource limits below configured minimum and let operator fix it Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image
''' until they are recreated for reasons other than operator's activity. That works because the operator uses
k8s = self.k8s "onDelete" pod update policy for stateful sets.
cluster_label = 'cluster-name=acid-minimal-cluster'
_, failover_targets = k8s.get_pg_nodes(cluster_label)
# configure minimum boundaries for CPU and memory limits The test covers:
minCPULimit = '500m' 1) enabling lazy upgrade in existing operator deployment
minMemoryLimit = '500Mi' 2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
patch_min_resource_limits = { '''
k8s = self.k8s
pod0 = "acid-minimal-cluster-0"
pod1 = "acid-minimal-cluster-1"
# enable lazy update
patch_lazy_image_upgrade = {
"data": { "data": {
"min_cpu_limit": minCPULimit, "enable_lazy_image_upgrade": "true",
"min_memory_limit": minMemoryLimit "docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
} }
} }
k8s.update_config(patch_min_resource_limits) k8s.update_config(patch_lazy_image_upgrade)
# lower resource limits below minimum # wait for sts update
pg_patch_resources = { time.sleep(60)
"spec": {
"resources": { # restart the pod to get a container with the new image
"requests": { k8s.api.core_v1.delete_namespaced_pod(pod0, "default")
"cpu": "10m", time.sleep(60)
"memory": "50Mi"
}, # lazy update works if the restarted pod and older pods have different Spilo versions
"limits": { # i.e. the update did not immediately affect all pods
"cpu": "200m", new_image = k8s.get_effective_pod_image(pod0)
"memory": "200Mi" old_image = k8s.get_effective_pod_image(pod1)
} self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image))
}
# clean up
unpatch_lazy_image_upgrade = {
"data": {
"enable_lazy_image_upgrade": "false",
} }
} }
k8s.api.custom_objects_api.patch_namespaced_custom_object( k8s.update_config(unpatch_lazy_image_upgrade)
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
k8s.wait_for_master_failover(failover_targets)
pods = k8s.api.core_v1.list_namespaced_pod( # at this point operator will complete the normal rolling update
'default', label_selector='spilo-role=master,' + cluster_label).items # so we additonally test if disabling the lazy update (forcing the normal rolling update) works
self.assert_master_is_unique() time.sleep(60)
masterPod = pods[0]
self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, image0 = k8s.get_effective_pod_image(pod0)
"Expected CPU limit {}, found {}" image1 = k8s.get_effective_pod_image(pod1)
.format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
"Expected memory limit {}, found {}"
.format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1))
def test_multi_namespace_support(self):
'''
Create a customized Postgres cluster in a non-default namespace.
'''
k8s = self.k8s
with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
pg_manifest = yaml.safe_load(f)
pg_manifest["metadata"]["namespace"] = self.namespace
yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
k8s.wait_for_pod_start("spilo-role=master", self.namespace)
self.assert_master_is_unique(self.namespace, "acid-test-cluster")
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_scaling(self):
'''
Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
'''
k8s = self.k8s
labels = "cluster-name=acid-minimal-cluster"
k8s.wait_for_pg_to_scale(3)
self.assertEqual(3, k8s.count_pods_with_label(labels))
self.assert_master_is_unique()
k8s.wait_for_pg_to_scale(2)
self.assertEqual(2, k8s.count_pods_with_label(labels))
self.assert_master_is_unique()
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_taint_based_eviction(self):
'''
Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
'''
k8s = self.k8s
cluster_label = 'cluster-name=acid-minimal-cluster'
# get nodes of master and replica(s) (expected target of new master)
current_master_node, failover_targets = k8s.get_pg_nodes(cluster_label)
num_replicas = len(failover_targets)
# if all pods live on the same node, failover will happen to other worker(s)
failover_targets = [x for x in failover_targets if x != current_master_node]
if len(failover_targets) == 0:
nodes = k8s.api.core_v1.list_node()
for n in nodes.items:
if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != current_master_node:
failover_targets.append(n.metadata.name)
# taint node with postgres=:NoExecute to force failover
body = {
"spec": {
"taints": [
{
"effect": "NoExecute",
"key": "postgres"
}
]
}
}
# patch node and test if master is failing over to one of the expected nodes
k8s.api.core_v1.patch_node(current_master_node, body)
k8s.wait_for_master_failover(failover_targets)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
self.assertNotEqual(current_master_node, new_master_node,
"Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
self.assertEqual(num_replicas, len(new_replica_nodes),
"Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
self.assert_master_is_unique()
# undo the tainting
body = {
"spec": {
"taints": []
}
}
k8s.api.core_v1.patch_node(new_master_node, body)
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_logical_backup_cron_job(self): def test_logical_backup_cron_job(self):
@ -306,6 +227,133 @@ class EndToEndTestCase(unittest.TestCase):
self.assertEqual(0, len(jobs), self.assertEqual(0, len(jobs),
"Expected 0 logical backup jobs, found {}".format(len(jobs))) "Expected 0 logical backup jobs, found {}".format(len(jobs)))
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_min_resource_limits(self):
'''
Lower resource limits below configured minimum and let operator fix it
'''
k8s = self.k8s
cluster_label = 'cluster-name=acid-minimal-cluster'
labels = 'spilo-role=master,' + cluster_label
_, failover_targets = k8s.get_pg_nodes(cluster_label)
# configure minimum boundaries for CPU and memory limits
minCPULimit = '500m'
minMemoryLimit = '500Mi'
patch_min_resource_limits = {
"data": {
"min_cpu_limit": minCPULimit,
"min_memory_limit": minMemoryLimit
}
}
k8s.update_config(patch_min_resource_limits)
# lower resource limits below minimum
pg_patch_resources = {
"spec": {
"resources": {
"requests": {
"cpu": "10m",
"memory": "50Mi"
},
"limits": {
"cpu": "200m",
"memory": "200Mi"
}
}
}
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources)
k8s.wait_for_pod_failover(failover_targets, labels)
k8s.wait_for_pod_start('spilo-role=replica')
pods = k8s.api.core_v1.list_namespaced_pod(
'default', label_selector=labels).items
self.assert_master_is_unique()
masterPod = pods[0]
self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit,
"Expected CPU limit {}, found {}"
.format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu']))
self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit,
"Expected memory limit {}, found {}"
.format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory']))
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_multi_namespace_support(self):
'''
Create a customized Postgres cluster in a non-default namespace.
'''
k8s = self.k8s
with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
pg_manifest = yaml.safe_load(f)
pg_manifest["metadata"]["namespace"] = self.namespace
yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)
k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
k8s.wait_for_pod_start("spilo-role=master", self.namespace)
self.assert_master_is_unique(self.namespace, "acid-test-cluster")
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_node_readiness_label(self):
'''
Remove node readiness label from master node. This must cause a failover.
'''
k8s = self.k8s
cluster_label = 'cluster-name=acid-minimal-cluster'
labels = 'spilo-role=master,' + cluster_label
readiness_label = 'lifecycle-status'
readiness_value = 'ready'
# get nodes of master and replica(s) (expected target of new master)
current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
num_replicas = len(current_replica_nodes)
failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
# add node_readiness_label to potential failover nodes
patch_readiness_label = {
"metadata": {
"labels": {
readiness_label: readiness_value
}
}
}
for failover_target in failover_targets:
k8s.api.core_v1.patch_node(failover_target, patch_readiness_label)
# define node_readiness_label in config map which should trigger a failover of the master
patch_readiness_label_config = {
"data": {
"node_readiness_label": readiness_label + ':' + readiness_value,
}
}
k8s.update_config(patch_readiness_label_config)
new_master_node, new_replica_nodes = self.assert_failover(
current_master_node, num_replicas, failover_targets, cluster_label)
# patch also node where master ran before
k8s.api.core_v1.patch_node(current_master_node, patch_readiness_label)
# toggle pod anti affinity to move replica away from master node
self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_scaling(self):
'''
Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime.
'''
k8s = self.k8s
labels = "cluster-name=acid-minimal-cluster"
k8s.wait_for_pg_to_scale(3)
self.assertEqual(3, k8s.count_pods_with_label(labels))
self.assert_master_is_unique()
k8s.wait_for_pg_to_scale(2)
self.assertEqual(2, k8s.count_pods_with_label(labels))
self.assert_master_is_unique()
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_service_annotations(self): def test_service_annotations(self):
''' '''
@ -347,72 +395,115 @@ class EndToEndTestCase(unittest.TestCase):
k8s.update_config(unpatch_custom_service_annotations) k8s.update_config(unpatch_custom_service_annotations)
@timeout_decorator.timeout(TEST_TIMEOUT_SEC) @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_lazy_image_update(self): def test_taint_based_eviction(self):
''' '''
Test lazy update for the Spilo image: operator changes a stateful set but lets pods run with the old image Add taint "postgres=:NoExecute" to node with master. This must cause a failover.
until they are recreated for reasons other than operator's activity. That works because the operator uses
"onDelete" pod update policy for stateful sets.
The test covers:
1) enabling lazy upgrade in existing operator deployment
2) forcing the normal rolling upgrade by changing the operator configmap and restarting its pod
''' '''
k8s = self.k8s k8s = self.k8s
pod0 = "acid-minimal-cluster-0" cluster_label = 'cluster-name=acid-minimal-cluster'
pod1 = "acid-minimal-cluster-1"
# enable lazy update # get nodes of master and replica(s) (expected target of new master)
patch_lazy_image_upgrade = { current_master_node, current_replica_nodes = k8s.get_pg_nodes(cluster_label)
"data": { num_replicas = len(current_replica_nodes)
"enable_lazy_image_upgrade": "true", failover_targets = self.get_failover_targets(current_master_node, current_replica_nodes)
"docker_image": "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16"
# taint node with postgres=:NoExecute to force failover
body = {
"spec": {
"taints": [
{
"effect": "NoExecute",
"key": "postgres"
}
]
} }
} }
k8s.update_config(patch_lazy_image_upgrade)
# wait for sts update # patch node and test if master is failing over to one of the expected nodes
time.sleep(60) k8s.api.core_v1.patch_node(current_master_node, body)
new_master_node, new_replica_nodes = self.assert_failover(
current_master_node, num_replicas, failover_targets, cluster_label)
# restart the pod to get a container with the new image # add toleration to pods
k8s.api.core_v1.delete_namespaced_pod(pod0, "default") patch_toleration_config = {
time.sleep(60)
# lazy update works if the restarted pod and older pods have different Spilo versions
# i.e. the update did not immediately affect all pods
new_image = k8s.get_effective_pod_image(pod0)
old_image = k8s.get_effective_pod_image(pod1)
self.assertNotEqual(old_image, new_image, "Lazy updated failed: pods have the same image {}".format(new_image))
# clean up
unpatch_lazy_image_upgrade = {
"data": { "data": {
"enable_lazy_image_upgrade": "false", "toleration": "key:postgres,operator:Exists,effect:NoExecute"
} }
} }
k8s.update_config(unpatch_lazy_image_upgrade) k8s.update_config(patch_toleration_config)
# at this point operator will complete the normal rolling update # toggle pod anti affinity to move replica away from master node
# so we additonally test if disabling the lazy update (forcing the normal rolling update) works self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label)
time.sleep(60)
image0 = k8s.get_effective_pod_image(pod0) def get_failover_targets(self, master_node, replica_nodes):
image1 = k8s.get_effective_pod_image(pod1) '''
If all pods live on the same node, failover will happen to other worker(s)
'''
k8s = self.k8s
self.assertEqual(image0, image1, "Disabling lazy updated failed: pods still have different images {} and {}".format(image0, image1)) failover_targets = [x for x in replica_nodes if x != master_node]
if len(failover_targets) == 0:
nodes = k8s.api.core_v1.list_node()
for n in nodes.items:
if "node-role.kubernetes.io/master" not in n.metadata.labels and n.metadata.name != master_node:
failover_targets.append(n.metadata.name)
return failover_targets
def assert_failover(self, current_master_node, num_replicas, failover_targets, cluster_label):
'''
Check if master is failing over. The replica should move first to be the switchover target
'''
k8s = self.k8s
k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
new_master_node, new_replica_nodes = k8s.get_pg_nodes(cluster_label)
self.assertNotEqual(current_master_node, new_master_node,
"Master on {} did not fail over to one of {}".format(current_master_node, failover_targets))
self.assertEqual(num_replicas, len(new_replica_nodes),
"Expected {} replicas, found {}".format(num_replicas, len(new_replica_nodes)))
self.assert_master_is_unique()
return new_master_node, new_replica_nodes
def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"): def assert_master_is_unique(self, namespace='default', clusterName="acid-minimal-cluster"):
''' '''
Check that there is a single pod in the k8s cluster with the label "spilo-role=master" Check that there is a single pod in the k8s cluster with the label "spilo-role=master"
To be called manually after operations that affect pods To be called manually after operations that affect pods
''' '''
k8s = self.k8s k8s = self.k8s
labels = 'spilo-role=master,cluster-name=' + clusterName labels = 'spilo-role=master,cluster-name=' + clusterName
num_of_master_pods = k8s.count_pods_with_label(labels, namespace) num_of_master_pods = k8s.count_pods_with_label(labels, namespace)
self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods)) self.assertEqual(num_of_master_pods, 1, "Expected 1 master pod, found {}".format(num_of_master_pods))
def assert_distributed_pods(self, master_node, replica_nodes, cluster_label):
'''
Other tests can lead to the situation that master and replica are on the same node.
Toggle pod anti affinty to distribute pods accross nodes (replica in particular).
'''
k8s = self.k8s
failover_targets = self.get_failover_targets(master_node, replica_nodes)
# enable pod anti affintiy in config map which should trigger movement of replica
patch_enable_antiaffinity = {
"data": {
"enable_pod_antiaffinity": "true"
}
}
k8s.update_config(patch_enable_antiaffinity)
self.assert_failover(
master_node, len(replica_nodes), failover_targets, cluster_label)
# disable pod anti affintiy again
patch_disable_antiaffinity = {
"data": {
"enable_pod_antiaffinity": "false"
}
}
k8s.update_config(patch_disable_antiaffinity)
class K8sApi: class K8sApi:
@ -500,15 +591,14 @@ class K8s:
def count_pods_with_label(self, labels, namespace='default'): def count_pods_with_label(self, labels, namespace='default'):
return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items) return len(self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items)
def wait_for_master_failover(self, expected_master_nodes, namespace='default'): def wait_for_pod_failover(self, failover_targets, labels, namespace='default'):
pod_phase = 'Failing over' pod_phase = 'Failing over'
new_master_node = '' new_pod_node = ''
labels = 'spilo-role=master,cluster-name=acid-minimal-cluster'
while (pod_phase != 'Running') or (new_master_node not in expected_master_nodes): while (pod_phase != 'Running') or (new_pod_node not in failover_targets):
pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items
if pods: if pods:
new_master_node = pods[0].spec.node_name new_pod_node = pods[0].spec.node_name
pod_phase = pods[0].status.phase pod_phase = pods[0].status.phase
time.sleep(self.RETRY_TIMEOUT_SEC) time.sleep(self.RETRY_TIMEOUT_SEC)

View File

@ -5,7 +5,7 @@ import (
"time" "time"
"github.com/zalando/postgres-operator/pkg/util/retryutil" "github.com/zalando/postgres-operator/pkg/util/retryutil"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
@ -172,19 +172,19 @@ func (c *Controller) nodeDelete(obj interface{}) {
} }
func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { func (c *Controller) moveMasterPodsOffNode(node *v1.Node) {
// retry to move master until configured timeout is reached
err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout, err := retryutil.Retry(1*time.Minute, c.opConfig.MasterPodMoveTimeout,
func() (bool, error) { func() (bool, error) {
err := c.attemptToMoveMasterPodsOffNode(node) err := c.attemptToMoveMasterPodsOffNode(node)
if err != nil { if err != nil {
return false, fmt.Errorf("unable to move master pods off the unschedulable node; will retry after delay of 1 minute") return false, err
} }
return true, nil return true, nil
}, },
) )
if err != nil { if err != nil {
c.logger.Warningf("failed to move master pods from the node %q: timeout of %v minutes expired", node.Name, c.opConfig.MasterPodMoveTimeout) c.logger.Warningf("failed to move master pods from the node %q: %v", node.Name, err)
} }
} }