change merging nodeAffinity expression
This commit is contained in:
		
							parent
							
								
									ced0eae14a
								
							
						
					
					
						commit
						541a484264
					
				| 
						 | 
				
			
			@ -343,8 +343,13 @@ configuration they are grouped under the `kubernetes` key.
 | 
			
		|||
  considered `ready`. The operator uses values of those labels to detect the
 | 
			
		||||
  start of the Kubernetes cluster upgrade procedure and move master pods off
 | 
			
		||||
  the nodes to be decommissioned. When the set is not empty, the operator also
 | 
			
		||||
  assigns the `Affinity` clause to the Postgres pods to be scheduled only on
 | 
			
		||||
  `ready` nodes. The default is empty.
 | 
			
		||||
  assigns the `nodeAffinity` clause to the Postgres pods to be scheduled only
 | 
			
		||||
  on `ready` nodes. If a `nodeAffinity` is specified in the postgres cluster
 | 
			
		||||
  manifest as well the `nodeSelectorTerms` will get merged. If the 
 | 
			
		||||
  `nodeAffinity` of the manifest contains only one `matchExpressions` slice
 | 
			
		||||
  the node readiniess label expressions will be moved there (AND condition).
 | 
			
		||||
  When multiple selector expressions are defined in the manifest an extra 
 | 
			
		||||
  `matchExpressions` section is appended (OR condition). The default is empty.
 | 
			
		||||
 | 
			
		||||
* **toleration**
 | 
			
		||||
  a dictionary that should contain `key`, `operator`, `value` and
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -703,6 +703,10 @@ spec:
 | 
			
		|||
          - pci
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
If you need to define a `nodeAffinity` for all your Postgres clusters use the
 | 
			
		||||
`node_readiness_label` configuration option, which allows you to define a list
 | 
			
		||||
of key-value pairs.
 | 
			
		||||
 | 
			
		||||
## In-place major version upgrade
 | 
			
		||||
 | 
			
		||||
Starting with Spilo 13, operator supports in-place major version upgrade to a
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -880,11 +880,9 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
 | 
			
		||||
        # verify we are in good state from potential previous tests
 | 
			
		||||
        self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running")
 | 
			
		||||
        self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running")
 | 
			
		||||
 | 
			
		||||
        # get nodes of master and replica(s)
 | 
			
		||||
        master_nodes, replica_nodes = k8s.get_cluster_nodes()
 | 
			
		||||
 | 
			
		||||
        self.assertNotEqual(master_nodes, [])
 | 
			
		||||
        self.assertNotEqual(replica_nodes, [])
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -975,6 +973,9 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
            print('Operator log: {}'.format(k8s.get_operator_log()))
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
        # toggle pod anti affinity to make sure replica and master run on separate nodes
 | 
			
		||||
        self.assert_distributed_pods(replica_nodes)
 | 
			
		||||
 | 
			
		||||
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
 | 
			
		||||
    def test_node_readiness_label(self):
 | 
			
		||||
        '''
 | 
			
		||||
| 
						 | 
				
			
			@ -987,7 +988,6 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
 | 
			
		||||
        # verify we are in good state from potential previous tests
 | 
			
		||||
        self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running")
 | 
			
		||||
        self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running")
 | 
			
		||||
 | 
			
		||||
        # get nodes of master and replica(s) (expected target of new master)
 | 
			
		||||
        master_nodes, replica_nodes = k8s.get_cluster_nodes()
 | 
			
		||||
| 
						 | 
				
			
			@ -1009,7 +1009,7 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
            for failover_target in failover_targets:
 | 
			
		||||
                k8s.api.core_v1.patch_node(failover_target, patch_readiness_label)
 | 
			
		||||
 | 
			
		||||
            # define node_readiness_label in config map which should trigger a failover of the master
 | 
			
		||||
            # define node_readiness_label in config map which should trigger a rolling update
 | 
			
		||||
            patch_readiness_label_config = {
 | 
			
		||||
                "data": {
 | 
			
		||||
                    "node_readiness_label": readiness_label + ':' + readiness_value,
 | 
			
		||||
| 
						 | 
				
			
			@ -1018,17 +1018,19 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
            k8s.update_config(patch_readiness_label_config, "setting readiness label")
 | 
			
		||||
            self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
 | 
			
		||||
 | 
			
		||||
            # node affinity change should cause replica to relocate from replica node to master node due to node affinity requirement
 | 
			
		||||
            # first replica will be replaced and get the new affinity
 | 
			
		||||
            k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
 | 
			
		||||
 | 
			
		||||
            # next switchover of the master
 | 
			
		||||
            k8s.wait_for_pod_failover(failover_targets, 'spilo-role=master,' + cluster_label)
 | 
			
		||||
 | 
			
		||||
            # the replica however will not start due to a volume node affinity conflict
 | 
			
		||||
            # the old master is replaced. However it might not start due to a volume node affinity conflict
 | 
			
		||||
            # only if the pvc and pod are deleted it can be scheduled
 | 
			
		||||
            replica = k8s.get_cluster_replica_pod()
 | 
			
		||||
            if replica.status.phase == 'Pending':
 | 
			
		||||
                k8s.api.core_v1.delete_namespaced_persistent_volume_claim('pgdata-' + replica.metadata.name, 'default')
 | 
			
		||||
                k8s.api.core_v1.delete_namespaced_pod(replica.metadata.name, 'default')
 | 
			
		||||
 | 
			
		||||
            k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
 | 
			
		||||
                k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
 | 
			
		||||
 | 
			
		||||
            # patch also node where master ran before
 | 
			
		||||
            k8s.api.core_v1.patch_node(master_nodes[0], patch_readiness_label)
 | 
			
		||||
| 
						 | 
				
			
			@ -1038,7 +1040,7 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
            raise
 | 
			
		||||
 | 
			
		||||
        # toggle pod anti affinity to move replica away from master node
 | 
			
		||||
        self.eventuallyTrue(lambda: self.assert_distributed_pods(master_nodes), "Pods are redistributed")
 | 
			
		||||
        self.assert_distributed_pods(master_nodes)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
 | 
			
		||||
| 
						 | 
				
			
			@ -1481,7 +1483,7 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
            raise
 | 
			
		||||
 | 
			
		||||
        # toggle pod anti affinity to move replica away from master node
 | 
			
		||||
        self.assert_distributed_pods(replica_nodes)
 | 
			
		||||
        self.assert_distributed_pods(master_nodes)
 | 
			
		||||
 | 
			
		||||
    @timeout_decorator.timeout(TEST_TIMEOUT_SEC)
 | 
			
		||||
    def test_zz_cluster_deletion(self):
 | 
			
		||||
| 
						 | 
				
			
			@ -1602,6 +1604,16 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
           Toggle pod anti affinty to distribute pods accross nodes (replica in particular).
 | 
			
		||||
        '''
 | 
			
		||||
        k8s = self.k8s
 | 
			
		||||
        cluster_labels = 'application=spilo,cluster-name=acid-minimal-cluster'
 | 
			
		||||
 | 
			
		||||
        # get nodes of master and replica(s)
 | 
			
		||||
        master_nodes, replica_nodes = k8s.get_cluster_nodes()
 | 
			
		||||
        self.assertNotEqual(master_nodes, [])
 | 
			
		||||
        self.assertNotEqual(replica_nodes, [])
 | 
			
		||||
 | 
			
		||||
        # if nodes are different we can quit here
 | 
			
		||||
        if master_nodes[0] not in replica_nodes:
 | 
			
		||||
            return True             
 | 
			
		||||
 | 
			
		||||
        # enable pod anti affintiy in config map which should trigger movement of replica
 | 
			
		||||
        patch_enable_antiaffinity = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1614,8 +1626,8 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
            k8s.update_config(patch_enable_antiaffinity, "enable antiaffinity")
 | 
			
		||||
            self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
 | 
			
		||||
 | 
			
		||||
            k8s.wait_for_pod_failover(target_nodes, 'spilo-role=replica,' + cluster_labels)
 | 
			
		||||
            k8s.wait_for_pod_start('spilo-role=replica,' + cluster_labels)
 | 
			
		||||
            k8s.wait_for_running_pods(cluster_labels, 2)
 | 
			
		||||
 | 
			
		||||
            # now disable pod anti affintiy again which will cause yet another failover
 | 
			
		||||
            patch_disable_antiaffinity = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1626,8 +1638,18 @@ class EndToEndTestCase(unittest.TestCase):
 | 
			
		|||
            k8s.update_config(patch_disable_antiaffinity, "disable antiaffinity")
 | 
			
		||||
            self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
 | 
			
		||||
            
 | 
			
		||||
            k8s.wait_for_pod_start('spilo-role=master,' + cluster_labels)
 | 
			
		||||
            k8s.wait_for_pod_start('spilo-role=replica,' + cluster_labels)
 | 
			
		||||
            k8s.wait_for_running_pods(cluster_labels, 2)
 | 
			
		||||
 | 
			
		||||
            master_nodes, replica_nodes = k8s.get_cluster_nodes()
 | 
			
		||||
            self.assertNotEqual(master_nodes, [])
 | 
			
		||||
            self.assertNotEqual(replica_nodes, [])
 | 
			
		||||
 | 
			
		||||
            # if nodes are different we can quit here
 | 
			
		||||
            for target_node in target_nodes:
 | 
			
		||||
                if (target_node not in master_nodes or target_node not in replica_nodes) and master_nodes[0] in replica_nodes:
 | 
			
		||||
                    print('Pods run on the same node') 
 | 
			
		||||
                    return False
 | 
			
		||||
 | 
			
		||||
        except timeout_decorator.TimeoutError:
 | 
			
		||||
            print('Operator log: {}'.format(k8s.get_operator_log()))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -352,8 +352,22 @@ func nodeAffinity(nodeReadinessLabel map[string]string, nodeAffinity *v1.NodeAff
 | 
			
		|||
				},
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{
 | 
			
		||||
				NodeSelectorTerms: append(nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms, nodeReadinessSelectorTerm),
 | 
			
		||||
			// if there are multiple node selector terms specified, append the node readiness label expressions (OR condition)
 | 
			
		||||
			if len(nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) > 1 {
 | 
			
		||||
				manifestTerms := nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
 | 
			
		||||
				manifestTerms = append(manifestTerms, nodeReadinessSelectorTerm)
 | 
			
		||||
				nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{
 | 
			
		||||
					NodeSelectorTerms: manifestTerms,
 | 
			
		||||
				}
 | 
			
		||||
				// if there's just one term defined merge it with the readiness label term (AND condition)
 | 
			
		||||
			} else {
 | 
			
		||||
				manifestExpressions := nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions
 | 
			
		||||
				manifestExpressions = append(manifestExpressions, matchExpressions...)
 | 
			
		||||
				nodeAffinityCopy.RequiredDuringSchedulingIgnoredDuringExecution = &v1.NodeSelector{
 | 
			
		||||
					NodeSelectorTerms: []v1.NodeSelectorTerm{
 | 
			
		||||
						v1.NodeSelectorTerm{MatchExpressions: manifestExpressions},
 | 
			
		||||
					},
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue