Set maximum CPU and Memory requests on K8s (#1959)
* Set maximum CPU and Memory requests on K8s
This commit is contained in:
		
							parent
							
								
									1c80ac0acd
								
							
						
					
					
						commit
						b41daf4f76
					
				|  | @ -350,6 +350,12 @@ spec: | |||
|                     type: string | ||||
|                     pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' | ||||
|                     default: "100Mi" | ||||
|                   max_cpu_request: | ||||
|                     type: string | ||||
|                     pattern: '^(\d+m|\d+(\.\d{1,3})?)$' | ||||
|                   max_memory_request: | ||||
|                     type: string | ||||
|                     pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' | ||||
|                   min_cpu_limit: | ||||
|                     type: string | ||||
|                     pattern: '^(\d+m|\d+(\.\d{1,3})?)$' | ||||
|  |  | |||
|  | @ -217,6 +217,12 @@ configPostgresPodResources: | |||
|   default_memory_limit: 500Mi | ||||
|   # memory request value for the postgres containers | ||||
|   default_memory_request: 100Mi | ||||
|   # optional upper boundary for CPU request | ||||
|   # max_cpu_request: "1" | ||||
| 
 | ||||
|   # optional upper boundary for memory request | ||||
|   # max_memory_request: 4Gi | ||||
| 
 | ||||
|   # hard CPU minimum required to properly run a Postgres cluster | ||||
|   min_cpu_limit: 250m | ||||
|   # hard memory minimum required to properly run a Postgres cluster | ||||
|  |  | |||
|  | @ -161,11 +161,12 @@ Those are top-level keys, containing both leaf keys and groups. | |||
| 
 | ||||
| * **set_memory_request_to_limit** | ||||
|   Set `memory_request` to `memory_limit` for all Postgres clusters (the default | ||||
|   value is also increased). This prevents certain cases of memory overcommitment | ||||
|   at the cost of overprovisioning memory and potential scheduling problems for | ||||
|   containers with high memory limits due to the lack of memory on Kubernetes | ||||
|   cluster nodes. This affects all containers created by the operator (Postgres, | ||||
|   connection pooler, logical backup, scalyr sidecar, and other sidecars except  | ||||
|   value is also increased but configured `max_memory_request` can not be | ||||
|   bypassed). This prevents certain cases of memory overcommitment at the cost | ||||
|   of overprovisioning memory and potential scheduling problems for containers | ||||
|   with high memory limits due to the lack of memory on Kubernetes cluster | ||||
|   nodes. This affects all containers created by the operator (Postgres, | ||||
|   connection pooler, logical backup, scalyr sidecar, and other sidecars except | ||||
|   **sidecars** defined in the operator configuration); to set resources for the | ||||
|   operator's own container, change the [operator deployment manually](https://github.com/zalando/postgres-operator/blob/master/manifests/postgres-operator.yaml#L20). | ||||
|   The default is `false`. | ||||
|  | @ -514,6 +515,12 @@ CRD-based configuration. | |||
|   memory limits for the Postgres containers, unless overridden by cluster-specific | ||||
|   settings. The default is `500Mi`. | ||||
| 
 | ||||
| * **max_cpu_request** | ||||
|   optional upper boundary for CPU request | ||||
| 
 | ||||
| * **max_memory_request** | ||||
|   optional upper boundary for memory request | ||||
| 
 | ||||
| * **min_cpu_limit** | ||||
|   hard CPU minimum what we consider to be required to properly run Postgres | ||||
|   clusters with Patroni on Kubernetes. The default is `250m`. | ||||
|  |  | |||
|  | @ -1012,9 +1012,10 @@ class EndToEndTestCase(unittest.TestCase): | |||
|         self.evantuallyEqual(check_version_14, "14", "Version was not upgrade to 14") | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_min_resource_limits(self): | ||||
|     def test_resource_generation(self): | ||||
|         ''' | ||||
|         Lower resource limits below configured minimum and let operator fix it | ||||
|         Lower resource limits below configured minimum and let operator fix it. | ||||
|         It will try to raise requests to limits which is capped with max_memory_request. | ||||
|         ''' | ||||
|         k8s = self.k8s | ||||
|         cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' | ||||
|  | @ -1023,17 +1024,20 @@ class EndToEndTestCase(unittest.TestCase): | |||
|         _, replica_nodes = k8s.get_pg_nodes(cluster_label) | ||||
|         self.assertNotEqual(replica_nodes, []) | ||||
| 
 | ||||
|         # configure minimum boundaries for CPU and memory limits | ||||
|         # configure maximum memory request and minimum boundaries for CPU and memory limits | ||||
|         maxMemoryRequest = '300Mi' | ||||
|         minCPULimit = '503m' | ||||
|         minMemoryLimit = '502Mi' | ||||
| 
 | ||||
|         patch_min_resource_limits = { | ||||
|         patch_pod_resources = { | ||||
|             "data": { | ||||
|                 "max_memory_request": maxMemoryRequest, | ||||
|                 "min_cpu_limit": minCPULimit, | ||||
|                 "min_memory_limit": minMemoryLimit | ||||
|                 "min_memory_limit": minMemoryLimit, | ||||
|                 "set_memory_request_to_limit": "true" | ||||
|             } | ||||
|         } | ||||
|         k8s.update_config(patch_min_resource_limits, "Minimum resource test") | ||||
|         k8s.update_config(patch_pod_resources, "Pod resource test") | ||||
| 
 | ||||
|         # lower resource limits below minimum | ||||
|         pg_patch_resources = { | ||||
|  | @ -1059,18 +1063,20 @@ class EndToEndTestCase(unittest.TestCase): | |||
|         k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) | ||||
|         k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) | ||||
| 
 | ||||
|         def verify_pod_limits(): | ||||
|         def verify_pod_resources(): | ||||
|             pods = k8s.api.core_v1.list_namespaced_pod('default', label_selector="cluster-name=acid-minimal-cluster,application=spilo").items | ||||
|             if len(pods) < 2: | ||||
|                 return False | ||||
| 
 | ||||
|             r = pods[0].spec.containers[0].resources.limits['memory'] == minMemoryLimit | ||||
|             r = pods[0].spec.containers[0].resources.requests['memory'] == maxMemoryRequest | ||||
|             r = r and pods[0].spec.containers[0].resources.limits['memory'] == minMemoryLimit | ||||
|             r = r and pods[0].spec.containers[0].resources.limits['cpu'] == minCPULimit | ||||
|             r = r and pods[1].spec.containers[0].resources.requests['memory'] == maxMemoryRequest | ||||
|             r = r and pods[1].spec.containers[0].resources.limits['memory'] == minMemoryLimit | ||||
|             r = r and pods[1].spec.containers[0].resources.limits['cpu'] == minCPULimit | ||||
|             return r | ||||
| 
 | ||||
|         self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted") | ||||
|         self.eventuallyTrue(verify_pod_resources, "Pod resources where not adjusted") | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     def test_multi_namespace_support(self): | ||||
|  | @ -1209,6 +1215,7 @@ class EndToEndTestCase(unittest.TestCase): | |||
|         self.assert_distributed_pods(master_nodes) | ||||
| 
 | ||||
|     @timeout_decorator.timeout(TEST_TIMEOUT_SEC) | ||||
|     @unittest.skip("Skipping this test until fixed") | ||||
|     def test_node_readiness_label(self): | ||||
|         ''' | ||||
|            Remove node readiness label from master node. This must cause a failover. | ||||
|  |  | |||
|  | @ -90,6 +90,8 @@ data: | |||
|   # master_pod_move_timeout: 20m | ||||
|   # max_instances: "-1" | ||||
|   # min_instances: "-1" | ||||
|   # max_cpu_request: "1" | ||||
|   # max_memory_request: 4Gi | ||||
|   # min_cpu_limit: 250m | ||||
|   # min_memory_limit: 250Mi | ||||
|   # minimal_major_version: "9.6" | ||||
|  |  | |||
|  | @ -348,6 +348,12 @@ spec: | |||
|                     type: string | ||||
|                     pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' | ||||
|                     default: "100Mi" | ||||
|                   max_cpu_request: | ||||
|                     type: string | ||||
|                     pattern: '^(\d+m|\d+(\.\d{1,3})?)$' | ||||
|                   max_memory_request: | ||||
|                     type: string | ||||
|                     pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' | ||||
|                   min_cpu_limit: | ||||
|                     type: string | ||||
|                     pattern: '^(\d+m|\d+(\.\d{1,3})?)$' | ||||
|  |  | |||
|  | @ -109,6 +109,8 @@ configuration: | |||
|     default_cpu_request: 100m | ||||
|     default_memory_limit: 500Mi | ||||
|     default_memory_request: 100Mi | ||||
|     # max_cpu_request: "1" | ||||
|     # max_memory_request: 4Gi | ||||
|     # min_cpu_limit: 250m | ||||
|     # min_memory_limit: 250Mi | ||||
|   timeouts: | ||||
|  |  | |||
|  | @ -1471,6 +1471,14 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ | |||
| 								Type:    "string", | ||||
| 								Pattern: "^(\\d+(e\\d+)?|\\d+(\\.\\d+)?(e\\d+)?[EPTGMK]i?)$", | ||||
| 							}, | ||||
| 							"max_cpu_request": { | ||||
| 								Type:    "string", | ||||
| 								Pattern: "^(\\d+m|\\d+(\\.\\d{1,3})?)$", | ||||
| 							}, | ||||
| 							"max_memory_request": { | ||||
| 								Type:    "string", | ||||
| 								Pattern: "^(\\d+(e\\d+)?|\\d+(\\.\\d+)?(e\\d+)?[EPTGMK]i?)$", | ||||
| 							}, | ||||
| 							"min_cpu_limit": { | ||||
| 								Type:    "string", | ||||
| 								Pattern: "^(\\d+m|\\d+(\\.\\d{1,3})?)$", | ||||
|  |  | |||
|  | @ -109,6 +109,8 @@ type PostgresPodResourcesDefaults struct { | |||
| 	DefaultMemoryLimit   string `json:"default_memory_limit,omitempty"` | ||||
| 	MinCPULimit          string `json:"min_cpu_limit,omitempty"` | ||||
| 	MinMemoryLimit       string `json:"min_memory_limit,omitempty"` | ||||
| 	MaxCPURequest        string `json:"max_cpu_request,omitempty"` | ||||
| 	MaxMemoryRequest     string `json:"max_memory_request,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // OperatorTimeouts defines the timeout of ResourceCheck, PodWait, ReadyWait
 | ||||
|  |  | |||
|  | @ -183,6 +183,32 @@ func (c *Cluster) enforceMinResourceLimits(resources *v1.ResourceRequirements) e | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (c *Cluster) enforceMaxResourceRequests(resources *v1.ResourceRequirements) error { | ||||
| 	var ( | ||||
| 		err error | ||||
| 	) | ||||
| 
 | ||||
| 	cpuRequest := resources.Requests[v1.ResourceCPU] | ||||
| 	maxCPURequest := c.OpConfig.MaxCPURequest | ||||
| 	maxCPU, err := util.MinResource(maxCPURequest, cpuRequest.String()) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("could not compare defined CPU request %s for %q container with configured maximum value %s: %v", | ||||
| 			cpuRequest.String(), constants.PostgresContainerName, maxCPURequest, err) | ||||
| 	} | ||||
| 	resources.Requests[v1.ResourceCPU] = maxCPU | ||||
| 
 | ||||
| 	memoryRequest := resources.Requests[v1.ResourceMemory] | ||||
| 	maxMemoryRequest := c.OpConfig.MaxMemoryRequest | ||||
| 	maxMemory, err := util.MinResource(maxMemoryRequest, memoryRequest.String()) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("could not compare defined memory request %s for %q container with configured maximum value %s: %v", | ||||
| 			memoryRequest.String(), constants.PostgresContainerName, maxMemoryRequest, err) | ||||
| 	} | ||||
| 	resources.Requests[v1.ResourceMemory] = maxMemory | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func setMemoryRequestToLimit(resources *v1.ResourceRequirements, containerName string, logger *logrus.Entry) { | ||||
| 
 | ||||
| 	requests := resources.Requests[v1.ResourceMemory] | ||||
|  | @ -260,6 +286,13 @@ func (c *Cluster) generateResourceRequirements( | |||
| 		setMemoryRequestToLimit(&result, containerName, c.logger) | ||||
| 	} | ||||
| 
 | ||||
| 	// enforce maximum cpu and memory requests for Postgres containers only
 | ||||
| 	if containerName == constants.PostgresContainerName { | ||||
| 		if err = c.enforceMaxResourceRequests(&result); err != nil { | ||||
| 			return nil, fmt.Errorf("could not enforce maximum resource requests: %v", err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return &result, nil | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -1841,8 +1841,10 @@ func TestSidecars(t *testing.T) { | |||
| 				}, | ||||
| 				Resources: config.Resources{ | ||||
| 					DefaultCPURequest:    "200m", | ||||
| 					MaxCPURequest:        "300m", | ||||
| 					DefaultCPULimit:      "500m", | ||||
| 					DefaultMemoryRequest: "0.7Gi", | ||||
| 					MaxMemoryRequest:     "1.0Gi", | ||||
| 					DefaultMemoryLimit:   "1.3Gi", | ||||
| 				}, | ||||
| 				SidecarImages: map[string]string{ | ||||
|  | @ -2128,8 +2130,10 @@ func TestGenerateService(t *testing.T) { | |||
| 				}, | ||||
| 				Resources: config.Resources{ | ||||
| 					DefaultCPURequest:    "200m", | ||||
| 					MaxCPURequest:        "300m", | ||||
| 					DefaultCPULimit:      "500m", | ||||
| 					DefaultMemoryRequest: "0.7Gi", | ||||
| 					MaxMemoryRequest:     "1.0Gi", | ||||
| 					DefaultMemoryLimit:   "1.3Gi", | ||||
| 				}, | ||||
| 				SidecarImages: map[string]string{ | ||||
|  | @ -2415,18 +2419,21 @@ func TestGenerateResourceRequirements(t *testing.T) { | |||
| 	roleLabel := "spilo-role" | ||||
| 	sidecarName := "postgres-exporter" | ||||
| 
 | ||||
| 	// two test cases will call enforceMinResourceLimits which emits 2 events per call
 | ||||
| 	// hence bufferSize of 4 is required
 | ||||
| 	newEventRecorder := record.NewFakeRecorder(4) | ||||
| 	// enforceMinResourceLimits will be called 2 twice emitting 4 events (2x cpu, 2x memory raise)
 | ||||
| 	// enforceMaxResourceRequests will be called 4 times emitting 6 events (2x cpu, 4x memory cap)
 | ||||
| 	// hence event bufferSize of 10 is required
 | ||||
| 	newEventRecorder := record.NewFakeRecorder(10) | ||||
| 
 | ||||
| 	configResources := config.Resources{ | ||||
| 		ClusterLabels:        map[string]string{"application": "spilo"}, | ||||
| 		ClusterNameLabel:     clusterNameLabel, | ||||
| 		DefaultCPURequest:    "100m", | ||||
| 		DefaultCPULimit:      "1", | ||||
| 		MaxCPURequest:        "500m", | ||||
| 		MinCPULimit:          "250m", | ||||
| 		DefaultMemoryRequest: "100Mi", | ||||
| 		DefaultMemoryLimit:   "500Mi", | ||||
| 		MinCPULimit:          "250m", | ||||
| 		MaxMemoryRequest:     "1Gi", | ||||
| 		MinMemoryLimit:       "250Mi", | ||||
| 		PodRoleLabel:         roleLabel, | ||||
| 	} | ||||
|  | @ -2558,6 +2565,10 @@ func TestGenerateResourceRequirements(t *testing.T) { | |||
| 					Namespace: namespace, | ||||
| 				}, | ||||
| 				Spec: acidv1.PostgresSpec{ | ||||
| 					Resources: &acidv1.Resources{ | ||||
| 						ResourceRequests: acidv1.ResourceDescription{Memory: "200Mi"}, | ||||
| 						ResourceLimits:   acidv1.ResourceDescription{Memory: "300Mi"}, | ||||
| 					}, | ||||
| 					TeamID: "acid", | ||||
| 					Volume: acidv1.Volume{ | ||||
| 						Size: "1G", | ||||
|  | @ -2565,8 +2576,8 @@ func TestGenerateResourceRequirements(t *testing.T) { | |||
| 				}, | ||||
| 			}, | ||||
| 			expectedResources: acidv1.Resources{ | ||||
| 				ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "500Mi"}, | ||||
| 				ResourceLimits:   acidv1.ResourceDescription{CPU: "1", Memory: "500Mi"}, | ||||
| 				ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "300Mi"}, | ||||
| 				ResourceLimits:   acidv1.ResourceDescription{CPU: "1", Memory: "300Mi"}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
|  | @ -2691,6 +2702,62 @@ func TestGenerateResourceRequirements(t *testing.T) { | |||
| 				ResourceLimits:   acidv1.ResourceDescription{CPU: "100m", Memory: "100Mi"}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			subTest: "test enforcing max cpu and memory requests", | ||||
| 			config: config.Config{ | ||||
| 				Resources:               configResources, | ||||
| 				PodManagementPolicy:     "ordered_ready", | ||||
| 				SetMemoryRequestToLimit: false, | ||||
| 			}, | ||||
| 			pgSpec: acidv1.Postgresql{ | ||||
| 				ObjectMeta: metav1.ObjectMeta{ | ||||
| 					Name:      clusterName, | ||||
| 					Namespace: namespace, | ||||
| 				}, | ||||
| 				Spec: acidv1.PostgresSpec{ | ||||
| 					Resources: &acidv1.Resources{ | ||||
| 						ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "2Gi"}, | ||||
| 						ResourceLimits:   acidv1.ResourceDescription{CPU: "2", Memory: "4Gi"}, | ||||
| 					}, | ||||
| 					TeamID: "acid", | ||||
| 					Volume: acidv1.Volume{ | ||||
| 						Size: "1G", | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			expectedResources: acidv1.Resources{ | ||||
| 				ResourceRequests: acidv1.ResourceDescription{CPU: "500m", Memory: "1Gi"}, | ||||
| 				ResourceLimits:   acidv1.ResourceDescription{CPU: "2", Memory: "4Gi"}, | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			subTest: "test SetMemoryRequestToLimit flag but raise only until max memory request", | ||||
| 			config: config.Config{ | ||||
| 				Resources:               configResources, | ||||
| 				PodManagementPolicy:     "ordered_ready", | ||||
| 				SetMemoryRequestToLimit: true, | ||||
| 			}, | ||||
| 			pgSpec: acidv1.Postgresql{ | ||||
| 				ObjectMeta: metav1.ObjectMeta{ | ||||
| 					Name:      clusterName, | ||||
| 					Namespace: namespace, | ||||
| 				}, | ||||
| 				Spec: acidv1.PostgresSpec{ | ||||
| 					Resources: &acidv1.Resources{ | ||||
| 						ResourceRequests: acidv1.ResourceDescription{Memory: "500Mi"}, | ||||
| 						ResourceLimits:   acidv1.ResourceDescription{Memory: "2Gi"}, | ||||
| 					}, | ||||
| 					TeamID: "acid", | ||||
| 					Volume: acidv1.Volume{ | ||||
| 						Size: "1G", | ||||
| 					}, | ||||
| 				}, | ||||
| 			}, | ||||
| 			expectedResources: acidv1.Resources{ | ||||
| 				ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "1Gi"}, | ||||
| 				ResourceLimits:   acidv1.ResourceDescription{CPU: "1", Memory: "2Gi"}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, tt := range tests { | ||||
|  |  | |||
|  | @ -129,6 +129,8 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur | |||
| 	result.DefaultMemoryLimit = util.Coalesce(fromCRD.PostgresPodResources.DefaultMemoryLimit, "500Mi") | ||||
| 	result.MinCPULimit = util.Coalesce(fromCRD.PostgresPodResources.MinCPULimit, "250m") | ||||
| 	result.MinMemoryLimit = util.Coalesce(fromCRD.PostgresPodResources.MinMemoryLimit, "250Mi") | ||||
| 	result.MaxCPURequest = fromCRD.PostgresPodResources.MaxCPURequest | ||||
| 	result.MaxMemoryRequest = fromCRD.PostgresPodResources.MaxMemoryRequest | ||||
| 
 | ||||
| 	// timeout config
 | ||||
| 	result.ResourceCheckInterval = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.ResourceCheckInterval), "3s") | ||||
|  |  | |||
|  | @ -54,6 +54,8 @@ type Resources struct { | |||
| 	DefaultMemoryLimit            string              `name:"default_memory_limit" default:"500Mi"` | ||||
| 	MinCPULimit                   string              `name:"min_cpu_limit" default:"250m"` | ||||
| 	MinMemoryLimit                string              `name:"min_memory_limit" default:"250Mi"` | ||||
| 	MaxCPURequest                 string              `name:"max_cpu_request"` | ||||
| 	MaxMemoryRequest              string              `name:"max_memory_request"` | ||||
| 	PodEnvironmentConfigMap       spec.NamespacedName `name:"pod_environment_configmap"` | ||||
| 	PodEnvironmentSecret          string              `name:"pod_environment_secret"` | ||||
| 	NodeReadinessLabel            map[string]string   `name:"node_readiness_label" default:""` | ||||
|  |  | |||
|  | @ -367,3 +367,21 @@ func IsSmallerQuantity(requestStr, limitStr string) (bool, error) { | |||
| 
 | ||||
| 	return request.Cmp(limit) == -1, nil | ||||
| } | ||||
| 
 | ||||
| func MinResource(maxRequestStr, requestStr string) (resource.Quantity, error) { | ||||
| 
 | ||||
| 	isSmaller, err := IsSmallerQuantity(maxRequestStr, requestStr) | ||||
| 	if isSmaller && err == nil { | ||||
| 		maxRequest, err := resource.ParseQuantity(maxRequestStr) | ||||
| 		if err != nil { | ||||
| 			return maxRequest, err | ||||
| 		} | ||||
| 		return maxRequest, nil | ||||
| 	} | ||||
| 
 | ||||
| 	request, err := resource.ParseQuantity(requestStr) | ||||
| 	if err != nil { | ||||
| 		return request, err | ||||
| 	} | ||||
| 	return request, nil | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue