diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 3949fde6c..b5fea1c73 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -350,6 +350,12 @@ spec: type: string pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' default: "100Mi" + max_cpu_request: + type: string + pattern: '^(\d+m|\d+(\.\d{1,3})?)$' + max_memory_request: + type: string + pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' min_cpu_limit: type: string pattern: '^(\d+m|\d+(\.\d{1,3})?)$' diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 15b693870..f125beebf 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -217,6 +217,12 @@ configPostgresPodResources: default_memory_limit: 500Mi # memory request value for the postgres containers default_memory_request: 100Mi + # optional upper boundary for CPU request + # max_cpu_request: "1" + + # optional upper boundary for memory request + # max_memory_request: 4Gi + # hard CPU minimum required to properly run a Postgres cluster min_cpu_limit: 250m # hard memory minimum required to properly run a Postgres cluster diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 104d574f6..4f17ec213 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -161,11 +161,12 @@ Those are top-level keys, containing both leaf keys and groups. * **set_memory_request_to_limit** Set `memory_request` to `memory_limit` for all Postgres clusters (the default - value is also increased). This prevents certain cases of memory overcommitment - at the cost of overprovisioning memory and potential scheduling problems for - containers with high memory limits due to the lack of memory on Kubernetes - cluster nodes. This affects all containers created by the operator (Postgres, - connection pooler, logical backup, scalyr sidecar, and other sidecars except + value is also increased but configured `max_memory_request` can not be + bypassed). This prevents certain cases of memory overcommitment at the cost + of overprovisioning memory and potential scheduling problems for containers + with high memory limits due to the lack of memory on Kubernetes cluster + nodes. This affects all containers created by the operator (Postgres, + connection pooler, logical backup, scalyr sidecar, and other sidecars except **sidecars** defined in the operator configuration); to set resources for the operator's own container, change the [operator deployment manually](https://github.com/zalando/postgres-operator/blob/master/manifests/postgres-operator.yaml#L20). The default is `false`. @@ -514,6 +515,12 @@ CRD-based configuration. memory limits for the Postgres containers, unless overridden by cluster-specific settings. The default is `500Mi`. +* **max_cpu_request** + optional upper boundary for CPU request + +* **max_memory_request** + optional upper boundary for memory request + * **min_cpu_limit** hard CPU minimum what we consider to be required to properly run Postgres clusters with Patroni on Kubernetes. The default is `250m`. diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 9e990e013..cc8941925 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -1012,9 +1012,10 @@ class EndToEndTestCase(unittest.TestCase): self.evantuallyEqual(check_version_14, "14", "Version was not upgrade to 14") @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_min_resource_limits(self): + def test_resource_generation(self): ''' - Lower resource limits below configured minimum and let operator fix it + Lower resource limits below configured minimum and let operator fix it. + It will try to raise requests to limits which is capped with max_memory_request. ''' k8s = self.k8s cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' @@ -1023,17 +1024,20 @@ class EndToEndTestCase(unittest.TestCase): _, replica_nodes = k8s.get_pg_nodes(cluster_label) self.assertNotEqual(replica_nodes, []) - # configure minimum boundaries for CPU and memory limits + # configure maximum memory request and minimum boundaries for CPU and memory limits + maxMemoryRequest = '300Mi' minCPULimit = '503m' minMemoryLimit = '502Mi' - patch_min_resource_limits = { + patch_pod_resources = { "data": { + "max_memory_request": maxMemoryRequest, "min_cpu_limit": minCPULimit, - "min_memory_limit": minMemoryLimit + "min_memory_limit": minMemoryLimit, + "set_memory_request_to_limit": "true" } } - k8s.update_config(patch_min_resource_limits, "Minimum resource test") + k8s.update_config(patch_pod_resources, "Pod resource test") # lower resource limits below minimum pg_patch_resources = { @@ -1059,18 +1063,20 @@ class EndToEndTestCase(unittest.TestCase): k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - def verify_pod_limits(): + def verify_pod_resources(): pods = k8s.api.core_v1.list_namespaced_pod('default', label_selector="cluster-name=acid-minimal-cluster,application=spilo").items if len(pods) < 2: return False - r = pods[0].spec.containers[0].resources.limits['memory'] == minMemoryLimit + r = pods[0].spec.containers[0].resources.requests['memory'] == maxMemoryRequest + r = r and pods[0].spec.containers[0].resources.limits['memory'] == minMemoryLimit r = r and pods[0].spec.containers[0].resources.limits['cpu'] == minCPULimit + r = r and pods[1].spec.containers[0].resources.requests['memory'] == maxMemoryRequest r = r and pods[1].spec.containers[0].resources.limits['memory'] == minMemoryLimit r = r and pods[1].spec.containers[0].resources.limits['cpu'] == minCPULimit return r - self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted") + self.eventuallyTrue(verify_pod_resources, "Pod resources where not adjusted") @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_multi_namespace_support(self): @@ -1209,6 +1215,7 @@ class EndToEndTestCase(unittest.TestCase): self.assert_distributed_pods(master_nodes) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + @unittest.skip("Skipping this test until fixed") def test_node_readiness_label(self): ''' Remove node readiness label from master node. This must cause a failover. diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 93e278047..ba5f5e2a9 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -90,6 +90,8 @@ data: # master_pod_move_timeout: 20m # max_instances: "-1" # min_instances: "-1" + # max_cpu_request: "1" + # max_memory_request: 4Gi # min_cpu_limit: 250m # min_memory_limit: 250Mi # minimal_major_version: "9.6" diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 24ea04748..5a759d3f5 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -348,6 +348,12 @@ spec: type: string pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' default: "100Mi" + max_cpu_request: + type: string + pattern: '^(\d+m|\d+(\.\d{1,3})?)$' + max_memory_request: + type: string + pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' min_cpu_limit: type: string pattern: '^(\d+m|\d+(\.\d{1,3})?)$' diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index b18855fcf..df7ef3ecf 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -109,6 +109,8 @@ configuration: default_cpu_request: 100m default_memory_limit: 500Mi default_memory_request: 100Mi + # max_cpu_request: "1" + # max_memory_request: 4Gi # min_cpu_limit: 250m # min_memory_limit: 250Mi timeouts: diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index e361843be..db11771e4 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -1471,6 +1471,14 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ Type: "string", Pattern: "^(\\d+(e\\d+)?|\\d+(\\.\\d+)?(e\\d+)?[EPTGMK]i?)$", }, + "max_cpu_request": { + Type: "string", + Pattern: "^(\\d+m|\\d+(\\.\\d{1,3})?)$", + }, + "max_memory_request": { + Type: "string", + Pattern: "^(\\d+(e\\d+)?|\\d+(\\.\\d+)?(e\\d+)?[EPTGMK]i?)$", + }, "min_cpu_limit": { Type: "string", Pattern: "^(\\d+m|\\d+(\\.\\d{1,3})?)$", diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 2211b9ed3..2864de2a2 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -109,6 +109,8 @@ type PostgresPodResourcesDefaults struct { DefaultMemoryLimit string `json:"default_memory_limit,omitempty"` MinCPULimit string `json:"min_cpu_limit,omitempty"` MinMemoryLimit string `json:"min_memory_limit,omitempty"` + MaxCPURequest string `json:"max_cpu_request,omitempty"` + MaxMemoryRequest string `json:"max_memory_request,omitempty"` } // OperatorTimeouts defines the timeout of ResourceCheck, PodWait, ReadyWait diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index e84303b1e..bd9b93aad 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -183,6 +183,32 @@ func (c *Cluster) enforceMinResourceLimits(resources *v1.ResourceRequirements) e return nil } +func (c *Cluster) enforceMaxResourceRequests(resources *v1.ResourceRequirements) error { + var ( + err error + ) + + cpuRequest := resources.Requests[v1.ResourceCPU] + maxCPURequest := c.OpConfig.MaxCPURequest + maxCPU, err := util.MinResource(maxCPURequest, cpuRequest.String()) + if err != nil { + return fmt.Errorf("could not compare defined CPU request %s for %q container with configured maximum value %s: %v", + cpuRequest.String(), constants.PostgresContainerName, maxCPURequest, err) + } + resources.Requests[v1.ResourceCPU] = maxCPU + + memoryRequest := resources.Requests[v1.ResourceMemory] + maxMemoryRequest := c.OpConfig.MaxMemoryRequest + maxMemory, err := util.MinResource(maxMemoryRequest, memoryRequest.String()) + if err != nil { + return fmt.Errorf("could not compare defined memory request %s for %q container with configured maximum value %s: %v", + memoryRequest.String(), constants.PostgresContainerName, maxMemoryRequest, err) + } + resources.Requests[v1.ResourceMemory] = maxMemory + + return nil +} + func setMemoryRequestToLimit(resources *v1.ResourceRequirements, containerName string, logger *logrus.Entry) { requests := resources.Requests[v1.ResourceMemory] @@ -260,6 +286,13 @@ func (c *Cluster) generateResourceRequirements( setMemoryRequestToLimit(&result, containerName, c.logger) } + // enforce maximum cpu and memory requests for Postgres containers only + if containerName == constants.PostgresContainerName { + if err = c.enforceMaxResourceRequests(&result); err != nil { + return nil, fmt.Errorf("could not enforce maximum resource requests: %v", err) + } + } + return &result, nil } diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 36f23999c..46f2db86c 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1841,8 +1841,10 @@ func TestSidecars(t *testing.T) { }, Resources: config.Resources{ DefaultCPURequest: "200m", + MaxCPURequest: "300m", DefaultCPULimit: "500m", DefaultMemoryRequest: "0.7Gi", + MaxMemoryRequest: "1.0Gi", DefaultMemoryLimit: "1.3Gi", }, SidecarImages: map[string]string{ @@ -2128,8 +2130,10 @@ func TestGenerateService(t *testing.T) { }, Resources: config.Resources{ DefaultCPURequest: "200m", + MaxCPURequest: "300m", DefaultCPULimit: "500m", DefaultMemoryRequest: "0.7Gi", + MaxMemoryRequest: "1.0Gi", DefaultMemoryLimit: "1.3Gi", }, SidecarImages: map[string]string{ @@ -2415,18 +2419,21 @@ func TestGenerateResourceRequirements(t *testing.T) { roleLabel := "spilo-role" sidecarName := "postgres-exporter" - // two test cases will call enforceMinResourceLimits which emits 2 events per call - // hence bufferSize of 4 is required - newEventRecorder := record.NewFakeRecorder(4) + // enforceMinResourceLimits will be called 2 twice emitting 4 events (2x cpu, 2x memory raise) + // enforceMaxResourceRequests will be called 4 times emitting 6 events (2x cpu, 4x memory cap) + // hence event bufferSize of 10 is required + newEventRecorder := record.NewFakeRecorder(10) configResources := config.Resources{ ClusterLabels: map[string]string{"application": "spilo"}, ClusterNameLabel: clusterNameLabel, DefaultCPURequest: "100m", DefaultCPULimit: "1", + MaxCPURequest: "500m", + MinCPULimit: "250m", DefaultMemoryRequest: "100Mi", DefaultMemoryLimit: "500Mi", - MinCPULimit: "250m", + MaxMemoryRequest: "1Gi", MinMemoryLimit: "250Mi", PodRoleLabel: roleLabel, } @@ -2558,6 +2565,10 @@ func TestGenerateResourceRequirements(t *testing.T) { Namespace: namespace, }, Spec: acidv1.PostgresSpec{ + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{Memory: "200Mi"}, + ResourceLimits: acidv1.ResourceDescription{Memory: "300Mi"}, + }, TeamID: "acid", Volume: acidv1.Volume{ Size: "1G", @@ -2565,8 +2576,8 @@ func TestGenerateResourceRequirements(t *testing.T) { }, }, expectedResources: acidv1.Resources{ - ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "500Mi"}, - ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "500Mi"}, + ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "300Mi"}, + ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "300Mi"}, }, }, { @@ -2691,6 +2702,62 @@ func TestGenerateResourceRequirements(t *testing.T) { ResourceLimits: acidv1.ResourceDescription{CPU: "100m", Memory: "100Mi"}, }, }, + { + subTest: "test enforcing max cpu and memory requests", + config: config.Config{ + Resources: configResources, + PodManagementPolicy: "ordered_ready", + SetMemoryRequestToLimit: false, + }, + pgSpec: acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + }, + Spec: acidv1.PostgresSpec{ + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "2Gi"}, + ResourceLimits: acidv1.ResourceDescription{CPU: "2", Memory: "4Gi"}, + }, + TeamID: "acid", + Volume: acidv1.Volume{ + Size: "1G", + }, + }, + }, + expectedResources: acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: "500m", Memory: "1Gi"}, + ResourceLimits: acidv1.ResourceDescription{CPU: "2", Memory: "4Gi"}, + }, + }, + { + subTest: "test SetMemoryRequestToLimit flag but raise only until max memory request", + config: config.Config{ + Resources: configResources, + PodManagementPolicy: "ordered_ready", + SetMemoryRequestToLimit: true, + }, + pgSpec: acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + }, + Spec: acidv1.PostgresSpec{ + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{Memory: "500Mi"}, + ResourceLimits: acidv1.ResourceDescription{Memory: "2Gi"}, + }, + TeamID: "acid", + Volume: acidv1.Volume{ + Size: "1G", + }, + }, + }, + expectedResources: acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "1Gi"}, + ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "2Gi"}, + }, + }, } for _, tt := range tests { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 9e1d40648..11a15c89a 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -129,6 +129,8 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.DefaultMemoryLimit = util.Coalesce(fromCRD.PostgresPodResources.DefaultMemoryLimit, "500Mi") result.MinCPULimit = util.Coalesce(fromCRD.PostgresPodResources.MinCPULimit, "250m") result.MinMemoryLimit = util.Coalesce(fromCRD.PostgresPodResources.MinMemoryLimit, "250Mi") + result.MaxCPURequest = fromCRD.PostgresPodResources.MaxCPURequest + result.MaxMemoryRequest = fromCRD.PostgresPodResources.MaxMemoryRequest // timeout config result.ResourceCheckInterval = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.ResourceCheckInterval), "3s") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 21948cc0f..5d1fff851 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -54,6 +54,8 @@ type Resources struct { DefaultMemoryLimit string `name:"default_memory_limit" default:"500Mi"` MinCPULimit string `name:"min_cpu_limit" default:"250m"` MinMemoryLimit string `name:"min_memory_limit" default:"250Mi"` + MaxCPURequest string `name:"max_cpu_request"` + MaxMemoryRequest string `name:"max_memory_request"` PodEnvironmentConfigMap spec.NamespacedName `name:"pod_environment_configmap"` PodEnvironmentSecret string `name:"pod_environment_secret"` NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` diff --git a/pkg/util/util.go b/pkg/util/util.go index 3eb9c5301..504455f47 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -367,3 +367,21 @@ func IsSmallerQuantity(requestStr, limitStr string) (bool, error) { return request.Cmp(limit) == -1, nil } + +func MinResource(maxRequestStr, requestStr string) (resource.Quantity, error) { + + isSmaller, err := IsSmallerQuantity(maxRequestStr, requestStr) + if isSmaller && err == nil { + maxRequest, err := resource.ParseQuantity(maxRequestStr) + if err != nil { + return maxRequest, err + } + return maxRequest, nil + } + + request, err := resource.ParseQuantity(requestStr) + if err != nil { + return request, err + } + return request, nil +}