Set maximum CPU and Memory requests on K8s (#1959)
* Set maximum CPU and Memory requests on K8s
This commit is contained in:
parent
1c80ac0acd
commit
b41daf4f76
|
|
@ -350,6 +350,12 @@ spec:
|
|||
type: string
|
||||
pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$'
|
||||
default: "100Mi"
|
||||
max_cpu_request:
|
||||
type: string
|
||||
pattern: '^(\d+m|\d+(\.\d{1,3})?)$'
|
||||
max_memory_request:
|
||||
type: string
|
||||
pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$'
|
||||
min_cpu_limit:
|
||||
type: string
|
||||
pattern: '^(\d+m|\d+(\.\d{1,3})?)$'
|
||||
|
|
|
|||
|
|
@ -217,6 +217,12 @@ configPostgresPodResources:
|
|||
default_memory_limit: 500Mi
|
||||
# memory request value for the postgres containers
|
||||
default_memory_request: 100Mi
|
||||
# optional upper boundary for CPU request
|
||||
# max_cpu_request: "1"
|
||||
|
||||
# optional upper boundary for memory request
|
||||
# max_memory_request: 4Gi
|
||||
|
||||
# hard CPU minimum required to properly run a Postgres cluster
|
||||
min_cpu_limit: 250m
|
||||
# hard memory minimum required to properly run a Postgres cluster
|
||||
|
|
|
|||
|
|
@ -161,11 +161,12 @@ Those are top-level keys, containing both leaf keys and groups.
|
|||
|
||||
* **set_memory_request_to_limit**
|
||||
Set `memory_request` to `memory_limit` for all Postgres clusters (the default
|
||||
value is also increased). This prevents certain cases of memory overcommitment
|
||||
at the cost of overprovisioning memory and potential scheduling problems for
|
||||
containers with high memory limits due to the lack of memory on Kubernetes
|
||||
cluster nodes. This affects all containers created by the operator (Postgres,
|
||||
connection pooler, logical backup, scalyr sidecar, and other sidecars except
|
||||
value is also increased but configured `max_memory_request` can not be
|
||||
bypassed). This prevents certain cases of memory overcommitment at the cost
|
||||
of overprovisioning memory and potential scheduling problems for containers
|
||||
with high memory limits due to the lack of memory on Kubernetes cluster
|
||||
nodes. This affects all containers created by the operator (Postgres,
|
||||
connection pooler, logical backup, scalyr sidecar, and other sidecars except
|
||||
**sidecars** defined in the operator configuration); to set resources for the
|
||||
operator's own container, change the [operator deployment manually](https://github.com/zalando/postgres-operator/blob/master/manifests/postgres-operator.yaml#L20).
|
||||
The default is `false`.
|
||||
|
|
@ -514,6 +515,12 @@ CRD-based configuration.
|
|||
memory limits for the Postgres containers, unless overridden by cluster-specific
|
||||
settings. The default is `500Mi`.
|
||||
|
||||
* **max_cpu_request**
|
||||
optional upper boundary for CPU request
|
||||
|
||||
* **max_memory_request**
|
||||
optional upper boundary for memory request
|
||||
|
||||
* **min_cpu_limit**
|
||||
hard CPU minimum what we consider to be required to properly run Postgres
|
||||
clusters with Patroni on Kubernetes. The default is `250m`.
|
||||
|
|
|
|||
|
|
@ -1012,9 +1012,10 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
self.evantuallyEqual(check_version_14, "14", "Version was not upgrade to 14")
|
||||
|
||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||
def test_min_resource_limits(self):
|
||||
def test_resource_generation(self):
|
||||
'''
|
||||
Lower resource limits below configured minimum and let operator fix it
|
||||
Lower resource limits below configured minimum and let operator fix it.
|
||||
It will try to raise requests to limits which is capped with max_memory_request.
|
||||
'''
|
||||
k8s = self.k8s
|
||||
cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
|
||||
|
|
@ -1023,17 +1024,20 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
_, replica_nodes = k8s.get_pg_nodes(cluster_label)
|
||||
self.assertNotEqual(replica_nodes, [])
|
||||
|
||||
# configure minimum boundaries for CPU and memory limits
|
||||
# configure maximum memory request and minimum boundaries for CPU and memory limits
|
||||
maxMemoryRequest = '300Mi'
|
||||
minCPULimit = '503m'
|
||||
minMemoryLimit = '502Mi'
|
||||
|
||||
patch_min_resource_limits = {
|
||||
patch_pod_resources = {
|
||||
"data": {
|
||||
"max_memory_request": maxMemoryRequest,
|
||||
"min_cpu_limit": minCPULimit,
|
||||
"min_memory_limit": minMemoryLimit
|
||||
"min_memory_limit": minMemoryLimit,
|
||||
"set_memory_request_to_limit": "true"
|
||||
}
|
||||
}
|
||||
k8s.update_config(patch_min_resource_limits, "Minimum resource test")
|
||||
k8s.update_config(patch_pod_resources, "Pod resource test")
|
||||
|
||||
# lower resource limits below minimum
|
||||
pg_patch_resources = {
|
||||
|
|
@ -1059,18 +1063,20 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label)
|
||||
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
|
||||
|
||||
def verify_pod_limits():
|
||||
def verify_pod_resources():
|
||||
pods = k8s.api.core_v1.list_namespaced_pod('default', label_selector="cluster-name=acid-minimal-cluster,application=spilo").items
|
||||
if len(pods) < 2:
|
||||
return False
|
||||
|
||||
r = pods[0].spec.containers[0].resources.limits['memory'] == minMemoryLimit
|
||||
r = pods[0].spec.containers[0].resources.requests['memory'] == maxMemoryRequest
|
||||
r = r and pods[0].spec.containers[0].resources.limits['memory'] == minMemoryLimit
|
||||
r = r and pods[0].spec.containers[0].resources.limits['cpu'] == minCPULimit
|
||||
r = r and pods[1].spec.containers[0].resources.requests['memory'] == maxMemoryRequest
|
||||
r = r and pods[1].spec.containers[0].resources.limits['memory'] == minMemoryLimit
|
||||
r = r and pods[1].spec.containers[0].resources.limits['cpu'] == minCPULimit
|
||||
return r
|
||||
|
||||
self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted")
|
||||
self.eventuallyTrue(verify_pod_resources, "Pod resources where not adjusted")
|
||||
|
||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||
def test_multi_namespace_support(self):
|
||||
|
|
@ -1209,6 +1215,7 @@ class EndToEndTestCase(unittest.TestCase):
|
|||
self.assert_distributed_pods(master_nodes)
|
||||
|
||||
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
|
||||
@unittest.skip("Skipping this test until fixed")
|
||||
def test_node_readiness_label(self):
|
||||
'''
|
||||
Remove node readiness label from master node. This must cause a failover.
|
||||
|
|
|
|||
|
|
@ -90,6 +90,8 @@ data:
|
|||
# master_pod_move_timeout: 20m
|
||||
# max_instances: "-1"
|
||||
# min_instances: "-1"
|
||||
# max_cpu_request: "1"
|
||||
# max_memory_request: 4Gi
|
||||
# min_cpu_limit: 250m
|
||||
# min_memory_limit: 250Mi
|
||||
# minimal_major_version: "9.6"
|
||||
|
|
|
|||
|
|
@ -348,6 +348,12 @@ spec:
|
|||
type: string
|
||||
pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$'
|
||||
default: "100Mi"
|
||||
max_cpu_request:
|
||||
type: string
|
||||
pattern: '^(\d+m|\d+(\.\d{1,3})?)$'
|
||||
max_memory_request:
|
||||
type: string
|
||||
pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$'
|
||||
min_cpu_limit:
|
||||
type: string
|
||||
pattern: '^(\d+m|\d+(\.\d{1,3})?)$'
|
||||
|
|
|
|||
|
|
@ -109,6 +109,8 @@ configuration:
|
|||
default_cpu_request: 100m
|
||||
default_memory_limit: 500Mi
|
||||
default_memory_request: 100Mi
|
||||
# max_cpu_request: "1"
|
||||
# max_memory_request: 4Gi
|
||||
# min_cpu_limit: 250m
|
||||
# min_memory_limit: 250Mi
|
||||
timeouts:
|
||||
|
|
|
|||
|
|
@ -1471,6 +1471,14 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
|
|||
Type: "string",
|
||||
Pattern: "^(\\d+(e\\d+)?|\\d+(\\.\\d+)?(e\\d+)?[EPTGMK]i?)$",
|
||||
},
|
||||
"max_cpu_request": {
|
||||
Type: "string",
|
||||
Pattern: "^(\\d+m|\\d+(\\.\\d{1,3})?)$",
|
||||
},
|
||||
"max_memory_request": {
|
||||
Type: "string",
|
||||
Pattern: "^(\\d+(e\\d+)?|\\d+(\\.\\d+)?(e\\d+)?[EPTGMK]i?)$",
|
||||
},
|
||||
"min_cpu_limit": {
|
||||
Type: "string",
|
||||
Pattern: "^(\\d+m|\\d+(\\.\\d{1,3})?)$",
|
||||
|
|
|
|||
|
|
@ -109,6 +109,8 @@ type PostgresPodResourcesDefaults struct {
|
|||
DefaultMemoryLimit string `json:"default_memory_limit,omitempty"`
|
||||
MinCPULimit string `json:"min_cpu_limit,omitempty"`
|
||||
MinMemoryLimit string `json:"min_memory_limit,omitempty"`
|
||||
MaxCPURequest string `json:"max_cpu_request,omitempty"`
|
||||
MaxMemoryRequest string `json:"max_memory_request,omitempty"`
|
||||
}
|
||||
|
||||
// OperatorTimeouts defines the timeout of ResourceCheck, PodWait, ReadyWait
|
||||
|
|
|
|||
|
|
@ -183,6 +183,32 @@ func (c *Cluster) enforceMinResourceLimits(resources *v1.ResourceRequirements) e
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *Cluster) enforceMaxResourceRequests(resources *v1.ResourceRequirements) error {
|
||||
var (
|
||||
err error
|
||||
)
|
||||
|
||||
cpuRequest := resources.Requests[v1.ResourceCPU]
|
||||
maxCPURequest := c.OpConfig.MaxCPURequest
|
||||
maxCPU, err := util.MinResource(maxCPURequest, cpuRequest.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not compare defined CPU request %s for %q container with configured maximum value %s: %v",
|
||||
cpuRequest.String(), constants.PostgresContainerName, maxCPURequest, err)
|
||||
}
|
||||
resources.Requests[v1.ResourceCPU] = maxCPU
|
||||
|
||||
memoryRequest := resources.Requests[v1.ResourceMemory]
|
||||
maxMemoryRequest := c.OpConfig.MaxMemoryRequest
|
||||
maxMemory, err := util.MinResource(maxMemoryRequest, memoryRequest.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not compare defined memory request %s for %q container with configured maximum value %s: %v",
|
||||
memoryRequest.String(), constants.PostgresContainerName, maxMemoryRequest, err)
|
||||
}
|
||||
resources.Requests[v1.ResourceMemory] = maxMemory
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setMemoryRequestToLimit(resources *v1.ResourceRequirements, containerName string, logger *logrus.Entry) {
|
||||
|
||||
requests := resources.Requests[v1.ResourceMemory]
|
||||
|
|
@ -260,6 +286,13 @@ func (c *Cluster) generateResourceRequirements(
|
|||
setMemoryRequestToLimit(&result, containerName, c.logger)
|
||||
}
|
||||
|
||||
// enforce maximum cpu and memory requests for Postgres containers only
|
||||
if containerName == constants.PostgresContainerName {
|
||||
if err = c.enforceMaxResourceRequests(&result); err != nil {
|
||||
return nil, fmt.Errorf("could not enforce maximum resource requests: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1841,8 +1841,10 @@ func TestSidecars(t *testing.T) {
|
|||
},
|
||||
Resources: config.Resources{
|
||||
DefaultCPURequest: "200m",
|
||||
MaxCPURequest: "300m",
|
||||
DefaultCPULimit: "500m",
|
||||
DefaultMemoryRequest: "0.7Gi",
|
||||
MaxMemoryRequest: "1.0Gi",
|
||||
DefaultMemoryLimit: "1.3Gi",
|
||||
},
|
||||
SidecarImages: map[string]string{
|
||||
|
|
@ -2128,8 +2130,10 @@ func TestGenerateService(t *testing.T) {
|
|||
},
|
||||
Resources: config.Resources{
|
||||
DefaultCPURequest: "200m",
|
||||
MaxCPURequest: "300m",
|
||||
DefaultCPULimit: "500m",
|
||||
DefaultMemoryRequest: "0.7Gi",
|
||||
MaxMemoryRequest: "1.0Gi",
|
||||
DefaultMemoryLimit: "1.3Gi",
|
||||
},
|
||||
SidecarImages: map[string]string{
|
||||
|
|
@ -2415,18 +2419,21 @@ func TestGenerateResourceRequirements(t *testing.T) {
|
|||
roleLabel := "spilo-role"
|
||||
sidecarName := "postgres-exporter"
|
||||
|
||||
// two test cases will call enforceMinResourceLimits which emits 2 events per call
|
||||
// hence bufferSize of 4 is required
|
||||
newEventRecorder := record.NewFakeRecorder(4)
|
||||
// enforceMinResourceLimits will be called 2 twice emitting 4 events (2x cpu, 2x memory raise)
|
||||
// enforceMaxResourceRequests will be called 4 times emitting 6 events (2x cpu, 4x memory cap)
|
||||
// hence event bufferSize of 10 is required
|
||||
newEventRecorder := record.NewFakeRecorder(10)
|
||||
|
||||
configResources := config.Resources{
|
||||
ClusterLabels: map[string]string{"application": "spilo"},
|
||||
ClusterNameLabel: clusterNameLabel,
|
||||
DefaultCPURequest: "100m",
|
||||
DefaultCPULimit: "1",
|
||||
MaxCPURequest: "500m",
|
||||
MinCPULimit: "250m",
|
||||
DefaultMemoryRequest: "100Mi",
|
||||
DefaultMemoryLimit: "500Mi",
|
||||
MinCPULimit: "250m",
|
||||
MaxMemoryRequest: "1Gi",
|
||||
MinMemoryLimit: "250Mi",
|
||||
PodRoleLabel: roleLabel,
|
||||
}
|
||||
|
|
@ -2558,6 +2565,10 @@ func TestGenerateResourceRequirements(t *testing.T) {
|
|||
Namespace: namespace,
|
||||
},
|
||||
Spec: acidv1.PostgresSpec{
|
||||
Resources: &acidv1.Resources{
|
||||
ResourceRequests: acidv1.ResourceDescription{Memory: "200Mi"},
|
||||
ResourceLimits: acidv1.ResourceDescription{Memory: "300Mi"},
|
||||
},
|
||||
TeamID: "acid",
|
||||
Volume: acidv1.Volume{
|
||||
Size: "1G",
|
||||
|
|
@ -2565,8 +2576,8 @@ func TestGenerateResourceRequirements(t *testing.T) {
|
|||
},
|
||||
},
|
||||
expectedResources: acidv1.Resources{
|
||||
ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "500Mi"},
|
||||
ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "500Mi"},
|
||||
ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "300Mi"},
|
||||
ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "300Mi"},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
@ -2691,6 +2702,62 @@ func TestGenerateResourceRequirements(t *testing.T) {
|
|||
ResourceLimits: acidv1.ResourceDescription{CPU: "100m", Memory: "100Mi"},
|
||||
},
|
||||
},
|
||||
{
|
||||
subTest: "test enforcing max cpu and memory requests",
|
||||
config: config.Config{
|
||||
Resources: configResources,
|
||||
PodManagementPolicy: "ordered_ready",
|
||||
SetMemoryRequestToLimit: false,
|
||||
},
|
||||
pgSpec: acidv1.Postgresql{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: clusterName,
|
||||
Namespace: namespace,
|
||||
},
|
||||
Spec: acidv1.PostgresSpec{
|
||||
Resources: &acidv1.Resources{
|
||||
ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "2Gi"},
|
||||
ResourceLimits: acidv1.ResourceDescription{CPU: "2", Memory: "4Gi"},
|
||||
},
|
||||
TeamID: "acid",
|
||||
Volume: acidv1.Volume{
|
||||
Size: "1G",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedResources: acidv1.Resources{
|
||||
ResourceRequests: acidv1.ResourceDescription{CPU: "500m", Memory: "1Gi"},
|
||||
ResourceLimits: acidv1.ResourceDescription{CPU: "2", Memory: "4Gi"},
|
||||
},
|
||||
},
|
||||
{
|
||||
subTest: "test SetMemoryRequestToLimit flag but raise only until max memory request",
|
||||
config: config.Config{
|
||||
Resources: configResources,
|
||||
PodManagementPolicy: "ordered_ready",
|
||||
SetMemoryRequestToLimit: true,
|
||||
},
|
||||
pgSpec: acidv1.Postgresql{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: clusterName,
|
||||
Namespace: namespace,
|
||||
},
|
||||
Spec: acidv1.PostgresSpec{
|
||||
Resources: &acidv1.Resources{
|
||||
ResourceRequests: acidv1.ResourceDescription{Memory: "500Mi"},
|
||||
ResourceLimits: acidv1.ResourceDescription{Memory: "2Gi"},
|
||||
},
|
||||
TeamID: "acid",
|
||||
Volume: acidv1.Volume{
|
||||
Size: "1G",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedResources: acidv1.Resources{
|
||||
ResourceRequests: acidv1.ResourceDescription{CPU: "100m", Memory: "1Gi"},
|
||||
ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "2Gi"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
|
|
|||
|
|
@ -129,6 +129,8 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
|
|||
result.DefaultMemoryLimit = util.Coalesce(fromCRD.PostgresPodResources.DefaultMemoryLimit, "500Mi")
|
||||
result.MinCPULimit = util.Coalesce(fromCRD.PostgresPodResources.MinCPULimit, "250m")
|
||||
result.MinMemoryLimit = util.Coalesce(fromCRD.PostgresPodResources.MinMemoryLimit, "250Mi")
|
||||
result.MaxCPURequest = fromCRD.PostgresPodResources.MaxCPURequest
|
||||
result.MaxMemoryRequest = fromCRD.PostgresPodResources.MaxMemoryRequest
|
||||
|
||||
// timeout config
|
||||
result.ResourceCheckInterval = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.ResourceCheckInterval), "3s")
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ type Resources struct {
|
|||
DefaultMemoryLimit string `name:"default_memory_limit" default:"500Mi"`
|
||||
MinCPULimit string `name:"min_cpu_limit" default:"250m"`
|
||||
MinMemoryLimit string `name:"min_memory_limit" default:"250Mi"`
|
||||
MaxCPURequest string `name:"max_cpu_request"`
|
||||
MaxMemoryRequest string `name:"max_memory_request"`
|
||||
PodEnvironmentConfigMap spec.NamespacedName `name:"pod_environment_configmap"`
|
||||
PodEnvironmentSecret string `name:"pod_environment_secret"`
|
||||
NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""`
|
||||
|
|
|
|||
|
|
@ -367,3 +367,21 @@ func IsSmallerQuantity(requestStr, limitStr string) (bool, error) {
|
|||
|
||||
return request.Cmp(limit) == -1, nil
|
||||
}
|
||||
|
||||
func MinResource(maxRequestStr, requestStr string) (resource.Quantity, error) {
|
||||
|
||||
isSmaller, err := IsSmallerQuantity(maxRequestStr, requestStr)
|
||||
if isSmaller && err == nil {
|
||||
maxRequest, err := resource.ParseQuantity(maxRequestStr)
|
||||
if err != nil {
|
||||
return maxRequest, err
|
||||
}
|
||||
return maxRequest, nil
|
||||
}
|
||||
|
||||
request, err := resource.ParseQuantity(requestStr)
|
||||
if err != nil {
|
||||
return request, err
|
||||
}
|
||||
return request, nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue