Enforce minimum cpu and memory limits (#731)

* add validation for PG resources and volume size
* check resource requests also on UPDATE and SYNC + update docs
* if cluster was running don't error on sync
This commit is contained in:
Felix Kunde 2019-12-12 16:43:55 +01:00 committed by GitHub
parent 0628439256
commit cd110aabf4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 117 additions and 18 deletions

View File

@ -30,7 +30,7 @@ spec:
databases:
foo: zalando
postgresql:
version: "10"
version: "11"
```
Once you cloned the Postgres Operator [repository](https://github.com/zalando/postgres-operator)
@ -40,6 +40,9 @@ you can find this example also in the manifests folder:
kubectl create -f manifests/minimal-postgres-manifest.yaml
```
Note, that the minimum volume size to run the `postgresql` resource on Elastic
Block Storage (EBS) is `1Gi`.
## Watch pods being created
```bash
@ -182,6 +185,32 @@ See [infrastructure roles secret](../manifests/infrastructure-roles.yaml)
and [infrastructure roles configmap](../manifests/infrastructure-roles-configmap.yaml)
for the examples.
## Resource definition
The compute resources to be used for the Postgres containers in the pods can be
specified in the postgresql cluster manifest.
```yaml
apiVersion: "acid.zalan.do/v1"
kind: postgresql
metadata:
name: acid-minimal-cluster
spec:
resources:
requests:
cpu: 10m
memory: 100Mi
limits:
cpu: 300m
memory: 300Mi
```
The minimum limit to properly run the `postgresql` resource is `256m` for `cpu`
and `256Mi` for `memory`. If a lower value is set in the manifest the operator
will cancel ADD or UPDATE events on this resource with an error. If no
resources are defined in the manifest the operator will obtain the configured
[default requests](reference/operator_parameters.md#kubernetes-resource-requests).
## Use taints and tolerations for dedicated PostgreSQL nodes
To ensure Postgres pods are running on nodes without any other application pods,
@ -384,7 +413,7 @@ specified but globally disabled in the configuration. The
## Increase volume size
PostgreSQL operator supports statefulset volume resize if you're using the
Postgres operator supports statefulset volume resize if you're using the
operator on top of AWS. For that you need to change the size field of the
volume description in the cluster manifest and apply the change:

View File

@ -9,7 +9,7 @@ spec:
size: 1Gi
numberOfInstances: 1
postgresql:
version: "10"
version: "11"
# Make this a standby cluster and provide the s3 bucket path of source cluster for continuous streaming.
standby:
s3_wal_path: "s3://path/to/bucket/containing/wal/of/source/cluster/"

View File

@ -227,6 +227,10 @@ func (c *Cluster) Create() error {
c.setStatus(acidv1.ClusterStatusCreating)
if err = c.validateResources(&c.Spec); err != nil {
return fmt.Errorf("insufficient resource limits specified: %v", err)
}
for _, role := range []PostgresRole{Master, Replica} {
if c.Endpoints[role] != nil {
@ -491,6 +495,44 @@ func compareResourcesAssumeFirstNotNil(a *v1.ResourceRequirements, b *v1.Resourc
}
func (c *Cluster) validateResources(spec *acidv1.PostgresSpec) error {
// setting limits too low can cause unnecessary evictions / OOM kills
const (
cpuMinLimit = "256m"
memoryMinLimit = "256Mi"
)
var (
isSmaller bool
err error
)
cpuLimit := spec.Resources.ResourceLimits.CPU
if cpuLimit != "" {
isSmaller, err = util.IsSmallerQuantity(cpuLimit, cpuMinLimit)
if err != nil {
return fmt.Errorf("error validating CPU limit: %v", err)
}
if isSmaller {
return fmt.Errorf("defined CPU limit %s is below required minimum %s to properly run postgresql resource", cpuLimit, cpuMinLimit)
}
}
memoryLimit := spec.Resources.ResourceLimits.Memory
if memoryLimit != "" {
isSmaller, err = util.IsSmallerQuantity(memoryLimit, memoryMinLimit)
if err != nil {
return fmt.Errorf("error validating memory limit: %v", err)
}
if isSmaller {
return fmt.Errorf("defined memory limit %s is below required minimum %s to properly run postgresql resource", memoryLimit, memoryMinLimit)
}
}
return nil
}
// Update changes Kubernetes objects according to the new specification. Unlike the sync case, the missing object
// (i.e. service) is treated as an error
// logical backup cron jobs are an exception: a user-initiated Update can enable a logical backup job
@ -501,6 +543,7 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
c.mu.Lock()
defer c.mu.Unlock()
oldStatus := c.Status
c.setStatus(acidv1.ClusterStatusUpdating)
c.setSpec(newSpec)
@ -512,6 +555,22 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
}
}()
if err := c.validateResources(&newSpec.Spec); err != nil {
err = fmt.Errorf("insufficient resource limits specified: %v", err)
// cancel update only when (already too low) pod resources were edited
// if cluster was successfully running before the update, continue but log a warning
isCPULimitSmaller, err2 := util.IsSmallerQuantity(newSpec.Spec.Resources.ResourceLimits.CPU, oldSpec.Spec.Resources.ResourceLimits.CPU)
isMemoryLimitSmaller, err3 := util.IsSmallerQuantity(newSpec.Spec.Resources.ResourceLimits.Memory, oldSpec.Spec.Resources.ResourceLimits.Memory)
if oldStatus.Running() && !isCPULimitSmaller && !isMemoryLimitSmaller && err2 == nil && err3 == nil {
c.logger.Warning(err)
} else {
updateFailed = true
return err
}
}
if oldSpec.Spec.PgVersion != newSpec.Spec.PgVersion { // PG versions comparison
c.logger.Warningf("postgresql version change(%q -> %q) has no effect", oldSpec.Spec.PgVersion, newSpec.Spec.PgVersion)
//we need that hack to generate statefulset with the old version

View File

@ -741,7 +741,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef
limit = c.OpConfig.DefaultMemoryLimit
}
isSmaller, err := util.RequestIsSmallerThanLimit(request, limit)
isSmaller, err := util.IsSmallerQuantity(request, limit)
if err != nil {
return nil, err
}
@ -768,7 +768,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef
limit = c.OpConfig.DefaultMemoryLimit
}
isSmaller, err := util.RequestIsSmallerThanLimit(sidecarRequest, sidecarLimit)
isSmaller, err := util.IsSmallerQuantity(sidecarRequest, sidecarLimit)
if err != nil {
return nil, err
}

View File

@ -23,6 +23,7 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
c.mu.Lock()
defer c.mu.Unlock()
oldStatus := c.Status
c.setSpec(newSpec)
defer func() {
@ -34,6 +35,16 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
}
}()
if err = c.validateResources(&c.Spec); err != nil {
err = fmt.Errorf("insufficient resource limits specified: %v", err)
if oldStatus.Running() {
c.logger.Warning(err)
err = nil
} else {
return err
}
}
if err = c.initUsers(); err != nil {
err = fmt.Errorf("could not init users: %v", err)
return err

View File

@ -111,7 +111,7 @@ func (c *Controller) initOperatorConfig() {
if c.opConfig.SetMemoryRequestToLimit {
isSmaller, err := util.RequestIsSmallerThanLimit(c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit)
isSmaller, err := util.IsSmallerQuantity(c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit)
if err != nil {
panic(err)
}
@ -120,7 +120,7 @@ func (c *Controller) initOperatorConfig() {
c.opConfig.DefaultMemoryRequest = c.opConfig.DefaultMemoryLimit
}
isSmaller, err = util.RequestIsSmallerThanLimit(c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit)
isSmaller, err = util.IsSmallerQuantity(c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit)
if err != nil {
panic(err)
}

View File

@ -141,17 +141,17 @@ func Coalesce(val, defaultVal string) string {
return val
}
// RequestIsSmallerThanLimit : ...
func RequestIsSmallerThanLimit(requestStr, limitStr string) (bool, error) {
// IsSmallerQuantity : checks if first resource is of a smaller quantity than the second
func IsSmallerQuantity(requestStr, limitStr string) (bool, error) {
request, err := resource.ParseQuantity(requestStr)
if err != nil {
return false, fmt.Errorf("could not parse memory request %v : %v", requestStr, err)
return false, fmt.Errorf("could not parse request %v : %v", requestStr, err)
}
limit, err2 := resource.ParseQuantity(limitStr)
if err2 != nil {
return false, fmt.Errorf("could not parse memory limit %v : %v", limitStr, err2)
return false, fmt.Errorf("could not parse limit %v : %v", limitStr, err2)
}
return request.Cmp(limit) == -1, nil

View File

@ -69,7 +69,7 @@ var substringMatch = []struct {
{regexp.MustCompile(`aaaa (\d+) bbbb`), "aaaa 123 bbbb", nil},
}
var requestIsSmallerThanLimitTests = []struct {
var requestIsSmallerQuantityTests = []struct {
request string
limit string
out bool
@ -155,14 +155,14 @@ func TestMapContains(t *testing.T) {
}
}
func TestRequestIsSmallerThanLimit(t *testing.T) {
for _, tt := range requestIsSmallerThanLimitTests {
res, err := RequestIsSmallerThanLimit(tt.request, tt.limit)
func TestIsSmallerQuantity(t *testing.T) {
for _, tt := range requestIsSmallerQuantityTests {
res, err := IsSmallerQuantity(tt.request, tt.limit)
if err != nil {
t.Errorf("RequestIsSmallerThanLimit returned unexpected error: %#v", err)
t.Errorf("IsSmallerQuantity returned unexpected error: %#v", err)
}
if res != tt.out {
t.Errorf("RequestIsSmallerThanLimit expected: %#v, got: %#v", tt.out, res)
t.Errorf("IsSmallerQuantity expected: %#v, got: %#v", tt.out, res)
}
}
}