From e6ce00050f84dd2d894da1e7718e54e80f9945f4 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Mon, 13 Jan 2020 18:23:04 +0100 Subject: [PATCH 1/5] reduce tracing of dumps and include Pg12 (#791) --- docker/logical-backup/Dockerfile | 3 ++- docker/logical-backup/dump.sh | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docker/logical-backup/Dockerfile b/docker/logical-backup/Dockerfile index 1da6f7386..94c524381 100644 --- a/docker/logical-backup/Dockerfile +++ b/docker/logical-backup/Dockerfile @@ -19,6 +19,7 @@ RUN apt-get update \ && curl --silent https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \ && apt-get update \ && apt-get install --no-install-recommends -y \ + postgresql-client-12 \ postgresql-client-11 \ postgresql-client-10 \ postgresql-client-9.6 \ @@ -28,6 +29,6 @@ RUN apt-get update \ COPY dump.sh ./ -ENV PG_DIR=/usr/lib/postgresql/ +ENV PG_DIR=/usr/lib/postgresql ENTRYPOINT ["/dump.sh"] diff --git a/docker/logical-backup/dump.sh b/docker/logical-backup/dump.sh index 78217322b..673f09038 100755 --- a/docker/logical-backup/dump.sh +++ b/docker/logical-backup/dump.sh @@ -6,12 +6,10 @@ set -o nounset set -o pipefail IFS=$'\n\t' -# make script trace visible via `kubectl logs` -set -o xtrace - ALL_DB_SIZE_QUERY="select sum(pg_database_size(datname)::numeric) from pg_database;" PG_BIN=$PG_DIR/$PG_VERSION/bin DUMP_SIZE_COEFF=5 +ERRORCOUNT=0 TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) K8S_API_URL=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1 @@ -42,9 +40,9 @@ function aws_upload { [[ ! -z "$EXPECTED_SIZE" ]] && args+=("--expected-size=$EXPECTED_SIZE") [[ ! -z "$LOGICAL_BACKUP_S3_ENDPOINT" ]] && args+=("--endpoint-url=$LOGICAL_BACKUP_S3_ENDPOINT") - [[ ! "$LOGICAL_BACKUP_S3_SSE" == "" ]] && args+=("--sse=$LOGICAL_BACKUP_S3_SSE") + [[ ! -z "$LOGICAL_BACKUP_S3_SSE" ]] && args+=("--sse=$LOGICAL_BACKUP_S3_SSE") - aws s3 cp - "$PATH_TO_BACKUP" "${args[@]//\'/}" --debug + aws s3 cp - "$PATH_TO_BACKUP" "${args[@]//\'/}" } function get_pods { @@ -93,4 +91,9 @@ for search in "${search_strategy[@]}"; do done +set -x dump | compress | aws_upload $(($(estimate_size) / DUMP_SIZE_COEFF)) +[[ ${PIPESTATUS[0]} != 0 || ${PIPESTATUS[1]} != 0 || ${PIPESTATUS[2]} != 0 ]] && (( ERRORCOUNT += 1 )) +set +x + +exit $ERRORCOUNT From 7fb163252c0f7477d1d432ceaabbc075c619236c Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 16 Jan 2020 10:47:34 +0100 Subject: [PATCH 2/5] standby clusters can only have 1 pod for now (#797) --- pkg/cluster/k8sres.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index c69c7a076..aed0c6e83 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1051,6 +1051,7 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 { /* Limit the max number of pods to one, if this is standby-cluster */ if spec.StandbyCluster != nil { c.logger.Info("Standby cluster can have maximum of 1 pod") + min = 1 max = 1 } if max >= 0 && newcur > max { From fddaf0fb73731a6234b223596a4cb87eb33e5299 Mon Sep 17 00:00:00 2001 From: Jonathan Juares Beber Date: Mon, 27 Jan 2020 14:43:32 +0100 Subject: [PATCH 3/5] Change error computation on JSON Unmarshal and create subtests on table test scenarios (#801) * Change error computation on JSON Unmarshall The [Unmarshall function][1] on the encoding/JSON default library returns different errors for different go versions. On Go 1.12, the version used currently on the CI system it returns `json: cannot unmarshal number into Go struct field PostgresSpec.teamId of type string`. On Go 1.13.5 it returns `json: cannot unmarshal number into Go struct field PostgresSpec.spec.teamId of type string`. The new version includes more details of the whole structure being unmarshelled. This commit introduces the same error but one level deeper on the JSON structure. It creates consistency across different Go versions. [1]: https://godoc.org/encoding/json#Unmarshal * Create subtests on table test scenarios The Run method of T allows defining subtests creating hierarchical tests. It provides better visibility of tests in case of failure. More details on https://golang.org/pkg/testing/. This commit converts each test scenario on pkg/apis/acid.zalan.do/v1/util_test.go to subtests, providing a better visibility and the debugging environment when working with tests. The following code snippet shows an error during test execution with subtests: ``` --- FAIL: TestUnmarshalMaintenanceWindow (0.00s) --- FAIL: TestUnmarshalMaintenanceWindow/expect_error_as_'From'_is_later_than_'To' (0.00s) ``` It included a `about` field on test scenarios describing the test purpose and/or it expected output. When a description was provided with comments it was moved to the about field. --- pkg/apis/acid.zalan.do/v1/util_test.go | 479 ++++++++++++++----------- 1 file changed, 263 insertions(+), 216 deletions(-) diff --git a/pkg/apis/acid.zalan.do/v1/util_test.go b/pkg/apis/acid.zalan.do/v1/util_test.go index fc068b322..a1e01825f 100644 --- a/pkg/apis/acid.zalan.do/v1/util_test.go +++ b/pkg/apis/acid.zalan.do/v1/util_test.go @@ -13,127 +13,139 @@ import ( ) var parseTimeTests = []struct { - in string - out metav1.Time - err error + about string + in string + out metav1.Time + err error }{ - {"16:08", mustParseTime("16:08"), nil}, - {"11:00", mustParseTime("11:00"), nil}, - {"23:59", mustParseTime("23:59"), nil}, + {"parse common time with minutes", "16:08", mustParseTime("16:08"), nil}, + {"parse time with zeroed minutes", "11:00", mustParseTime("11:00"), nil}, + {"parse corner case last minute of the day", "23:59", mustParseTime("23:59"), nil}, - {"26:09", metav1.Now(), errors.New(`parsing time "26:09": hour out of range`)}, - {"23:69", metav1.Now(), errors.New(`parsing time "23:69": minute out of range`)}, + {"expect error as hour is out of range", "26:09", metav1.Now(), errors.New(`parsing time "26:09": hour out of range`)}, + {"expect error as minute is out of range", "23:69", metav1.Now(), errors.New(`parsing time "23:69": minute out of range`)}, } var parseWeekdayTests = []struct { - in string - out time.Weekday - err error + about string + in string + out time.Weekday + err error }{ - {"Wed", time.Wednesday, nil}, - {"Sunday", time.Weekday(0), errors.New("incorrect weekday")}, - {"", time.Weekday(0), errors.New("incorrect weekday")}, + {"parse common weekday", "Wed", time.Wednesday, nil}, + {"expect error as weekday is invalid", "Sunday", time.Weekday(0), errors.New("incorrect weekday")}, + {"expect error as weekday is empty", "", time.Weekday(0), errors.New("incorrect weekday")}, } var clusterNames = []struct { + about string in string inTeam string clusterName string err error }{ - {"acid-test", "acid", "test", nil}, - {"test-my-name", "test", "my-name", nil}, - {"my-team-another-test", "my-team", "another-test", nil}, - {"------strange-team-cluster", "-----", "strange-team-cluster", + {"common team and cluster name", "acid-test", "acid", "test", nil}, + {"cluster name with hyphen", "test-my-name", "test", "my-name", nil}, + {"cluster and team name with hyphen", "my-team-another-test", "my-team", "another-test", nil}, + {"expect error as cluster name is just hyphens", "------strange-team-cluster", "-----", "strange-team-cluster", errors.New(`name must confirm to DNS-1035, regex used for validation is "^[a-z]([-a-z0-9]*[a-z0-9])?$"`)}, - {"fooobar-fooobarfooobarfooobarfooobarfooobarfooobarfooobarfooobar", "fooobar", "", + {"expect error as cluster name is too long", "fooobar-fooobarfooobarfooobarfooobarfooobarfooobarfooobarfooobar", "fooobar", "", errors.New("name cannot be longer than 58 characters")}, - {"acid-test", "test", "", errors.New("name must match {TEAM}-{NAME} format")}, - {"-test", "", "", errors.New("team name is empty")}, - {"-test", "-", "", errors.New("name must match {TEAM}-{NAME} format")}, - {"", "-", "", errors.New("cluster name must match {TEAM}-{NAME} format. Got cluster name '', team name '-'")}, - {"-", "-", "", errors.New("cluster name must match {TEAM}-{NAME} format. Got cluster name '-', team name '-'")}, + {"expect error as cluster name does not match {TEAM}-{NAME} format", "acid-test", "test", "", errors.New("name must match {TEAM}-{NAME} format")}, + {"expect error as team and cluster name are empty", "-test", "", "", errors.New("team name is empty")}, + {"expect error as cluster name is empty and team name is a hyphen", "-test", "-", "", errors.New("name must match {TEAM}-{NAME} format")}, + {"expect error as cluster name is empty, team name is a hyphen and cluster name is empty", "", "-", "", errors.New("cluster name must match {TEAM}-{NAME} format. Got cluster name '', team name '-'")}, + {"expect error as cluster and team name are hyphens", "-", "-", "", errors.New("cluster name must match {TEAM}-{NAME} format. Got cluster name '-', team name '-'")}, // user may specify the team part of the full cluster name differently from the team name returned by the Teams API // in the case the actual Teams API name is long enough, this will fail the check - {"foo-bar", "qwerty", "", errors.New("cluster name must match {TEAM}-{NAME} format. Got cluster name 'foo-bar', team name 'qwerty'")}, + {"expect error as team name does not match", "foo-bar", "qwerty", "", errors.New("cluster name must match {TEAM}-{NAME} format. Got cluster name 'foo-bar', team name 'qwerty'")}, } var cloneClusterDescriptions = []struct { - in *CloneDescription - err error + about string + in *CloneDescription + err error }{ - {&CloneDescription{"foo+bar", "", "NotEmpty", "", "", "", "", nil}, nil}, - {&CloneDescription{"foo+bar", "", "", "", "", "", "", nil}, + {"cluster name invalid but EndTimeSet is not empty", &CloneDescription{"foo+bar", "", "NotEmpty", "", "", "", "", nil}, nil}, + {"expect error as cluster name does not match DNS-1035", &CloneDescription{"foo+bar", "", "", "", "", "", "", nil}, errors.New(`clone cluster name must confirm to DNS-1035, regex used for validation is "^[a-z]([-a-z0-9]*[a-z0-9])?$"`)}, - {&CloneDescription{"foobar123456789012345678901234567890123456789012345678901234567890", "", "", "", "", "", "", nil}, + {"expect error as cluster name is too long", &CloneDescription{"foobar123456789012345678901234567890123456789012345678901234567890", "", "", "", "", "", "", nil}, errors.New("clone cluster name must be no longer than 63 characters")}, - {&CloneDescription{"foobar", "", "", "", "", "", "", nil}, nil}, + {"common cluster name", &CloneDescription{"foobar", "", "", "", "", "", "", nil}, nil}, } var maintenanceWindows = []struct { - in []byte - out MaintenanceWindow - err error -}{{[]byte(`"Tue:10:00-20:00"`), + about string + in []byte + out MaintenanceWindow + err error +}{{"regular scenario", + []byte(`"Tue:10:00-20:00"`), MaintenanceWindow{ Everyday: false, Weekday: time.Tuesday, StartTime: mustParseTime("10:00"), EndTime: mustParseTime("20:00"), }, nil}, - {[]byte(`"Mon:10:00-10:00"`), + {"starts and ends at the same time", + []byte(`"Mon:10:00-10:00"`), MaintenanceWindow{ Everyday: false, Weekday: time.Monday, StartTime: mustParseTime("10:00"), EndTime: mustParseTime("10:00"), }, nil}, - {[]byte(`"Sun:00:00-00:00"`), + {"starts and ends 00:00 on sunday", + []byte(`"Sun:00:00-00:00"`), MaintenanceWindow{ Everyday: false, Weekday: time.Sunday, StartTime: mustParseTime("00:00"), EndTime: mustParseTime("00:00"), }, nil}, - {[]byte(`"01:00-10:00"`), + {"without day indication should define to sunday", + []byte(`"01:00-10:00"`), MaintenanceWindow{ Everyday: true, Weekday: time.Sunday, StartTime: mustParseTime("01:00"), EndTime: mustParseTime("10:00"), }, nil}, - {[]byte(`"Mon:12:00-11:00"`), MaintenanceWindow{}, errors.New(`'From' time must be prior to the 'To' time`)}, - {[]byte(`"Wed:33:00-00:00"`), MaintenanceWindow{}, errors.New(`could not parse start time: parsing time "33:00": hour out of range`)}, - {[]byte(`"Wed:00:00-26:00"`), MaintenanceWindow{}, errors.New(`could not parse end time: parsing time "26:00": hour out of range`)}, - {[]byte(`"Sunday:00:00-00:00"`), MaintenanceWindow{}, errors.New(`could not parse weekday: incorrect weekday`)}, - {[]byte(`":00:00-10:00"`), MaintenanceWindow{}, errors.New(`could not parse weekday: incorrect weekday`)}, - {[]byte(`"Mon:10:00-00:00"`), MaintenanceWindow{}, errors.New(`'From' time must be prior to the 'To' time`)}, - {[]byte(`"Mon:00:00:00-10:00:00"`), MaintenanceWindow{}, errors.New(`incorrect maintenance window format`)}, - {[]byte(`"Mon:00:00"`), MaintenanceWindow{}, errors.New("incorrect maintenance window format")}, - {[]byte(`"Mon:00:00-00:00:00"`), MaintenanceWindow{}, errors.New("could not parse end time: incorrect time format")}} + {"expect error as 'From' is later than 'To'", []byte(`"Mon:12:00-11:00"`), MaintenanceWindow{}, errors.New(`'From' time must be prior to the 'To' time`)}, + {"expect error as 'From' is later than 'To' with 00:00 corner case", []byte(`"Mon:10:00-00:00"`), MaintenanceWindow{}, errors.New(`'From' time must be prior to the 'To' time`)}, + {"expect error as 'From' time is not valid", []byte(`"Wed:33:00-00:00"`), MaintenanceWindow{}, errors.New(`could not parse start time: parsing time "33:00": hour out of range`)}, + {"expect error as 'To' time is not valid", []byte(`"Wed:00:00-26:00"`), MaintenanceWindow{}, errors.New(`could not parse end time: parsing time "26:00": hour out of range`)}, + {"expect error as weekday is not valid", []byte(`"Sunday:00:00-00:00"`), MaintenanceWindow{}, errors.New(`could not parse weekday: incorrect weekday`)}, + {"expect error as weekday is empty", []byte(`":00:00-10:00"`), MaintenanceWindow{}, errors.New(`could not parse weekday: incorrect weekday`)}, + {"expect error as maintenance window set seconds", []byte(`"Mon:00:00:00-10:00:00"`), MaintenanceWindow{}, errors.New(`incorrect maintenance window format`)}, + {"expect error as 'To' time set seconds", []byte(`"Mon:00:00-00:00:00"`), MaintenanceWindow{}, errors.New("could not parse end time: incorrect time format")}, + {"expect error as 'To' time is missing", []byte(`"Mon:00:00"`), MaintenanceWindow{}, errors.New("incorrect maintenance window format")}} var postgresStatus = []struct { - in []byte - out PostgresStatus - err error + about string + in []byte + out PostgresStatus + err error }{ - {[]byte(`{"PostgresClusterStatus":"Running"}`), + {"cluster running", []byte(`{"PostgresClusterStatus":"Running"}`), PostgresStatus{PostgresClusterStatus: ClusterStatusRunning}, nil}, - {[]byte(`{"PostgresClusterStatus":""}`), + {"cluster status undefined", []byte(`{"PostgresClusterStatus":""}`), PostgresStatus{PostgresClusterStatus: ClusterStatusUnknown}, nil}, - {[]byte(`"Running"`), + {"cluster running without full JSON format", []byte(`"Running"`), PostgresStatus{PostgresClusterStatus: ClusterStatusRunning}, nil}, - {[]byte(`""`), + {"cluster status empty", []byte(`""`), PostgresStatus{PostgresClusterStatus: ClusterStatusUnknown}, nil}} +var tmp postgresqlCopy var unmarshalCluster = []struct { + about string in []byte out Postgresql marshal []byte err error }{ - // example with simple status field { + about: "example with simple status field", in: []byte(`{ "kind": "Postgresql","apiVersion": "acid.zalan.do/v1", "metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": 100}}`), @@ -147,12 +159,14 @@ var unmarshalCluster = []struct { }, Status: PostgresStatus{PostgresClusterStatus: ClusterStatusInvalid}, // This error message can vary between Go versions, so compute it for the current version. - Error: json.Unmarshal([]byte(`{"teamId": 0}`), &PostgresSpec{}).Error(), + Error: json.Unmarshal([]byte(`{ + "kind": "Postgresql","apiVersion": "acid.zalan.do/v1", + "metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": 100}}`), &tmp).Error(), }, marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), err: nil}, - // example with /status subresource { + about: "example with /status subresource", in: []byte(`{ "kind": "Postgresql","apiVersion": "acid.zalan.do/v1", "metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": 100}}`), @@ -166,13 +180,14 @@ var unmarshalCluster = []struct { }, Status: PostgresStatus{PostgresClusterStatus: ClusterStatusInvalid}, // This error message can vary between Go versions, so compute it for the current version. - Error: json.Unmarshal([]byte(`{"teamId": 0}`), &PostgresSpec{}).Error(), + Error: json.Unmarshal([]byte(`{ + "kind": "Postgresql","apiVersion": "acid.zalan.do/v1", + "metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": 100}}`), &tmp).Error(), }, marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":{"PostgresClusterStatus":"Invalid"}}`), err: nil}, - // example with detailed input manifest - // and deprecated pod_priority_class_name -> podPriorityClassName { + about: "example with detailed input manifest and deprecated pod_priority_class_name -> podPriorityClassName", in: []byte(`{ "kind": "Postgresql", "apiVersion": "acid.zalan.do/v1", @@ -321,9 +336,9 @@ var unmarshalCluster = []struct { }, marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"9.6","parameters":{"log_statement":"all","max_connections":"10","shared_buffers":"32MB"}},"pod_priority_class_name":"spilo-pod-priority","volume":{"size":"5Gi","storageClass":"SSD", "subPath": "subdir"},"enableShmVolume":false,"patroni":{"initdb":{"data-checksums":"true","encoding":"UTF8","locale":"en_US.UTF-8"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"],"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432,"slots":{"permanent_logical_1":{"database":"foo","plugin":"pgoutput","type":"logical"}}},"resources":{"requests":{"cpu":"10m","memory":"50Mi"},"limits":{"cpu":"300m","memory":"3000Mi"}},"teamId":"acid","allowedSourceRanges":["127.0.0.1/32"],"numberOfInstances":2,"users":{"zalando":["superuser","createdb"]},"maintenanceWindows":["Mon:01:00-06:00","Sat:00:00-04:00","05:00-05:15"],"clone":{"cluster":"acid-batman"}},"status":{"PostgresClusterStatus":""}}`), err: nil}, - // example with teamId set in input { - in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "teapot-testcluster1"}, "spec": {"teamId": "acid"}}`), + about: "example with teamId set in input", + in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "teapot-testcluster1"}, "spec": {"teamId": "acid"}}`), out: Postgresql{ TypeMeta: metav1.TypeMeta{ Kind: "Postgresql", @@ -338,9 +353,9 @@ var unmarshalCluster = []struct { }, marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"teapot-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null} ,"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":{"PostgresClusterStatus":"Invalid"}}`), err: nil}, - // clone example { - in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": "acid", "clone": {"cluster": "team-batman"}}}`), + about: "example with clone", + in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": "acid", "clone": {"cluster": "team-batman"}}}`), out: Postgresql{ TypeMeta: metav1.TypeMeta{ Kind: "Postgresql", @@ -360,9 +375,9 @@ var unmarshalCluster = []struct { }, marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{"cluster":"team-batman"}},"status":{"PostgresClusterStatus":""}}`), err: nil}, - // standby example { - in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": "acid", "standby": {"s3_wal_path": "s3://custom/path/to/bucket/"}}}`), + about: "standby example", + in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": "acid", "standby": {"s3_wal_path": "s3://custom/path/to/bucket/"}}}`), out: Postgresql{ TypeMeta: metav1.TypeMeta{ Kind: "Postgresql", @@ -382,24 +397,28 @@ var unmarshalCluster = []struct { }, marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"standby":{"s3_wal_path":"s3://custom/path/to/bucket/"}},"status":{"PostgresClusterStatus":""}}`), err: nil}, - // erroneous examples { + about: "expect error on malformatted JSON", in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1"`), out: Postgresql{}, marshal: []byte{}, err: errors.New("unexpected end of JSON input")}, { + about: "expect error on JSON with field's value malformatted", in: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster","creationTimestamp":qaz},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0,"slots":null},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":{"PostgresClusterStatus":"Invalid"}}`), out: Postgresql{}, marshal: []byte{}, - err: errors.New("invalid character 'q' looking for beginning of value")}} + err: errors.New("invalid character 'q' looking for beginning of value"), + }, +} var postgresqlList = []struct { - in []byte - out PostgresqlList - err error + about string + in []byte + out PostgresqlList + err error }{ - {[]byte(`{"apiVersion":"v1","items":[{"apiVersion":"acid.zalan.do/v1","kind":"Postgresql","metadata":{"labels":{"team":"acid"},"name":"acid-testcluster42","namespace":"default","resourceVersion":"30446957","selfLink":"/apis/acid.zalan.do/v1/namespaces/default/postgresqls/acid-testcluster42","uid":"857cd208-33dc-11e7-b20a-0699041e4b03"},"spec":{"allowedSourceRanges":["185.85.220.0/22"],"numberOfInstances":1,"postgresql":{"version":"9.6"},"teamId":"acid","volume":{"size":"10Gi"}},"status":{"PostgresClusterStatus":"Running"}}],"kind":"List","metadata":{},"resourceVersion":"","selfLink":""}`), + {"expect success", []byte(`{"apiVersion":"v1","items":[{"apiVersion":"acid.zalan.do/v1","kind":"Postgresql","metadata":{"labels":{"team":"acid"},"name":"acid-testcluster42","namespace":"default","resourceVersion":"30446957","selfLink":"/apis/acid.zalan.do/v1/namespaces/default/postgresqls/acid-testcluster42","uid":"857cd208-33dc-11e7-b20a-0699041e4b03"},"spec":{"allowedSourceRanges":["185.85.220.0/22"],"numberOfInstances":1,"postgresql":{"version":"9.6"},"teamId":"acid","volume":{"size":"10Gi"}},"status":{"PostgresClusterStatus":"Running"}}],"kind":"List","metadata":{},"resourceVersion":"","selfLink":""}`), PostgresqlList{ TypeMeta: metav1.TypeMeta{ Kind: "List", @@ -433,15 +452,17 @@ var postgresqlList = []struct { }}, }, nil}, - {[]byte(`{"apiVersion":"v1","items":[{"apiVersion":"acid.zalan.do/v1","kind":"Postgresql","metadata":{"labels":{"team":"acid"},"name":"acid-testcluster42","namespace"`), + {"expect error on malformatted JSON", []byte(`{"apiVersion":"v1","items":[{"apiVersion":"acid.zalan.do/v1","kind":"Postgresql","metadata":{"labels":{"team":"acid"},"name":"acid-testcluster42","namespace"`), PostgresqlList{}, errors.New("unexpected end of JSON input")}} var annotations = []struct { + about string in []byte annotations map[string]string err error }{{ + about: "common annotations", in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "acid-testcluster1"}, "spec": {"podAnnotations": {"foo": "bar"},"teamId": "acid", "clone": {"cluster": "team-batman"}}}`), annotations: map[string]string{"foo": "bar"}, err: nil}, @@ -458,230 +479,256 @@ func mustParseTime(s string) metav1.Time { func TestParseTime(t *testing.T) { for _, tt := range parseTimeTests { - aTime, err := parseTime(tt.in) - if err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("ParseTime expected error: %v, got: %v", tt.err, err) + t.Run(tt.about, func(t *testing.T) { + aTime, err := parseTime(tt.in) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("ParseTime expected error: %v, got: %v", tt.err, err) + } + return + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } - continue - } else if tt.err != nil { - t.Errorf("Expected error: %v", tt.err) - } - if aTime != tt.out { - t.Errorf("Expected time: %v, got: %v", tt.out, aTime) - } + if aTime != tt.out { + t.Errorf("Expected time: %v, got: %v", tt.out, aTime) + } + }) } } func TestWeekdayTime(t *testing.T) { for _, tt := range parseWeekdayTests { - aTime, err := parseWeekday(tt.in) - if err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("ParseWeekday expected error: %v, got: %v", tt.err, err) + t.Run(tt.about, func(t *testing.T) { + aTime, err := parseWeekday(tt.in) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("ParseWeekday expected error: %v, got: %v", tt.err, err) + } + return + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } - continue - } else if tt.err != nil { - t.Errorf("Expected error: %v", tt.err) - } - if aTime != tt.out { - t.Errorf("Expected weekday: %v, got: %v", tt.out, aTime) - } + if aTime != tt.out { + t.Errorf("Expected weekday: %v, got: %v", tt.out, aTime) + } + }) } } func TestClusterAnnotations(t *testing.T) { for _, tt := range annotations { - var cluster Postgresql - err := cluster.UnmarshalJSON(tt.in) - if err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("Unable to marshal cluster with annotations: expected %v got %v", tt.err, err) + t.Run(tt.about, func(t *testing.T) { + var cluster Postgresql + err := cluster.UnmarshalJSON(tt.in) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("Unable to marshal cluster with annotations: expected %v got %v", tt.err, err) + } + return } - continue - } - for k, v := range cluster.Spec.PodAnnotations { - found, expected := v, tt.annotations[k] - if found != expected { - t.Errorf("Didn't find correct value for key %v in for podAnnotations: Expected %v found %v", k, expected, found) + for k, v := range cluster.Spec.PodAnnotations { + found, expected := v, tt.annotations[k] + if found != expected { + t.Errorf("Didn't find correct value for key %v in for podAnnotations: Expected %v found %v", k, expected, found) + } } - } + }) } } func TestClusterName(t *testing.T) { for _, tt := range clusterNames { - name, err := extractClusterName(tt.in, tt.inTeam) - if err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("extractClusterName expected error: %v, got: %v", tt.err, err) + t.Run(tt.about, func(t *testing.T) { + name, err := extractClusterName(tt.in, tt.inTeam) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("extractClusterName expected error: %v, got: %v", tt.err, err) + } + return + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } - continue - } else if tt.err != nil { - t.Errorf("Expected error: %v", tt.err) - } - if name != tt.clusterName { - t.Errorf("Expected cluserName: %q, got: %q", tt.clusterName, name) - } + if name != tt.clusterName { + t.Errorf("Expected cluserName: %q, got: %q", tt.clusterName, name) + } + }) } } func TestCloneClusterDescription(t *testing.T) { for _, tt := range cloneClusterDescriptions { - if err := validateCloneClusterDescription(tt.in); err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("testCloneClusterDescription expected error: %v, got: %v", tt.err, err) + t.Run(tt.about, func(t *testing.T) { + if err := validateCloneClusterDescription(tt.in); err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("testCloneClusterDescription expected error: %v, got: %v", tt.err, err) + } + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } - } else if tt.err != nil { - t.Errorf("Expected error: %v", tt.err) - } + }) } } func TestUnmarshalMaintenanceWindow(t *testing.T) { for _, tt := range maintenanceWindows { - var m MaintenanceWindow - err := m.UnmarshalJSON(tt.in) - if err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("MaintenanceWindow unmarshal expected error: %v, got %v", tt.err, err) + t.Run(tt.about, func(t *testing.T) { + var m MaintenanceWindow + err := m.UnmarshalJSON(tt.in) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("MaintenanceWindow unmarshal expected error: %v, got %v", tt.err, err) + } + return + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } - continue - } else if tt.err != nil { - t.Errorf("Expected error: %v", tt.err) - } - if !reflect.DeepEqual(m, tt.out) { - t.Errorf("Expected maintenance window: %#v, got: %#v", tt.out, m) - } + if !reflect.DeepEqual(m, tt.out) { + t.Errorf("Expected maintenance window: %#v, got: %#v", tt.out, m) + } + }) } } func TestMarshalMaintenanceWindow(t *testing.T) { for _, tt := range maintenanceWindows { - if tt.err != nil { - continue - } + t.Run(tt.about, func(t *testing.T) { + if tt.err != nil { + return + } - s, err := tt.out.MarshalJSON() - if err != nil { - t.Errorf("Marshal Error: %v", err) - } + s, err := tt.out.MarshalJSON() + if err != nil { + t.Errorf("Marshal Error: %v", err) + } - if !bytes.Equal(s, tt.in) { - t.Errorf("Expected Marshal: %q, got: %q", string(tt.in), string(s)) - } + if !bytes.Equal(s, tt.in) { + t.Errorf("Expected Marshal: %q, got: %q", string(tt.in), string(s)) + } + }) } } func TestUnmarshalPostgresStatus(t *testing.T) { for _, tt := range postgresStatus { - var ps PostgresStatus - err := ps.UnmarshalJSON(tt.in) - if err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("CR status unmarshal expected error: %v, got %v", tt.err, err) - } - continue - //} else if tt.err != nil { - //t.Errorf("Expected error: %v", tt.err) - } + t.Run(tt.about, func(t *testing.T) { - if !reflect.DeepEqual(ps, tt.out) { - t.Errorf("Expected status: %#v, got: %#v", tt.out, ps) - } + var ps PostgresStatus + err := ps.UnmarshalJSON(tt.in) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("CR status unmarshal expected error: %v, got %v", tt.err, err) + } + return + } + + if !reflect.DeepEqual(ps, tt.out) { + t.Errorf("Expected status: %#v, got: %#v", tt.out, ps) + } + }) } } func TestPostgresUnmarshal(t *testing.T) { for _, tt := range unmarshalCluster { - var cluster Postgresql - err := cluster.UnmarshalJSON(tt.in) - if err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("Unmarshal expected error: %v, got: %v", tt.err, err) + t.Run(tt.about, func(t *testing.T) { + var cluster Postgresql + err := cluster.UnmarshalJSON(tt.in) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("Unmarshal expected error: %v, got: %v", tt.err, err) + } + return + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } - continue - } else if tt.err != nil { - t.Errorf("Expected error: %v", tt.err) - } - if !reflect.DeepEqual(cluster, tt.out) { - t.Errorf("Expected Postgresql: %#v, got %#v", tt.out, cluster) - } + if !reflect.DeepEqual(cluster, tt.out) { + t.Errorf("Expected Postgresql: %#v, got %#v", tt.out, cluster) + } + }) } } func TestMarshal(t *testing.T) { for _, tt := range unmarshalCluster { - if tt.err != nil { - continue - } + t.Run(tt.about, func(t *testing.T) { - // Unmarshal and marshal example to capture api changes - var cluster Postgresql - err := cluster.UnmarshalJSON(tt.marshal) - if err != nil { - if tt.err == nil || err.Error() != tt.err.Error() { - t.Errorf("Backwards compatibility unmarshal expected error: %v, got: %v", tt.err, err) + if tt.err != nil { + return } - continue - } - expected, err := json.Marshal(cluster) - if err != nil { - t.Errorf("Backwards compatibility marshal error: %v", err) - } - m, err := json.Marshal(tt.out) - if err != nil { - t.Errorf("Marshal error: %v", err) - } - if !bytes.Equal(m, expected) { - t.Errorf("Marshal Postgresql \nexpected: %q, \ngot: %q", string(expected), string(m)) - } + // Unmarshal and marshal example to capture api changes + var cluster Postgresql + err := cluster.UnmarshalJSON(tt.marshal) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("Backwards compatibility unmarshal expected error: %v, got: %v", tt.err, err) + } + return + } + expected, err := json.Marshal(cluster) + if err != nil { + t.Errorf("Backwards compatibility marshal error: %v", err) + } + + m, err := json.Marshal(tt.out) + if err != nil { + t.Errorf("Marshal error: %v", err) + } + if !bytes.Equal(m, expected) { + t.Errorf("Marshal Postgresql \nexpected: %q, \ngot: %q", string(expected), string(m)) + } + }) } } func TestPostgresMeta(t *testing.T) { for _, tt := range unmarshalCluster { - if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta { - t.Errorf("GetObjectKindMeta \nexpected: %v, \ngot: %v", tt.out.TypeMeta, a) - } + t.Run(tt.about, func(t *testing.T) { - if a := tt.out.GetObjectMeta(); reflect.DeepEqual(a, tt.out.ObjectMeta) { - t.Errorf("GetObjectMeta \nexpected: %v, \ngot: %v", tt.out.ObjectMeta, a) - } + if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta { + t.Errorf("GetObjectKindMeta \nexpected: %v, \ngot: %v", tt.out.TypeMeta, a) + } + + if a := tt.out.GetObjectMeta(); reflect.DeepEqual(a, tt.out.ObjectMeta) { + t.Errorf("GetObjectMeta \nexpected: %v, \ngot: %v", tt.out.ObjectMeta, a) + } + }) } } func TestPostgresListMeta(t *testing.T) { for _, tt := range postgresqlList { - if tt.err != nil { - continue - } + t.Run(tt.about, func(t *testing.T) { + if tt.err != nil { + return + } - if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta { - t.Errorf("GetObjectKindMeta expected: %v, got: %v", tt.out.TypeMeta, a) - } + if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta { + t.Errorf("GetObjectKindMeta expected: %v, got: %v", tt.out.TypeMeta, a) + } - if a := tt.out.GetListMeta(); reflect.DeepEqual(a, tt.out.ListMeta) { - t.Errorf("GetObjectMeta expected: %v, got: %v", tt.out.ListMeta, a) - } + if a := tt.out.GetListMeta(); reflect.DeepEqual(a, tt.out.ListMeta) { + t.Errorf("GetObjectMeta expected: %v, got: %v", tt.out.ListMeta, a) + } - return + return + }) } } func TestPostgresqlClone(t *testing.T) { for _, tt := range unmarshalCluster { - cp := &tt.out - cp.Error = "" - clone := cp.Clone() - if !reflect.DeepEqual(clone, cp) { - t.Errorf("TestPostgresqlClone expected: \n%#v\n, got \n%#v", cp, clone) - } - + t.Run(tt.about, func(t *testing.T) { + cp := &tt.out + cp.Error = "" + clone := cp.Clone() + if !reflect.DeepEqual(clone, cp) { + t.Errorf("TestPostgresqlClone expected: \n%#v\n, got \n%#v", cp, clone) + } + }) } } From 1f0312a01451b544231089dd6b703b93e68bd2c5 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Mon, 3 Feb 2020 11:43:18 +0100 Subject: [PATCH 4/5] make minimum limits boundaries configurable (#808) * make minimum limits boundaries configurable * add e2e test --- .../crds/operatorconfigurations.yaml | 6 ++ charts/postgres-operator/values-crd.yaml | 26 ++++--- charts/postgres-operator/values.yaml | 24 ++++--- docs/reference/operator_parameters.md | 14 +++- docs/user.md | 10 +-- e2e/tests/test_e2e.py | 68 ++++++++++++++++--- manifests/complete-postgres-manifest.yaml | 4 +- manifests/configmap.yaml | 6 +- manifests/operatorconfiguration.crd.yaml | 6 ++ manifests/postgres-operator.yaml | 4 +- ...gresql-operator-default-configuration.yaml | 8 ++- pkg/apis/acid.zalan.do/v1/crds.go | 8 +++ .../v1/operator_configuration_type.go | 2 + pkg/cluster/cluster.go | 56 +++++++-------- pkg/cluster/sync.go | 16 ++--- pkg/controller/operator_config.go | 2 + pkg/util/config/config.go | 8 ++- 17 files changed, 175 insertions(+), 93 deletions(-) diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index c97e246ab..52d03df9c 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -179,6 +179,12 @@ spec: default_memory_request: type: string pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' + min_cpu_limit: + type: string + pattern: '^(\d+m|\d+(\.\d{1,3})?)$' + min_memory_limit: + type: string + pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' timeouts: type: object properties: diff --git a/charts/postgres-operator/values-crd.yaml b/charts/postgres-operator/values-crd.yaml index 5b67258fa..61cab3d06 100644 --- a/charts/postgres-operator/values-crd.yaml +++ b/charts/postgres-operator/values-crd.yaml @@ -115,13 +115,17 @@ configKubernetes: # configure resource requests for the Postgres pods configPostgresPodResources: # CPU limits for the postgres containers - default_cpu_limit: "3" - # cpu request value for the postgres containers + default_cpu_limit: "1" + # CPU request value for the postgres containers default_cpu_request: 100m # memory limits for the postgres containers - default_memory_limit: 1Gi + default_memory_limit: 500Mi # memory request value for the postgres containers default_memory_request: 100Mi + # hard CPU minimum required to properly run a Postgres cluster + min_cpu_limit: 250m + # hard memory minimum required to properly run a Postgres cluster + min_memory_limit: 250Mi # timeouts related to some operator actions configTimeouts: @@ -251,7 +255,7 @@ configScalyr: # CPU rquest value for the Scalyr sidecar scalyr_cpu_request: 100m # Memory limit value for the Scalyr sidecar - scalyr_memory_limit: 1Gi + scalyr_memory_limit: 500Mi # Memory request value for the Scalyr sidecar scalyr_memory_request: 50Mi @@ -272,13 +276,13 @@ serviceAccount: priorityClassName: "" -resources: {} - # limits: - # cpu: 100m - # memory: 300Mi - # requests: - # cpu: 100m - # memory: 300Mi +resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 100m + memory: 250Mi # Affinity for pod assignment # Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 60190f49a..deb506329 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -108,13 +108,17 @@ configKubernetes: # configure resource requests for the Postgres pods configPostgresPodResources: # CPU limits for the postgres containers - default_cpu_limit: "3" - # cpu request value for the postgres containers + default_cpu_limit: "1" + # CPU request value for the postgres containers default_cpu_request: 100m # memory limits for the postgres containers - default_memory_limit: 1Gi + default_memory_limit: 500Mi # memory request value for the postgres containers default_memory_request: 100Mi + # hard CPU minimum required to properly run a Postgres cluster + min_cpu_limit: 250m + # hard memory minimum required to properly run a Postgres cluster + min_memory_limit: 250Mi # timeouts related to some operator actions configTimeouts: @@ -248,13 +252,13 @@ serviceAccount: priorityClassName: "" -resources: {} - # limits: - # cpu: 100m - # memory: 300Mi - # requests: - # cpu: 100m - # memory: 300Mi +resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 100m + memory: 250Mi # Affinity for pod assignment # Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 1055d89b6..d6dde8c0e 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -318,11 +318,19 @@ CRD-based configuration. * **default_cpu_limit** CPU limits for the Postgres containers, unless overridden by cluster-specific - settings. The default is `3`. + settings. The default is `1`. * **default_memory_limit** memory limits for the Postgres containers, unless overridden by cluster-specific - settings. The default is `1Gi`. + settings. The default is `500Mi`. + +* **min_cpu_limit** + hard CPU minimum what we consider to be required to properly run Postgres + clusters with Patroni on Kubernetes. The default is `250m`. + +* **min_memory_limit** + hard memory minimum what we consider to be required to properly run Postgres + clusters with Patroni on Kubernetes. The default is `250Mi`. ## Operator timeouts @@ -579,4 +587,4 @@ scalyr sidecar. In the CRD-based configuration they are grouped under the CPU limit value for the Scalyr sidecar. The default is `1`. * **scalyr_memory_limit** - Memory limit value for the Scalyr sidecar. The default is `1Gi`. + Memory limit value for the Scalyr sidecar. The default is `500Mi`. diff --git a/docs/user.md b/docs/user.md index 45f345c87..f81e11ede 100644 --- a/docs/user.md +++ b/docs/user.md @@ -232,11 +232,11 @@ spec: memory: 300Mi ``` -The minimum limit to properly run the `postgresql` resource is `256m` for `cpu` -and `256Mi` for `memory`. If a lower value is set in the manifest the operator -will cancel ADD or UPDATE events on this resource with an error. If no -resources are defined in the manifest the operator will obtain the configured -[default requests](reference/operator_parameters.md#kubernetes-resource-requests). +The minimum limits to properly run the `postgresql` resource are configured to +`250m` for `cpu` and `250Mi` for `memory`. If a lower value is set in the +manifest the operator will raise the limits to the configured minimum values. +If no resources are defined in the manifest they will be obtained from the +configured [default requests](reference/operator_parameters.md#kubernetes-resource-requests). ## Use taints and tolerations for dedicated PostgreSQL nodes diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 88a7f1f34..fc87c9887 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -58,6 +58,57 @@ class EndToEndTestCase(unittest.TestCase): k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml") k8s.wait_for_pod_start('spilo-role=master') + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_min_resource_limits(self): + ''' + Lower resource limits below configured minimum and let operator fix it + ''' + k8s = self.k8s + cluster_label = 'version=acid-minimal-cluster' + _, failover_targets = k8s.get_pg_nodes(cluster_label) + + # configure minimum boundaries for CPU and memory limits + minCPULimit = '250m' + minMemoryLimit = '250Mi' + patch_min_resource_limits = { + "data": { + "min_cpu_limit": minCPULimit, + "min_memory_limit": minMemoryLimit + } + } + k8s.update_config(patch_min_resource_limits) + + # lower resource limits below minimum + pg_patch_resources = { + "spec": { + "resources": { + "requests": { + "cpu": "10m", + "memory": "50Mi" + }, + "limits": { + "cpu": "200m", + "memory": "200Mi" + } + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) + k8s.wait_for_master_failover(failover_targets) + + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector='spilo-role=master,' + cluster_label).items + self.assert_master_is_unique() + masterPod = pods[0] + + self.assertEqual(masterPod.spec.containers[0].resources.limits['cpu'], minCPULimit, + "Expected CPU limit {}, found {}" + .format(minCPULimit, masterPod.spec.containers[0].resources.limits['cpu'])) + self.assertEqual(masterPod.spec.containers[0].resources.limits['memory'], minMemoryLimit, + "Expected memory limit {}, found {}" + .format(minMemoryLimit, masterPod.spec.containers[0].resources.limits['memory'])) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_multi_namespace_support(self): ''' @@ -76,10 +127,9 @@ class EndToEndTestCase(unittest.TestCase): @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_scaling(self): - """ + ''' Scale up from 2 to 3 and back to 2 pods by updating the Postgres manifest at runtime. - """ - + ''' k8s = self.k8s labels = "version=acid-minimal-cluster" @@ -93,9 +143,9 @@ class EndToEndTestCase(unittest.TestCase): @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_taint_based_eviction(self): - """ + ''' Add taint "postgres=:NoExecute" to node with master. This must cause a failover. - """ + ''' k8s = self.k8s cluster_label = 'version=acid-minimal-cluster' @@ -145,7 +195,7 @@ class EndToEndTestCase(unittest.TestCase): @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_logical_backup_cron_job(self): - """ + ''' Ensure we can (a) create the cron job at user request for a specific PG cluster (b) update the cluster-wide image for the logical backup pod (c) delete the job at user request @@ -153,7 +203,7 @@ class EndToEndTestCase(unittest.TestCase): Limitations: (a) Does not run the actual batch job because there is no S3 mock to upload backups to (b) Assumes 'acid-minimal-cluster' exists as defined in setUp - """ + ''' k8s = self.k8s @@ -208,10 +258,10 @@ class EndToEndTestCase(unittest.TestCase): "Expected 0 logical backup jobs, found {}".format(len(jobs))) def assert_master_is_unique(self, namespace='default', version="acid-minimal-cluster"): - """ + ''' Check that there is a single pod in the k8s cluster with the label "spilo-role=master" To be called manually after operations that affect pods - """ + ''' k8s = self.k8s labels = 'spilo-role=master,version=' + version diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index cf450ef94..2478156d6 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -42,8 +42,8 @@ spec: cpu: 10m memory: 100Mi limits: - cpu: 300m - memory: 300Mi + cpu: 500m + memory: 500Mi patroni: initdb: encoding: "UTF8" diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index afb5957da..7d11198da 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -15,9 +15,9 @@ data: # custom_pod_annotations: "keya:valuea,keyb:valueb" db_hosted_zone: db.example.com debug_logging: "true" - # default_cpu_limit: "3" + # default_cpu_limit: "1" # default_cpu_request: 100m - # default_memory_limit: 1Gi + # default_memory_limit: 500Mi # default_memory_request: 100Mi docker_image: registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p16 # enable_admin_role_for_users: "true" @@ -48,6 +48,8 @@ data: # master_pod_move_timeout: 10m # max_instances: "-1" # min_instances: "-1" + # min_cpu_limit: 250m + # min_memory_limit: 250Mi # node_readiness_label: "" # oauth_token_secret_name: postgresql-operator # pam_configuration: | diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 810624bc4..509d9aefc 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -155,6 +155,12 @@ spec: default_memory_request: type: string pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' + min_cpu_limit: + type: string + pattern: '^(\d+m|\d+(\.\d{1,3})?)$' + min_memory_limit: + type: string + pattern: '^(\d+(e\d+)?|\d+(\.\d+)?(e\d+)?[EPTGMK]i?)$' timeouts: type: object properties: diff --git a/manifests/postgres-operator.yaml b/manifests/postgres-operator.yaml index fa8682809..a06abfc68 100644 --- a/manifests/postgres-operator.yaml +++ b/manifests/postgres-operator.yaml @@ -19,10 +19,10 @@ spec: imagePullPolicy: IfNotPresent resources: requests: - cpu: 500m + cpu: 100m memory: 250Mi limits: - cpu: 2000m + cpu: 500m memory: 500Mi securityContext: runAsUser: 1000 diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 5e10ff66e..f13a1eed9 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -54,10 +54,12 @@ configuration: # toleration: {} # watched_namespace: "" postgres_pod_resources: - default_cpu_limit: "3" + default_cpu_limit: "1" default_cpu_request: 100m - default_memory_limit: 1Gi + default_memory_limit: 500Mi default_memory_request: 100Mi + # min_cpu_limit: 250m + # min_memory_limit: 250Mi timeouts: pod_label_wait_timeout: 10m pod_deletion_wait_timeout: 10m @@ -115,6 +117,6 @@ configuration: scalyr_cpu_limit: "1" scalyr_cpu_request: 100m # scalyr_image: "" - scalyr_memory_limit: 1Gi + scalyr_memory_limit: 500Mi scalyr_memory_request: 50Mi # scalyr_server_url: "" diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 20fa37138..4d5a6f024 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -810,6 +810,14 @@ var OperatorConfigCRDResourceValidation = apiextv1beta1.CustomResourceValidation Type: "string", Pattern: "^(\\d+(e\\d+)?|\\d+(\\.\\d+)?(e\\d+)?[EPTGMK]i?)$", }, + "min_cpu_limit": { + Type: "string", + Pattern: "^(\\d+m|\\d+(\\.\\d{1,3})?)$", + }, + "min_memory_limit": { + Type: "string", + Pattern: "^(\\d+(e\\d+)?|\\d+(\\.\\d+)?(e\\d+)?[EPTGMK]i?)$", + }, }, }, "timeouts": { diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 948c7cbbf..1e6a3b459 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -79,6 +79,8 @@ type PostgresPodResourcesDefaults struct { DefaultMemoryRequest string `json:"default_memory_request,omitempty"` DefaultCPULimit string `json:"default_cpu_limit,omitempty"` DefaultMemoryLimit string `json:"default_memory_limit,omitempty"` + MinCPULimit string `json:"min_cpu_limit,omitempty"` + MinMemoryLimit string `json:"min_memory_limit,omitempty"` } // OperatorTimeouts defines the timeout of ResourceCheck, PodWait, ReadyWait diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 0a7377389..c560c4cdf 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -227,8 +227,8 @@ func (c *Cluster) Create() error { c.setStatus(acidv1.ClusterStatusCreating) - if err = c.validateResources(&c.Spec); err != nil { - return fmt.Errorf("insufficient resource limits specified: %v", err) + if err = c.enforceMinResourceLimits(&c.Spec); err != nil { + return fmt.Errorf("could not enforce minimum resource limits: %v", err) } for _, role := range []PostgresRole{Master, Replica} { @@ -495,38 +495,38 @@ func compareResourcesAssumeFirstNotNil(a *v1.ResourceRequirements, b *v1.Resourc } -func (c *Cluster) validateResources(spec *acidv1.PostgresSpec) error { - - // setting limits too low can cause unnecessary evictions / OOM kills - const ( - cpuMinLimit = "256m" - memoryMinLimit = "256Mi" - ) +func (c *Cluster) enforceMinResourceLimits(spec *acidv1.PostgresSpec) error { var ( isSmaller bool err error ) + // setting limits too low can cause unnecessary evictions / OOM kills + minCPULimit := c.OpConfig.MinCPULimit + minMemoryLimit := c.OpConfig.MinMemoryLimit + cpuLimit := spec.Resources.ResourceLimits.CPU if cpuLimit != "" { - isSmaller, err = util.IsSmallerQuantity(cpuLimit, cpuMinLimit) + isSmaller, err = util.IsSmallerQuantity(cpuLimit, minCPULimit) if err != nil { - return fmt.Errorf("error validating CPU limit: %v", err) + return fmt.Errorf("could not compare defined CPU limit %s with configured minimum value %s: %v", cpuLimit, minCPULimit, err) } if isSmaller { - return fmt.Errorf("defined CPU limit %s is below required minimum %s to properly run postgresql resource", cpuLimit, cpuMinLimit) + c.logger.Warningf("defined CPU limit %s is below required minimum %s and will be set to it", cpuLimit, minCPULimit) + spec.Resources.ResourceLimits.CPU = minCPULimit } } memoryLimit := spec.Resources.ResourceLimits.Memory if memoryLimit != "" { - isSmaller, err = util.IsSmallerQuantity(memoryLimit, memoryMinLimit) + isSmaller, err = util.IsSmallerQuantity(memoryLimit, minMemoryLimit) if err != nil { - return fmt.Errorf("error validating memory limit: %v", err) + return fmt.Errorf("could not compare defined memory limit %s with configured minimum value %s: %v", memoryLimit, minMemoryLimit, err) } if isSmaller { - return fmt.Errorf("defined memory limit %s is below required minimum %s to properly run postgresql resource", memoryLimit, memoryMinLimit) + c.logger.Warningf("defined memory limit %s is below required minimum %s and will be set to it", memoryLimit, minMemoryLimit) + spec.Resources.ResourceLimits.Memory = minMemoryLimit } } @@ -543,7 +543,6 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { c.mu.Lock() defer c.mu.Unlock() - oldStatus := c.Status c.setStatus(acidv1.ClusterStatusUpdating) c.setSpec(newSpec) @@ -555,22 +554,6 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { } }() - if err := c.validateResources(&newSpec.Spec); err != nil { - err = fmt.Errorf("insufficient resource limits specified: %v", err) - - // cancel update only when (already too low) pod resources were edited - // if cluster was successfully running before the update, continue but log a warning - isCPULimitSmaller, err2 := util.IsSmallerQuantity(newSpec.Spec.Resources.ResourceLimits.CPU, oldSpec.Spec.Resources.ResourceLimits.CPU) - isMemoryLimitSmaller, err3 := util.IsSmallerQuantity(newSpec.Spec.Resources.ResourceLimits.Memory, oldSpec.Spec.Resources.ResourceLimits.Memory) - - if oldStatus.Running() && !isCPULimitSmaller && !isMemoryLimitSmaller && err2 == nil && err3 == nil { - c.logger.Warning(err) - } else { - updateFailed = true - return err - } - } - if oldSpec.Spec.PgVersion != newSpec.Spec.PgVersion { // PG versions comparison c.logger.Warningf("postgresql version change(%q -> %q) has no effect", oldSpec.Spec.PgVersion, newSpec.Spec.PgVersion) //we need that hack to generate statefulset with the old version @@ -616,6 +599,12 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { // Statefulset func() { + if err := c.enforceMinResourceLimits(&c.Spec); err != nil { + c.logger.Errorf("could not sync resources: %v", err) + updateFailed = true + return + } + oldSs, err := c.generateStatefulSet(&oldSpec.Spec) if err != nil { c.logger.Errorf("could not generate old statefulset spec: %v", err) @@ -623,6 +612,9 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { return } + // update newSpec to for latter comparison with oldSpec + c.enforceMinResourceLimits(&newSpec.Spec) + newSs, err := c.generateStatefulSet(&newSpec.Spec) if err != nil { c.logger.Errorf("could not generate new statefulset spec: %v", err) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index abe579fb5..fa4fc9ec1 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -23,7 +23,6 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error { c.mu.Lock() defer c.mu.Unlock() - oldStatus := c.Status c.setSpec(newSpec) defer func() { @@ -35,16 +34,6 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error { } }() - if err = c.validateResources(&c.Spec); err != nil { - err = fmt.Errorf("insufficient resource limits specified: %v", err) - if oldStatus.Running() { - c.logger.Warning(err) - err = nil - } else { - return err - } - } - if err = c.initUsers(); err != nil { err = fmt.Errorf("could not init users: %v", err) return err @@ -76,6 +65,11 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error { return err } + if err = c.enforceMinResourceLimits(&c.Spec); err != nil { + err = fmt.Errorf("could not enforce minimum resource limits: %v", err) + return err + } + c.logger.Debugf("syncing statefulsets") if err = c.syncStatefulSet(); err != nil { if !k8sutil.ResourceAlreadyExists(err) { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 56ba91d02..d0357d222 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -75,6 +75,8 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest result.DefaultCPULimit = fromCRD.PostgresPodResources.DefaultCPULimit result.DefaultMemoryLimit = fromCRD.PostgresPodResources.DefaultMemoryLimit + result.MinCPULimit = fromCRD.PostgresPodResources.MinCPULimit + result.MinMemoryLimit = fromCRD.PostgresPodResources.MinMemoryLimit // timeout config result.ResourceCheckInterval = time.Duration(fromCRD.Timeouts.ResourceCheckInterval) diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 224691120..339f06ce0 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -37,8 +37,10 @@ type Resources struct { PodToleration map[string]string `name:"toleration" default:""` DefaultCPURequest string `name:"default_cpu_request" default:"100m"` DefaultMemoryRequest string `name:"default_memory_request" default:"100Mi"` - DefaultCPULimit string `name:"default_cpu_limit" default:"3"` - DefaultMemoryLimit string `name:"default_memory_limit" default:"1Gi"` + DefaultCPULimit string `name:"default_cpu_limit" default:"1"` + DefaultMemoryLimit string `name:"default_memory_limit" default:"500Mi"` + MinCPULimit string `name:"min_cpu_limit" default:"250m"` + MinMemoryLimit string `name:"min_memory_limit" default:"250Mi"` PodEnvironmentConfigMap string `name:"pod_environment_configmap" default:""` NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` @@ -66,7 +68,7 @@ type Scalyr struct { ScalyrCPURequest string `name:"scalyr_cpu_request" default:"100m"` ScalyrMemoryRequest string `name:"scalyr_memory_request" default:"50Mi"` ScalyrCPULimit string `name:"scalyr_cpu_limit" default:"1"` - ScalyrMemoryLimit string `name:"scalyr_memory_limit" default:"1Gi"` + ScalyrMemoryLimit string `name:"scalyr_memory_limit" default:"500Mi"` } // LogicalBackup defines configuration for logical backup From 8794e4f9acad87c8acbb0e4dd13b952fcd38f89e Mon Sep 17 00:00:00 2001 From: siku4 <44839490+siku4@users.noreply.github.com> Date: Mon, 3 Feb 2020 15:30:26 +0100 Subject: [PATCH 5/5] add service for exposing operator api (#794) --- .../postgres-operator/templates/service.yaml | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 charts/postgres-operator/templates/service.yaml diff --git a/charts/postgres-operator/templates/service.yaml b/charts/postgres-operator/templates/service.yaml new file mode 100644 index 000000000..52990c5d4 --- /dev/null +++ b/charts/postgres-operator/templates/service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: {{ template "postgres-operator.name" . }} + helm.sh/chart: {{ template "postgres-operator.chart" . }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/instance: {{ .Release.Name }} + name: {{ template "postgres-operator.fullname" . }} +spec: + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: {{ template "postgres-operator.name" . }} + sessionAffinity: None + type: ClusterIP +status: + loadBalancer: {} \ No newline at end of file