From 45c89b3da4f72a0ebb1a683fec5a8ffdd76ce20e Mon Sep 17 00:00:00 2001 From: zerg-junior Date: Thu, 15 Nov 2018 14:00:08 +0100 Subject: [PATCH 01/11] [WIP] Add set_memory_request_to_limit option (#406) * Add set_memory_request_to_limit option --- README.md | 2 + delivery.yaml | 2 +- docs/administrator.md | 6 +- docs/reference/operator_parameters.md | 3 + manifests/complete-postgres-manifest.yaml | 4 +- manifests/configmap.yaml | 3 +- .../v1/operator_configuration_type.go | 1 + pkg/apis/acid.zalan.do/v1/postgresql_type.go | 4 +- pkg/apis/acid.zalan.do/v1/util_test.go | 4 +- .../acid.zalan.do/v1/zz_generated.deepcopy.go | 2 +- pkg/cluster/k8sres.go | 66 +++++++++++++++++-- pkg/controller/controller.go | 23 +++++++ pkg/controller/operator_config.go | 1 + pkg/util/config/config.go | 1 + pkg/util/util.go | 18 +++++ pkg/util/util_test.go | 23 +++++++ 16 files changed, 145 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 18ea97538..13adae24d 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,8 @@ cd postgres-operator ./run_operator_locally.sh ``` +Note we provide the `/manifests` directory as an example only; you should consider adjusting the manifests to your particular setting. + ## Running and testing the operator The best way to test the operator is to run it locally in [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). See developer docs(`docs/developer.yaml`) for details. diff --git a/delivery.yaml b/delivery.yaml index 502aa75b2..c939e64f0 100644 --- a/delivery.yaml +++ b/delivery.yaml @@ -22,7 +22,7 @@ pipeline: go version - desc: 'Install Docker' cmd: | - curl -sSL https://get.docker.com/ | sh + curl -fLOsS https://delivery.cloud.zalando.com/utils/ensure-docker && sh ensure-docker && rm ensure-docker - desc: 'Symlink sources into the GOPATH' cmd: | mkdir -p $OPERATOR_TOP_DIR diff --git a/docs/administrator.md b/docs/administrator.md index 83594ef1f..f2c747cce 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -41,12 +41,12 @@ manifests: ```bash $ kubectl create namespace test - $ kubectl config set-context --namespace=test + $ kubectl config set-context $(kubectl config current-context) --namespace=test ``` All subsequent `kubectl` commands will work with the `test` namespace. The -operator will run in this namespace and look up needed resources - such as its -config map - there. +operator will run in this namespace and look up needed resources - such as its +config map - there. Please note that the namespace for service accounts and cluster role bindings in [operator RBAC rules](manifests/operator-service-account-rbac.yaml) needs to be adjusted to the non-default value. ## Specify the namespace to watch diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 76109c890..47f67228c 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -221,6 +221,9 @@ CRD-based configuration. memory limits for the postgres containers, unless overridden by cluster-specific settings. The default is `1Gi`. +* **set_memory_request_to_limit** + Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers (Postgres, Scalyr sidecar, and other sidecars). The default is `false`. + ## Operator timeouts This set of parameters define various timeouts related to some operator diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 60d39c94b..e0f76e4d4 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -6,7 +6,7 @@ metadata: spec: teamId: "ACID" volume: - size: 5Gi + size: 1Gi numberOfInstances: 2 users: #Application/Robot users zalando: @@ -31,7 +31,7 @@ spec: memory: 100Mi limits: cpu: 300m - memory: 3000Mi + memory: 300Mi patroni: initdb: encoding: "UTF8" diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index ed7652907..d127e72f2 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -10,11 +10,12 @@ data: debug_logging: "true" workers: "4" - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.4-p29 + docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.5-p35 pod_service_account_name: "zalando-postgres-operator" secret_name_template: '{username}.{cluster}.credentials' super_username: postgres enable_teams_api: "false" + # set_memory_request_to_limit: "true" # postgres_superuser_teams: "postgres_superusers" # enable_team_superuser: "false" # team_admin_role: "admin" diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index de7681db4..da163620b 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -131,6 +131,7 @@ type OperatorConfigurationData struct { PostgresUsersConfiguration PostgresUsersConfiguration `json:"users"` Kubernetes KubernetesMetaConfiguration `json:"kubernetes"` PostgresPodResources PostgresPodResourcesDefaults `json:"postgres_pod_resources"` + SetMemoryRequestToLimit bool `json:"set_memory_request_to_limit,omitempty"` Timeouts OperatorTimeouts `json:"timeouts"` LoadBalancer LoadBalancerConfiguration `json:"load_balancer"` AWSGCP AWSGCPConfiguration `json:"aws_or_gcp"` diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index 380ba68d7..aedb0512f 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -90,8 +90,8 @@ type ResourceDescription struct { // Resources describes requests and limits for the cluster resouces. type Resources struct { - ResourceRequest ResourceDescription `json:"requests,omitempty"` - ResourceLimits ResourceDescription `json:"limits,omitempty"` + ResourceRequests ResourceDescription `json:"requests,omitempty"` + ResourceLimits ResourceDescription `json:"limits,omitempty"` } // Patroni contains Patroni-specific configuration diff --git a/pkg/apis/acid.zalan.do/v1/util_test.go b/pkg/apis/acid.zalan.do/v1/util_test.go index 99e3f2b7c..b6a27542c 100644 --- a/pkg/apis/acid.zalan.do/v1/util_test.go +++ b/pkg/apis/acid.zalan.do/v1/util_test.go @@ -240,8 +240,8 @@ var unmarshalCluster = []struct { Slots: map[string]map[string]string{"permanent_logical_1": {"type": "logical", "database": "foo", "plugin": "pgoutput"}}, }, Resources: Resources{ - ResourceRequest: ResourceDescription{CPU: "10m", Memory: "50Mi"}, - ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"}, + ResourceRequests: ResourceDescription{CPU: "10m", Memory: "50Mi"}, + ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"}, }, TeamID: "ACID", diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 4496f8c0a..55a5ac075 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -573,7 +573,7 @@ func (in *ResourceDescription) DeepCopy() *ResourceDescription { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Resources) DeepCopyInto(out *Resources) { *out = *in - out.ResourceRequest = in.ResourceRequest + out.ResourceRequests = in.ResourceRequests out.ResourceLimits = in.ResourceLimits return } diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 66ac55388..b775ee636 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -92,18 +92,18 @@ func (c *Cluster) makeDefaultResources() acidv1.Resources { defaultRequests := acidv1.ResourceDescription{CPU: config.DefaultCPURequest, Memory: config.DefaultMemoryRequest} defaultLimits := acidv1.ResourceDescription{CPU: config.DefaultCPULimit, Memory: config.DefaultMemoryLimit} - return acidv1.Resources{ResourceRequest: defaultRequests, ResourceLimits: defaultLimits} + return acidv1.Resources{ResourceRequests: defaultRequests, ResourceLimits: defaultLimits} } func generateResourceRequirements(resources acidv1.Resources, defaultResources acidv1.Resources) (*v1.ResourceRequirements, error) { var err error - specRequests := resources.ResourceRequest + specRequests := resources.ResourceRequests specLimits := resources.ResourceLimits result := v1.ResourceRequirements{} - result.Requests, err = fillResourceList(specRequests, defaultResources.ResourceRequest) + result.Requests, err = fillResourceList(specRequests, defaultResources.ResourceRequests) if err != nil { return nil, fmt.Errorf("could not fill resource requests: %v", err) } @@ -377,8 +377,8 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar, resources, err := generateResourceRequirements( makeResources( - sidecar.Resources.ResourceRequest.CPU, - sidecar.Resources.ResourceRequest.Memory, + sidecar.Resources.ResourceRequests.CPU, + sidecar.Resources.ResourceRequests.Memory, sidecar.Resources.ResourceLimits.CPU, sidecar.Resources.ResourceLimits.Memory, ), @@ -625,7 +625,7 @@ func getBucketScopeSuffix(uid string) string { func makeResources(cpuRequest, memoryRequest, cpuLimit, memoryLimit string) acidv1.Resources { return acidv1.Resources{ - ResourceRequest: acidv1.ResourceDescription{ + ResourceRequests: acidv1.ResourceDescription{ CPU: cpuRequest, Memory: memoryRequest, }, @@ -644,6 +644,60 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State podTemplate *v1.PodTemplateSpec volumeClaimTemplate *v1.PersistentVolumeClaim ) + + if c.OpConfig.SetMemoryRequestToLimit { + + // controller adjusts the default memory request at operator startup + + request := spec.Resources.ResourceRequests.Memory + if request == "" { + request = c.OpConfig.DefaultMemoryRequest + } + + limit := spec.Resources.ResourceLimits.Memory + if limit == "" { + limit = c.OpConfig.DefaultMemoryLimit + } + + isSmaller, err := util.RequestIsSmallerThanLimit(request, limit) + if err != nil { + return nil, err + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the Postgres container is increased to match the memory limit of %v.", request, limit) + spec.Resources.ResourceRequests.Memory = limit + + } + + // controller adjusts the Scalyr sidecar request at operator startup + // as this sidecar is managed separately + + // adjust sidecar containers defined for that particular cluster + for _, sidecar := range spec.Sidecars { + + // TODO #413 + sidecarRequest := sidecar.Resources.ResourceRequests.Memory + if request == "" { + request = c.OpConfig.DefaultMemoryRequest + } + + sidecarLimit := sidecar.Resources.ResourceLimits.Memory + if limit == "" { + limit = c.OpConfig.DefaultMemoryLimit + } + + isSmaller, err := util.RequestIsSmallerThanLimit(sidecarRequest, sidecarLimit) + if err != nil { + return nil, err + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the %v sidecar container is increased to match the memory limit of %v.", sidecar.Resources.ResourceRequests.Memory, sidecar.Name, sidecar.Resources.ResourceLimits.Memory) + sidecar.Resources.ResourceRequests.Memory = sidecar.Resources.ResourceLimits.Memory + } + } + + } + defaultResources := c.makeDefaultResources() resourceRequirements, err := generateResourceRequirements(spec.Resources, defaultResources) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 1bc4e08e0..fd1c099de 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -110,6 +110,29 @@ func (c *Controller) initOperatorConfig() { c.opConfig = config.NewFromMap(configMapData) c.warnOnDeprecatedOperatorParameters() + if c.opConfig.SetMemoryRequestToLimit { + + isSmaller, err := util.RequestIsSmallerThanLimit(c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit) + if err != nil { + panic(err) + } + if isSmaller { + c.logger.Warningf("The default memory request of %v for Postgres containers is increased to match the default memory limit of %v.", c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit) + c.opConfig.DefaultMemoryRequest = c.opConfig.DefaultMemoryLimit + } + + isSmaller, err = util.RequestIsSmallerThanLimit(c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit) + if err != nil { + panic(err) + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the Scalyr sidecar container is increased to match the memory limit of %v.", c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit) + c.opConfig.ScalyrMemoryRequest = c.opConfig.ScalyrMemoryLimit + } + + // generateStatefulSet adjusts values for individual Postgres clusters + } + } func (c *Controller) modifyConfigFromEnvironment() { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 93ba1a0f4..006cfd2d1 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -55,6 +55,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest result.DefaultCPULimit = fromCRD.PostgresPodResources.DefaultCPULimit result.DefaultMemoryLimit = fromCRD.PostgresPodResources.DefaultMemoryLimit + result.SetMemoryRequestToLimit = fromCRD.SetMemoryRequestToLimit result.ResourceCheckInterval = time.Duration(fromCRD.Timeouts.ResourceCheckInterval) result.ResourceCheckTimeout = time.Duration(fromCRD.Timeouts.ResourceCheckTimeout) diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 92fd3fd73..2bd7924ad 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -104,6 +104,7 @@ type Config struct { PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` ProtectedRoles []string `name:"protected_role_names" default:"admin"` PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` + SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" defaults:"false"` } // MustMarshal marshals the config or panics diff --git a/pkg/util/util.go b/pkg/util/util.go index 7b7b58fc4..99e670af9 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -3,12 +3,14 @@ package util import ( "crypto/md5" // #nosec we need it to for PostgreSQL md5 passwords "encoding/hex" + "fmt" "math/rand" "regexp" "strings" "time" "github.com/motomux/pretty" + resource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/zalando-incubator/postgres-operator/pkg/spec" @@ -127,3 +129,19 @@ func Coalesce(val, defaultVal string) string { } return val } + +// RequestIsSmallerThanLimit +func RequestIsSmallerThanLimit(requestStr, limitStr string) (bool, error) { + + request, err := resource.ParseQuantity(requestStr) + if err != nil { + return false, fmt.Errorf("could not parse memory request %v : %v", requestStr, err) + } + + limit, err2 := resource.ParseQuantity(limitStr) + if err2 != nil { + return false, fmt.Errorf("could not parse memory limit %v : %v", limitStr, err2) + } + + return request.Cmp(limit) == -1, nil +} diff --git a/pkg/util/util_test.go b/pkg/util/util_test.go index 53ac13768..3a02149b4 100644 --- a/pkg/util/util_test.go +++ b/pkg/util/util_test.go @@ -69,6 +69,17 @@ var substringMatch = []struct { {regexp.MustCompile(`aaaa (\d+) bbbb`), "aaaa 123 bbbb", nil}, } +var requestIsSmallerThanLimitTests = []struct { + request string + limit string + out bool +}{ + {"1G", "2G", true}, + {"1G", "1Gi", true}, // G is 1000^3 bytes, Gi is 1024^3 bytes + {"1024Mi", "1G", false}, + {"1e9", "1G", false}, // 1e9 bytes == 1G +} + func TestRandomPassword(t *testing.T) { const pwdLength = 10 pwd := RandomPassword(pwdLength) @@ -143,3 +154,15 @@ func TestMapContains(t *testing.T) { } } } + +func TestRequestIsSmallerThanLimit(t *testing.T) { + for _, tt := range requestIsSmallerThanLimitTests { + res, err := RequestIsSmallerThanLimit(tt.request, tt.limit) + if err != nil { + t.Errorf("RequestIsSmallerThanLimit returned unexpected error: %#v", err) + } + if res != tt.out { + t.Errorf("RequestIsSmallerThanLimit expected: %#v, got: %#v", tt.out, res) + } + } +} From a0e09cd6a636e2a94077b77b8d3c9b61a3f45d89 Mon Sep 17 00:00:00 2001 From: Dmitry Dolgov <9erthalion6@gmail.com> Date: Tue, 27 Nov 2018 11:59:59 +0100 Subject: [PATCH 02/11] Add golangci badge (#423) Since we've connected it, it makes sense also to display the results. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 13adae24d..d96458eec 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![Coverage Status](https://coveralls.io/repos/github/zalando-incubator/postgres-operator/badge.svg)](https://coveralls.io/github/zalando-incubator/postgres-operator) [![Go Report Card](https://goreportcard.com/badge/github.com/zalando-incubator/postgres-operator)](https://goreportcard.com/report/github.com/zalando-incubator/postgres-operator) [![GoDoc](https://godoc.org/github.com/zalando-incubator/postgres-operator?status.svg)](https://godoc.org/github.com/zalando-incubator/postgres-operator) +[![golangci](https://golangci.com/badges/github.com/zalando-incubator/postgres-operator.svg)](https://golangci.com/r/github.com/zalando-incubator/postgres-operator) ## Introduction From ff5c63ddf13d7f7ed6810c64a2a697163cc01915 Mon Sep 17 00:00:00 2001 From: Isaev Denis Date: Tue, 27 Nov 2018 14:00:15 +0300 Subject: [PATCH 03/11] add .golangci.yml (#422) --- .golangci.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .golangci.yml diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 000000000..4ffc1915a --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,5 @@ +# https://github.com/golangci/golangci/wiki/Configuration + +service: + prepare: + - make deps From d6e6b007707fb19f4ca8486188b2e540cb14110c Mon Sep 17 00:00:00 2001 From: Dmitry Dolgov <9erthalion6@gmail.com> Date: Fri, 21 Dec 2018 16:22:30 +0100 Subject: [PATCH 04/11] Add shm_volume option (#427) Add possibility to mount a tmpfs volume to /dev/shm to avoid issues like [this](https://github.com/docker-library/postgres/issues/416). To achieve that two new options were introduced: * `enableShmVolume` to PostgreSQL manifest, to specify whether or not mount this volume per database cluster * `enable_shm_volume` to operator configuration, to specify whether or not mount per operator. The first one, `enableShmVolume` takes precedence to allow us to be more flexible. --- docs/reference/cluster_manifest.md | 15 +++++- docs/reference/operator_parameters.md | 8 +++ manifests/complete-postgres-manifest.yaml | 3 +- pkg/apis/acid.zalan.do/v1/postgresql_type.go | 1 + pkg/apis/acid.zalan.do/v1/util_test.go | 6 +-- pkg/cluster/k8sres.go | 52 ++++++++++++++++++- pkg/cluster/k8sres_test.go | 54 ++++++++++++++++++++ pkg/util/config/config.go | 1 + pkg/util/constants/kubernetes.go | 1 + pkg/util/constants/postgresql.go | 3 ++ 10 files changed, 137 insertions(+), 7 deletions(-) diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 75de35097..efc850aff 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -96,7 +96,19 @@ Those are parameters grouped directly under the `spec` key in the manifest. that should be assigned to the cluster pods. When not specified, the value is taken from the `pod_priority_class_name` operator parameter, if not set then the default priority class is taken. The priority class itself must be defined in advance. - + +* **enableShmVolume** + Start a database pod without limitations on shm memory. By default docker + limit `/dev/shm` to `64M` (see e.g. the [docker + issue](https://github.com/docker-library/postgres/issues/416), which could be + not enough if PostgreSQL uses parallel workers heavily. If this option is + present and value is `true`, to the target database pod will be mounted a new + tmpfs volume to remove this limitation. If it's not present, the decision + about mounting a volume will be made based on operator configuration + (`enable_shm_volume`, which is `true` by default). It it's present and value + is `false`, then no volume will be mounted no matter how operator was + configured (so you can override the operator configuration). + ## Postgres parameters Those parameters are grouped under the `postgresql` top-level key. @@ -112,6 +124,7 @@ Those parameters are grouped under the `postgresql` top-level key. cluster. Optional (Spilo automatically sets reasonable defaults for parameters like work_mem or max_connections). + ## Patroni parameters Those parameters are grouped under the `patroni` top-level key. See the [patroni diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 47f67228c..3f96b450c 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -224,6 +224,14 @@ CRD-based configuration. * **set_memory_request_to_limit** Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers (Postgres, Scalyr sidecar, and other sidecars). The default is `false`. +* **enable_shm_volume** + Instruct operator to start any new database pod without limitations on shm + memory. If this option is enabled, to the target database pod will be mounted + a new tmpfs volume to remove shm memory limitation (see e.g. the [docker + issue](https://github.com/docker-library/postgres/issues/416)). This option + is global for an operator object, and can be overwritten by `enableShmVolume` + parameter from Postgres manifest. The default is `true` + ## Operator timeouts This set of parameters define various timeouts related to some operator diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index e0f76e4d4..c5f80f373 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -13,12 +13,13 @@ spec: - superuser - createdb enableMasterLoadBalancer: true - enableReplicaLoadBalancer: true + enableReplicaLoadBalancer: true allowedSourceRanges: # load balancers' source ranges for both master and replica services - 127.0.0.1/32 databases: foo: zalando #Expert section + enableShmVolume: true postgresql: version: "10" parameters: diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index aedb0512f..2a8f60f71 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -51,6 +51,7 @@ type PostgresSpec struct { Tolerations []v1.Toleration `json:"tolerations,omitempty"` Sidecars []Sidecar `json:"sidecars,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` + ShmVolume *bool `json:"enableShmVolume,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/acid.zalan.do/v1/util_test.go b/pkg/apis/acid.zalan.do/v1/util_test.go index b6a27542c..01be31e88 100644 --- a/pkg/apis/acid.zalan.do/v1/util_test.go +++ b/pkg/apis/acid.zalan.do/v1/util_test.go @@ -499,7 +499,7 @@ func TestMarshal(t *testing.T) { t.Errorf("Marshal error: %v", err) } if !bytes.Equal(m, tt.marshal) { - t.Errorf("Marshal Postgresql expected: %q, got: %q", string(tt.marshal), string(m)) + t.Errorf("Marshal Postgresql \nexpected: %q, \ngot: %q", string(tt.marshal), string(m)) } } } @@ -507,11 +507,11 @@ func TestMarshal(t *testing.T) { func TestPostgresMeta(t *testing.T) { for _, tt := range unmarshalCluster { if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta { - t.Errorf("GetObjectKindMeta expected: %v, got: %v", tt.out.TypeMeta, a) + t.Errorf("GetObjectKindMeta \nexpected: %v, \ngot: %v", tt.out.TypeMeta, a) } if a := tt.out.GetObjectMeta(); reflect.DeepEqual(a, tt.out.ObjectMeta) { - t.Errorf("GetObjectMeta expected: %v, got: %v", tt.out.ObjectMeta, a) + t.Errorf("GetObjectMeta \nexpected: %v, \ngot: %v", tt.out.ObjectMeta, a) } } } diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index b775ee636..6a3a052bd 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -18,6 +18,7 @@ import ( acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" + "github.com/zalando-incubator/postgres-operator/pkg/util/config" "github.com/zalando-incubator/postgres-operator/pkg/util/constants" "k8s.io/apimachinery/pkg/labels" ) @@ -396,6 +397,16 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar, return nil, nil } +// Check whether or not we're requested to mount an shm volume, +// taking into account that PostgreSQL manifest has precedence. +func mountShmVolumeNeeded(opConfig config.Config, pgSpec *acidv1.PostgresSpec) bool { + if pgSpec.ShmVolume != nil { + return *pgSpec.ShmVolume + } + + return opConfig.ShmVolume +} + func generatePodTemplate( namespace string, labels labels.Set, @@ -407,6 +418,7 @@ func generatePodTemplate( podServiceAccountName string, kubeIAMRole string, priorityClassName string, + shmVolume bool, ) (*v1.PodTemplateSpec, error) { terminateGracePeriodSeconds := terminateGracePeriod @@ -420,6 +432,10 @@ func generatePodTemplate( Tolerations: *tolerationsSpec, } + if shmVolume { + addShmVolume(&podSpec) + } + if nodeAffinity != nil { podSpec.Affinity = nodeAffinity } @@ -733,7 +749,12 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State volumeMounts := generateVolumeMounts() // generate the spilo container - spiloContainer := generateSpiloContainer(c.containerName(), &effectiveDockerImage, resourceRequirements, spiloEnvVars, volumeMounts) + spiloContainer := generateSpiloContainer(c.containerName(), + &effectiveDockerImage, + resourceRequirements, + spiloEnvVars, + volumeMounts, + ) // resolve conflicts between operator-global and per-cluster sidecards sideCars := c.mergeSidecars(spec.Sidecars) @@ -775,7 +796,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State int64(c.OpConfig.PodTerminateGracePeriod.Seconds()), c.OpConfig.PodServiceAccountName, c.OpConfig.KubeIAMRole, - effectivePodPriorityClassName); err != nil { + effectivePodPriorityClassName, + mountShmVolumeNeeded(c.OpConfig, spec)); err != nil { return nil, fmt.Errorf("could not generate pod template: %v", err) } @@ -882,6 +904,32 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 { return newcur } +// To avoid issues with limited /dev/shm inside docker environment, when +// PostgreSQL can't allocate enough of dsa segments from it, we can +// mount an extra memory volume +// +// see https://docs.okd.io/latest/dev_guide/shared_memory.html +func addShmVolume(podSpec *v1.PodSpec) { + volumes := append(podSpec.Volumes, v1.Volume{ + Name: constants.ShmVolumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + Medium: "Memory", + }, + }, + }) + + pgIdx := constants.PostgresContainerIdx + mounts := append(podSpec.Containers[pgIdx].VolumeMounts, + v1.VolumeMount{ + Name: constants.ShmVolumeName, + MountPath: constants.ShmVolumePath, + }) + + podSpec.Containers[0].VolumeMounts = mounts + podSpec.Volumes = volumes +} + func generatePersistentVolumeClaimTemplate(volumeSize, volumeStorageClass string) (*v1.PersistentVolumeClaim, error) { var storageClassName *string diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 12e145c04..92946ab2b 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1,8 +1,11 @@ package cluster import ( + "k8s.io/api/core/v1" + acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando-incubator/postgres-operator/pkg/util/config" + "github.com/zalando-incubator/postgres-operator/pkg/util/constants" "github.com/zalando-incubator/postgres-operator/pkg/util/k8sutil" "testing" ) @@ -75,3 +78,54 @@ func TestCreateLoadBalancerLogic(t *testing.T) { } } } + +func TestShmVolume(t *testing.T) { + testName := "TestShmVolume" + tests := []struct { + subTest string + podSpec *v1.PodSpec + shmPos int + }{ + { + subTest: "empty PodSpec", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{}, + }, + }, + }, + shmPos: 0, + }, + { + subTest: "non empty PodSpec", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{v1.Volume{}}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{ + v1.VolumeMount{}, + }, + }, + }, + }, + shmPos: 1, + }, + } + for _, tt := range tests { + addShmVolume(tt.podSpec) + + volumeName := tt.podSpec.Volumes[tt.shmPos].Name + volumeMountName := tt.podSpec.Containers[0].VolumeMounts[tt.shmPos].Name + + if volumeName != constants.ShmVolumeName { + t.Errorf("%s %s: Expected volume %s was not created, have %s instead", + testName, tt.subTest, constants.ShmVolumeName, volumeName) + } + if volumeMountName != constants.ShmVolumeName { + t.Errorf("%s %s: Expected mount %s was not created, have %s instead", + testName, tt.subTest, constants.ShmVolumeName, volumeMountName) + } + } +} diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 2bd7924ad..d855e0a2a 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,6 +38,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` + ShmVolume bool `name:"enable_shm_volume" default:"true"` } // Auth describes authentication specific configuration parameters diff --git a/pkg/util/constants/kubernetes.go b/pkg/util/constants/kubernetes.go index 2604f124d..a4ea73e80 100644 --- a/pkg/util/constants/kubernetes.go +++ b/pkg/util/constants/kubernetes.go @@ -5,6 +5,7 @@ import "time" // General kubernetes-related constants const ( PostgresContainerName = "postgres" + PostgresContainerIdx = 0 K8sAPIPath = "/apis" StatefulsetDeletionInterval = 1 * time.Second StatefulsetDeletionTimeout = 30 * time.Second diff --git a/pkg/util/constants/postgresql.go b/pkg/util/constants/postgresql.go index 7556e8858..e39fd423f 100644 --- a/pkg/util/constants/postgresql.go +++ b/pkg/util/constants/postgresql.go @@ -10,4 +10,7 @@ const ( PostgresConnectRetryTimeout = 2 * time.Minute PostgresConnectTimeout = 15 * time.Second + + ShmVolumeName = "dshm" + ShmVolumePath = "/dev/shm" ) From 4fa09e0dcbe556106beaed4078e6c2756245e931 Mon Sep 17 00:00:00 2001 From: zerg-junior Date: Fri, 21 Dec 2018 16:44:31 +0100 Subject: [PATCH 05/11] Unify warnings about unmovable pods (#389) * Unify warnings about unmovable pods * Log conditions that prevent master pod migration --- pkg/controller/node.go | 68 +++++++++++++++++++++++++---------------- run_operator_locally.sh | 2 +- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/pkg/controller/node.go b/pkg/controller/node.go index dc919c450..b3e30cc9b 100644 --- a/pkg/controller/node.go +++ b/pkg/controller/node.go @@ -7,7 +7,10 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" + "fmt" + "github.com/zalando-incubator/postgres-operator/pkg/cluster" + "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" ) @@ -55,15 +58,16 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { return } - if util.MapContains(nodeCur.Labels, map[string]string{"master": "true"}) { + if !c.nodeIsReady(nodePrev) { + c.logger.Debugf("The decommissioned node %v should have already triggered master pod migration. Previous k8s-reported state of the node: %v", util.NameFromMeta(nodePrev.ObjectMeta), nodePrev) return } - // do nothing if the node should have already triggered an update or - // if only one of the label and the unschedulability criteria are met. - if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) { + if c.nodeIsReady(nodeCur) { + c.logger.Debugf("The decommissioned node %v become schedulable again. Current k8s-reported state of the node: %v", util.NameFromMeta(nodeCur.ObjectMeta), nodeCur) return } + c.moveMasterPodsOffNode(nodeCur) } @@ -73,8 +77,9 @@ func (c *Controller) nodeIsReady(node *v1.Node) bool { } func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { + nodeName := util.NameFromMeta(node.ObjectMeta) - c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q", + c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label %q", nodeName, c.opConfig.NodeReadinessLabel) opts := metav1.ListOptions{ @@ -82,7 +87,7 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { } podList, err := c.KubeClient.Pods(c.opConfig.WatchedNamespace).List(opts) if err != nil { - c.logger.Errorf("could not fetch list of the pods: %v", err) + c.logger.Errorf("could not fetch the list of Spilo pods: %v", err) return } @@ -93,17 +98,25 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { } } + movedMasterPods := 0 + movableMasterPods := make(map[*v1.Pod]*cluster.Cluster) + unmovablePods := make(map[spec.NamespacedName]string) + clusters := make(map[*cluster.Cluster]bool) - masterPods := make(map[*v1.Pod]*cluster.Cluster) - movedPods := 0 + for _, pod := range nodePods { + podName := util.NameFromMeta(pod.ObjectMeta) role, ok := pod.Labels[c.opConfig.PodRoleLabel] - if !ok || cluster.PostgresRole(role) != cluster.Master { - if !ok { - c.logger.Warningf("could not move pod %q: pod has no role", podName) - } + if !ok { + // pods with an unknown role cannot be safely moved to another node + unmovablePods[podName] = fmt.Sprintf("could not move pod %q from node %q: pod has no role label %q", podName, nodeName, c.opConfig.PodRoleLabel) + continue + } + + // deployments can transparently re-create replicas so we do not move away such pods + if cluster.PostgresRole(role) == cluster.Replica { continue } @@ -113,7 +126,7 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { cl, ok := c.clusters[clusterName] c.clustersMu.RUnlock() if !ok { - c.logger.Warningf("could not move pod %q: pod does not belong to a known cluster", podName) + unmovablePods[podName] = fmt.Sprintf("could not move master pod %q from node %q: pod belongs to an unknown Postgres cluster %q", podName, nodeName, clusterName) continue } @@ -121,20 +134,20 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { clusters[cl] = true } - masterPods[pod] = cl + movableMasterPods[pod] = cl } for cl := range clusters { cl.Lock() } - for pod, cl := range masterPods { - podName := util.NameFromMeta(pod.ObjectMeta) + for pod, cl := range movableMasterPods { - if err := cl.MigrateMasterPod(podName); err != nil { - c.logger.Errorf("could not move master pod %q: %v", podName, err) + podName := util.NameFromMeta(pod.ObjectMeta) + if err := cl.MigrateMasterPod(podName); err == nil { + movedMasterPods++ } else { - movedPods++ + unmovablePods[podName] = fmt.Sprintf("could not move master pod %q from node %q: %v", podName, nodeName, err) } } @@ -142,15 +155,16 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { cl.Unlock() } - totalPods := len(masterPods) - - c.logger.Infof("%d/%d master pods have been moved out from the %q node", - movedPods, totalPods, nodeName) - - if leftPods := totalPods - movedPods; leftPods > 0 { - c.logger.Warnf("could not move master %d/%d pods from the %q node", - leftPods, totalPods, nodeName) + if leftPods := len(unmovablePods); leftPods > 0 { + c.logger.Warnf("could not move %d master or unknown role pods from the node %q, you may have to delete them manually", + leftPods, nodeName) + for _, reason := range unmovablePods { + c.logger.Warning(reason) + } } + + c.logger.Infof("%d master pods have been moved out from the node %q", movedMasterPods, nodeName) + } func (c *Controller) nodeDelete(obj interface{}) { diff --git a/run_operator_locally.sh b/run_operator_locally.sh index 301803c35..d6c416d56 100755 --- a/run_operator_locally.sh +++ b/run_operator_locally.sh @@ -121,7 +121,7 @@ function deploy_self_built_image() { # update the tag in the postgres operator conf # since the image with this tag already exists on the machine, # docker should not attempt to fetch it from the registry due to imagePullPolicy - sed --expression "s/\(image\:.*\:\).*$/\1$TAG/" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" + sed --expression "s/\(image\:.*\:\).*$/\1$TAG/; s/smoke-tested-//" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" retry "kubectl create -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource" } From 26670408c48612e92cbb59f1f02d78783e642096 Mon Sep 17 00:00:00 2001 From: zerg-junior Date: Fri, 21 Dec 2018 17:39:34 +0100 Subject: [PATCH 06/11] Revert "Unify warnings about unmovable pods (#389)" (#430) This reverts commit 4fa09e0dcbe556106beaed4078e6c2756245e931. Reason: the reverted commit bloats the logs --- pkg/controller/node.go | 68 ++++++++++++++++------------------------- run_operator_locally.sh | 2 +- 2 files changed, 28 insertions(+), 42 deletions(-) diff --git a/pkg/controller/node.go b/pkg/controller/node.go index b3e30cc9b..dc919c450 100644 --- a/pkg/controller/node.go +++ b/pkg/controller/node.go @@ -7,10 +7,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" - "fmt" - "github.com/zalando-incubator/postgres-operator/pkg/cluster" - "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" ) @@ -58,16 +55,15 @@ func (c *Controller) nodeUpdate(prev, cur interface{}) { return } - if !c.nodeIsReady(nodePrev) { - c.logger.Debugf("The decommissioned node %v should have already triggered master pod migration. Previous k8s-reported state of the node: %v", util.NameFromMeta(nodePrev.ObjectMeta), nodePrev) + if util.MapContains(nodeCur.Labels, map[string]string{"master": "true"}) { return } - if c.nodeIsReady(nodeCur) { - c.logger.Debugf("The decommissioned node %v become schedulable again. Current k8s-reported state of the node: %v", util.NameFromMeta(nodeCur.ObjectMeta), nodeCur) + // do nothing if the node should have already triggered an update or + // if only one of the label and the unschedulability criteria are met. + if !c.nodeIsReady(nodePrev) || c.nodeIsReady(nodeCur) { return } - c.moveMasterPodsOffNode(nodeCur) } @@ -77,9 +73,8 @@ func (c *Controller) nodeIsReady(node *v1.Node) bool { } func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { - nodeName := util.NameFromMeta(node.ObjectMeta) - c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label %q", + c.logger.Infof("moving pods: node %q became unschedulable and does not have a ready label: %q", nodeName, c.opConfig.NodeReadinessLabel) opts := metav1.ListOptions{ @@ -87,7 +82,7 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { } podList, err := c.KubeClient.Pods(c.opConfig.WatchedNamespace).List(opts) if err != nil { - c.logger.Errorf("could not fetch the list of Spilo pods: %v", err) + c.logger.Errorf("could not fetch list of the pods: %v", err) return } @@ -98,25 +93,17 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { } } - movedMasterPods := 0 - movableMasterPods := make(map[*v1.Pod]*cluster.Cluster) - unmovablePods := make(map[spec.NamespacedName]string) - clusters := make(map[*cluster.Cluster]bool) - + masterPods := make(map[*v1.Pod]*cluster.Cluster) + movedPods := 0 for _, pod := range nodePods { - podName := util.NameFromMeta(pod.ObjectMeta) role, ok := pod.Labels[c.opConfig.PodRoleLabel] - if !ok { - // pods with an unknown role cannot be safely moved to another node - unmovablePods[podName] = fmt.Sprintf("could not move pod %q from node %q: pod has no role label %q", podName, nodeName, c.opConfig.PodRoleLabel) - continue - } - - // deployments can transparently re-create replicas so we do not move away such pods - if cluster.PostgresRole(role) == cluster.Replica { + if !ok || cluster.PostgresRole(role) != cluster.Master { + if !ok { + c.logger.Warningf("could not move pod %q: pod has no role", podName) + } continue } @@ -126,7 +113,7 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { cl, ok := c.clusters[clusterName] c.clustersMu.RUnlock() if !ok { - unmovablePods[podName] = fmt.Sprintf("could not move master pod %q from node %q: pod belongs to an unknown Postgres cluster %q", podName, nodeName, clusterName) + c.logger.Warningf("could not move pod %q: pod does not belong to a known cluster", podName) continue } @@ -134,20 +121,20 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { clusters[cl] = true } - movableMasterPods[pod] = cl + masterPods[pod] = cl } for cl := range clusters { cl.Lock() } - for pod, cl := range movableMasterPods { - + for pod, cl := range masterPods { podName := util.NameFromMeta(pod.ObjectMeta) - if err := cl.MigrateMasterPod(podName); err == nil { - movedMasterPods++ + + if err := cl.MigrateMasterPod(podName); err != nil { + c.logger.Errorf("could not move master pod %q: %v", podName, err) } else { - unmovablePods[podName] = fmt.Sprintf("could not move master pod %q from node %q: %v", podName, nodeName, err) + movedPods++ } } @@ -155,16 +142,15 @@ func (c *Controller) moveMasterPodsOffNode(node *v1.Node) { cl.Unlock() } - if leftPods := len(unmovablePods); leftPods > 0 { - c.logger.Warnf("could not move %d master or unknown role pods from the node %q, you may have to delete them manually", - leftPods, nodeName) - for _, reason := range unmovablePods { - c.logger.Warning(reason) - } + totalPods := len(masterPods) + + c.logger.Infof("%d/%d master pods have been moved out from the %q node", + movedPods, totalPods, nodeName) + + if leftPods := totalPods - movedPods; leftPods > 0 { + c.logger.Warnf("could not move master %d/%d pods from the %q node", + leftPods, totalPods, nodeName) } - - c.logger.Infof("%d master pods have been moved out from the node %q", movedMasterPods, nodeName) - } func (c *Controller) nodeDelete(obj interface{}) { diff --git a/run_operator_locally.sh b/run_operator_locally.sh index d6c416d56..301803c35 100755 --- a/run_operator_locally.sh +++ b/run_operator_locally.sh @@ -121,7 +121,7 @@ function deploy_self_built_image() { # update the tag in the postgres operator conf # since the image with this tag already exists on the machine, # docker should not attempt to fetch it from the registry due to imagePullPolicy - sed --expression "s/\(image\:.*\:\).*$/\1$TAG/; s/smoke-tested-//" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" + sed --expression "s/\(image\:.*\:\).*$/\1$TAG/" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" retry "kubectl create -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource" } From c0b0b9a83282f8a0cbb089f68f011744a58e2e41 Mon Sep 17 00:00:00 2001 From: zerg-junior Date: Thu, 27 Dec 2018 10:14:33 +0100 Subject: [PATCH 07/11] [WIP] Add 'admin' option to create role (#425) * Add 'admin' option to create role * Fix run_locally_script --- docs/reference/operator_parameters.md | 3 +++ manifests/configmap.yaml | 1 + pkg/cluster/cluster.go | 13 +++++++++---- pkg/spec/types.go | 1 + pkg/util/config/config.go | 1 + pkg/util/users/users.go | 7 ++++++- run_operator_locally.sh | 2 +- 7 files changed, 22 insertions(+), 6 deletions(-) diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 3f96b450c..23c625bcb 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -373,6 +373,9 @@ key. role name to grant to team members created from the Teams API. The default is `admin`, that role is created by Spilo as a `NOLOGIN` role. +* **enable_admin_role_for_users** + if `true`, the `team_admin_role` will have the rights to grant roles coming from PG manifests. Such roles will be created as in "CREATE ROLE 'role_from_manifest' ... ADMIN 'team_admin_role'". The default is `true`. + * **pam_role_name** when set, the operator will add all team member roles to this group and add a `pg_hba` line to authenticate members of that role via `pam`. The default is diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index d127e72f2..be72ce2c5 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -19,6 +19,7 @@ data: # postgres_superuser_teams: "postgres_superusers" # enable_team_superuser: "false" # team_admin_role: "admin" + # enable_admin_role_for_users: "true" # teams_api_url: http://fake-teams-api.default.svc.cluster.local # team_api_role_configuration: "log_statement:all" # infrastructure_roles_secret_name: postgresql-infrastructure-roles diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index b2208705a..7eaa873fd 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -709,11 +709,16 @@ func (c *Cluster) initRobotUsers() error { if err != nil { return fmt.Errorf("invalid flags for user %q: %v", username, err) } + adminRole := "" + if c.OpConfig.EnableAdminRoleForUsers { + adminRole = c.OpConfig.TeamAdminRole + } newRole := spec.PgUser{ - Origin: spec.RoleOriginManifest, - Name: username, - Password: util.RandomPassword(constants.PasswordLength), - Flags: flags, + Origin: spec.RoleOriginManifest, + Name: username, + Password: util.RandomPassword(constants.PasswordLength), + Flags: flags, + AdminRole: adminRole, } if currentRole, present := c.pgUsers[username]; present { c.pgUsers[username] = c.resolveNameConflict(¤tRole, &newRole) diff --git a/pkg/spec/types.go b/pkg/spec/types.go index e394462d4..edcde5a3b 100644 --- a/pkg/spec/types.go +++ b/pkg/spec/types.go @@ -49,6 +49,7 @@ type PgUser struct { Flags []string `yaml:"user_flags"` MemberOf []string `yaml:"inrole"` Parameters map[string]string `yaml:"db_parameters"` + AdminRole string `yaml:"admin_role"` } // PgUserMap maps user names to the definitions. diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index d855e0a2a..124935a03 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -90,6 +90,7 @@ type Config struct { EnableTeamsAPI bool `name:"enable_teams_api" default:"true"` EnableTeamSuperuser bool `name:"enable_team_superuser" default:"false"` TeamAdminRole string `name:"team_admin_role" default:"admin"` + EnableAdminRoleForUsers bool `name:"enable_admin_role_for_users" default:"true"` EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"` EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"` // deprecated and kept for backward compatibility diff --git a/pkg/util/users/users.go b/pkg/util/users/users.go index cd76c621d..b436595ef 100644 --- a/pkg/util/users/users.go +++ b/pkg/util/users/users.go @@ -5,9 +5,10 @@ import ( "fmt" "strings" + "reflect" + "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" - "reflect" ) const ( @@ -19,6 +20,7 @@ const ( doBlockStmt = `SET LOCAL synchronous_commit = 'local'; DO $$ BEGIN %s; END;$$;` passwordTemplate = "ENCRYPTED PASSWORD '%s'" inRoleTemplate = `IN ROLE %s` + adminTemplate = `ADMIN %s` ) // DefaultUserSyncStrategy implements a user sync strategy that merges already existing database users @@ -113,6 +115,9 @@ func (strategy DefaultUserSyncStrategy) createPgUser(user spec.PgUser, db *sql.D if len(user.MemberOf) > 0 { userFlags = append(userFlags, fmt.Sprintf(inRoleTemplate, quoteMemberList(user))) } + if user.AdminRole != "" { + userFlags = append(userFlags, fmt.Sprintf(adminTemplate, user.AdminRole)) + } if user.Password == "" { userPassword = "PASSWORD NULL" diff --git a/run_operator_locally.sh b/run_operator_locally.sh index 301803c35..d6c416d56 100755 --- a/run_operator_locally.sh +++ b/run_operator_locally.sh @@ -121,7 +121,7 @@ function deploy_self_built_image() { # update the tag in the postgres operator conf # since the image with this tag already exists on the machine, # docker should not attempt to fetch it from the registry due to imagePullPolicy - sed --expression "s/\(image\:.*\:\).*$/\1$TAG/" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" + sed --expression "s/\(image\:.*\:\).*$/\1$TAG/; s/smoke-tested-//" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" retry "kubectl create -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource" } From 8d766e020c81deb5db365b44154126bc14626fb3 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 2 Jan 2019 10:31:28 +0100 Subject: [PATCH 08/11] Fix reference to enable_database_access in operator_parameters.md (#435) --- docs/reference/operator_parameters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 23c625bcb..5192f7f36 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -334,7 +334,7 @@ Options to aid debugging of the operator itself. Grouped under the `debug` key. boolean parameter that toggles verbose debug logs from the operator. The default is `true`. -* **enable_db_access** +* **enable_database_access** boolean parameter that toggles the functionality of the operator that require access to the postgres database, i.e. creating databases and users. The default is `true`. From 5cfcc453a90e2a8d12b9a916d62aecbd6f867708 Mon Sep 17 00:00:00 2001 From: zerg-junior Date: Wed, 2 Jan 2019 12:01:47 +0100 Subject: [PATCH 09/11] Update CRD configuration docs and fix the CDP build (#414) * Update CRD configuration docs * document resource consumption of the operator * Add talks by Oleksii --- docs/index.md | 4 ++- docs/reference/operator_parameters.md | 33 ++++++++++++++---------- manifests/minimal-postgres-manifest.yaml | 3 ++- manifests/postgres-operator.yaml | 7 +++++ pkg/cluster/k8sres.go | 1 + 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/docs/index.md b/docs/index.md index 397dbea0d..e038f713e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -51,7 +51,9 @@ Please, report any issues discovered to https://github.com/zalando-incubator/pos ## Talks -1. "PostgreSQL High Availability on Kubernetes with Patroni" talk by Oleksii Kliukin, Atmosphere 2018: [video](https://www.youtube.com/watch?v=cFlwQOPPkeg) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-high-availability-on-kubernetes-with-patroni) +1. "PostgreSQL and Kubernetes: DBaaS without a vendor-lock" talk by Oleksii Kliukin, PostgreSQL Sessions 2018: [slides](https://speakerdeck.com/alexeyklyukin/postgresql-and-kubernetes-dbaas-without-a-vendor-lock) + +2. "PostgreSQL High Availability on Kubernetes with Patroni" talk by Oleksii Kliukin, Atmosphere 2018: [video](https://www.youtube.com/watch?v=cFlwQOPPkeg) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-high-availability-on-kubernetes-with-patroni) 2. "Blue elephant on-demand: Postgres + Kubernetes" talk by Oleksii Kliukin and Jan Mussler, FOSDEM 2018: [video](https://fosdem.org/2018/schedule/event/blue_elephant_on_demand_postgres_kubernetes/) | [slides (pdf)](https://www.postgresql.eu/events/fosdem2018/sessions/session/1735/slides/59/FOSDEM%202018_%20Blue_Elephant_On_Demand.pdf) diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 5192f7f36..e76a3627a 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -10,29 +10,37 @@ configuration. configuration structure. There is an [example](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/configmap.yaml) -* CRD-based configuration. The configuration is stored in the custom YAML - manifest, an instance of the custom resource definition (CRD) called - `OperatorConfiguration`. This CRD is registered by the operator - during the start when `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` variable is - set to a non-empty value. The CRD-based configuration is a regular YAML - document; non-scalar keys are simply represented in the usual YAML way. The - usage of the CRD-based configuration is triggered by setting the - `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` variable, which should point to the - `postgresql-operator-configuration` object name in the operators namespace. +* CRD-based configuration. The configuration is stored in a custom YAML + manifest. The manifest is an instance of the custom resource definition (CRD) called + `OperatorConfiguration`. The operator registers this CRD + during the start and uses it for configuration if the [operator deployment manifest ](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgres-operator.yaml#L21) sets the `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` env variable to a non-empty value. The variable should point to the + `postgresql-operator-configuration` object in the operator's namespace. + + The CRD-based configuration is a regular YAML + document; non-scalar keys are simply represented in the usual YAML way. There are no default values built-in in the operator, each parameter that is not supplied in the configuration receives an empty value. In order to create your own configuration just copy the [default one](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml) and change it. -CRD-based configuration is more natural and powerful then the one based on + To test the CRD-based configuration locally, use the following + ```bash + kubectl create -f manifests/operator-service-account-rbac.yaml + kubectl create -f manifests/postgres-operator.yaml # set the env var as mentioned above + kubectl create -f manifests/postgresql-operator-default-configuration.yaml + kubectl get operatorconfigurations postgresql-operator-default-configuration -o yaml + ``` + Note that the operator first registers the definition of the CRD `OperatorConfiguration` and then waits for an instance of the CRD to be created. In between these two event the operator pod may be failing since it cannot fetch the not-yet-existing `OperatorConfiguration` instance. + +The CRD-based configuration is more powerful than the one based on ConfigMaps and should be used unless there is a compatibility requirement to use an already existing configuration. Even in that case, it should be rather straightforward to convert the configmap based configuration into the CRD-based one and restart the operator. The ConfigMaps-based configuration will be deprecated and subsequently removed in future releases. -Note that for the CRD-based configuration configuration groups below correspond +Note that for the CRD-based configuration groups of configuration options below correspond to the non-leaf keys in the target YAML (i.e. for the Kubernetes resources the key is `kubernetes`). The key is mentioned alongside the group description. The ConfigMap-based configuration is flat and does not allow non-leaf keys. @@ -46,7 +54,6 @@ They will be deprecated and removed in the future. Variable names are underscore-separated words. - ## General Those are top-level keys, containing both leaf keys and groups. @@ -222,7 +229,7 @@ CRD-based configuration. settings. The default is `1Gi`. * **set_memory_request_to_limit** - Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers (Postgres, Scalyr sidecar, and other sidecars). The default is `false`. + Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers created by the operator (Postgres, Scalyr sidecar, and other sidecars); to set resources for the operator's own container, change the [operator deployment manually](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgres-operator.yaml#L13). The default is `false`. * **enable_shm_volume** Instruct operator to start any new database pod without limitations on shm diff --git a/manifests/minimal-postgres-manifest.yaml b/manifests/minimal-postgres-manifest.yaml index ae5d36cbc..402946b09 100644 --- a/manifests/minimal-postgres-manifest.yaml +++ b/manifests/minimal-postgres-manifest.yaml @@ -15,7 +15,8 @@ spec: - createdb # role for application foo - foo_user: + foo_user: [] + #databases: name->owner databases: diff --git a/manifests/postgres-operator.yaml b/manifests/postgres-operator.yaml index d8a4a6ac4..fffb8ede8 100644 --- a/manifests/postgres-operator.yaml +++ b/manifests/postgres-operator.yaml @@ -14,6 +14,13 @@ spec: - name: postgres-operator image: registry.opensource.zalan.do/acid/smoke-tested-postgres-operator:v1.0.0-21-ge39915c imagePullPolicy: IfNotPresent + resources: + requests: + cpu: 500m + memory: 250Mi + limits: + cpu: 2000m + memory: 500Mi env: # provided additional ENV vars can overwrite individual config map entries - name: CONFIG_MAP_NAME diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 6a3a052bd..bfd5ccb01 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -661,6 +661,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State volumeClaimTemplate *v1.PersistentVolumeClaim ) + // Improve me. Please. if c.OpConfig.SetMemoryRequestToLimit { // controller adjusts the default memory request at operator startup From 744567826115680bbadff90f8ea2a45f5dcb61df Mon Sep 17 00:00:00 2001 From: Jan Mussler Date: Fri, 4 Jan 2019 12:25:38 +0100 Subject: [PATCH 10/11] bump spilo versions. (#439) --- manifests/configmap.yaml | 2 +- manifests/postgresql-operator-default-configuration.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index be72ce2c5..30fbbb63d 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -10,7 +10,7 @@ data: debug_logging: "true" workers: "4" - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.5-p35 + docker_image: registry.opensource.zalan.do/acid/spilo-cdp-11:1.5-p42 pod_service_account_name: "zalando-postgres-operator" secret_name_template: '{username}.{cluster}.credentials' super_username: postgres diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 391702cdc..6d3c819b7 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -4,7 +4,7 @@ metadata: name: postgresql-operator-default-configuration configuration: etcd_host: "" - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.4-p29 + docker_image: registry.opensource.zalan.do/acid/spilo-cdp-11:1.5-p42 workers: 4 min_instances: -1 max_instances: -1 From f7058c754d69127b3ce6cc45c301ac2c59cc6f97 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Fri, 4 Jan 2019 13:42:52 +0100 Subject: [PATCH 11/11] Pass more variables to Spilo container (#437) Pass KUBERNETES_SCOPE_LABEL, KUBERNETES_ROLE_LABEL and KUBERNETES_LABELS to spilo container, so that they could be changed. Fix for #411 --- pkg/cluster/k8sres.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index bfd5ccb01..fec795ad0 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -339,7 +339,6 @@ func generateSpiloContainer( envVars []v1.EnvVar, volumeMounts []v1.VolumeMount, ) *v1.Container { - privilegedMode := true return &v1.Container{ Name: name, @@ -491,6 +490,18 @@ func (c *Cluster) generateSpiloPodEnvVars(uid types.UID, spiloConfiguration stri Name: "PGUSER_SUPERUSER", Value: c.OpConfig.SuperUsername, }, + { + Name: "KUBERNETES_SCOPE_LABEL", + Value: c.OpConfig.ClusterNameLabel, + }, + { + Name: "KUBERNETES_ROLE_LABEL", + Value: c.OpConfig.PodRoleLabel, + }, + { + Name: "KUBERNETES_LABELS", + Value: labels.Set(c.OpConfig.ClusterLabels).String(), + }, { Name: "PGPASSWORD_SUPERUSER", ValueFrom: &v1.EnvVarSource{ @@ -741,8 +752,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State // generate environment variables for the spilo container spiloEnvVars := deduplicateEnvVars( - c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone, customPodEnvVarsList), - c.containerName(), c.logger) + c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone, + customPodEnvVarsList), c.containerName(), c.logger) // pickup the docker image for the spilo container effectiveDockerImage := util.Coalesce(spec.DockerImage, c.OpConfig.DockerImage) @@ -750,6 +761,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State volumeMounts := generateVolumeMounts() // generate the spilo container + c.logger.Debugf("Generating Spilo container, environment variables: %v", spiloEnvVars) spiloContainer := generateSpiloContainer(c.containerName(), &effectiveDockerImage, resourceRequirements, @@ -757,7 +769,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State volumeMounts, ) - // resolve conflicts between operator-global and per-cluster sidecards + // resolve conflicts between operator-global and per-cluster sidecars sideCars := c.mergeSidecars(spec.Sidecars) resourceRequirementsScalyrSidecar := makeResources( @@ -786,7 +798,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State tolerationSpec := tolerations(&spec.Tolerations, c.OpConfig.PodToleration) effectivePodPriorityClassName := util.Coalesce(spec.PodPriorityClassName, c.OpConfig.PodPriorityClassName) - // generate pod template for the statefulset, based on the spilo container and sidecards + // generate pod template for the statefulset, based on the spilo container and sidecars if podTemplate, err = generatePodTemplate( c.Namespace, c.labelsSet(true),