diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 000000000..4ffc1915a --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,5 @@ +# https://github.com/golangci/golangci/wiki/Configuration + +service: + prepare: + - make deps diff --git a/README.md b/README.md index 18ea97538..d96458eec 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![Coverage Status](https://coveralls.io/repos/github/zalando-incubator/postgres-operator/badge.svg)](https://coveralls.io/github/zalando-incubator/postgres-operator) [![Go Report Card](https://goreportcard.com/badge/github.com/zalando-incubator/postgres-operator)](https://goreportcard.com/report/github.com/zalando-incubator/postgres-operator) [![GoDoc](https://godoc.org/github.com/zalando-incubator/postgres-operator?status.svg)](https://godoc.org/github.com/zalando-incubator/postgres-operator) +[![golangci](https://golangci.com/badges/github.com/zalando-incubator/postgres-operator.svg)](https://golangci.com/r/github.com/zalando-incubator/postgres-operator) ## Introduction @@ -90,6 +91,8 @@ cd postgres-operator ./run_operator_locally.sh ``` +Note we provide the `/manifests` directory as an example only; you should consider adjusting the manifests to your particular setting. + ## Running and testing the operator The best way to test the operator is to run it locally in [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). See developer docs(`docs/developer.yaml`) for details. diff --git a/docs/administrator.md b/docs/administrator.md index 83594ef1f..f2c747cce 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -41,12 +41,12 @@ manifests: ```bash $ kubectl create namespace test - $ kubectl config set-context --namespace=test + $ kubectl config set-context $(kubectl config current-context) --namespace=test ``` All subsequent `kubectl` commands will work with the `test` namespace. The -operator will run in this namespace and look up needed resources - such as its -config map - there. +operator will run in this namespace and look up needed resources - such as its +config map - there. Please note that the namespace for service accounts and cluster role bindings in [operator RBAC rules](manifests/operator-service-account-rbac.yaml) needs to be adjusted to the non-default value. ## Specify the namespace to watch diff --git a/docs/index.md b/docs/index.md index 397dbea0d..e038f713e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -51,7 +51,9 @@ Please, report any issues discovered to https://github.com/zalando-incubator/pos ## Talks -1. "PostgreSQL High Availability on Kubernetes with Patroni" talk by Oleksii Kliukin, Atmosphere 2018: [video](https://www.youtube.com/watch?v=cFlwQOPPkeg) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-high-availability-on-kubernetes-with-patroni) +1. "PostgreSQL and Kubernetes: DBaaS without a vendor-lock" talk by Oleksii Kliukin, PostgreSQL Sessions 2018: [slides](https://speakerdeck.com/alexeyklyukin/postgresql-and-kubernetes-dbaas-without-a-vendor-lock) + +2. "PostgreSQL High Availability on Kubernetes with Patroni" talk by Oleksii Kliukin, Atmosphere 2018: [video](https://www.youtube.com/watch?v=cFlwQOPPkeg) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-high-availability-on-kubernetes-with-patroni) 2. "Blue elephant on-demand: Postgres + Kubernetes" talk by Oleksii Kliukin and Jan Mussler, FOSDEM 2018: [video](https://fosdem.org/2018/schedule/event/blue_elephant_on_demand_postgres_kubernetes/) | [slides (pdf)](https://www.postgresql.eu/events/fosdem2018/sessions/session/1735/slides/59/FOSDEM%202018_%20Blue_Elephant_On_Demand.pdf) diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 75de35097..efc850aff 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -96,7 +96,19 @@ Those are parameters grouped directly under the `spec` key in the manifest. that should be assigned to the cluster pods. When not specified, the value is taken from the `pod_priority_class_name` operator parameter, if not set then the default priority class is taken. The priority class itself must be defined in advance. - + +* **enableShmVolume** + Start a database pod without limitations on shm memory. By default docker + limit `/dev/shm` to `64M` (see e.g. the [docker + issue](https://github.com/docker-library/postgres/issues/416), which could be + not enough if PostgreSQL uses parallel workers heavily. If this option is + present and value is `true`, to the target database pod will be mounted a new + tmpfs volume to remove this limitation. If it's not present, the decision + about mounting a volume will be made based on operator configuration + (`enable_shm_volume`, which is `true` by default). It it's present and value + is `false`, then no volume will be mounted no matter how operator was + configured (so you can override the operator configuration). + ## Postgres parameters Those parameters are grouped under the `postgresql` top-level key. @@ -112,6 +124,7 @@ Those parameters are grouped under the `postgresql` top-level key. cluster. Optional (Spilo automatically sets reasonable defaults for parameters like work_mem or max_connections). + ## Patroni parameters Those parameters are grouped under the `patroni` top-level key. See the [patroni diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 60416e818..81920b342 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -10,29 +10,37 @@ configuration. configuration structure. There is an [example](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/configmap.yaml) -* CRD-based configuration. The configuration is stored in the custom YAML - manifest, an instance of the custom resource definition (CRD) called - `OperatorConfiguration`. This CRD is registered by the operator - during the start when `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` variable is - set to a non-empty value. The CRD-based configuration is a regular YAML - document; non-scalar keys are simply represented in the usual YAML way. The - usage of the CRD-based configuration is triggered by setting the - `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` variable, which should point to the - `postgresql-operator-configuration` object name in the operators namespace. +* CRD-based configuration. The configuration is stored in a custom YAML + manifest. The manifest is an instance of the custom resource definition (CRD) called + `OperatorConfiguration`. The operator registers this CRD + during the start and uses it for configuration if the [operator deployment manifest ](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgres-operator.yaml#L21) sets the `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` env variable to a non-empty value. The variable should point to the + `postgresql-operator-configuration` object in the operator's namespace. + + The CRD-based configuration is a regular YAML + document; non-scalar keys are simply represented in the usual YAML way. There are no default values built-in in the operator, each parameter that is not supplied in the configuration receives an empty value. In order to create your own configuration just copy the [default one](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml) and change it. -CRD-based configuration is more natural and powerful then the one based on + To test the CRD-based configuration locally, use the following + ```bash + kubectl create -f manifests/operator-service-account-rbac.yaml + kubectl create -f manifests/postgres-operator.yaml # set the env var as mentioned above + kubectl create -f manifests/postgresql-operator-default-configuration.yaml + kubectl get operatorconfigurations postgresql-operator-default-configuration -o yaml + ``` + Note that the operator first registers the definition of the CRD `OperatorConfiguration` and then waits for an instance of the CRD to be created. In between these two event the operator pod may be failing since it cannot fetch the not-yet-existing `OperatorConfiguration` instance. + +The CRD-based configuration is more powerful than the one based on ConfigMaps and should be used unless there is a compatibility requirement to use an already existing configuration. Even in that case, it should be rather straightforward to convert the configmap based configuration into the CRD-based one and restart the operator. The ConfigMaps-based configuration will be deprecated and subsequently removed in future releases. -Note that for the CRD-based configuration configuration groups below correspond +Note that for the CRD-based configuration groups of configuration options below correspond to the non-leaf keys in the target YAML (i.e. for the Kubernetes resources the key is `kubernetes`). The key is mentioned alongside the group description. The ConfigMap-based configuration is flat and does not allow non-leaf keys. @@ -46,7 +54,6 @@ They will be deprecated and removed in the future. Variable names are underscore-separated words. - ## General Those are top-level keys, containing both leaf keys and groups. @@ -221,6 +228,17 @@ CRD-based configuration. memory limits for the postgres containers, unless overridden by cluster-specific settings. The default is `1Gi`. +* **set_memory_request_to_limit** + Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers created by the operator (Postgres, Scalyr sidecar, and other sidecars); to set resources for the operator's own container, change the [operator deployment manually](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgres-operator.yaml#L13). The default is `false`. + +* **enable_shm_volume** + Instruct operator to start any new database pod without limitations on shm + memory. If this option is enabled, to the target database pod will be mounted + a new tmpfs volume to remove shm memory limitation (see e.g. the [docker + issue](https://github.com/docker-library/postgres/issues/416)). This option + is global for an operator object, and can be overwritten by `enableShmVolume` + parameter from Postgres manifest. The default is `true` + ## Operator timeouts This set of parameters define various timeouts related to some operator @@ -323,7 +341,7 @@ Options to aid debugging of the operator itself. Grouped under the `debug` key. boolean parameter that toggles verbose debug logs from the operator. The default is `true`. -* **enable_db_access** +* **enable_database_access** boolean parameter that toggles the functionality of the operator that require access to the postgres database, i.e. creating databases and users. The default is `true`. @@ -362,6 +380,9 @@ key. role name to grant to team members created from the Teams API. The default is `admin`, that role is created by Spilo as a `NOLOGIN` role. +* **enable_admin_role_for_users** + if `true`, the `team_admin_role` will have the rights to grant roles coming from PG manifests. Such roles will be created as in "CREATE ROLE 'role_from_manifest' ... ADMIN 'team_admin_role'". The default is `true`. + * **pam_role_name** when set, the operator will add all team member roles to this group and add a `pg_hba` line to authenticate members of that role via `pam`. The default is diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 60d39c94b..c5f80f373 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -6,19 +6,20 @@ metadata: spec: teamId: "ACID" volume: - size: 5Gi + size: 1Gi numberOfInstances: 2 users: #Application/Robot users zalando: - superuser - createdb enableMasterLoadBalancer: true - enableReplicaLoadBalancer: true + enableReplicaLoadBalancer: true allowedSourceRanges: # load balancers' source ranges for both master and replica services - 127.0.0.1/32 databases: foo: zalando #Expert section + enableShmVolume: true postgresql: version: "10" parameters: @@ -31,7 +32,7 @@ spec: memory: 100Mi limits: cpu: 300m - memory: 3000Mi + memory: 300Mi patroni: initdb: encoding: "UTF8" diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index ed7652907..30fbbb63d 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -10,14 +10,16 @@ data: debug_logging: "true" workers: "4" - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.4-p29 + docker_image: registry.opensource.zalan.do/acid/spilo-cdp-11:1.5-p42 pod_service_account_name: "zalando-postgres-operator" secret_name_template: '{username}.{cluster}.credentials' super_username: postgres enable_teams_api: "false" + # set_memory_request_to_limit: "true" # postgres_superuser_teams: "postgres_superusers" # enable_team_superuser: "false" # team_admin_role: "admin" + # enable_admin_role_for_users: "true" # teams_api_url: http://fake-teams-api.default.svc.cluster.local # team_api_role_configuration: "log_statement:all" # infrastructure_roles_secret_name: postgresql-infrastructure-roles diff --git a/manifests/minimal-postgres-manifest.yaml b/manifests/minimal-postgres-manifest.yaml index ae5d36cbc..402946b09 100644 --- a/manifests/minimal-postgres-manifest.yaml +++ b/manifests/minimal-postgres-manifest.yaml @@ -15,7 +15,8 @@ spec: - createdb # role for application foo - foo_user: + foo_user: [] + #databases: name->owner databases: diff --git a/manifests/postgres-operator.yaml b/manifests/postgres-operator.yaml index d8a4a6ac4..fffb8ede8 100644 --- a/manifests/postgres-operator.yaml +++ b/manifests/postgres-operator.yaml @@ -14,6 +14,13 @@ spec: - name: postgres-operator image: registry.opensource.zalan.do/acid/smoke-tested-postgres-operator:v1.0.0-21-ge39915c imagePullPolicy: IfNotPresent + resources: + requests: + cpu: 500m + memory: 250Mi + limits: + cpu: 2000m + memory: 500Mi env: # provided additional ENV vars can overwrite individual config map entries - name: CONFIG_MAP_NAME diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 391702cdc..6d3c819b7 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -4,7 +4,7 @@ metadata: name: postgresql-operator-default-configuration configuration: etcd_host: "" - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.4-p29 + docker_image: registry.opensource.zalan.do/acid/spilo-cdp-11:1.5-p42 workers: 4 min_instances: -1 max_instances: -1 diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 4a8884490..f2759f5ad 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -147,6 +147,7 @@ type OperatorConfigurationData struct { PostgresUsersConfiguration PostgresUsersConfiguration `json:"users"` Kubernetes KubernetesMetaConfiguration `json:"kubernetes"` PostgresPodResources PostgresPodResourcesDefaults `json:"postgres_pod_resources"` + SetMemoryRequestToLimit bool `json:"set_memory_request_to_limit,omitempty"` Timeouts OperatorTimeouts `json:"timeouts"` LoadBalancer LoadBalancerConfiguration `json:"load_balancer"` AWSGCP AWSGCPConfiguration `json:"aws_or_gcp"` diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index d56c19f47..4315faeca 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -52,6 +52,7 @@ type PostgresSpec struct { Tolerations []v1.Toleration `json:"tolerations,omitempty"` Sidecars []Sidecar `json:"sidecars,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` + ShmVolume *bool `json:"enableShmVolume,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -92,8 +93,8 @@ type ResourceDescription struct { // Resources describes requests and limits for the cluster resouces. type Resources struct { - ResourceRequest ResourceDescription `json:"requests,omitempty"` - ResourceLimits ResourceDescription `json:"limits,omitempty"` + ResourceRequests ResourceDescription `json:"requests,omitempty"` + ResourceLimits ResourceDescription `json:"limits,omitempty"` } // Patroni contains Patroni-specific configuration diff --git a/pkg/apis/acid.zalan.do/v1/util_test.go b/pkg/apis/acid.zalan.do/v1/util_test.go index 99e3f2b7c..01be31e88 100644 --- a/pkg/apis/acid.zalan.do/v1/util_test.go +++ b/pkg/apis/acid.zalan.do/v1/util_test.go @@ -240,8 +240,8 @@ var unmarshalCluster = []struct { Slots: map[string]map[string]string{"permanent_logical_1": {"type": "logical", "database": "foo", "plugin": "pgoutput"}}, }, Resources: Resources{ - ResourceRequest: ResourceDescription{CPU: "10m", Memory: "50Mi"}, - ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"}, + ResourceRequests: ResourceDescription{CPU: "10m", Memory: "50Mi"}, + ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"}, }, TeamID: "ACID", @@ -499,7 +499,7 @@ func TestMarshal(t *testing.T) { t.Errorf("Marshal error: %v", err) } if !bytes.Equal(m, tt.marshal) { - t.Errorf("Marshal Postgresql expected: %q, got: %q", string(tt.marshal), string(m)) + t.Errorf("Marshal Postgresql \nexpected: %q, \ngot: %q", string(tt.marshal), string(m)) } } } @@ -507,11 +507,11 @@ func TestMarshal(t *testing.T) { func TestPostgresMeta(t *testing.T) { for _, tt := range unmarshalCluster { if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta { - t.Errorf("GetObjectKindMeta expected: %v, got: %v", tt.out.TypeMeta, a) + t.Errorf("GetObjectKindMeta \nexpected: %v, \ngot: %v", tt.out.TypeMeta, a) } if a := tt.out.GetObjectMeta(); reflect.DeepEqual(a, tt.out.ObjectMeta) { - t.Errorf("GetObjectMeta expected: %v, got: %v", tt.out.ObjectMeta, a) + t.Errorf("GetObjectMeta \nexpected: %v, \ngot: %v", tt.out.ObjectMeta, a) } } } diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 4496f8c0a..55a5ac075 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -573,7 +573,7 @@ func (in *ResourceDescription) DeepCopy() *ResourceDescription { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Resources) DeepCopyInto(out *Resources) { *out = *in - out.ResourceRequest = in.ResourceRequest + out.ResourceRequests = in.ResourceRequests out.ResourceLimits = in.ResourceLimits return } diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index b2208705a..7eaa873fd 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -709,11 +709,16 @@ func (c *Cluster) initRobotUsers() error { if err != nil { return fmt.Errorf("invalid flags for user %q: %v", username, err) } + adminRole := "" + if c.OpConfig.EnableAdminRoleForUsers { + adminRole = c.OpConfig.TeamAdminRole + } newRole := spec.PgUser{ - Origin: spec.RoleOriginManifest, - Name: username, - Password: util.RandomPassword(constants.PasswordLength), - Flags: flags, + Origin: spec.RoleOriginManifest, + Name: username, + Password: util.RandomPassword(constants.PasswordLength), + Flags: flags, + AdminRole: adminRole, } if currentRole, present := c.pgUsers[username]; present { c.pgUsers[username] = c.resolveNameConflict(¤tRole, &newRole) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 66ac55388..fec795ad0 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -18,6 +18,7 @@ import ( acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" + "github.com/zalando-incubator/postgres-operator/pkg/util/config" "github.com/zalando-incubator/postgres-operator/pkg/util/constants" "k8s.io/apimachinery/pkg/labels" ) @@ -92,18 +93,18 @@ func (c *Cluster) makeDefaultResources() acidv1.Resources { defaultRequests := acidv1.ResourceDescription{CPU: config.DefaultCPURequest, Memory: config.DefaultMemoryRequest} defaultLimits := acidv1.ResourceDescription{CPU: config.DefaultCPULimit, Memory: config.DefaultMemoryLimit} - return acidv1.Resources{ResourceRequest: defaultRequests, ResourceLimits: defaultLimits} + return acidv1.Resources{ResourceRequests: defaultRequests, ResourceLimits: defaultLimits} } func generateResourceRequirements(resources acidv1.Resources, defaultResources acidv1.Resources) (*v1.ResourceRequirements, error) { var err error - specRequests := resources.ResourceRequest + specRequests := resources.ResourceRequests specLimits := resources.ResourceLimits result := v1.ResourceRequirements{} - result.Requests, err = fillResourceList(specRequests, defaultResources.ResourceRequest) + result.Requests, err = fillResourceList(specRequests, defaultResources.ResourceRequests) if err != nil { return nil, fmt.Errorf("could not fill resource requests: %v", err) } @@ -338,7 +339,6 @@ func generateSpiloContainer( envVars []v1.EnvVar, volumeMounts []v1.VolumeMount, ) *v1.Container { - privilegedMode := true return &v1.Container{ Name: name, @@ -377,8 +377,8 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar, resources, err := generateResourceRequirements( makeResources( - sidecar.Resources.ResourceRequest.CPU, - sidecar.Resources.ResourceRequest.Memory, + sidecar.Resources.ResourceRequests.CPU, + sidecar.Resources.ResourceRequests.Memory, sidecar.Resources.ResourceLimits.CPU, sidecar.Resources.ResourceLimits.Memory, ), @@ -396,6 +396,16 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar, return nil, nil } +// Check whether or not we're requested to mount an shm volume, +// taking into account that PostgreSQL manifest has precedence. +func mountShmVolumeNeeded(opConfig config.Config, pgSpec *acidv1.PostgresSpec) bool { + if pgSpec.ShmVolume != nil { + return *pgSpec.ShmVolume + } + + return opConfig.ShmVolume +} + func generatePodTemplate( namespace string, labels labels.Set, @@ -407,6 +417,7 @@ func generatePodTemplate( podServiceAccountName string, kubeIAMRole string, priorityClassName string, + shmVolume bool, ) (*v1.PodTemplateSpec, error) { terminateGracePeriodSeconds := terminateGracePeriod @@ -420,6 +431,10 @@ func generatePodTemplate( Tolerations: *tolerationsSpec, } + if shmVolume { + addShmVolume(&podSpec) + } + if nodeAffinity != nil { podSpec.Affinity = nodeAffinity } @@ -475,6 +490,18 @@ func (c *Cluster) generateSpiloPodEnvVars(uid types.UID, spiloConfiguration stri Name: "PGUSER_SUPERUSER", Value: c.OpConfig.SuperUsername, }, + { + Name: "KUBERNETES_SCOPE_LABEL", + Value: c.OpConfig.ClusterNameLabel, + }, + { + Name: "KUBERNETES_ROLE_LABEL", + Value: c.OpConfig.PodRoleLabel, + }, + { + Name: "KUBERNETES_LABELS", + Value: labels.Set(c.OpConfig.ClusterLabels).String(), + }, { Name: "PGPASSWORD_SUPERUSER", ValueFrom: &v1.EnvVarSource{ @@ -625,7 +652,7 @@ func getBucketScopeSuffix(uid string) string { func makeResources(cpuRequest, memoryRequest, cpuLimit, memoryLimit string) acidv1.Resources { return acidv1.Resources{ - ResourceRequest: acidv1.ResourceDescription{ + ResourceRequests: acidv1.ResourceDescription{ CPU: cpuRequest, Memory: memoryRequest, }, @@ -644,6 +671,61 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State podTemplate *v1.PodTemplateSpec volumeClaimTemplate *v1.PersistentVolumeClaim ) + + // Improve me. Please. + if c.OpConfig.SetMemoryRequestToLimit { + + // controller adjusts the default memory request at operator startup + + request := spec.Resources.ResourceRequests.Memory + if request == "" { + request = c.OpConfig.DefaultMemoryRequest + } + + limit := spec.Resources.ResourceLimits.Memory + if limit == "" { + limit = c.OpConfig.DefaultMemoryLimit + } + + isSmaller, err := util.RequestIsSmallerThanLimit(request, limit) + if err != nil { + return nil, err + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the Postgres container is increased to match the memory limit of %v.", request, limit) + spec.Resources.ResourceRequests.Memory = limit + + } + + // controller adjusts the Scalyr sidecar request at operator startup + // as this sidecar is managed separately + + // adjust sidecar containers defined for that particular cluster + for _, sidecar := range spec.Sidecars { + + // TODO #413 + sidecarRequest := sidecar.Resources.ResourceRequests.Memory + if request == "" { + request = c.OpConfig.DefaultMemoryRequest + } + + sidecarLimit := sidecar.Resources.ResourceLimits.Memory + if limit == "" { + limit = c.OpConfig.DefaultMemoryLimit + } + + isSmaller, err := util.RequestIsSmallerThanLimit(sidecarRequest, sidecarLimit) + if err != nil { + return nil, err + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the %v sidecar container is increased to match the memory limit of %v.", sidecar.Resources.ResourceRequests.Memory, sidecar.Name, sidecar.Resources.ResourceLimits.Memory) + sidecar.Resources.ResourceRequests.Memory = sidecar.Resources.ResourceLimits.Memory + } + } + + } + defaultResources := c.makeDefaultResources() resourceRequirements, err := generateResourceRequirements(spec.Resources, defaultResources) @@ -670,8 +752,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State // generate environment variables for the spilo container spiloEnvVars := deduplicateEnvVars( - c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone, customPodEnvVarsList), - c.containerName(), c.logger) + c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone, + customPodEnvVarsList), c.containerName(), c.logger) // pickup the docker image for the spilo container effectiveDockerImage := util.Coalesce(spec.DockerImage, c.OpConfig.DockerImage) @@ -679,9 +761,15 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State volumeMounts := generateVolumeMounts() // generate the spilo container - spiloContainer := generateSpiloContainer(c.containerName(), &effectiveDockerImage, resourceRequirements, spiloEnvVars, volumeMounts) + c.logger.Debugf("Generating Spilo container, environment variables: %v", spiloEnvVars) + spiloContainer := generateSpiloContainer(c.containerName(), + &effectiveDockerImage, + resourceRequirements, + spiloEnvVars, + volumeMounts, + ) - // resolve conflicts between operator-global and per-cluster sidecards + // resolve conflicts between operator-global and per-cluster sidecars sideCars := c.mergeSidecars(spec.Sidecars) resourceRequirementsScalyrSidecar := makeResources( @@ -710,7 +798,7 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State tolerationSpec := tolerations(&spec.Tolerations, c.OpConfig.PodToleration) effectivePodPriorityClassName := util.Coalesce(spec.PodPriorityClassName, c.OpConfig.PodPriorityClassName) - // generate pod template for the statefulset, based on the spilo container and sidecards + // generate pod template for the statefulset, based on the spilo container and sidecars if podTemplate, err = generatePodTemplate( c.Namespace, c.labelsSet(true), @@ -721,7 +809,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State int64(c.OpConfig.PodTerminateGracePeriod.Seconds()), c.OpConfig.PodServiceAccountName, c.OpConfig.KubeIAMRole, - effectivePodPriorityClassName); err != nil { + effectivePodPriorityClassName, + mountShmVolumeNeeded(c.OpConfig, spec)); err != nil { return nil, fmt.Errorf("could not generate pod template: %v", err) } @@ -828,6 +917,32 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 { return newcur } +// To avoid issues with limited /dev/shm inside docker environment, when +// PostgreSQL can't allocate enough of dsa segments from it, we can +// mount an extra memory volume +// +// see https://docs.okd.io/latest/dev_guide/shared_memory.html +func addShmVolume(podSpec *v1.PodSpec) { + volumes := append(podSpec.Volumes, v1.Volume{ + Name: constants.ShmVolumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + Medium: "Memory", + }, + }, + }) + + pgIdx := constants.PostgresContainerIdx + mounts := append(podSpec.Containers[pgIdx].VolumeMounts, + v1.VolumeMount{ + Name: constants.ShmVolumeName, + MountPath: constants.ShmVolumePath, + }) + + podSpec.Containers[0].VolumeMounts = mounts + podSpec.Volumes = volumes +} + func generatePersistentVolumeClaimTemplate(volumeSize, volumeStorageClass string) (*v1.PersistentVolumeClaim, error) { var storageClassName *string diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 12e145c04..92946ab2b 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1,8 +1,11 @@ package cluster import ( + "k8s.io/api/core/v1" + acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando-incubator/postgres-operator/pkg/util/config" + "github.com/zalando-incubator/postgres-operator/pkg/util/constants" "github.com/zalando-incubator/postgres-operator/pkg/util/k8sutil" "testing" ) @@ -75,3 +78,54 @@ func TestCreateLoadBalancerLogic(t *testing.T) { } } } + +func TestShmVolume(t *testing.T) { + testName := "TestShmVolume" + tests := []struct { + subTest string + podSpec *v1.PodSpec + shmPos int + }{ + { + subTest: "empty PodSpec", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{}, + }, + }, + }, + shmPos: 0, + }, + { + subTest: "non empty PodSpec", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{v1.Volume{}}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{ + v1.VolumeMount{}, + }, + }, + }, + }, + shmPos: 1, + }, + } + for _, tt := range tests { + addShmVolume(tt.podSpec) + + volumeName := tt.podSpec.Volumes[tt.shmPos].Name + volumeMountName := tt.podSpec.Containers[0].VolumeMounts[tt.shmPos].Name + + if volumeName != constants.ShmVolumeName { + t.Errorf("%s %s: Expected volume %s was not created, have %s instead", + testName, tt.subTest, constants.ShmVolumeName, volumeName) + } + if volumeMountName != constants.ShmVolumeName { + t.Errorf("%s %s: Expected mount %s was not created, have %s instead", + testName, tt.subTest, constants.ShmVolumeName, volumeMountName) + } + } +} diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 1bc4e08e0..fd1c099de 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -110,6 +110,29 @@ func (c *Controller) initOperatorConfig() { c.opConfig = config.NewFromMap(configMapData) c.warnOnDeprecatedOperatorParameters() + if c.opConfig.SetMemoryRequestToLimit { + + isSmaller, err := util.RequestIsSmallerThanLimit(c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit) + if err != nil { + panic(err) + } + if isSmaller { + c.logger.Warningf("The default memory request of %v for Postgres containers is increased to match the default memory limit of %v.", c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit) + c.opConfig.DefaultMemoryRequest = c.opConfig.DefaultMemoryLimit + } + + isSmaller, err = util.RequestIsSmallerThanLimit(c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit) + if err != nil { + panic(err) + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the Scalyr sidecar container is increased to match the memory limit of %v.", c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit) + c.opConfig.ScalyrMemoryRequest = c.opConfig.ScalyrMemoryLimit + } + + // generateStatefulSet adjusts values for individual Postgres clusters + } + } func (c *Controller) modifyConfigFromEnvironment() { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 93ba1a0f4..006cfd2d1 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -55,6 +55,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest result.DefaultCPULimit = fromCRD.PostgresPodResources.DefaultCPULimit result.DefaultMemoryLimit = fromCRD.PostgresPodResources.DefaultMemoryLimit + result.SetMemoryRequestToLimit = fromCRD.SetMemoryRequestToLimit result.ResourceCheckInterval = time.Duration(fromCRD.Timeouts.ResourceCheckInterval) result.ResourceCheckTimeout = time.Duration(fromCRD.Timeouts.ResourceCheckTimeout) diff --git a/pkg/spec/types.go b/pkg/spec/types.go index c9da79297..3e6bec8db 100644 --- a/pkg/spec/types.go +++ b/pkg/spec/types.go @@ -49,6 +49,7 @@ type PgUser struct { Flags []string `yaml:"user_flags"` MemberOf []string `yaml:"inrole"` Parameters map[string]string `yaml:"db_parameters"` + AdminRole string `yaml:"admin_role"` } // PgUserMap maps user names to the definitions. diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 92fd3fd73..124935a03 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,6 +38,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` + ShmVolume bool `name:"enable_shm_volume" default:"true"` } // Auth describes authentication specific configuration parameters @@ -89,6 +90,7 @@ type Config struct { EnableTeamsAPI bool `name:"enable_teams_api" default:"true"` EnableTeamSuperuser bool `name:"enable_team_superuser" default:"false"` TeamAdminRole string `name:"team_admin_role" default:"admin"` + EnableAdminRoleForUsers bool `name:"enable_admin_role_for_users" default:"true"` EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"` EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"` // deprecated and kept for backward compatibility @@ -104,6 +106,7 @@ type Config struct { PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` ProtectedRoles []string `name:"protected_role_names" default:"admin"` PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` + SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" defaults:"false"` } // MustMarshal marshals the config or panics diff --git a/pkg/util/constants/kubernetes.go b/pkg/util/constants/kubernetes.go index 2604f124d..a4ea73e80 100644 --- a/pkg/util/constants/kubernetes.go +++ b/pkg/util/constants/kubernetes.go @@ -5,6 +5,7 @@ import "time" // General kubernetes-related constants const ( PostgresContainerName = "postgres" + PostgresContainerIdx = 0 K8sAPIPath = "/apis" StatefulsetDeletionInterval = 1 * time.Second StatefulsetDeletionTimeout = 30 * time.Second diff --git a/pkg/util/constants/postgresql.go b/pkg/util/constants/postgresql.go index 7556e8858..e39fd423f 100644 --- a/pkg/util/constants/postgresql.go +++ b/pkg/util/constants/postgresql.go @@ -10,4 +10,7 @@ const ( PostgresConnectRetryTimeout = 2 * time.Minute PostgresConnectTimeout = 15 * time.Second + + ShmVolumeName = "dshm" + ShmVolumePath = "/dev/shm" ) diff --git a/pkg/util/users/users.go b/pkg/util/users/users.go index cd76c621d..b436595ef 100644 --- a/pkg/util/users/users.go +++ b/pkg/util/users/users.go @@ -5,9 +5,10 @@ import ( "fmt" "strings" + "reflect" + "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" - "reflect" ) const ( @@ -19,6 +20,7 @@ const ( doBlockStmt = `SET LOCAL synchronous_commit = 'local'; DO $$ BEGIN %s; END;$$;` passwordTemplate = "ENCRYPTED PASSWORD '%s'" inRoleTemplate = `IN ROLE %s` + adminTemplate = `ADMIN %s` ) // DefaultUserSyncStrategy implements a user sync strategy that merges already existing database users @@ -113,6 +115,9 @@ func (strategy DefaultUserSyncStrategy) createPgUser(user spec.PgUser, db *sql.D if len(user.MemberOf) > 0 { userFlags = append(userFlags, fmt.Sprintf(inRoleTemplate, quoteMemberList(user))) } + if user.AdminRole != "" { + userFlags = append(userFlags, fmt.Sprintf(adminTemplate, user.AdminRole)) + } if user.Password == "" { userPassword = "PASSWORD NULL" diff --git a/pkg/util/util.go b/pkg/util/util.go index 7b7b58fc4..99e670af9 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -3,12 +3,14 @@ package util import ( "crypto/md5" // #nosec we need it to for PostgreSQL md5 passwords "encoding/hex" + "fmt" "math/rand" "regexp" "strings" "time" "github.com/motomux/pretty" + resource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/zalando-incubator/postgres-operator/pkg/spec" @@ -127,3 +129,19 @@ func Coalesce(val, defaultVal string) string { } return val } + +// RequestIsSmallerThanLimit +func RequestIsSmallerThanLimit(requestStr, limitStr string) (bool, error) { + + request, err := resource.ParseQuantity(requestStr) + if err != nil { + return false, fmt.Errorf("could not parse memory request %v : %v", requestStr, err) + } + + limit, err2 := resource.ParseQuantity(limitStr) + if err2 != nil { + return false, fmt.Errorf("could not parse memory limit %v : %v", limitStr, err2) + } + + return request.Cmp(limit) == -1, nil +} diff --git a/pkg/util/util_test.go b/pkg/util/util_test.go index 53ac13768..3a02149b4 100644 --- a/pkg/util/util_test.go +++ b/pkg/util/util_test.go @@ -69,6 +69,17 @@ var substringMatch = []struct { {regexp.MustCompile(`aaaa (\d+) bbbb`), "aaaa 123 bbbb", nil}, } +var requestIsSmallerThanLimitTests = []struct { + request string + limit string + out bool +}{ + {"1G", "2G", true}, + {"1G", "1Gi", true}, // G is 1000^3 bytes, Gi is 1024^3 bytes + {"1024Mi", "1G", false}, + {"1e9", "1G", false}, // 1e9 bytes == 1G +} + func TestRandomPassword(t *testing.T) { const pwdLength = 10 pwd := RandomPassword(pwdLength) @@ -143,3 +154,15 @@ func TestMapContains(t *testing.T) { } } } + +func TestRequestIsSmallerThanLimit(t *testing.T) { + for _, tt := range requestIsSmallerThanLimitTests { + res, err := RequestIsSmallerThanLimit(tt.request, tt.limit) + if err != nil { + t.Errorf("RequestIsSmallerThanLimit returned unexpected error: %#v", err) + } + if res != tt.out { + t.Errorf("RequestIsSmallerThanLimit expected: %#v, got: %#v", tt.out, res) + } + } +} diff --git a/run_operator_locally.sh b/run_operator_locally.sh index 301803c35..d6c416d56 100755 --- a/run_operator_locally.sh +++ b/run_operator_locally.sh @@ -121,7 +121,7 @@ function deploy_self_built_image() { # update the tag in the postgres operator conf # since the image with this tag already exists on the machine, # docker should not attempt to fetch it from the registry due to imagePullPolicy - sed --expression "s/\(image\:.*\:\).*$/\1$TAG/" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" + sed --expression "s/\(image\:.*\:\).*$/\1$TAG/; s/smoke-tested-//" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" retry "kubectl create -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource" }