diff --git a/.gitignore b/.gitignore index ad08aa383..e09b6644b 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,7 @@ _testmain.go .idea scm-source.json + +# diagrams +*.aux +*.log diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 000000000..4ffc1915a --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,5 @@ +# https://github.com/golangci/golangci/wiki/Configuration + +service: + prepare: + - make deps diff --git a/CODEOWNERS b/CODEOWNERS index 7e6db5933..98d9cd7bb 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,2 +1,2 @@ # global owners -* @alexeyklyukin @erthalion @zerg-junior @Jan-M @CyberDem0n @avaczi +* @alexeyklyukin @erthalion @sdudoladov @Jan-M @CyberDem0n @avaczi @FxKu diff --git a/README.md b/README.md index 18ea97538..432588b27 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,10 @@ [![Coverage Status](https://coveralls.io/repos/github/zalando-incubator/postgres-operator/badge.svg)](https://coveralls.io/github/zalando-incubator/postgres-operator) [![Go Report Card](https://goreportcard.com/badge/github.com/zalando-incubator/postgres-operator)](https://goreportcard.com/report/github.com/zalando-incubator/postgres-operator) [![GoDoc](https://godoc.org/github.com/zalando-incubator/postgres-operator?status.svg)](https://godoc.org/github.com/zalando-incubator/postgres-operator) +[![golangci](https://golangci.com/badges/github.com/zalando-incubator/postgres-operator.svg)](https://golangci.com/r/github.com/zalando-incubator/postgres-operator) + + + ## Introduction @@ -27,8 +31,22 @@ manages PostgreSQL clusters on Kubernetes: 3. Finally, the operator periodically synchronizes the actual state of each Postgres cluster with the desired state defined in the cluster's manifest. -There is a browser-friendly version of this documentation at -[postgres-operator.readthedocs.io](https://postgres-operator.readthedocs.io) +Here is a diagram, that summarizes what would be created by the operator, when a +new Postgres cluster CRD was submitted: + +![postgresql-operator](docs/diagrams/operator.png "K8S resources, created by operator") + +This picture is not complete without an overview of what is inside a pod, so +let's zoom in: + +![pod](docs/diagrams/pod.png "Database pod components") + +These two diagrams should help you to understand the basics of what kind of +functionality the operator provides. Below we discuss all everything in more +details. + +There is a browser-friendly version of this documentation at [postgres-operator.readthedocs.io](https://postgres-operator.readthedocs.io) + ## Table of contents @@ -42,6 +60,13 @@ There is a browser-friendly version of this documentation at the rest of the document is a tutorial to get you up and running with the operator on Minikube. + +## Community + +There are two places to get in touch with the community: +1. The [GitHub issue tracker](https://github.com/zalando-incubator/postgres-operator/issues) +2. The #postgres-operator slack channel under [Postgres Slack](https://postgres-slack.herokuapp.com) + ## Quickstart Prerequisites: @@ -90,6 +115,8 @@ cd postgres-operator ./run_operator_locally.sh ``` +Note we provide the `/manifests` directory as an example only; you should consider adjusting the manifests to your particular setting. + ## Running and testing the operator The best way to test the operator is to run it locally in [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). See developer docs(`docs/developer.yaml`) for details. diff --git a/cmd/main.go b/cmd/main.go index b400630f6..09ab40a87 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -47,14 +47,14 @@ func init() { log.Printf("Fully qualified configmap name: %v", config.ConfigMapName) } - if crd_interval := os.Getenv("CRD_READY_WAIT_INTERVAL"); crd_interval != "" { - config.CRDReadyWaitInterval = mustParseDuration(crd_interval) + if crdInterval := os.Getenv("CRD_READY_WAIT_INTERVAL"); crdInterval != "" { + config.CRDReadyWaitInterval = mustParseDuration(crdInterval) } else { config.CRDReadyWaitInterval = 4 * time.Second } - if crd_timeout := os.Getenv("CRD_READY_WAIT_TIMEOUT"); crd_timeout != "" { - config.CRDReadyWaitTimeout = mustParseDuration(crd_timeout) + if crdTimeout := os.Getenv("CRD_READY_WAIT_TIMEOUT"); crdTimeout != "" { + config.CRDReadyWaitTimeout = mustParseDuration(crdTimeout) } else { config.CRDReadyWaitTimeout = 30 * time.Second } diff --git a/delivery.yaml b/delivery.yaml index c939e64f0..5f1f5384f 100644 --- a/delivery.yaml +++ b/delivery.yaml @@ -20,9 +20,6 @@ pipeline: mv go /usr/local ln -s /usr/local/go/bin/go /usr/bin/go go version - - desc: 'Install Docker' - cmd: | - curl -fLOsS https://delivery.cloud.zalando.com/utils/ensure-docker && sh ensure-docker && rm ensure-docker - desc: 'Symlink sources into the GOPATH' cmd: | mkdir -p $OPERATOR_TOP_DIR diff --git a/docs/administrator.md b/docs/administrator.md index 83594ef1f..208e9ddb9 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -1,6 +1,6 @@ ## Create ConfigMap -ConfigMap is used to store the configuration of the operator +A ConfigMap is used to store the configuration of the operator. ```bash $ kubectl create -f manifests/configmap.yaml @@ -41,12 +41,14 @@ manifests: ```bash $ kubectl create namespace test - $ kubectl config set-context --namespace=test + $ kubectl config set-context $(kubectl config current-context) --namespace=test ``` All subsequent `kubectl` commands will work with the `test` namespace. The -operator will run in this namespace and look up needed resources - such as its -config map - there. +operator will run in this namespace and look up needed resources - such as its +ConfigMap - there. Please note that the namespace for service accounts and +cluster role bindings in [operator RBAC rules](../manifests/operator-service-account-rbac.yaml) +needs to be adjusted to the non-default value. ## Specify the namespace to watch @@ -56,8 +58,10 @@ replicas to 5" and reacting to the requests, in this example by actually scaling up. By default, the operator watches the namespace it is deployed to. You can -change this by altering the `WATCHED_NAMESPACE` env var in the operator -deployment manifest or the `watched_namespace` field in the operator configmap. +change this by setting the `WATCHED_NAMESPACE` var in the `env` section of the +[operator deployment](../manifests/postgres-operator.yaml) manifest or by +altering the `watched_namespace` field in the operator +[ConfigMap](../manifests/configmap.yaml#L6). In the case both are set, the env var takes the precedence. To make the operator listen to all namespaces, explicitly set the field/env var to "`*`". @@ -75,7 +79,7 @@ in the case database pods need to talk to the Kubernetes API (e.g. when using Kubernetes-native configuration of Patroni). The operator checks that the `pod_service_account_name` exists in the target namespace, and, if not, deploys there the `pod_service_account_definition` from the operator -[`Config`](pkg/util/config/config.go) with the default value of: +[`Config`](../pkg/util/config/config.go) with the default value of: ```yaml apiVersion: v1 @@ -86,13 +90,13 @@ metadata: In this definition, the operator overwrites the account's name to match `pod_service_account_name` and the `default` namespace to match the target -namespace. The operator performs **no** further syncing of this account. +namespace. The operator performs **no** further syncing of this account. ## Role-based access control for the operator -The `manifests/operator-service-account-rbac.yaml` defines cluster roles and bindings needed -for the operator to function under access control restrictions. To deploy the -operator with this RBAC policy use: +The `manifests/operator-service-account-rbac.yaml` defines cluster roles and +bindings needed for the operator to function under access control restrictions. +To deploy the operator with this RBAC policy use: ```bash $ kubectl create -f manifests/configmap.yaml @@ -103,18 +107,18 @@ operator with this RBAC policy use: Note that the service account in `operator-rbac.yaml` is named `zalando-postgres-operator`. You may have to change the `service_account_name` -in the operator configmap and `serviceAccountName` in the postgres-operator +in the operator ConfigMap and `serviceAccountName` in the postgres-operator deployment appropriately. -This is done intentionally, as to avoid breaking those setups that already work +This is done intentionally to avoid breaking those setups that already work with the default `operator` account. In the future the operator should ideally be run under the `zalando-postgres-operator` service account. -The service account defined in `operator-rbac.yaml` acquires some privileges -not really used by the operator (i.e. we only need list and watch on -configmaps), this is also done intentionally to avoid breaking things if -someone decides to configure the same service account in the operator's -configmap to run postgres clusters. +The service account defined in `operator-rbac.yaml` acquires some privileges +not really used by the operator (i.e. we only need `list` and `watch` on +`configmaps` resources), this is also done intentionally to avoid breaking +things if someone decides to configure the same service account in the +operator's ConfigMap to run postgres clusters. ### Use taints and tolerations for dedicated PostgreSQL nodes @@ -142,14 +146,101 @@ data: ... ``` +Note that the Kubernetes version 1.13 brings [taint-based eviction](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/#taint-based-evictions) to the beta stage and enables it by default. +Postgres pods by default receive tolerations for `unreachable` and `noExecute` taints with the timeout of `5m`. +Depending on your setup, you may want to adjust these parameters to prevent master pods from being evicted by the Kubernetes runtime. +To prevent eviction completely, specify the toleration by leaving out the `tolerationSeconds` value (similar to how Kubernetes' own DaemonSets are configured) + +### Enable pod anti affinity + +To ensure Postgres pods are running on different topologies, you can use [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) +and configure the required topology in the operator ConfigMap. + +Enable pod anti affinity by adding following line to the operator ConfigMap: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-operator +data: + enable_pod_antiaffinity: "true" +``` + +By default the topology key for the pod anti affinity is set to `kubernetes.io/hostname`, +you can set another topology key e.g. `failure-domain.beta.kubernetes.io/zone` by adding following line +to the operator ConfigMap, see [built-in node labels](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) for available topology keys: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-operator +data: + enable_pod_antiaffinity: "true" + pod_antiaffinity_topology_key: "failure-domain.beta.kubernetes.io/zone" +``` + +### Add cluster-specific labels + +In some cases, you might want to add `labels` that are specific to a given +postgres cluster, in order to identify its child objects. +The typical use case is to add labels that identifies the `Pods` created by the +operator, in order to implement fine-controlled `NetworkPolicies`. + +**OperatorConfiguration** + +```yaml +apiVersion: "acid.zalan.do/v1" +kind: OperatorConfiguration +metadata: + name: postgresql-operator-configuration +configuration: + kubernetes: + inherited_labels: + - application + - environment +... +``` + +**cluster manifest** + +```yaml +apiVersion: "acid.zalan.do/v1" +kind: postgresql +metadata: + name: demo-cluster + labels: + application: my-app + environment: demo +spec: +... +``` + +**network policy** + +```yaml +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: netpol-example +spec: + podSelector: + matchLabels: + application: my-app + environment: demo +... +``` + + ## Custom Pod Environment Variables -It is possible to configure a config map which is used by the Postgres pods as +It is possible to configure a ConfigMap which is used by the Postgres pods as an additional provider for environment variables. One use case is to customize the Spilo image and configure it with environment -variables. The config map with the additional settings is configured in the -operator's main config map: +variables. The ConfigMap with the additional settings is configured in the +operator's main ConfigMap: **postgres-operator ConfigMap** @@ -186,12 +277,12 @@ instances permitted by each Postgres cluster managed by the operator. If either `min_instances` or `max_instances` is set to a non-zero value, the operator may adjust the number of instances specified in the cluster manifest to match either the min or the max boundary. For instance, of a cluster manifest has 1 -instance and the min_instances is set to 3, the cluster will be created with 3 -instances. By default, both parameters are set to -1. +instance and the `min_instances` is set to 3, the cluster will be created with 3 +instances. By default, both parameters are set to `-1`. ## Load balancers -For any Postgresql/Spilo cluster, the operator creates two separate k8s +For any Postgresql/Spilo cluster, the operator creates two separate Kubernetes services: one for the master pod and one for replica pods. To expose these services to an outer network, one can attach load balancers to them by setting `enableMasterLoadBalancer` and/or `enableReplicaLoadBalancer` to `true` in the @@ -200,29 +291,47 @@ manifest, the operator configmap's settings `enable_master_load_balancer` and `enable_replica_load_balancer` apply. Note that the operator settings affect all Postgresql services running in all namespaces watched by the operator. -To limit the range of IP adresses that can reach a load balancer, specify desired ranges in the `allowedSourceRanges` field (applies to both master and replica LBs). To prevent exposing LBs to the entire Internet, this field is set at cluster creation time to `127.0.0.1/32` unless overwritten explicitly. If you want to revoke all IP ranges from an existing cluster, please set the `allowedSourceRanges` field to `127.0.0.1/32` or to the empty sequence `[]`. Setting the field to `null` or omitting entirely may lead to k8s removing this field from the manifest due to [the k8s handling of null fields](https://kubernetes.io/docs/concepts/overview/object-management-kubectl/declarative-config/#how-apply-calculates-differences-and-merges-changes). Then the resultant manifest will not have the necessary change, and the operator will respectively do noting with the existing source ranges. +To limit the range of IP adresses that can reach a load balancer, specify the +desired ranges in the `allowedSourceRanges` field (applies to both master and +replica load balancers). To prevent exposing load balancers to the entire +Internet, this field is set at cluster creation time to `127.0.0.1/32` unless +overwritten explicitly. If you want to revoke all IP ranges from an existing +cluster, please set the `allowedSourceRanges` field to `127.0.0.1/32` or to an +empty sequence `[]`. Setting the field to `null` or omitting it entirely may +lead to Kubernetes removing this field from the manifest due to its +[handling of null fields](https://kubernetes.io/docs/concepts/overview/object-management-kubectl/declarative-config/#how-apply-calculates-differences-and-merges-changes). +Then the resultant manifest will not contain the necessary change, and the +operator will respectively do noting with the existing source ranges. ## Running periodic 'autorepair' scans of Kubernetes objects The Postgres operator periodically scans all Kubernetes objects belonging to each cluster and repairs all discrepancies between them and the definitions -generated from the current cluster manifest. There are two types of scans: a -`sync scan`, running every `resync_period` seconds for every cluster, and the -`repair scan`, coming every `repair_period` only for those clusters that didn't -report success as a result of the last operation applied to them. +generated from the current cluster manifest. There are two types of scans: + +* `sync scan`, running every `resync_period` seconds for every cluster + +* `repair scan`, coming every `repair_period` only for those clusters that didn't +report success as a result of the last operation applied to them. ## Postgres roles supported by the operator -The operator is capable of maintaining roles of multiple kinds within a Postgres database cluster: +The operator is capable of maintaining roles of multiple kinds within a +Postgres database cluster: -1. **System roles** are roles necessary for the proper work of Postgres itself such as a replication role or the initial superuser role. The operator delegates creating such roles to Patroni and only establishes relevant secrets. +* **System roles** are roles necessary for the proper work of Postgres itself such as a replication role or the initial superuser role. The operator delegates creating such roles to Patroni and only establishes relevant secrets. -2. **Infrastructure roles** are roles for processes originating from external systems, e.g. monitoring robots. The operator creates such roles in all PG clusters it manages assuming k8s secrets with the relevant credentials exist beforehand. +* **Infrastructure roles** are roles for processes originating from external systems, e.g. monitoring robots. The operator creates such roles in all Postgres clusters it manages assuming that Kubernetes secrets with the relevant credentials exist beforehand. -3. **Per-cluster robot users** are also roles for processes originating from external systems but defined for an individual Postgres cluster in its manifest. A typical example is a role for connections from an application that uses the database. +* **Per-cluster robot users** are also roles for processes originating from external systems but defined for an individual Postgres cluster in its manifest. A typical example is a role for connections from an application that uses the database. -4. **Human users** originate from the Teams API that returns list of the team members given a team id. Operator differentiates between (a) product teams that own a particular Postgres cluster and are granted admin rights to maintain it, and (b) Postgres superuser teams that get the superuser access to all PG databases running in a k8s cluster for the purposes of maintaining and troubleshooting. +* **Human users** originate from the Teams API that returns a list of the team members given a team id. The operator differentiates between (a) product teams that own a particular Postgres cluster and are granted admin rights to maintain it, and (b) Postgres superuser teams that get the superuser access to all Postgres databases running in a Kubernetes cluster for the purposes of maintaining and troubleshooting. ## Understanding rolling update of Spilo pods -The operator logs reasons for a rolling update with the `info` level and a diff between the old and new StatefulSet specs with the `debug` level. To benefit from numerous escape characters in the latter log entry, view it in CLI with `echo -e`. Note that the resultant message will contain some noise because the `PodTemplate` used by the operator is yet to be updated with the default values used internally in Kubernetes. +The operator logs reasons for a rolling update with the `info` level and +a diff between the old and new StatefulSet specs with the `debug` level. +To read the latter log entry with the escaped characters rendered, view it +in CLI with `echo -e`. Note that the resultant message will contain some +noise because the `PodTemplate` used by the operator is yet to be updated +with the default values used internally in Kubernetes. diff --git a/docs/developer.md b/docs/developer.md index 5d766b023..220d764d0 100644 --- a/docs/developer.md +++ b/docs/developer.md @@ -188,13 +188,13 @@ defaults to 4) * /workers/$id/logs - log of the operations performed by a given worker * /clusters/ - list of teams and clusters known to the operator * /clusters/$team - list of clusters for the given team -* /cluster/$team/$clustername - detailed status of the cluster, including the +* /clusters/$team/$namespace/$clustername - detailed status of the cluster, including the specifications for CRD, master and replica services, endpoints and statefulsets, as well as any errors and the worker that cluster is assigned to. -* /cluster/$team/$clustername/logs/ - logs of all operations performed to the +* /clusters/$team/$namespace/$clustername/logs/ - logs of all operations performed to the cluster so far. -* /cluster/$team/$clustername/history/ - history of cluster changes triggered +* /clusters/$team/$namespace/$clustername/history/ - history of cluster changes triggered by the changes of the manifest (shows the somewhat obscure diff and what exactly has triggered the change) diff --git a/docs/diagrams/Makefile b/docs/diagrams/Makefile new file mode 100644 index 000000000..0c0c7c918 --- /dev/null +++ b/docs/diagrams/Makefile @@ -0,0 +1,11 @@ +OBJ=$(patsubst %.tex, %.png, $(wildcard *.tex)) + +.PHONY: all + +all: $(OBJ) + +%.pdf: %.tex + lualatex $< -shell-escape $@ + +%.png: %.pdf + convert -flatten -density 300 $< -quality 90 $@ diff --git a/docs/diagrams/logo.png b/docs/diagrams/logo.png new file mode 100644 index 000000000..8b8372fa3 Binary files /dev/null and b/docs/diagrams/logo.png differ diff --git a/docs/diagrams/operator.png b/docs/diagrams/operator.png new file mode 100644 index 000000000..5e1cbfe83 Binary files /dev/null and b/docs/diagrams/operator.png differ diff --git a/docs/diagrams/operator.tex b/docs/diagrams/operator.tex new file mode 100644 index 000000000..a8ee0a05f --- /dev/null +++ b/docs/diagrams/operator.tex @@ -0,0 +1,101 @@ +\documentclass{article} +\usepackage{tikz} +\usepackage[graphics,tightpage,active]{preview} +\usetikzlibrary{arrows, shadows.blur, positioning, fit, calc, backgrounds} +\usepackage{lscape} + +\pagenumbering{gobble} + +\PreviewEnvironment{tikzpicture} +\PreviewEnvironment{equation} +\PreviewEnvironment{equation*} +\newlength{\imagewidth} +\newlength{\imagescale} +\pagestyle{empty} +\thispagestyle{empty} + +\begin{document} +\begin{center} +\begin{tikzpicture}[ + scale=0.5,transform shape, + font=\sffamily, + every matrix/.style={ampersand replacement=\&,column sep=2cm,row sep=2cm}, + operator/.style={draw,solid,thick,circle,fill=red!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + component/.style={draw,solid,thick,rounded corners,fill=yellow!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + border/.style={draw,dashed,rounded corners,fill=gray!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + pod/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + service/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + endpoint/.style={draw,solid,thick,rounded corners,fill=blue!20, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + secret/.style={draw,solid,thick,rounded corners,fill=blue!20, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + pvc/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + label/.style={rectangle,inner sep=0,outer sep=0}, + to/.style={->,>=stealth',shorten >=1pt,semithick,font=\sffamily\footnotesize}, + every node/.style={align=center}] + + % Position the nodes using a matrix layout + + \matrix{ + \& \node[component] (crd) {CRD}; \\ + \& \node[operator] (operator) {Operator}; \\ + \path + node[service] (service-master) {Master} + node[label, right of=service-master] (service-middle) {} + node[label, below of=service-middle] (services-label) {Services} + node[service, right=.5cm of service-master] (service-replica) {Replica} + node[border, behind path, + fit=(service-master)(service-replica)(services-label) + ] (services) {}; + \& + \node[component] (sts) {Statefulset}; \& \node[component] (pdb) {Pod Disruption Budget}; \\ + \path + node[service] (master-endpoint) {Master} + node[service, right=.5cm of master-endpoint] (replica-endpoint) {Replica} + node[label, right of=master-endpoint] (endpoint-middle) {} + node[label, below of=endpoint-middle] (endpoint-label) {Endpoints} + node[border, behind path, + fit=(master-endpoint)(replica-endpoint)(endpoint-label) + ] (endpoints) {}; \& + \node[component] (pod-template) {Pod Template}; \& + \node[border] (secrets) { + \begin{tikzpicture}[] + \node[secret] (users-secret) at (0, 0) {Users}; + \node[secret] (robots-secret) at (2, 0) {Robots}; + \node[secret] (standby-secret) at (4, 0) {Standby}; + \end{tikzpicture} \\ + Secrets + }; \\ \& + \path + node[pod] (replica1-pod) {Replica} + node[pod, left=.5cm of replica1-pod] (master-pod) {Master} + node[pod, right=.5cm of replica1-pod] (replica2-pod) {Replica} + node[label, below of=replica1-pod] (pod-label) {Pods} + node[border, behind path, + fit=(master-pod)(replica1-pod)(replica2-pod)(pod-label) + ] (pods) {}; \\ \& + \path + node[pvc] (replica1-pvc) {Replica} + node[pvc, left=.5cm of replica1-pvc] (master-pvc) {Master} + node[pvc, right=.5cm of replica1-pvc] (replica2-pvc) {Replica} + node[label, below of=replica1-pvc] (pvc-label) {Persistent Volume Claims} + node[border, behind path, + fit=(master-pvc)(replica1-pvc)(replica2-pvc)(pvc-label) + ] (pvcs) {}; \& + \\ \& \\ + }; + + % Draw the arrows between the nodes and label them. + \draw[to] (crd) -- node[midway,above] {} node[midway,below] {} (operator); + \draw[to] (operator) -- node[midway,above] {} node[midway,below] {} (sts); + \draw[to] (operator) -- node[midway,above] {} node[midway,below] {} (secrets); + \draw[to] (operator) -| node[midway,above] {} node[midway,below] {} (pdb); + \draw[to] (service-master) -- node[midway,above] {} node[midway,below] {} (master-endpoint); + \draw[to] (service-replica) -- node[midway,above] {} node[midway,below] {} (replica-endpoint); + \draw[to] (master-pod) -- node[midway,above] {} node[midway,below] {} (master-pvc); + \draw[to] (replica1-pod) -- node[midway,above] {} node[midway,below] {} (replica1-pvc); + \draw[to] (replica2-pod) -- node[midway,above] {} node[midway,below] {} (replica2-pvc); + \draw[to] (operator) -| node[midway,above] {} node[midway,below] {} (services); + \draw[to] (sts) -- node[midway,above] {} node[midway,below] {} (pod-template); + \draw[to] (pod-template) -- node[midway,above] {} node[midway,below] {} (pods); +\end{tikzpicture} +\end{center} +\end{document} diff --git a/docs/diagrams/pod.png b/docs/diagrams/pod.png new file mode 100644 index 000000000..f54d1a2bd Binary files /dev/null and b/docs/diagrams/pod.png differ diff --git a/docs/diagrams/pod.tex b/docs/diagrams/pod.tex new file mode 100644 index 000000000..f4451399a --- /dev/null +++ b/docs/diagrams/pod.tex @@ -0,0 +1,92 @@ +\documentclass{article} +\usepackage{tikz} +\usepackage[graphics,tightpage,active]{preview} +\usetikzlibrary{arrows, shadows.blur, positioning, fit, calc, backgrounds} +\usepackage{lscape} + +\pagenumbering{gobble} + +\PreviewEnvironment{tikzpicture} +\PreviewEnvironment{equation} +\PreviewEnvironment{equation*} +\newlength{\imagewidth} +\newlength{\imagescale} +\pagestyle{empty} +\thispagestyle{empty} + +\begin{document} +\begin{center} +\begin{tikzpicture}[ + scale=0.5,transform shape, + font=\sffamily, + every matrix/.style={ampersand replacement=\&,column sep=2cm,row sep=2cm}, + pod/.style={draw,solid,thick,circle,fill=red!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + component/.style={draw,solid,thick,rounded corners,fill=yellow!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + border/.style={draw,dashed,rounded corners,fill=gray!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + volume/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + sidecar/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + k8s-label/.style={draw,solid,thick,rounded corners,fill=blue!20, minimum width=1.5cm, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + affinity/.style={draw,solid,thick,rounded corners,fill=blue!20, minimum width=2cm, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}}, + label/.style={rectangle,inner sep=0,outer sep=0}, + to/.style={->,>=stealth',shorten >=1pt,semithick,font=\sffamily\footnotesize}, + every node/.style={align=center}] + + % Position the nodes using a matrix layout + + \matrix{ + \path + node[k8s-label] (app-label) {App} + node[k8s-label, right=.25cm of app-label] (role-label) {Role} + node[k8s-label, right=.25cm of role-label] (custom-label) {Custom} + node[label, below of=role-label] (k8s-label-label) {K8S Labels} + node[border, behind path, + fit=(app-label)(role-label)(custom-label)(k8s-label-label) + ] (k8s-labels) {}; \& \& + \path + node[affinity] (affinity) {Affinity} + node[label, right=.25cm of affinity] (affinity-middle) {} + node[affinity, right=.25cm of affinity-middle] (anti-affinity) {Anti-affinity} + node[label, below of=affinity-middle] (affinity-label) {Assigning to nodes} + node[border, behind path, + fit=(affinity)(anti-affinity)(affinity-label) + ] (affinity) {}; \\ + \& \node[pod] (pod) {Pod}; \& \\ + \path + node[volume, minimum width={width("shm-volume")}] (data-volume) {Data} + node[volume, right=.25cm of data-volume, minimum width={width("shm-volume")}] (tokens-volume) {Tokens} + node[volume, right=.25cm of tokens-volume] (shm-volume) {/dev/shm} + node[label, below of=tokens-volume] (volumes-label) {Volumes} + node[border, behind path, + fit=(data-volume)(shm-volume)(tokens-volume)(volumes-label) + ] (volumes) {}; \& + \node[component] (spilo) {Spilo}; \& + \node[sidecar] (scalyr) {Scalyr}; \& \\ \& + \path + node[component] (patroni) {Patroni} + node[component, below=.25cm of patroni] (postgres) {PostgreSQL} + node[border, behind path, + fit=(postgres)(patroni) + ] (spilo-components) {}; \& + \path + node[sidecar] (custom-sidecar1) {User defined} + node[label, right=.25cm of custom-sidecar1] (sidecars-middle) {} + node[sidecar, right=.25cm of sidecars-middle] (custom-sidecar2) {User defined} + node[label, below of=sidecars-middle] (sidecars-label) {Custom sidecars} + node[border, behind path, + fit=(custom-sidecar1)(custom-sidecar2)(sidecars-label) + ] (sidecars) {}; + \\ \& \\ + }; + + % Draw the arrows between the nodes and label them. + \draw[to] (pod) to [bend left=25] (volumes); + \draw[to] (pod) to [bend left=25] (k8s-labels); + \draw[to] (pod) to [bend right=25] (affinity); + \draw[to] (pod) to [bend right=25] (scalyr); + \draw[to] (pod) to [bend right=25] (sidecars); + \draw[to] (pod) -- node[midway,above] {} node[midway,below] {} (spilo); + \draw[to] (spilo) -- node[midway,above] {} node[midway,below] {} (spilo-components); + +\end{tikzpicture} +\end{center} +\end{document} diff --git a/docs/gsoc-2019/ideas.md b/docs/gsoc-2019/ideas.md new file mode 100644 index 000000000..2e3dc7b4e --- /dev/null +++ b/docs/gsoc-2019/ideas.md @@ -0,0 +1,64 @@ + +# Google Summer of Code 2019 + +## Applications steps + +1. Please carefully read the official [Google Summer of Code Student Guide](https://google.github.io/gsocguides/student/) +2. Join the #postgres-operator slack channel under [Postgres Slack](https://postgres-slack.herokuapp.com) to introduce yourself to the community and get quick feedback on your application. +3. Select a project from the list of ideas below or propose your own. +4. Write a proposal draft. Please open an issue with the label `gsoc2019_application` in the [operator repository](https://github.com/zalando-incubator/postgres-operator/issues) so that the community members can publicly review it. See proposal instructions below for details. +5. Submit proposal and the proof of enrollment before April 9 2019 18:00 UTC through the web site of the Program. + +## Project ideas + + +### Place database pods into the "Guaranteed" Quality-of-Service class + +* **Description**: Kubernetes runtime does not kill pods in this class on condition they stay within their resource limits, which is desirable for the DB pods serving production workloads. To be assigned to that class, pod's resources must equal its limits. The task is to add the `enableGuaranteedQoSClass` or the like option to the Postgres manifest and the operator configmap that forcibly re-write pod resources to match the limits. +* **Recommended skills**: golang, basic Kubernetes abstractions +* **Difficulty**: moderate +* **Mentor(s)**: Felix Kunde [@FxKu](https://github.com/fxku), Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov) + +### Implement the kubectl plugin for the Postgres CustomResourceDefinition + +* **Description**: [kubectl plugins](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) enable extending the Kubernetes command-line client `kubectl` with commands to manage custom resources. The task is to design and implement a plugin for the `kubectl postgres` command, +that can enable, for example, correct deletion or major version upgrade of Postgres clusters. +* **Recommended skills**: golang, shell scripting, operational experience with Kubernetes +* **Difficulty**: moderate to medium, depending on the plugin design +* **Mentor(s)**: Felix Kunde [@FxKu](https://github.com/fxku), Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov) + +### Implement the openAPIV3Schema for the Postgres CRD + +* **Description**: at present the operator validates a database manifest on its own. +It will be helpful to reject erroneous manifests before they reach the operator using the [native Kubernetes CRD validation](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#validation). It is up to the student to decide whether to write the schema manually or to adopt existing [schema generator developed for the Prometheus project](https://github.com/ant31/crd-validation). +* **Recommended skills**: golang, JSON schema +* **Difficulty**: medium +* **Mentor(s)**: Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov) +* **Issue**: [#388](https://github.com/zalando-incubator/postgres-operator/issues/388) + +### Design a solution for the local testing of the operator + +* **Description**: The current way of testing is to run minikube, either manually or with some tooling around it like `/run-operator_locally.sh` or Vagrant. This has at least three problems: +First, minikube is a single node cluster, so it is unsuitable for testing vital functions such as pod migration between nodes. Second, minikube starts slowly; that prolongs local testing. +Third, every contributor needs to come up with their own solution for local testing. The task is to come up with a better option which will enable us to conveniently and uniformly run e2e tests locally / potentially in Travis CI. +A promising option is the Kubernetes own [kind](https://github.com/kubernetes-sigs/kind) +* **Recommended skills**: Docker, shell scripting, basic Kubernetes abstractions +* **Difficulty**: medium to hard depending on the selected desing +* **Mentor(s)**: Dmitry Dolgov [@erthalion](https://github.com/erthalion), Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov) +* **Issue**: [#475](https://github.com/zalando-incubator/postgres-operator/issues/475) + +### Detach a Postgres cluster from the operator for maintenance + +* **Description**: sometimes a Postgres cluster requires manual maintenance. During such maintenance the operator should ignore all the changes manually applied to the cluster. + Currently the only way to achieve this behavior is to shutdown the operator altogether, for instance by scaling down the operator's own deployment to zero pods. That approach evidently affects all Postgres databases under the operator control and thus is highly undesirable in production Kubernetes clusters. It would be much better to be able to detach only the desired Postgres cluster from the operator for the time being and re-attach it again after maintenance. +* **Recommended skills**: golang, architecture of a Kubernetes operator +* **Difficulty**: hard - requires significant modification of the operator's internals and careful consideration of the corner cases. +* **Mentor(s)**: Dmitry Dolgov [@erthalion](https://github.com/erthalion), Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov) +* **Issue**: [#421](https://github.com/zalando-incubator/postgres-operator/issues/421) + +### Propose your own idea + +Feel free to come up with your own ideas. For inspiration, +see [our bug tracker](https://github.com/zalando-incubator/postgres-operator/issues), +the [official `CustomResouceDefinition` docs](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/) +and [other operators](https://github.com/operator-framework/awesome-operators). \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 397dbea0d..f17dbf11b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -51,7 +51,9 @@ Please, report any issues discovered to https://github.com/zalando-incubator/pos ## Talks -1. "PostgreSQL High Availability on Kubernetes with Patroni" talk by Oleksii Kliukin, Atmosphere 2018: [video](https://www.youtube.com/watch?v=cFlwQOPPkeg) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-high-availability-on-kubernetes-with-patroni) +1. "PostgreSQL and Kubernetes: DBaaS without a vendor-lock" talk by Oleksii Kliukin, PostgreSQL Sessions 2018: [video](https://www.youtube.com/watch?v=q26U2rQcqMw) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-and-kubernetes-dbaas-without-a-vendor-lock) + +2. "PostgreSQL High Availability on Kubernetes with Patroni" talk by Oleksii Kliukin, Atmosphere 2018: [video](https://www.youtube.com/watch?v=cFlwQOPPkeg) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-high-availability-on-kubernetes-with-patroni) 2. "Blue elephant on-demand: Postgres + Kubernetes" talk by Oleksii Kliukin and Jan Mussler, FOSDEM 2018: [video](https://fosdem.org/2018/schedule/event/blue_elephant_on_demand_postgres_kubernetes/) | [slides (pdf)](https://www.postgresql.eu/events/fosdem2018/sessions/session/1735/slides/59/FOSDEM%202018_%20Blue_Elephant_On_Demand.pdf) diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 75de35097..a7c0e9840 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -33,7 +33,15 @@ Those parameters are grouped under the `metadata` top-level key. services, secrets) for the cluster. Changing it after the cluster creation results in deploying or updating a completely separate cluster in the target namespace. Optional (if present, should match the namespace where the - manifest is applied). + manifest is applied). + +* **labels** + if labels are matching one of the `inherited_labels` [configured in the + operator parameters](operator_parameters.md#kubernetes-resources), + they will automatically be added to all the objects (StatefulSet, Service, + Endpoints, etc.) that are created by the operator. + Labels that are set here but not listed as `inherited_labels` in the operator + parameters are ignored. ## Top-level parameters @@ -89,14 +97,26 @@ Those are parameters grouped directly under the `spec` key in the manifest. examples](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) for details on tolerations and possible values of those keys. When set, this value overrides the `pod_toleration` setting from the operator. Optional. - + * **podPriorityClassName** a name of the [priority class](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass) that should be assigned to the cluster pods. When not specified, the value is taken from the `pod_priority_class_name` operator parameter, if not set then the default priority class is taken. The priority class itself must be defined in advance. - + +* **enableShmVolume** + Start a database pod without limitations on shm memory. By default docker + limit `/dev/shm` to `64M` (see e.g. the [docker + issue](https://github.com/docker-library/postgres/issues/416), which could be + not enough if PostgreSQL uses parallel workers heavily. If this option is + present and value is `true`, to the target database pod will be mounted a new + tmpfs volume to remove this limitation. If it's not present, the decision + about mounting a volume will be made based on operator configuration + (`enable_shm_volume`, which is `true` by default). It it's present and value + is `false`, then no volume will be mounted no matter how operator was + configured (so you can override the operator configuration). + ## Postgres parameters Those parameters are grouped under the `postgresql` top-level key. @@ -112,6 +132,7 @@ Those parameters are grouped under the `postgresql` top-level key. cluster. Optional (Spilo automatically sets reasonable defaults for parameters like work_mem or max_connections). + ## Patroni parameters Those parameters are grouped under the `patroni` top-level key. See the [patroni @@ -122,7 +143,7 @@ explanation of `ttl` and `loop_wait` parameters. a map of key-value pairs describing initdb parameters. For `data-checksum`, `debug`, `no-locale`, `noclean`, `nosync` and `sync-only` parameters use `true` as the value if you want to set them. Changes to this option do not - affect the already initialized clusters. Optional. + affect the already initialized clusters. Optional. * **pg_hba** list of custom `pg_hba` lines to replace default ones. Note that the default @@ -202,7 +223,7 @@ under the `clone` top-level key and do not affect the already running cluster. different namespaces) , the operator uses UID in the S3 bucket name in order to guarantee uniqueness. Has no effect when cloning from the running clusters. Optional. - + * **timestamp** the timestamp up to which the recovery should proceed. The operator always configures non-inclusive recovery target, stopping right before the given @@ -222,7 +243,7 @@ properties of the persistent storage that stores postgres data. the name of the Kubernetes storage class to draw the persistent volume from. See [Kubernetes documentation](https://kubernetes.io/docs/concepts/storage/storage-classes/) - for the details on storage classes. Optional. + for the details on storage classes. Optional. ### Sidecar definitions diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 76109c890..69d903427 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -10,29 +10,37 @@ configuration. configuration structure. There is an [example](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/configmap.yaml) -* CRD-based configuration. The configuration is stored in the custom YAML - manifest, an instance of the custom resource definition (CRD) called - `OperatorConfiguration`. This CRD is registered by the operator - during the start when `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` variable is - set to a non-empty value. The CRD-based configuration is a regular YAML - document; non-scalar keys are simply represented in the usual YAML way. The - usage of the CRD-based configuration is triggered by setting the - `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` variable, which should point to the - `postgresql-operator-configuration` object name in the operators namespace. +* CRD-based configuration. The configuration is stored in a custom YAML + manifest. The manifest is an instance of the custom resource definition (CRD) called + `OperatorConfiguration`. The operator registers this CRD + during the start and uses it for configuration if the [operator deployment manifest ](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgres-operator.yaml#L21) sets the `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` env variable to a non-empty value. The variable should point to the + `postgresql-operator-configuration` object in the operator's namespace. + + The CRD-based configuration is a regular YAML + document; non-scalar keys are simply represented in the usual YAML way. There are no default values built-in in the operator, each parameter that is not supplied in the configuration receives an empty value. In order to create your own configuration just copy the [default one](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml) and change it. -CRD-based configuration is more natural and powerful then the one based on + To test the CRD-based configuration locally, use the following + ```bash + kubectl create -f manifests/operator-service-account-rbac.yaml + kubectl create -f manifests/postgres-operator.yaml # set the env var as mentioned above + kubectl create -f manifests/postgresql-operator-default-configuration.yaml + kubectl get operatorconfigurations postgresql-operator-default-configuration -o yaml + ``` + Note that the operator first registers the definition of the CRD `OperatorConfiguration` and then waits for an instance of the CRD to be created. In between these two event the operator pod may be failing since it cannot fetch the not-yet-existing `OperatorConfiguration` instance. + +The CRD-based configuration is more powerful than the one based on ConfigMaps and should be used unless there is a compatibility requirement to use an already existing configuration. Even in that case, it should be rather straightforward to convert the configmap based configuration into the CRD-based one and restart the operator. The ConfigMaps-based configuration will be deprecated and subsequently removed in future releases. -Note that for the CRD-based configuration configuration groups below correspond +Note that for the CRD-based configuration groups of configuration options below correspond to the non-leaf keys in the target YAML (i.e. for the Kubernetes resources the key is `kubernetes`). The key is mentioned alongside the group description. The ConfigMap-based configuration is flat and does not allow non-leaf keys. @@ -46,7 +54,6 @@ They will be deprecated and removed in the future. Variable names are underscore-separated words. - ## General Those are top-level keys, containing both leaf keys and groups. @@ -165,6 +172,14 @@ configuration they are grouped under the `kubernetes` key. list of `name:value` pairs for additional labels assigned to the cluster objects. The default is `application:spilo`. +* **inherited_labels** + list of labels that can be inherited from the cluster manifest, and added to + each child objects (`StatefulSet`, `Pod`, `Service` and `Endpoints`) created by + the opertor. + Typical use case is to dynamically pass labels that are specific to a given + postgres cluster, in order to implement `NetworkPolicy`. + The default is empty. + * **cluster_name_label** name of the label assigned to Kubernetes objects created by the operator that indicates which cluster a given object belongs to. The default is @@ -191,13 +206,21 @@ configuration they are grouped under the `kubernetes` key. All variables from that ConfigMap are injected to the pod's environment, on conflicts they are overridden by the environment variables generated by the operator. The default is empty. - + * **pod_priority_class_name** a name of the [priority class](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass) that should be assigned to the Postgres pods. The priority class itself must be defined in advance. Default is empty (use the default priority class). - + +* **enable_pod_antiaffinity** + toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) on the Postgres pods, to avoid multiple pods + of the same Postgres cluster in the same topology , e.g. node. The default is `false`. + +* **pod_antiaffinity_topology_key** + override + [topology key](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) + for pod anti affinity. The default is `kubernetes.io/hostname`. ## Kubernetes resource requests @@ -221,6 +244,17 @@ CRD-based configuration. memory limits for the postgres containers, unless overridden by cluster-specific settings. The default is `1Gi`. +* **set_memory_request_to_limit** + Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers created by the operator (Postgres, Scalyr sidecar, and other sidecars); to set resources for the operator's own container, change the [operator deployment manually](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgres-operator.yaml#L13). The default is `false`. + +* **enable_shm_volume** + Instruct operator to start any new database pod without limitations on shm + memory. If this option is enabled, to the target database pod will be mounted + a new tmpfs volume to remove shm memory limitation (see e.g. the [docker + issue](https://github.com/docker-library/postgres/issues/416)). This option + is global for an operator object, and can be overwritten by `enableShmVolume` + parameter from Postgres manifest. The default is `true` + ## Operator timeouts This set of parameters define various timeouts related to some operator @@ -276,6 +310,11 @@ In the CRD-based configuration they are grouped under the `load_balancer` key. cluster. Can be overridden by individual cluster settings. The default is `false`. +* **custom_service_annotations** + when load balancing is enabled, LoadBalancer service is created and + this parameter takes service annotations that are applied to service. + Optional. + * **master_dns_name_format** defines the DNS name string template for the master load balancer cluster. The default is `{cluster}.{team}.{hostedzone}`, where `{cluster}` is replaced by the cluster @@ -290,12 +329,12 @@ In the CRD-based configuration they are grouped under the `load_balancer` key. replaced with the hosted zone (the value of the `db_hosted_zone` parameter). No other placeholders are allowed. -## AWS or GSC interaction +## AWS or GCP interaction The options in this group configure operator interactions with non-Kubernetes -objects from AWS or Google cloud. They have no effect unless you are using +objects from Amazon Web Services (AWS) or Google Cloud Platform (GCP). They have no effect unless you are using either. In the CRD-based configuration those options are grouped under the -`aws_or_gcp` key. +`aws_or_gcp` key. Note the GCP integration is not yet officially supported. * **wal_s3_bucket** S3 bucket to use for shipping WAL segments with WAL-E. A bucket has to be @@ -323,11 +362,11 @@ Options to aid debugging of the operator itself. Grouped under the `debug` key. boolean parameter that toggles verbose debug logs from the operator. The default is `true`. -* **enable_db_access** +* **enable_database_access** boolean parameter that toggles the functionality of the operator that require access to the postgres database, i.e. creating databases and users. The default is `true`. - + ## Automatic creation of human users in the database Options to automate creation of human users with the aid of the teams API @@ -362,6 +401,9 @@ key. role name to grant to team members created from the Teams API. The default is `admin`, that role is created by Spilo as a `NOLOGIN` role. +* **enable_admin_role_for_users** + if `true`, the `team_admin_role` will have the rights to grant roles coming from PG manifests. Such roles will be created as in "CREATE ROLE 'role_from_manifest' ... ADMIN 'team_admin_role'". The default is `true`. + * **pam_role_name** when set, the operator will add all team member roles to this group and add a `pg_hba` line to authenticate members of that role via `pam`. The default is @@ -422,4 +464,4 @@ scalyr sidecar. In the CRD-based configuration they are grouped under the Memory limit value for the Scalyr sidecar. The default is `1Gi`. -For the configmap operator configuration, the [default parameter values](https://github.com/zalando-incubator/postgres-operator/blob/master/pkg/util/config/config.go#L14) mentioned here are likely to be overwritten in your local operator installation via your local version of the operator configmap. In the case you use the operator CRD, all the CRD defaults are provided in the [operator's default configuration manifest](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml) \ No newline at end of file +For the configmap operator configuration, the [default parameter values](https://github.com/zalando-incubator/postgres-operator/blob/master/pkg/util/config/config.go#L14) mentioned here are likely to be overwritten in your local operator installation via your local version of the operator configmap. In the case you use the operator CRD, all the CRD defaults are provided in the [operator's default configuration manifest](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml) diff --git a/docs/user.md b/docs/user.md index ae6abcbe9..ba91530eb 100644 --- a/docs/user.md +++ b/docs/user.md @@ -57,12 +57,11 @@ $ psql -U postgres Postgres operator allows defining roles to be created in the resulting database cluster. It covers three use-cases: -* create application roles specific to the cluster described in the manifest: - `manifest roles`. -* create application roles that should be automatically created on every - cluster managed by the operator: `infrastructure roles`. -* automatically create users for every member of the team owning the database - cluster: `teams API roles`. +* `manifest roles`: create application roles specific to the cluster described in the manifest. +* `infrastructure roles`: create application roles that should be automatically created on every + cluster managed by the operator. +* `teams API roles`: automatically create users for every member of the team owning the database + cluster. In the next sections, we will cover those use cases in more details. @@ -75,7 +74,7 @@ flags. Manifest roles are defined as a dictionary, with a role name as a key and a list of role options as a value. For a role without any options it is best to supply the empty -list `[]`. It is also possible to leave this field empty as in our example manifests, but in certain cases such empty field may removed by Kubernetes [due to the `null` value it gets](https://kubernetes.io/docs/concepts/overview/object-management-kubectl/declarative-config/#how-apply-calculates-differences-and-merges-changes) (`foobar_user:` is equivalent to `foobar_user: null`). +list `[]`. It is also possible to leave this field empty as in our example manifests, but in certain cases such empty field may removed by Kubernetes [due to the `null` value it gets](https://kubernetes.io/docs/concepts/overview/object-management-kubectl/declarative-config/#how-apply-calculates-differences-and-merges-changes) (`foobar_user:` is equivalent to `foobar_user: null`). The operator accepts the following options: `superuser`, `inherit`, `login`, `nologin`, `createrole`, `createdb`, `replication`, `bypassrls`. @@ -99,10 +98,13 @@ An infrastructure role is a role that should be present on every PostgreSQL cluster managed by the operator. An example of such a role is a monitoring user. There are two ways to define them: -* Exclusively via the infrastructure roles secret (specified by the - `infrastructure_roles_secret_name` parameter). +* With the infrastructure roles secret only +* With both the the secret and the infrastructure role ConfigMap. -The role definition looks like this (values are base64 encoded): +### Infrastructure roles secret + +The infrastructure roles secret is specified by the `infrastructure_roles_secret_name` +parameter. The role definition looks like this (values are base64 encoded): ```yaml user1: ZGJ1c2Vy @@ -110,25 +112,29 @@ The role definition looks like this (values are base64 encoded): inrole1: b3BlcmF0b3I= ``` -A block above describes the infrastructure role 'dbuser' with the password -'secret' that is the member of the 'operator' role. For the following +The block above describes the infrastructure role 'dbuser' with password +'secret' that is a member of the 'operator' role. For the following definitions one must increase the index, i.e. the next role will be defined as -'user2' and so on. Note that there is no way to specify role options (like -superuser or nologin) this way, and the resulting role will automatically be a -login role. +'user2' and so on. The resulting role will automatically be a login role. -* Via both the infrastructure roles secret and the infrastructure role - configmap (with the same name as the infrastructure roles secret). +Note that with definitions that solely use the infrastructure roles secret +there is no way to specify role options (like superuser or nologin) or role +memberships. This is where the ConfigMap comes into play. -The infrastructure roles secret should contain an entry with 'rolename: -rolepassword' for each role, and the role description should be specified in -the configmap. Below is the example: +### Secret plus ConfigMap + +A [ConfigMap](https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/) +allows for defining more details regarding the infrastructure roles. +Therefore, one should use the new style that specifies infrastructure roles +using both the secret and a ConfigMap. The ConfigMap must have the same name as +the secret. The secret should contain an entry with 'rolename:rolepassword' for +each role. ```yaml dbuser: c2VjcmV0 ``` -and the configmap definition for that user: +And the role description for that user should be specified in the ConfigMap. ```yaml data: @@ -140,18 +146,13 @@ and the configmap definition for that user: log_statement: all ``` -Note that the definition above allows for more details than the one that relies -solely on the infrastructure role secret. In particular, one can allow -membership in multiple roles via the `inrole` array parameter, define role -flags via the `user_flags` list and supply per-role options through the -`db_parameters` dictionary. All those parameters are optional. +One can allow membership in multiple roles via the `inrole` array parameter, +define role flags via the `user_flags` list and supply per-role options through +the `db_parameters` dictionary. All those parameters are optional. -The definitions that solely use the infrastructure roles secret are more -limited and considered legacy ones; one should use the new style that specifies -infrastructure roles using both the secret and the configmap. You can mix both -in the infrastructure role secret, as long as your new-style definition can be -clearly distinguished from the old-style one (for instance, do not name -new-style roles`userN`). +Both definitions can be mixed in the infrastructure role secret, as long as +your new-style definition can be clearly distinguished from the old-style one +(for instance, do not name new-style roles `userN`). Since an infrastructure role is created uniformly on all clusters managed by the operator, it makes no sense to define it without the password. Such @@ -272,6 +273,32 @@ are always passed to sidecars: The PostgreSQL volume is shared with sidecars and is mounted at `/home/postgres/pgdata`. + +## InitContainers Support + +Each cluster can specify arbitrary init containers to run. These containers can be +used to run custom actions before any normal and sidecar containers start. +An init container can be specified like this: + +```yaml +apiVersion: "acid.zalan.do/v1" +kind: postgresql + +metadata: + name: acid-minimal-cluster +spec: + ... + init_containers: + - name: "container-name" + image: "company/image:tag" + env: + - name: "ENV_VAR_NAME" + value: "any-k8s-env-things" +``` + +`init_containers` accepts full `v1.Container` definition. + + ## Increase volume size PostgreSQL operator supports statefulset volume resize if you're using the diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 60d39c94b..4a4c6078a 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -4,21 +4,26 @@ kind: postgresql metadata: name: acid-test-cluster spec: + init_containers: + - name: date + image: busybox + command: [ "/bin/date" ] teamId: "ACID" volume: - size: 5Gi + size: 1Gi numberOfInstances: 2 users: #Application/Robot users zalando: - superuser - createdb enableMasterLoadBalancer: true - enableReplicaLoadBalancer: true + enableReplicaLoadBalancer: true allowedSourceRanges: # load balancers' source ranges for both master and replica services - 127.0.0.1/32 databases: foo: zalando #Expert section + enableShmVolume: true postgresql: version: "10" parameters: @@ -31,7 +36,7 @@ spec: memory: 100Mi limits: cpu: 300m - memory: 3000Mi + memory: 300Mi patroni: initdb: encoding: "UTF8" diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index ed7652907..19eb3f5de 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -10,14 +10,18 @@ data: debug_logging: "true" workers: "4" - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.4-p29 + docker_image: registry.opensource.zalan.do/acid/spilo-11:1.5-p4 pod_service_account_name: "zalando-postgres-operator" secret_name_template: '{username}.{cluster}.credentials' super_username: postgres enable_teams_api: "false" + # custom_service_annotations: + # "keyx:valuez,keya:valuea" + # set_memory_request_to_limit: "true" # postgres_superuser_teams: "postgres_superusers" # enable_team_superuser: "false" # team_admin_role: "admin" + # enable_admin_role_for_users: "true" # teams_api_url: http://fake-teams-api.default.svc.cluster.local # team_api_role_configuration: "log_statement:all" # infrastructure_roles_secret_name: postgresql-infrastructure-roles diff --git a/manifests/minimal-postgres-manifest.yaml b/manifests/minimal-postgres-manifest.yaml index ae5d36cbc..37d772567 100644 --- a/manifests/minimal-postgres-manifest.yaml +++ b/manifests/minimal-postgres-manifest.yaml @@ -2,7 +2,7 @@ apiVersion: "acid.zalan.do/v1" kind: postgresql metadata: name: acid-minimal-cluster - namespace: test # assumes namespace exists beforehand + namespace: default spec: teamId: "ACID" volume: @@ -15,7 +15,8 @@ spec: - createdb # role for application foo - foo_user: + foo_user: [] + #databases: name->owner databases: diff --git a/manifests/operator-service-account-rbac.yaml b/manifests/operator-service-account-rbac.yaml index 8a1bfb857..7bd539ac5 100644 --- a/manifests/operator-service-account-rbac.yaml +++ b/manifests/operator-service-account-rbac.yaml @@ -14,6 +14,7 @@ rules: - acid.zalan.do resources: - postgresqls + - operatorconfigurations verbs: - "*" - apiGroups: diff --git a/manifests/postgres-operator.yaml b/manifests/postgres-operator.yaml index 0c4cf84cb..d43c0f8a8 100644 --- a/manifests/postgres-operator.yaml +++ b/manifests/postgres-operator.yaml @@ -12,9 +12,20 @@ spec: serviceAccountName: zalando-postgres-operator containers: - name: postgres-operator - image: registry.opensource.zalan.do/acid/postgres-operator:v1.0.0 + image: registry.opensource.zalan.do/acid/smoke-tested-postgres-operator:v1.0.0-37-g2422d72 imagePullPolicy: IfNotPresent + resources: + requests: + cpu: 500m + memory: 250Mi + limits: + cpu: 2000m + memory: 500Mi env: # provided additional ENV vars can overwrite individual config map entries - name: CONFIG_MAP_NAME value: "postgres-operator" + # In order to use the CRD OperatorConfiguration instead, uncomment these lines and comment out the two lines above + # - name: POSTGRES_OPERATOR_CONFIGURATION_OBJECT + # value: postgresql-operator-default-configuration + diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 391702cdc..5b9de1073 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -4,7 +4,7 @@ metadata: name: postgresql-operator-default-configuration configuration: etcd_host: "" - docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.4-p29 + docker_image: registry.opensource.zalan.do/acid/spilo-cdp-11:1.5-p42 workers: 4 min_instances: -1 max_instances: -1 @@ -25,8 +25,11 @@ configuration: pod_role_label: spilo-role cluster_labels: application: spilo + # inherited_labels: + # - application + # - app cluster_name_label: cluster-name - # watched_namespace:"" + # watched_namespace:"" # node_readiness_label: "" # toleration: {} # infrastructure_roles_secret_name: "" @@ -46,11 +49,14 @@ configuration: load_balancer: enable_master_load_balancer: false enable_replica_load_balancer: false + # custom_service_annotations: + # keyx: valuex + # keyy: valuey master_dns_name_format: "{cluster}.{team}.{hostedzone}" replica_dns_name_format: "{cluster}-repl.{team}.{hostedzone}" aws_or_gcp: # db_hosted_zone: "" - # wal_s3_bucket: "" + # wal_s3_bucket: "" # log_s3_bucket: "" # kube_iam_role: "" aws_region: eu-central-1 @@ -59,13 +65,13 @@ configuration: enable_database_access: true teams_api: enable_teams_api: false - team_api_role_configuration: + team_api_role_configuration: log_statement: all enable_team_superuser: false team_admin_role: admin pam_role_name: zalandos # pam_configuration: "" - protected_role_names: + protected_role_names: - admin # teams_api_url: "" # postgres_superuser_teams: "postgres_superusers" diff --git a/pkg/apis/acid.zalan.do/v1/const.go b/pkg/apis/acid.zalan.do/v1/const.go index 4592a2d68..59d6c1406 100644 --- a/pkg/apis/acid.zalan.do/v1/const.go +++ b/pkg/apis/acid.zalan.do/v1/const.go @@ -1,10 +1,7 @@ package v1 +// ClusterStatusUnknown etc : status of a Postgres cluster known to the operator const ( - serviceNameMaxLength = 63 - clusterNameMaxLength = serviceNameMaxLength - len("-repl") - serviceNameRegexString = `^[a-z]([-a-z0-9]*[a-z0-9])?$` - ClusterStatusUnknown PostgresStatus = "" ClusterStatusCreating PostgresStatus = "Creating" ClusterStatusUpdating PostgresStatus = "Updating" @@ -14,3 +11,9 @@ const ( ClusterStatusRunning PostgresStatus = "Running" ClusterStatusInvalid PostgresStatus = "Invalid" ) + +const ( + serviceNameMaxLength = 63 + clusterNameMaxLength = serviceNameMaxLength - len("-repl") + serviceNameRegexString = `^[a-z]([-a-z0-9]*[a-z0-9])?$` +) diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 5cefa1c83..5f1704527 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -6,6 +6,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +// CRDResource* define names necesssary for the k8s CRD API const ( PostgresCRDResourceKind = "postgresql" PostgresCRDResourcePlural = "postgresqls" @@ -39,6 +40,7 @@ func buildCRD(name, kind, plural, short string) *apiextv1beta1.CustomResourceDef } } +// PostgresCRD returns CustomResourceDefinition built from PostgresCRDResource func PostgresCRD() *apiextv1beta1.CustomResourceDefinition { return buildCRD(PostgresCRDResouceName, PostgresCRDResourceKind, @@ -46,6 +48,7 @@ func PostgresCRD() *apiextv1beta1.CustomResourceDefinition { PostgresCRDResourceShort) } +// ConfigurationCRD returns CustomResourceDefinition built from OperatorConfigCRDResource func ConfigurationCRD() *apiextv1beta1.CustomResourceDefinition { return buildCRD(OperatorConfigCRDResourceName, OperatorConfigCRDResouceKind, diff --git a/pkg/apis/acid.zalan.do/v1/doc.go b/pkg/apis/acid.zalan.do/v1/doc.go index 5accd806d..159378752 100644 --- a/pkg/apis/acid.zalan.do/v1/doc.go +++ b/pkg/apis/acid.zalan.do/v1/doc.go @@ -1,6 +1,6 @@ +// Package v1 is the v1 version of the API. // +k8s:deepcopy-gen=package,register -// Package v1 is the v1 version of the API. // +groupName=acid.zalan.do package v1 diff --git a/pkg/apis/acid.zalan.do/v1/marshal.go b/pkg/apis/acid.zalan.do/v1/marshal.go index b24c4e49d..823ff0ef2 100644 --- a/pkg/apis/acid.zalan.do/v1/marshal.go +++ b/pkg/apis/acid.zalan.do/v1/marshal.go @@ -104,6 +104,7 @@ func (p *Postgresql) UnmarshalJSON(data []byte) error { return nil } +// UnmarshalJSON convert to Duration from byte slice of json func (d *Duration) UnmarshalJSON(b []byte) error { var ( v interface{} diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index de7681db4..99d79b64b 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -13,6 +13,8 @@ import ( // +genclient:onlyVerbs=get // +genclient:noStatus // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// OperatorConfiguration defines the specification for the OperatorConfiguration. type OperatorConfiguration struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` @@ -21,6 +23,8 @@ type OperatorConfiguration struct { } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// OperatorConfigurationList is used in the k8s API calls type OperatorConfigurationList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata"` @@ -28,11 +32,13 @@ type OperatorConfigurationList struct { Items []OperatorConfiguration `json:"items"` } +// PostgresUsersConfiguration defines the system users of Postgres. type PostgresUsersConfiguration struct { SuperUsername string `json:"super_username,omitempty"` ReplicationUsername string `json:"replication_username,omitempty"` } +// KubernetesMetaConfiguration defines k8s conf required for all Postgres clusters and the operator itself type KubernetesMetaConfiguration struct { PodServiceAccountName string `json:"pod_service_account_name,omitempty"` // TODO: change it to the proper json @@ -46,6 +52,7 @@ type KubernetesMetaConfiguration struct { InfrastructureRolesSecretName spec.NamespacedName `json:"infrastructure_roles_secret_name,omitempty"` PodRoleLabel string `json:"pod_role_label,omitempty"` ClusterLabels map[string]string `json:"cluster_labels,omitempty"` + InheritedLabels []string `json:"inherited_labels,omitempty"` ClusterNameLabel string `json:"cluster_name_label,omitempty"` NodeReadinessLabel map[string]string `json:"node_readiness_label,omitempty"` // TODO: use a proper toleration structure? @@ -53,8 +60,11 @@ type KubernetesMetaConfiguration struct { // TODO: use namespacedname PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` + EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity" default:"false"` + PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"` } +// PostgresPodResourcesDefaults defines the spec of default resources type PostgresPodResourcesDefaults struct { DefaultCPURequest string `json:"default_cpu_request,omitempty"` DefaultMemoryRequest string `json:"default_memory_request,omitempty"` @@ -62,6 +72,7 @@ type PostgresPodResourcesDefaults struct { DefaultMemoryLimit string `json:"default_memory_limit,omitempty"` } +// OperatorTimeouts defines the timeout of ResourceCheck, PodWait, ReadyWait type OperatorTimeouts struct { ResourceCheckInterval Duration `json:"resource_check_interval,omitempty"` ResourceCheckTimeout Duration `json:"resource_check_timeout,omitempty"` @@ -71,14 +82,18 @@ type OperatorTimeouts struct { ReadyWaitTimeout Duration `json:"ready_wait_timeout,omitempty"` } +// LoadBalancerConfiguration defines the LB configuration type LoadBalancerConfiguration struct { DbHostedZone string `json:"db_hosted_zone,omitempty"` EnableMasterLoadBalancer bool `json:"enable_master_load_balancer,omitempty"` EnableReplicaLoadBalancer bool `json:"enable_replica_load_balancer,omitempty"` + CustomServiceAnnotations map[string]string `json:"custom_service_annotations,omitempty"` MasterDNSNameFormat config.StringTemplate `json:"master_dns_name_format,omitempty"` ReplicaDNSNameFormat config.StringTemplate `json:"replica_dns_name_format,omitempty"` } +// AWSGCPConfiguration defines the configuration for AWS +// TODO complete Google Cloud Platform (GCP) configuration type AWSGCPConfiguration struct { WALES3Bucket string `json:"wal_s3_bucket,omitempty"` AWSRegion string `json:"aws_region,omitempty"` @@ -86,11 +101,13 @@ type AWSGCPConfiguration struct { KubeIAMRole string `json:"kube_iam_role,omitempty"` } +// OperatorDebugConfiguration defines options for the debug mode type OperatorDebugConfiguration struct { DebugLogging bool `json:"debug_logging,omitempty"` EnableDBAccess bool `json:"enable_database_access,omitempty"` } +// TeamsAPIConfiguration defines the configration of TeamsAPI type TeamsAPIConfiguration struct { EnableTeamsAPI bool `json:"enable_teams_api,omitempty"` TeamsAPIUrl string `json:"teams_api_url,omitempty"` @@ -103,12 +120,14 @@ type TeamsAPIConfiguration struct { PostgresSuperuserTeams []string `json:"postgres_superuser_teams,omitempty"` } +// LoggingRESTAPIConfiguration defines Logging API conf type LoggingRESTAPIConfiguration struct { APIPort int `json:"api_port,omitempty"` RingLogLines int `json:"ring_log_lines,omitempty"` ClusterHistoryEntries int `json:"cluster_history_entries,omitempty"` } +// ScalyrConfiguration defines the configuration for ScalyrAPI type ScalyrConfiguration struct { ScalyrAPIKey string `json:"scalyr_api_key,omitempty"` ScalyrImage string `json:"scalyr_image,omitempty"` @@ -119,6 +138,7 @@ type ScalyrConfiguration struct { ScalyrMemoryLimit string `json:"scalyr_memory_limit,omitempty"` } +// OperatorConfigurationData defines the operation config type OperatorConfigurationData struct { EtcdHost string `json:"etcd_host,omitempty"` DockerImage string `json:"docker_image,omitempty"` @@ -131,6 +151,7 @@ type OperatorConfigurationData struct { PostgresUsersConfiguration PostgresUsersConfiguration `json:"users"` Kubernetes KubernetesMetaConfiguration `json:"kubernetes"` PostgresPodResources PostgresPodResourcesDefaults `json:"postgres_pod_resources"` + SetMemoryRequestToLimit bool `json:"set_memory_request_to_limit,omitempty"` Timeouts OperatorTimeouts `json:"timeouts"` LoadBalancer LoadBalancerConfiguration `json:"load_balancer"` AWSGCP AWSGCPConfiguration `json:"aws_or_gcp"` @@ -140,6 +161,7 @@ type OperatorConfigurationData struct { Scalyr ScalyrConfiguration `json:"scalyr"` } +// OperatorConfigurationUsers defines configration for super user type OperatorConfigurationUsers struct { SuperUserName string `json:"superuser_name,omitempty"` Replication string `json:"replication_user_name,omitempty"` @@ -147,4 +169,5 @@ type OperatorConfigurationUsers struct { TeamAPIRoleConfiguration map[string]string `json:"team_api_role_configuration,omitempty"` } +//Duration shortens this frequently used name type Duration time.Duration diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index 380ba68d7..ccd7fe08c 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -9,7 +9,8 @@ import ( // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object -//Postgresql defines PostgreSQL Custom Resource Definition Object. + +// Postgresql defines PostgreSQL Custom Resource Definition Object. type Postgresql struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` @@ -50,10 +51,13 @@ type PostgresSpec struct { Databases map[string]string `json:"databases,omitempty"` Tolerations []v1.Toleration `json:"tolerations,omitempty"` Sidecars []Sidecar `json:"sidecars,omitempty"` + InitContainers []v1.Container `json:"init_containers,omitempty"` PodPriorityClassName string `json:"pod_priority_class_name,omitempty"` + ShmVolume *bool `json:"enableShmVolume,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + // PostgresqlList defines a list of PostgreSQL clusters. type PostgresqlList struct { metav1.TypeMeta `json:",inline"` @@ -90,8 +94,8 @@ type ResourceDescription struct { // Resources describes requests and limits for the cluster resouces. type Resources struct { - ResourceRequest ResourceDescription `json:"requests,omitempty"` - ResourceLimits ResourceDescription `json:"limits,omitempty"` + ResourceRequests ResourceDescription `json:"requests,omitempty"` + ResourceLimits ResourceDescription `json:"limits,omitempty"` } // Patroni contains Patroni-specific configuration diff --git a/pkg/apis/acid.zalan.do/v1/register.go b/pkg/apis/acid.zalan.do/v1/register.go index 7dd03fad1..165981a68 100644 --- a/pkg/apis/acid.zalan.do/v1/register.go +++ b/pkg/apis/acid.zalan.do/v1/register.go @@ -8,15 +8,20 @@ import ( "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do" ) +// APIVersion of the `postgresql` and `operator` CRDs const ( APIVersion = "v1" ) var ( // localSchemeBuilder and AddToScheme will stay in k8s.io/kubernetes. + + // An instance of runtime.SchemeBuilder, global for this package SchemeBuilder runtime.SchemeBuilder localSchemeBuilder = &SchemeBuilder - AddToScheme = localSchemeBuilder.AddToScheme + //AddToScheme is localSchemeBuilder.AddToScheme + AddToScheme = localSchemeBuilder.AddToScheme + //SchemeGroupVersion has GroupName and APIVersion SchemeGroupVersion = schema.GroupVersion{Group: acidzalando.GroupName, Version: APIVersion} ) diff --git a/pkg/apis/acid.zalan.do/v1/util.go b/pkg/apis/acid.zalan.do/v1/util.go index 2d3c90db8..0a3267972 100644 --- a/pkg/apis/acid.zalan.do/v1/util.go +++ b/pkg/apis/acid.zalan.do/v1/util.go @@ -14,6 +14,7 @@ var ( serviceNameRegex = regexp.MustCompile(serviceNameRegexString) ) +// Clone convenience wrapper around DeepCopy func (p *Postgresql) Clone() *Postgresql { if p == nil { return nil @@ -83,6 +84,7 @@ func validateCloneClusterDescription(clone *CloneDescription) error { return nil } +// Success of the current Status func (status PostgresStatus) Success() bool { return status != ClusterStatusAddFailed && status != ClusterStatusUpdateFailed && diff --git a/pkg/apis/acid.zalan.do/v1/util_test.go b/pkg/apis/acid.zalan.do/v1/util_test.go index 99e3f2b7c..01be31e88 100644 --- a/pkg/apis/acid.zalan.do/v1/util_test.go +++ b/pkg/apis/acid.zalan.do/v1/util_test.go @@ -240,8 +240,8 @@ var unmarshalCluster = []struct { Slots: map[string]map[string]string{"permanent_logical_1": {"type": "logical", "database": "foo", "plugin": "pgoutput"}}, }, Resources: Resources{ - ResourceRequest: ResourceDescription{CPU: "10m", Memory: "50Mi"}, - ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"}, + ResourceRequests: ResourceDescription{CPU: "10m", Memory: "50Mi"}, + ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"}, }, TeamID: "ACID", @@ -499,7 +499,7 @@ func TestMarshal(t *testing.T) { t.Errorf("Marshal error: %v", err) } if !bytes.Equal(m, tt.marshal) { - t.Errorf("Marshal Postgresql expected: %q, got: %q", string(tt.marshal), string(m)) + t.Errorf("Marshal Postgresql \nexpected: %q, \ngot: %q", string(tt.marshal), string(m)) } } } @@ -507,11 +507,11 @@ func TestMarshal(t *testing.T) { func TestPostgresMeta(t *testing.T) { for _, tt := range unmarshalCluster { if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta { - t.Errorf("GetObjectKindMeta expected: %v, got: %v", tt.out.TypeMeta, a) + t.Errorf("GetObjectKindMeta \nexpected: %v, \ngot: %v", tt.out.TypeMeta, a) } if a := tt.out.GetObjectMeta(); reflect.DeepEqual(a, tt.out.ObjectMeta) { - t.Errorf("GetObjectMeta expected: %v, got: %v", tt.out.ObjectMeta, a) + t.Errorf("GetObjectMeta \nexpected: %v, \ngot: %v", tt.out.ObjectMeta, a) } } } diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index 4496f8c0a..66cef6e6d 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -1,7 +1,7 @@ // +build !ignore_autogenerated /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -442,6 +442,18 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.InitContainers != nil { + in, out := &in.InitContainers, &out.InitContainers + *out = make([]corev1.Container, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.ShmVolume != nil { + in, out := &in.ShmVolume, &out.ShmVolume + *out = new(bool) + **out = **in + } return } @@ -573,7 +585,7 @@ func (in *ResourceDescription) DeepCopy() *ResourceDescription { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Resources) DeepCopyInto(out *Resources) { *out = *in - out.ResourceRequest = in.ResourceRequest + out.ResourceRequests = in.ResourceRequests out.ResourceLimits = in.ResourceLimits return } diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index b2208705a..0230f0c18 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -12,7 +12,7 @@ import ( "github.com/sirupsen/logrus" "k8s.io/api/apps/v1beta1" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" policybeta1 "k8s.io/api/policy/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -494,7 +494,7 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { defer func() { if updateFailed { c.setStatus(acidv1.ClusterStatusUpdateFailed) - } else if c.Status != acidv1.ClusterStatusRunning { + } else { c.setStatus(acidv1.ClusterStatusRunning) } }() @@ -709,11 +709,16 @@ func (c *Cluster) initRobotUsers() error { if err != nil { return fmt.Errorf("invalid flags for user %q: %v", username, err) } + adminRole := "" + if c.OpConfig.EnableAdminRoleForUsers { + adminRole = c.OpConfig.TeamAdminRole + } newRole := spec.PgUser{ - Origin: spec.RoleOriginManifest, - Name: username, - Password: util.RandomPassword(constants.PasswordLength), - Flags: flags, + Origin: spec.RoleOriginManifest, + Name: username, + Password: util.RandomPassword(constants.PasswordLength), + Flags: flags, + AdminRole: adminRole, } if currentRole, present := c.pgUsers[username]; present { c.pgUsers[username] = c.resolveNameConflict(¤tRole, &newRole) @@ -872,7 +877,7 @@ func (c *Cluster) GetStatus() *ClusterStatus { func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) error { var err error - c.logger.Debugf("failing over from %q to %q", curMaster.Name, candidate) + c.logger.Debugf("switching over from %q to %q", curMaster.Name, candidate) var wg sync.WaitGroup @@ -898,12 +903,12 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e }() if err = c.patroni.Switchover(curMaster, candidate.Name); err == nil { - c.logger.Debugf("successfully failed over from %q to %q", curMaster.Name, candidate) + c.logger.Debugf("successfully switched over from %q to %q", curMaster.Name, candidate) if err = <-podLabelErr; err != nil { err = fmt.Errorf("could not get master pod label: %v", err) } } else { - err = fmt.Errorf("could not failover: %v", err) + err = fmt.Errorf("could not switch over: %v", err) } // signal the role label waiting goroutine to close the shop and go home diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index e1ab471ee..730b575f4 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -18,6 +18,7 @@ import ( acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" + "github.com/zalando-incubator/postgres-operator/pkg/util/config" "github.com/zalando-incubator/postgres-operator/pkg/util/constants" "k8s.io/apimachinery/pkg/labels" ) @@ -92,18 +93,18 @@ func (c *Cluster) makeDefaultResources() acidv1.Resources { defaultRequests := acidv1.ResourceDescription{CPU: config.DefaultCPURequest, Memory: config.DefaultMemoryRequest} defaultLimits := acidv1.ResourceDescription{CPU: config.DefaultCPULimit, Memory: config.DefaultMemoryLimit} - return acidv1.Resources{ResourceRequest: defaultRequests, ResourceLimits: defaultLimits} + return acidv1.Resources{ResourceRequests: defaultRequests, ResourceLimits: defaultLimits} } func generateResourceRequirements(resources acidv1.Resources, defaultResources acidv1.Resources) (*v1.ResourceRequirements, error) { var err error - specRequests := resources.ResourceRequest + specRequests := resources.ResourceRequests specLimits := resources.ResourceLimits result := v1.ResourceRequirements{} - result.Requests, err = fillResourceList(specRequests, defaultResources.ResourceRequest) + result.Requests, err = fillResourceList(specRequests, defaultResources.ResourceRequests) if err != nil { return nil, fmt.Errorf("could not fill resource requests: %v", err) } @@ -289,6 +290,26 @@ func nodeAffinity(nodeReadinessLabel map[string]string) *v1.Affinity { } } +func generatePodAffinity(labels labels.Set, topologyKey string, nodeAffinity *v1.Affinity) *v1.Affinity { + // generate pod anti-affinity to avoid multiple pods of the same Postgres cluster in the same topology , e.g. node + podAffinity := v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + TopologyKey: topologyKey, + }}, + }, + } + + if nodeAffinity != nil && nodeAffinity.NodeAffinity != nil { + podAffinity.NodeAffinity = nodeAffinity.NodeAffinity + } + + return &podAffinity +} + func tolerations(tolerationsSpec *[]v1.Toleration, podToleration map[string]string) []v1.Toleration { // allow to override tolerations by postgresql manifest if len(*tolerationsSpec) > 0 { @@ -338,7 +359,6 @@ func generateSpiloContainer( envVars []v1.EnvVar, volumeMounts []v1.VolumeMount, ) *v1.Container { - privilegedMode := true return &v1.Container{ Name: name, @@ -377,8 +397,8 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar, resources, err := generateResourceRequirements( makeResources( - sidecar.Resources.ResourceRequest.CPU, - sidecar.Resources.ResourceRequest.Memory, + sidecar.Resources.ResourceRequests.CPU, + sidecar.Resources.ResourceRequests.Memory, sidecar.Resources.ResourceLimits.CPU, sidecar.Resources.ResourceLimits.Memory, ), @@ -396,10 +416,21 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar, return nil, nil } +// Check whether or not we're requested to mount an shm volume, +// taking into account that PostgreSQL manifest has precedence. +func mountShmVolumeNeeded(opConfig config.Config, pgSpec *acidv1.PostgresSpec) bool { + if pgSpec.ShmVolume != nil { + return *pgSpec.ShmVolume + } + + return opConfig.ShmVolume +} + func generatePodTemplate( namespace string, labels labels.Set, spiloContainer *v1.Container, + initContainers []v1.Container, sidecarContainers []v1.Container, tolerationsSpec *[]v1.Toleration, nodeAffinity *v1.Affinity, @@ -407,6 +438,9 @@ func generatePodTemplate( podServiceAccountName string, kubeIAMRole string, priorityClassName string, + shmVolume bool, + podAntiAffinity bool, + podAntiAffinityTopologyKey string, ) (*v1.PodTemplateSpec, error) { terminateGracePeriodSeconds := terminateGracePeriod @@ -417,10 +451,17 @@ func generatePodTemplate( ServiceAccountName: podServiceAccountName, TerminationGracePeriodSeconds: &terminateGracePeriodSeconds, Containers: containers, + InitContainers: initContainers, Tolerations: *tolerationsSpec, } - if nodeAffinity != nil { + if shmVolume { + addShmVolume(&podSpec) + } + + if podAntiAffinity { + podSpec.Affinity = generatePodAffinity(labels, podAntiAffinityTopologyKey, nodeAffinity) + } else if nodeAffinity != nil { podSpec.Affinity = nodeAffinity } @@ -475,6 +516,18 @@ func (c *Cluster) generateSpiloPodEnvVars(uid types.UID, spiloConfiguration stri Name: "PGUSER_SUPERUSER", Value: c.OpConfig.SuperUsername, }, + { + Name: "KUBERNETES_SCOPE_LABEL", + Value: c.OpConfig.ClusterNameLabel, + }, + { + Name: "KUBERNETES_ROLE_LABEL", + Value: c.OpConfig.PodRoleLabel, + }, + { + Name: "KUBERNETES_LABELS", + Value: labels.Set(c.OpConfig.ClusterLabels).String(), + }, { Name: "PGPASSWORD_SUPERUSER", ValueFrom: &v1.EnvVarSource{ @@ -629,7 +682,7 @@ func getBucketScopeSuffix(uid string) string { func makeResources(cpuRequest, memoryRequest, cpuLimit, memoryLimit string) acidv1.Resources { return acidv1.Resources{ - ResourceRequest: acidv1.ResourceDescription{ + ResourceRequests: acidv1.ResourceDescription{ CPU: cpuRequest, Memory: memoryRequest, }, @@ -648,6 +701,61 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State podTemplate *v1.PodTemplateSpec volumeClaimTemplate *v1.PersistentVolumeClaim ) + + // Improve me. Please. + if c.OpConfig.SetMemoryRequestToLimit { + + // controller adjusts the default memory request at operator startup + + request := spec.Resources.ResourceRequests.Memory + if request == "" { + request = c.OpConfig.DefaultMemoryRequest + } + + limit := spec.Resources.ResourceLimits.Memory + if limit == "" { + limit = c.OpConfig.DefaultMemoryLimit + } + + isSmaller, err := util.RequestIsSmallerThanLimit(request, limit) + if err != nil { + return nil, err + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the Postgres container is increased to match the memory limit of %v.", request, limit) + spec.Resources.ResourceRequests.Memory = limit + + } + + // controller adjusts the Scalyr sidecar request at operator startup + // as this sidecar is managed separately + + // adjust sidecar containers defined for that particular cluster + for _, sidecar := range spec.Sidecars { + + // TODO #413 + sidecarRequest := sidecar.Resources.ResourceRequests.Memory + if request == "" { + request = c.OpConfig.DefaultMemoryRequest + } + + sidecarLimit := sidecar.Resources.ResourceLimits.Memory + if limit == "" { + limit = c.OpConfig.DefaultMemoryLimit + } + + isSmaller, err := util.RequestIsSmallerThanLimit(sidecarRequest, sidecarLimit) + if err != nil { + return nil, err + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the %v sidecar container is increased to match the memory limit of %v.", sidecar.Resources.ResourceRequests.Memory, sidecar.Name, sidecar.Resources.ResourceLimits.Memory) + sidecar.Resources.ResourceRequests.Memory = sidecar.Resources.ResourceLimits.Memory + } + } + + } + defaultResources := c.makeDefaultResources() resourceRequirements, err := generateResourceRequirements(spec.Resources, defaultResources) @@ -674,8 +782,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State // generate environment variables for the spilo container spiloEnvVars := deduplicateEnvVars( - c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone, customPodEnvVarsList), - c.containerName(), c.logger) + c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone, + customPodEnvVarsList), c.containerName(), c.logger) // pickup the docker image for the spilo container effectiveDockerImage := util.Coalesce(spec.DockerImage, c.OpConfig.DockerImage) @@ -683,9 +791,15 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State volumeMounts := generateVolumeMounts() // generate the spilo container - spiloContainer := generateSpiloContainer(c.containerName(), &effectiveDockerImage, resourceRequirements, spiloEnvVars, volumeMounts) + c.logger.Debugf("Generating Spilo container, environment variables: %v", spiloEnvVars) + spiloContainer := generateSpiloContainer(c.containerName(), + &effectiveDockerImage, + resourceRequirements, + spiloEnvVars, + volumeMounts, + ) - // resolve conflicts between operator-global and per-cluster sidecards + // resolve conflicts between operator-global and per-cluster sidecars sideCars := c.mergeSidecars(spec.Sidecars) resourceRequirementsScalyrSidecar := makeResources( @@ -714,18 +828,22 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State tolerationSpec := tolerations(&spec.Tolerations, c.OpConfig.PodToleration) effectivePodPriorityClassName := util.Coalesce(spec.PodPriorityClassName, c.OpConfig.PodPriorityClassName) - // generate pod template for the statefulset, based on the spilo container and sidecards + // generate pod template for the statefulset, based on the spilo container and sidecars if podTemplate, err = generatePodTemplate( c.Namespace, c.labelsSet(true), spiloContainer, + spec.InitContainers, sidecarContainers, &tolerationSpec, nodeAffinity(c.OpConfig.NodeReadinessLabel), int64(c.OpConfig.PodTerminateGracePeriod.Seconds()), c.OpConfig.PodServiceAccountName, c.OpConfig.KubeIAMRole, - effectivePodPriorityClassName); err != nil { + effectivePodPriorityClassName, + mountShmVolumeNeeded(c.OpConfig, spec), + c.OpConfig.EnablePodAntiAffinity, + c.OpConfig.PodAntiAffinityTopologyKey); err != nil { return nil, fmt.Errorf("could not generate pod template: %v", err) } @@ -832,6 +950,32 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 { return newcur } +// To avoid issues with limited /dev/shm inside docker environment, when +// PostgreSQL can't allocate enough of dsa segments from it, we can +// mount an extra memory volume +// +// see https://docs.okd.io/latest/dev_guide/shared_memory.html +func addShmVolume(podSpec *v1.PodSpec) { + volumes := append(podSpec.Volumes, v1.Volume{ + Name: constants.ShmVolumeName, + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{ + Medium: "Memory", + }, + }, + }) + + pgIdx := constants.PostgresContainerIdx + mounts := append(podSpec.Containers[pgIdx].VolumeMounts, + v1.VolumeMount{ + Name: constants.ShmVolumeName, + MountPath: constants.ShmVolumePath, + }) + + podSpec.Containers[0].VolumeMounts = mounts + podSpec.Volumes = volumes +} + func generatePersistentVolumeClaimTemplate(volumeSize, volumeStorageClass string) (*v1.PersistentVolumeClaim, error) { var storageClassName *string @@ -959,7 +1103,7 @@ func (c *Cluster) generateService(role PostgresRole, spec *acidv1.PostgresSpec) } if role == Replica { - serviceSpec.Selector = c.roleLabelsSet(role) + serviceSpec.Selector = c.roleLabelsSet(false, role) } var annotations map[string]string @@ -982,6 +1126,13 @@ func (c *Cluster) generateService(role PostgresRole, spec *acidv1.PostgresSpec) constants.ZalandoDNSNameAnnotation: dnsName, constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, } + + if len(c.OpConfig.CustomServiceAnnotations) != 0 { + c.logger.Debugf("There are custom annotations defined, creating them.") + for customAnnotationKey, customAnnotationValue := range c.OpConfig.CustomServiceAnnotations { + annotations[customAnnotationKey] = customAnnotationValue + } + } } else if role == Replica { // before PR #258, the replica service was only created if allocated a LB // now we always create the service but warn if the LB is absent @@ -992,7 +1143,7 @@ func (c *Cluster) generateService(role PostgresRole, spec *acidv1.PostgresSpec) ObjectMeta: metav1.ObjectMeta{ Name: c.serviceName(role), Namespace: c.Namespace, - Labels: c.roleLabelsSet(role), + Labels: c.roleLabelsSet(true, role), Annotations: annotations, }, Spec: serviceSpec, @@ -1006,7 +1157,7 @@ func (c *Cluster) generateEndpoint(role PostgresRole, subsets []v1.EndpointSubse ObjectMeta: metav1.ObjectMeta{ Name: c.endpointName(role), Namespace: c.Namespace, - Labels: c.roleLabelsSet(role), + Labels: c.roleLabelsSet(true, role), }, } if len(subsets) > 0 { @@ -1070,7 +1221,7 @@ func (c *Cluster) generatePodDisruptionBudget() *policybeta1.PodDisruptionBudget Spec: policybeta1.PodDisruptionBudgetSpec{ MinAvailable: &minAvailable, Selector: &metav1.LabelSelector{ - MatchLabels: c.roleLabelsSet(Master), + MatchLabels: c.roleLabelsSet(false, Master), }, }, } diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 12e145c04..92946ab2b 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1,8 +1,11 @@ package cluster import ( + "k8s.io/api/core/v1" + acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando-incubator/postgres-operator/pkg/util/config" + "github.com/zalando-incubator/postgres-operator/pkg/util/constants" "github.com/zalando-incubator/postgres-operator/pkg/util/k8sutil" "testing" ) @@ -75,3 +78,54 @@ func TestCreateLoadBalancerLogic(t *testing.T) { } } } + +func TestShmVolume(t *testing.T) { + testName := "TestShmVolume" + tests := []struct { + subTest string + podSpec *v1.PodSpec + shmPos int + }{ + { + subTest: "empty PodSpec", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{}, + }, + }, + }, + shmPos: 0, + }, + { + subTest: "non empty PodSpec", + podSpec: &v1.PodSpec{ + Volumes: []v1.Volume{v1.Volume{}}, + Containers: []v1.Container{ + v1.Container{ + VolumeMounts: []v1.VolumeMount{ + v1.VolumeMount{}, + }, + }, + }, + }, + shmPos: 1, + }, + } + for _, tt := range tests { + addShmVolume(tt.podSpec) + + volumeName := tt.podSpec.Volumes[tt.shmPos].Name + volumeMountName := tt.podSpec.Containers[0].VolumeMounts[tt.shmPos].Name + + if volumeName != constants.ShmVolumeName { + t.Errorf("%s %s: Expected volume %s was not created, have %s instead", + testName, tt.subTest, constants.ShmVolumeName, volumeName) + } + if volumeMountName != constants.ShmVolumeName { + t.Errorf("%s %s: Expected mount %s was not created, have %s instead", + testName, tt.subTest, constants.ShmVolumeName, volumeMountName) + } + } +} diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index ab282b6b9..52b8b4e09 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -4,7 +4,7 @@ import ( "fmt" "math/rand" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/zalando-incubator/postgres-operator/pkg/spec" @@ -27,7 +27,7 @@ func (c *Cluster) listPods() ([]v1.Pod, error) { func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) { listOptions := metav1.ListOptions{ - LabelSelector: c.roleLabelsSet(role).String(), + LabelSelector: c.roleLabelsSet(false, role).String(), } pods, err := c.KubeClient.Pods(c.Namespace).List(listOptions) @@ -77,11 +77,7 @@ func (c *Cluster) deletePod(podName spec.NamespacedName) error { return err } - if err := c.waitForPodDeletion(ch); err != nil { - return err - } - - return nil + return c.waitForPodDeletion(ch) } func (c *Cluster) unregisterPodSubscriber(podName spec.NamespacedName) { @@ -122,7 +118,7 @@ func (c *Cluster) movePodFromEndOfLifeNode(pod *v1.Pod) (*v1.Pod, error) { if eol, err = c.podIsEndOfLife(pod); err != nil { return nil, fmt.Errorf("could not get node %q: %v", pod.Spec.NodeName, err) } else if !eol { - c.logger.Infof("pod %q is already on a live node", podName) + c.logger.Infof("check failed: pod %q is already on a live node", podName) return pod, nil } @@ -162,7 +158,7 @@ func (c *Cluster) masterCandidate(oldNodeName string) (*v1.Pod, error) { } if len(replicas) == 0 { - c.logger.Warningf("no available master candidates, migration will cause longer downtime of the master instance") + c.logger.Warningf("no available master candidates, migration will cause longer downtime of Postgres cluster") return nil, nil } @@ -193,18 +189,18 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error { return fmt.Errorf("could not get pod: %v", err) } - c.logger.Infof("migrating master pod %q", podName) + c.logger.Infof("starting process to migrate master pod %q", podName) if eol, err = c.podIsEndOfLife(oldMaster); err != nil { return fmt.Errorf("could not get node %q: %v", oldMaster.Spec.NodeName, err) } if !eol { - c.logger.Debugf("pod is already on a live node") + c.logger.Debugf("no action needed: master pod is already on a live node") return nil } if role := PostgresRole(oldMaster.Labels[c.OpConfig.PodRoleLabel]); role != Master { - c.logger.Warningf("pod %q is not a master", podName) + c.logger.Warningf("no action needed: pod %q is not the master (anymore)", podName) return nil } // we must have a statefulset in the cluster for the migration to work @@ -219,10 +215,10 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error { // We may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case. if *c.Statefulset.Spec.Replicas > 1 { if masterCandidatePod, err = c.masterCandidate(oldMaster.Spec.NodeName); err != nil { - return fmt.Errorf("could not get new master candidate: %v", err) + return fmt.Errorf("could not find suitable replica pod as candidate for failover: %v", err) } } else { - c.logger.Warningf("single master pod for cluster %q, migration will cause longer downtime of the master instance", c.clusterName()) + c.logger.Warningf("migrating single pod cluster %q, this will cause downtime of the Postgres cluster until pod is back", c.clusterName()) } // there are two cases for each postgres cluster that has its master pod on the node to migrate from: @@ -256,15 +252,15 @@ func (c *Cluster) MigrateReplicaPod(podName spec.NamespacedName, fromNodeName st return fmt.Errorf("could not get pod: %v", err) } - c.logger.Infof("migrating replica pod %q", podName) + c.logger.Infof("migrating replica pod %q to live node", podName) if replicaPod.Spec.NodeName != fromNodeName { - c.logger.Infof("pod %q has already migrated to node %q", podName, replicaPod.Spec.NodeName) + c.logger.Infof("check failed: pod %q has already migrated to node %q", podName, replicaPod.Spec.NodeName) return nil } if role := PostgresRole(replicaPod.Labels[c.OpConfig.PodRoleLabel]); role != Replica { - return fmt.Errorf("pod %q is not a replica", podName) + return fmt.Errorf("check failed: pod %q is not a replica", podName) } _, err = c.movePodFromEndOfLifeNode(replicaPod) @@ -296,7 +292,7 @@ func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { } func (c *Cluster) recreatePods() error { - c.setProcessName("recreating pods") + c.setProcessName("starting to recreate pods") ls := c.labelsSet(false) namespace := c.Namespace @@ -337,10 +333,10 @@ func (c *Cluster) recreatePods() error { // failover if we have not observed a master pod when re-creating former replicas. if newMasterPod == nil && len(replicas) > 0 { if err := c.Switchover(masterPod, masterCandidate(replicas)); err != nil { - c.logger.Warningf("could not perform failover: %v", err) + c.logger.Warningf("could not perform switch over: %v", err) } } else if newMasterPod == nil && len(replicas) == 0 { - c.logger.Warningf("cannot switch master role before re-creating the pod: no replicas") + c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas") } c.logger.Infof("recreating old master pod %q", util.NameFromMeta(masterPod.ObjectMeta)) diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index 10e4201cb..886a4bac9 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -437,7 +437,11 @@ func (c *Cluster) updateService(role PostgresRole, newService *v1.Service) error func (c *Cluster) deleteService(role PostgresRole) error { c.logger.Debugf("deleting service %s", role) - service := c.Services[role] + service, ok := c.Services[role] + if !ok { + c.logger.Debugf("No service for %s role was found, nothing to delete", role) + return nil + } if err := c.KubeClient.Services(service.Namespace).Delete(service.Name, c.deleteOptions); err != nil { return err diff --git a/pkg/cluster/types.go b/pkg/cluster/types.go index 83b7e73fb..681f99e1f 100644 --- a/pkg/cluster/types.go +++ b/pkg/cluster/types.go @@ -1,12 +1,13 @@ package cluster import ( + "time" + acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1" "k8s.io/api/apps/v1beta1" "k8s.io/api/core/v1" policybeta1 "k8s.io/api/policy/v1beta1" "k8s.io/apimachinery/pkg/types" - "time" ) // PostgresRole describes role of the node @@ -20,6 +21,7 @@ const ( Replica PostgresRole = "replica" ) +// PodEventType represents the type of a pod-related event type PodEventType string // Possible values for the EventType diff --git a/pkg/cluster/util.go b/pkg/cluster/util.go index dbfda1c0e..74a009d2c 100644 --- a/pkg/cluster/util.go +++ b/pkg/cluster/util.go @@ -389,6 +389,19 @@ func (c *Cluster) labelsSet(shouldAddExtraLabels bool) labels.Set { if shouldAddExtraLabels { // enables filtering resources owned by a team lbls["team"] = c.Postgresql.Spec.TeamID + + // allow to inherit certain labels from the 'postgres' object + if spec, err := c.GetSpec(); err == nil { + for k, v := range spec.ObjectMeta.Labels { + for _, match := range c.OpConfig.InheritedLabels { + if k == match { + lbls[k] = v + } + } + } + } else { + c.logger.Warningf("could not get the list of InheritedLabels for cluster %q: %v", c.Name, err) + } } return labels.Set(lbls) @@ -398,8 +411,8 @@ func (c *Cluster) labelsSelector() *metav1.LabelSelector { return &metav1.LabelSelector{MatchLabels: c.labelsSet(false), MatchExpressions: nil} } -func (c *Cluster) roleLabelsSet(role PostgresRole) labels.Set { - lbls := c.labelsSet(false) +func (c *Cluster) roleLabelsSet(shouldAddExtraLabels bool, role PostgresRole) labels.Set { + lbls := c.labelsSet(shouldAddExtraLabels) lbls[c.OpConfig.PodRoleLabel] = string(role) return lbls } @@ -460,6 +473,7 @@ func (c *Cluster) setSpec(newSpec *acidv1.Postgresql) { c.specMu.Unlock() } +// GetSpec returns a copy of the operator-side spec of a Postgres cluster in a thread-safe manner func (c *Cluster) GetSpec() (*acidv1.Postgresql, error) { c.specMu.RLock() defer c.specMu.RUnlock() diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 1bc4e08e0..fd1c099de 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -110,6 +110,29 @@ func (c *Controller) initOperatorConfig() { c.opConfig = config.NewFromMap(configMapData) c.warnOnDeprecatedOperatorParameters() + if c.opConfig.SetMemoryRequestToLimit { + + isSmaller, err := util.RequestIsSmallerThanLimit(c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit) + if err != nil { + panic(err) + } + if isSmaller { + c.logger.Warningf("The default memory request of %v for Postgres containers is increased to match the default memory limit of %v.", c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit) + c.opConfig.DefaultMemoryRequest = c.opConfig.DefaultMemoryLimit + } + + isSmaller, err = util.RequestIsSmallerThanLimit(c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit) + if err != nil { + panic(err) + } + if isSmaller { + c.logger.Warningf("The memory request of %v for the Scalyr sidecar container is increased to match the memory limit of %v.", c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit) + c.opConfig.ScalyrMemoryRequest = c.opConfig.ScalyrMemoryLimit + } + + // generateStatefulSet adjusts values for individual Postgres clusters + } + } func (c *Controller) modifyConfigFromEnvironment() { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 93ba1a0f4..08df7e97c 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -39,6 +39,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.PodServiceAccountName = fromCRD.Kubernetes.PodServiceAccountName result.PodServiceAccountDefinition = fromCRD.Kubernetes.PodServiceAccountDefinition result.PodServiceAccountRoleBindingDefinition = fromCRD.Kubernetes.PodServiceAccountRoleBindingDefinition + result.PodEnvironmentConfigMap = fromCRD.Kubernetes.PodEnvironmentConfigMap result.PodTerminateGracePeriod = time.Duration(fromCRD.Kubernetes.PodTerminateGracePeriod) result.WatchedNamespace = fromCRD.Kubernetes.WatchedNamespace result.PDBNameFormat = fromCRD.Kubernetes.PDBNameFormat @@ -47,14 +48,19 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.InfrastructureRolesSecretName = fromCRD.Kubernetes.InfrastructureRolesSecretName result.PodRoleLabel = fromCRD.Kubernetes.PodRoleLabel result.ClusterLabels = fromCRD.Kubernetes.ClusterLabels + result.InheritedLabels = fromCRD.Kubernetes.InheritedLabels result.ClusterNameLabel = fromCRD.Kubernetes.ClusterNameLabel result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName + result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity; + result.PodAntiAffinityTopologyKey = fromCRD.Kubernetes.PodAntiAffinityTopologyKey; + result.DefaultCPURequest = fromCRD.PostgresPodResources.DefaultCPURequest result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest result.DefaultCPULimit = fromCRD.PostgresPodResources.DefaultCPULimit result.DefaultMemoryLimit = fromCRD.PostgresPodResources.DefaultMemoryLimit + result.SetMemoryRequestToLimit = fromCRD.SetMemoryRequestToLimit result.ResourceCheckInterval = time.Duration(fromCRD.Timeouts.ResourceCheckInterval) result.ResourceCheckTimeout = time.Duration(fromCRD.Timeouts.ResourceCheckTimeout) @@ -66,6 +72,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.DbHostedZone = fromCRD.LoadBalancer.DbHostedZone result.EnableMasterLoadBalancer = fromCRD.LoadBalancer.EnableMasterLoadBalancer result.EnableReplicaLoadBalancer = fromCRD.LoadBalancer.EnableReplicaLoadBalancer + result.CustomServiceAnnotations = fromCRD.LoadBalancer.CustomServiceAnnotations result.MasterDNSNameFormat = fromCRD.LoadBalancer.MasterDNSNameFormat result.ReplicaDNSNameFormat = fromCRD.LoadBalancer.ReplicaDNSNameFormat diff --git a/pkg/controller/postgresql.go b/pkg/controller/postgresql.go index e67b47193..e551930bd 100644 --- a/pkg/controller/postgresql.go +++ b/pkg/controller/postgresql.go @@ -385,8 +385,14 @@ func (c *Controller) queueClusterEvent(informerOldSpec, informerNewSpec *acidv1. if informerOldSpec != nil { //update, delete uid = informerOldSpec.GetUID() clusterName = util.NameFromMeta(informerOldSpec.ObjectMeta) + + // user is fixing previously incorrect spec if eventType == EventUpdate && informerNewSpec.Error == "" && informerOldSpec.Error != "" { eventType = EventSync + } + + // set current error to be one of the new spec if present + if informerNewSpec != nil { clusterError = informerNewSpec.Error } else { clusterError = informerOldSpec.Error diff --git a/pkg/generated/clientset/versioned/clientset.go b/pkg/generated/clientset/versioned/clientset.go index d42fa3b21..4bddb3eed 100644 --- a/pkg/generated/clientset/versioned/clientset.go +++ b/pkg/generated/clientset/versioned/clientset.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/doc.go b/pkg/generated/clientset/versioned/doc.go index 4ef8c1bb8..d514b90a4 100644 --- a/pkg/generated/clientset/versioned/doc.go +++ b/pkg/generated/clientset/versioned/doc.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/fake/clientset_generated.go b/pkg/generated/clientset/versioned/fake/clientset_generated.go index 19d9ab805..defa99ce7 100644 --- a/pkg/generated/clientset/versioned/fake/clientset_generated.go +++ b/pkg/generated/clientset/versioned/fake/clientset_generated.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/fake/doc.go b/pkg/generated/clientset/versioned/fake/doc.go index c249c43fa..960df4951 100644 --- a/pkg/generated/clientset/versioned/fake/doc.go +++ b/pkg/generated/clientset/versioned/fake/doc.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/fake/register.go b/pkg/generated/clientset/versioned/fake/register.go index 5269b757a..a2c11fe33 100644 --- a/pkg/generated/clientset/versioned/fake/register.go +++ b/pkg/generated/clientset/versioned/fake/register.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/scheme/doc.go b/pkg/generated/clientset/versioned/scheme/doc.go index d17209947..ea0df2783 100644 --- a/pkg/generated/clientset/versioned/scheme/doc.go +++ b/pkg/generated/clientset/versioned/scheme/doc.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/scheme/register.go b/pkg/generated/clientset/versioned/scheme/register.go index 346cd4b16..8d1fea84f 100644 --- a/pkg/generated/clientset/versioned/scheme/register.go +++ b/pkg/generated/clientset/versioned/scheme/register.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/acid.zalan.do_client.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/acid.zalan.do_client.go index 4e73a425f..23623ad26 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/acid.zalan.do_client.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/acid.zalan.do_client.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/doc.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/doc.go index 97d91a36a..8bff3bf2d 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/doc.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/doc.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/doc.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/doc.go index 58640649f..c9373b6d8 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/doc.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/doc.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_acid.zalan.do_client.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_acid.zalan.do_client.go index 9d401ef7c..310e1435e 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_acid.zalan.do_client.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_acid.zalan.do_client.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_operatorconfiguration.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_operatorconfiguration.go index b2fada626..f3587f267 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_operatorconfiguration.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_operatorconfiguration.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_postgresql.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_postgresql.go index 6feb72eb8..6cc46d47c 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_postgresql.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/fake/fake_postgresql.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/generated_expansion.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/generated_expansion.go index 775a4b21f..e1d824486 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/generated_expansion.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/generated_expansion.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/operatorconfiguration.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/operatorconfiguration.go index 2541f0e3f..873ecf0b4 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/operatorconfiguration.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/operatorconfiguration.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/postgresql.go b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/postgresql.go index df1045ee3..4a7acac79 100644 --- a/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/postgresql.go +++ b/pkg/generated/clientset/versioned/typed/acid.zalan.do/v1/postgresql.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/informers/externalversions/acid.zalan.do/interface.go b/pkg/generated/informers/externalversions/acid.zalan.do/interface.go index 9dfa60021..4407c1cde 100644 --- a/pkg/generated/informers/externalversions/acid.zalan.do/interface.go +++ b/pkg/generated/informers/externalversions/acid.zalan.do/interface.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/informers/externalversions/acid.zalan.do/v1/interface.go b/pkg/generated/informers/externalversions/acid.zalan.do/v1/interface.go index f0f35b65c..e519b9716 100644 --- a/pkg/generated/informers/externalversions/acid.zalan.do/v1/interface.go +++ b/pkg/generated/informers/externalversions/acid.zalan.do/v1/interface.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/informers/externalversions/acid.zalan.do/v1/postgresql.go b/pkg/generated/informers/externalversions/acid.zalan.do/v1/postgresql.go index 50f3126cf..f44fccbc2 100644 --- a/pkg/generated/informers/externalversions/acid.zalan.do/v1/postgresql.go +++ b/pkg/generated/informers/externalversions/acid.zalan.do/v1/postgresql.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/informers/externalversions/factory.go b/pkg/generated/informers/externalversions/factory.go index 395bc25b5..30dd7b8d0 100644 --- a/pkg/generated/informers/externalversions/factory.go +++ b/pkg/generated/informers/externalversions/factory.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/informers/externalversions/generic.go b/pkg/generated/informers/externalversions/generic.go index 1b1988212..3ee0f89b6 100644 --- a/pkg/generated/informers/externalversions/generic.go +++ b/pkg/generated/informers/externalversions/generic.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/informers/externalversions/internalinterfaces/factory_interfaces.go b/pkg/generated/informers/externalversions/internalinterfaces/factory_interfaces.go index f3b4ab9fa..37857d2bc 100644 --- a/pkg/generated/informers/externalversions/internalinterfaces/factory_interfaces.go +++ b/pkg/generated/informers/externalversions/internalinterfaces/factory_interfaces.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/listers/acid.zalan.do/v1/expansion_generated.go b/pkg/generated/listers/acid.zalan.do/v1/expansion_generated.go index 071a413d6..4c353bec3 100644 --- a/pkg/generated/listers/acid.zalan.do/v1/expansion_generated.go +++ b/pkg/generated/listers/acid.zalan.do/v1/expansion_generated.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/generated/listers/acid.zalan.do/v1/postgresql.go b/pkg/generated/listers/acid.zalan.do/v1/postgresql.go index c8603bc79..a9028987b 100644 --- a/pkg/generated/listers/acid.zalan.do/v1/postgresql.go +++ b/pkg/generated/listers/acid.zalan.do/v1/postgresql.go @@ -1,5 +1,5 @@ /* -Copyright 2018 Compose, Zalando SE +Copyright 2019 Compose, Zalando SE Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pkg/spec/types.go b/pkg/spec/types.go index e394462d4..3e6bec8db 100644 --- a/pkg/spec/types.go +++ b/pkg/spec/types.go @@ -49,6 +49,7 @@ type PgUser struct { Flags []string `yaml:"user_flags"` MemberOf []string `yaml:"inrole"` Parameters map[string]string `yaml:"db_parameters"` + AdminRole string `yaml:"admin_role"` } // PgUserMap maps user names to the definitions. @@ -125,6 +126,7 @@ func (n *NamespacedName) Decode(value string) error { return n.DecodeWorker(value, GetOperatorNamespace()) } +// UnmarshalJSON converts a byte slice to NamespacedName func (n *NamespacedName) UnmarshalJSON(data []byte) error { result := NamespacedName{} var tmp string diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 92fd3fd73..a82f4c17d 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -27,6 +27,7 @@ type Resources struct { PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` PodPriorityClassName string `name:"pod_priority_class_name"` ClusterLabels map[string]string `name:"cluster_labels" default:"application:spilo"` + InheritedLabels []string `name:"inherited_labels" default:""` ClusterNameLabel string `name:"cluster_name_label" default:"cluster-name"` PodRoleLabel string `name:"pod_role_label" default:"spilo-role"` PodToleration map[string]string `name:"toleration" default:""` @@ -38,6 +39,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"` + ShmVolume bool `name:"enable_shm_volume" default:"true"` } // Auth describes authentication specific configuration parameters @@ -89,8 +91,12 @@ type Config struct { EnableTeamsAPI bool `name:"enable_teams_api" default:"true"` EnableTeamSuperuser bool `name:"enable_team_superuser" default:"false"` TeamAdminRole string `name:"team_admin_role" default:"admin"` + EnableAdminRoleForUsers bool `name:"enable_admin_role_for_users" default:"true"` EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"` EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"` + CustomServiceAnnotations map[string]string `name:"custom_service_annotations"` + EnablePodAntiAffinity bool `name:"enable_pod_antiaffinity" default:"false"` + PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"` // deprecated and kept for backward compatibility EnableLoadBalancer *bool `name:"enable_load_balancer"` MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"` @@ -104,6 +110,7 @@ type Config struct { PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"` ProtectedRoles []string `name:"protected_role_names" default:"admin"` PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""` + SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" defaults:"false"` } // MustMarshal marshals the config or panics diff --git a/pkg/util/config/util.go b/pkg/util/config/util.go index 498810bb7..4c1bdf7e0 100644 --- a/pkg/util/config/util.go +++ b/pkg/util/config/util.go @@ -19,6 +19,7 @@ type fieldInfo struct { Field reflect.Value } +// StringTemplate is a convenience alias type StringTemplate string func decoderFrom(field reflect.Value) (d decoder) { @@ -221,12 +222,14 @@ func getMapPairsFromString(value string) (pairs []string, err error) { return } +// Decode cast value to StringTemplate func (f *StringTemplate) Decode(value string) error { *f = StringTemplate(value) return nil } +// Format formatted string from StringTemplate func (f *StringTemplate) Format(a ...string) string { res := string(*f) @@ -237,6 +240,7 @@ func (f *StringTemplate) Format(a ...string) string { return res } +// MarshalJSON converts a StringTemplate to byte slice func (f StringTemplate) MarshalJSON() ([]byte, error) { return json.Marshal(string(f)) } diff --git a/pkg/util/constants/kubernetes.go b/pkg/util/constants/kubernetes.go index 2604f124d..a4ea73e80 100644 --- a/pkg/util/constants/kubernetes.go +++ b/pkg/util/constants/kubernetes.go @@ -5,6 +5,7 @@ import "time" // General kubernetes-related constants const ( PostgresContainerName = "postgres" + PostgresContainerIdx = 0 K8sAPIPath = "/apis" StatefulsetDeletionInterval = 1 * time.Second StatefulsetDeletionTimeout = 30 * time.Second diff --git a/pkg/util/constants/postgresql.go b/pkg/util/constants/postgresql.go index 7556e8858..e39fd423f 100644 --- a/pkg/util/constants/postgresql.go +++ b/pkg/util/constants/postgresql.go @@ -10,4 +10,7 @@ const ( PostgresConnectRetryTimeout = 2 * time.Minute PostgresConnectTimeout = 15 * time.Second + + ShmVolumeName = "dshm" + ShmVolumePath = "/dev/shm" ) diff --git a/pkg/util/retryutil/retry_util.go b/pkg/util/retryutil/retry_util.go index cbae3bb1b..f8b61fc39 100644 --- a/pkg/util/retryutil/retry_util.go +++ b/pkg/util/retryutil/retry_util.go @@ -17,8 +17,10 @@ type Ticker struct { ticker *time.Ticker } +// Stop is a convenience wrapper around ticker.Stop func (t *Ticker) Stop() { t.ticker.Stop() } +// Tick is a convenience wrapper around ticker.C func (t *Ticker) Tick() { <-t.ticker.C } // Retry is a wrapper around RetryWorker that provides a real RetryTicker diff --git a/pkg/util/teams/teams.go b/pkg/util/teams/teams.go index 8afcd1a3b..d7413ab9c 100644 --- a/pkg/util/teams/teams.go +++ b/pkg/util/teams/teams.go @@ -43,6 +43,7 @@ type httpClient interface { Do(req *http.Request) (*http.Response, error) } +// Interface to the TeamsAPIClient type Interface interface { TeamInfo(teamID, token string) (tm *Team, err error) } diff --git a/pkg/util/users/users.go b/pkg/util/users/users.go index cd76c621d..b436595ef 100644 --- a/pkg/util/users/users.go +++ b/pkg/util/users/users.go @@ -5,9 +5,10 @@ import ( "fmt" "strings" + "reflect" + "github.com/zalando-incubator/postgres-operator/pkg/spec" "github.com/zalando-incubator/postgres-operator/pkg/util" - "reflect" ) const ( @@ -19,6 +20,7 @@ const ( doBlockStmt = `SET LOCAL synchronous_commit = 'local'; DO $$ BEGIN %s; END;$$;` passwordTemplate = "ENCRYPTED PASSWORD '%s'" inRoleTemplate = `IN ROLE %s` + adminTemplate = `ADMIN %s` ) // DefaultUserSyncStrategy implements a user sync strategy that merges already existing database users @@ -113,6 +115,9 @@ func (strategy DefaultUserSyncStrategy) createPgUser(user spec.PgUser, db *sql.D if len(user.MemberOf) > 0 { userFlags = append(userFlags, fmt.Sprintf(inRoleTemplate, quoteMemberList(user))) } + if user.AdminRole != "" { + userFlags = append(userFlags, fmt.Sprintf(adminTemplate, user.AdminRole)) + } if user.Password == "" { userPassword = "PASSWORD NULL" diff --git a/pkg/util/util.go b/pkg/util/util.go index 7b7b58fc4..99e670af9 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -3,12 +3,14 @@ package util import ( "crypto/md5" // #nosec we need it to for PostgreSQL md5 passwords "encoding/hex" + "fmt" "math/rand" "regexp" "strings" "time" "github.com/motomux/pretty" + resource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/zalando-incubator/postgres-operator/pkg/spec" @@ -127,3 +129,19 @@ func Coalesce(val, defaultVal string) string { } return val } + +// RequestIsSmallerThanLimit +func RequestIsSmallerThanLimit(requestStr, limitStr string) (bool, error) { + + request, err := resource.ParseQuantity(requestStr) + if err != nil { + return false, fmt.Errorf("could not parse memory request %v : %v", requestStr, err) + } + + limit, err2 := resource.ParseQuantity(limitStr) + if err2 != nil { + return false, fmt.Errorf("could not parse memory limit %v : %v", limitStr, err2) + } + + return request.Cmp(limit) == -1, nil +} diff --git a/pkg/util/util_test.go b/pkg/util/util_test.go index 53ac13768..3a02149b4 100644 --- a/pkg/util/util_test.go +++ b/pkg/util/util_test.go @@ -69,6 +69,17 @@ var substringMatch = []struct { {regexp.MustCompile(`aaaa (\d+) bbbb`), "aaaa 123 bbbb", nil}, } +var requestIsSmallerThanLimitTests = []struct { + request string + limit string + out bool +}{ + {"1G", "2G", true}, + {"1G", "1Gi", true}, // G is 1000^3 bytes, Gi is 1024^3 bytes + {"1024Mi", "1G", false}, + {"1e9", "1G", false}, // 1e9 bytes == 1G +} + func TestRandomPassword(t *testing.T) { const pwdLength = 10 pwd := RandomPassword(pwdLength) @@ -143,3 +154,15 @@ func TestMapContains(t *testing.T) { } } } + +func TestRequestIsSmallerThanLimit(t *testing.T) { + for _, tt := range requestIsSmallerThanLimitTests { + res, err := RequestIsSmallerThanLimit(tt.request, tt.limit) + if err != nil { + t.Errorf("RequestIsSmallerThanLimit returned unexpected error: %#v", err) + } + if res != tt.out { + t.Errorf("RequestIsSmallerThanLimit expected: %#v, got: %#v", tt.out, res) + } + } +} diff --git a/run_operator_locally.sh b/run_operator_locally.sh index 301803c35..2594097b2 100755 --- a/run_operator_locally.sh +++ b/run_operator_locally.sh @@ -3,6 +3,11 @@ # Deploy a Postgres operator to a minikube aka local Kubernetes cluster # Optionally re-build the operator binary beforehand to test local changes +# Known limitations: +# 1) minikube provides a single node k8s cluster. That is, you will not be able test functions like pod +# migration between multiple nodes locally +# 2) this script configures the operator via configmap, not the operator CRD + # enable unofficial bash strict mode set -o errexit @@ -13,6 +18,7 @@ IFS=$'\n\t' readonly PATH_TO_LOCAL_OPERATOR_MANIFEST="/tmp/local-postgres-operator-manifest.yaml" readonly PATH_TO_PORT_FORWARED_KUBECTL_PID="/tmp/kubectl-port-forward.pid" +readonly PATH_TO_THE_PG_CLUSTER_MANIFEST="/tmp/minimal-postgres-manifest.yaml" readonly LOCAL_PORT="8080" readonly OPERATOR_PORT="8080" @@ -37,18 +43,16 @@ function retry(){ return 1 } - function display_help(){ - echo "Usage: $0 [ -r | --rebuild-operator ] [ -h | --help ]" + echo "Usage: $0 [ -r | --rebuild-operator ] [ -h | --help ] [ -n | --deploy-new-operator-image ] [ -t | --deploy-pg-to-namespace-test ]" } - function clean_up(){ echo "==== CLEAN UP PREVIOUS RUN ==== " local status - status=$(minikube status --format "{{.MinikubeStatus}}" || true) + status=$(minikube status --format "{{.Host}}" || true) if [[ "$status" = "Running" ]] || [[ "$status" = "Stopped" ]]; then echo "Delete the existing local cluster so that we can cleanly apply resources from scratch..." @@ -56,7 +60,7 @@ function clean_up(){ fi if [[ -e "$PATH_TO_LOCAL_OPERATOR_MANIFEST" ]]; then - rm --verbose "$PATH_TO_LOCAL_OPERATOR_MANIFEST" + rm -v "$PATH_TO_LOCAL_OPERATOR_MANIFEST" fi # the kubectl process does the port-forwarding between operator and local ports @@ -70,7 +74,7 @@ function clean_up(){ if kill "$pid" > /dev/null 2>&1; then echo "Kill the kubectl process responsible for port forwarding for minikube so that we can re-use the same ports for forwarding later..." fi - rm --verbose "$PATH_TO_PORT_FORWARED_KUBECTL_PID" + rm -v "$PATH_TO_PORT_FORWARED_KUBECTL_PID" fi } @@ -121,9 +125,9 @@ function deploy_self_built_image() { # update the tag in the postgres operator conf # since the image with this tag already exists on the machine, # docker should not attempt to fetch it from the registry due to imagePullPolicy - sed --expression "s/\(image\:.*\:\).*$/\1$TAG/" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" + sed -e "s/\(image\:.*\:\).*$/\1$TAG/; s/smoke-tested-//" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST" - retry "kubectl create -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource" + retry "kubectl apply -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource" } @@ -139,17 +143,18 @@ function start_operator(){ retry "kubectl create -f manifests/\"$file\"" "attempt to create $file resource" done + cp manifests/postgres-operator.yaml $PATH_TO_LOCAL_OPERATOR_MANIFEST + if [[ "$should_build_custom_operator" = true ]]; then # set in main() deploy_self_built_image else - retry "kubectl create -f manifests/postgres-operator.yaml" "attempt to create /postgres-operator.yaml resource" + retry "kubectl create -f ${PATH_TO_LOCAL_OPERATOR_MANIFEST}" "attempt to create ${PATH_TO_LOCAL_OPERATOR_MANIFEST} resource" fi local -r msg="Wait for the postgresql custom resource definition to register..." local -r cmd="kubectl get crd | grep --quiet 'postgresqls.acid.zalan.do'" retry "$cmd" "$msg " - kubectl create -f manifests/minimal-postgres-manifest.yaml } @@ -186,16 +191,38 @@ function check_health(){ } +function submit_postgresql_manifest(){ + + echo "==== SUBMIT MINIMAL POSTGRES MANIFEST ==== " + + local namespace="default" + cp manifests/minimal-postgres-manifest.yaml $PATH_TO_THE_PG_CLUSTER_MANIFEST + + if $should_deploy_pg_to_namespace_test; then + kubectl create namespace test + namespace="test" + sed --in-place 's/namespace: default/namespace: test/' $PATH_TO_THE_PG_CLUSTER_MANIFEST + fi + + kubectl create -f $PATH_TO_THE_PG_CLUSTER_MANIFEST + echo "The operator will create the PG cluster with minimal manifest $PATH_TO_THE_PG_CLUSTER_MANIFEST in the ${namespace} namespace" + +} + + function main(){ if ! [[ $(basename "$PWD") == "postgres-operator" ]]; then - echo "Please execute the script only from the root directory of the Postgres opepator repo." + echo "Please execute the script only from the root directory of the Postgres operator repo." exit 1 fi trap "echo 'If you observe issues with minikube VM not starting/not proceeding, consider deleting the .minikube dir and/or rebooting before re-running the script'" EXIT - local should_build_custom_operator=false # used in start_operator() + local should_build_custom_operator=false + local should_deploy_pg_to_namespace_test=false + local should_replace_operator_image=false + while true do # if the 1st param is unset, use the empty string as a default value @@ -204,19 +231,32 @@ function main(){ display_help exit 0 ;; - -r | --rebuild-operator) + -r | --rebuild-operator) # with minikube restart should_build_custom_operator=true break ;; + -n | --deploy-new-operator-image) # without minikube restart that takes minutes + should_replace_operator_image=true + break + ;; + -t | --deploy-pg-to-namespace-test) # to test multi-namespace support locally + should_deploy_pg_to_namespace_test=true + break + ;; *) break ;; esac done + if ${should_replace_operator_image}; then + deploy_self_built_image + exit 0 + fi + clean_up start_minikube - kubectl create namespace test start_operator + submit_postgresql_manifest forward_ports check_health