merge master

This commit is contained in:
Sergey Dudoladov 2019-02-21 17:55:55 +01:00
commit ccf4c7c492
80 changed files with 1176 additions and 225 deletions

4
.gitignore vendored
View File

@ -30,3 +30,7 @@ _testmain.go
.idea
scm-source.json
# diagrams
*.aux
*.log

5
.golangci.yml Normal file
View File

@ -0,0 +1,5 @@
# https://github.com/golangci/golangci/wiki/Configuration
service:
prepare:
- make deps

View File

@ -1,2 +1,2 @@
# global owners
* @alexeyklyukin @erthalion @zerg-junior @Jan-M @CyberDem0n @avaczi
* @alexeyklyukin @erthalion @sdudoladov @Jan-M @CyberDem0n @avaczi @FxKu

View File

@ -4,6 +4,10 @@
[![Coverage Status](https://coveralls.io/repos/github/zalando-incubator/postgres-operator/badge.svg)](https://coveralls.io/github/zalando-incubator/postgres-operator)
[![Go Report Card](https://goreportcard.com/badge/github.com/zalando-incubator/postgres-operator)](https://goreportcard.com/report/github.com/zalando-incubator/postgres-operator)
[![GoDoc](https://godoc.org/github.com/zalando-incubator/postgres-operator?status.svg)](https://godoc.org/github.com/zalando-incubator/postgres-operator)
[![golangci](https://golangci.com/badges/github.com/zalando-incubator/postgres-operator.svg)](https://golangci.com/r/github.com/zalando-incubator/postgres-operator)
<img src="docs/diagrams/logo.png" width="200">
## Introduction
@ -27,8 +31,22 @@ manages PostgreSQL clusters on Kubernetes:
3. Finally, the operator periodically synchronizes the actual state of each
Postgres cluster with the desired state defined in the cluster's manifest.
There is a browser-friendly version of this documentation at
[postgres-operator.readthedocs.io](https://postgres-operator.readthedocs.io)
Here is a diagram, that summarizes what would be created by the operator, when a
new Postgres cluster CRD was submitted:
![postgresql-operator](docs/diagrams/operator.png "K8S resources, created by operator")
This picture is not complete without an overview of what is inside a pod, so
let's zoom in:
![pod](docs/diagrams/pod.png "Database pod components")
These two diagrams should help you to understand the basics of what kind of
functionality the operator provides. Below we discuss all everything in more
details.
There is a browser-friendly version of this documentation at [postgres-operator.readthedocs.io](https://postgres-operator.readthedocs.io)
## Table of contents
@ -42,6 +60,13 @@ There is a browser-friendly version of this documentation at
the rest of the document is a tutorial to get you up and running with the operator on Minikube.
## Community
There are two places to get in touch with the community:
1. The [GitHub issue tracker](https://github.com/zalando-incubator/postgres-operator/issues)
2. The #postgres-operator slack channel under [Postgres Slack](https://postgres-slack.herokuapp.com)
## Quickstart
Prerequisites:
@ -90,6 +115,8 @@ cd postgres-operator
./run_operator_locally.sh
```
Note we provide the `/manifests` directory as an example only; you should consider adjusting the manifests to your particular setting.
## Running and testing the operator
The best way to test the operator is to run it locally in [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). See developer docs(`docs/developer.yaml`) for details.

View File

@ -47,14 +47,14 @@ func init() {
log.Printf("Fully qualified configmap name: %v", config.ConfigMapName)
}
if crd_interval := os.Getenv("CRD_READY_WAIT_INTERVAL"); crd_interval != "" {
config.CRDReadyWaitInterval = mustParseDuration(crd_interval)
if crdInterval := os.Getenv("CRD_READY_WAIT_INTERVAL"); crdInterval != "" {
config.CRDReadyWaitInterval = mustParseDuration(crdInterval)
} else {
config.CRDReadyWaitInterval = 4 * time.Second
}
if crd_timeout := os.Getenv("CRD_READY_WAIT_TIMEOUT"); crd_timeout != "" {
config.CRDReadyWaitTimeout = mustParseDuration(crd_timeout)
if crdTimeout := os.Getenv("CRD_READY_WAIT_TIMEOUT"); crdTimeout != "" {
config.CRDReadyWaitTimeout = mustParseDuration(crdTimeout)
} else {
config.CRDReadyWaitTimeout = 30 * time.Second
}

View File

@ -20,9 +20,6 @@ pipeline:
mv go /usr/local
ln -s /usr/local/go/bin/go /usr/bin/go
go version
- desc: 'Install Docker'
cmd: |
curl -fLOsS https://delivery.cloud.zalando.com/utils/ensure-docker && sh ensure-docker && rm ensure-docker
- desc: 'Symlink sources into the GOPATH'
cmd: |
mkdir -p $OPERATOR_TOP_DIR

View File

@ -1,6 +1,6 @@
## Create ConfigMap
ConfigMap is used to store the configuration of the operator
A ConfigMap is used to store the configuration of the operator.
```bash
$ kubectl create -f manifests/configmap.yaml
@ -41,12 +41,14 @@ manifests:
```bash
$ kubectl create namespace test
$ kubectl config set-context --namespace=test
$ kubectl config set-context $(kubectl config current-context) --namespace=test
```
All subsequent `kubectl` commands will work with the `test` namespace. The
operator will run in this namespace and look up needed resources - such as its
config map - there.
operator will run in this namespace and look up needed resources - such as its
ConfigMap - there. Please note that the namespace for service accounts and
cluster role bindings in [operator RBAC rules](../manifests/operator-service-account-rbac.yaml)
needs to be adjusted to the non-default value.
## Specify the namespace to watch
@ -56,8 +58,10 @@ replicas to 5" and reacting to the requests, in this example by actually
scaling up.
By default, the operator watches the namespace it is deployed to. You can
change this by altering the `WATCHED_NAMESPACE` env var in the operator
deployment manifest or the `watched_namespace` field in the operator configmap.
change this by setting the `WATCHED_NAMESPACE` var in the `env` section of the
[operator deployment](../manifests/postgres-operator.yaml) manifest or by
altering the `watched_namespace` field in the operator
[ConfigMap](../manifests/configmap.yaml#L6).
In the case both are set, the env var takes the precedence. To make the
operator listen to all namespaces, explicitly set the field/env var to "`*`".
@ -75,7 +79,7 @@ in the case database pods need to talk to the Kubernetes API (e.g. when using
Kubernetes-native configuration of Patroni). The operator checks that the
`pod_service_account_name` exists in the target namespace, and, if not, deploys
there the `pod_service_account_definition` from the operator
[`Config`](pkg/util/config/config.go) with the default value of:
[`Config`](../pkg/util/config/config.go) with the default value of:
```yaml
apiVersion: v1
@ -86,13 +90,13 @@ metadata:
In this definition, the operator overwrites the account's name to match
`pod_service_account_name` and the `default` namespace to match the target
namespace. The operator performs **no** further syncing of this account.
namespace. The operator performs **no** further syncing of this account.
## Role-based access control for the operator
The `manifests/operator-service-account-rbac.yaml` defines cluster roles and bindings needed
for the operator to function under access control restrictions. To deploy the
operator with this RBAC policy use:
The `manifests/operator-service-account-rbac.yaml` defines cluster roles and
bindings needed for the operator to function under access control restrictions.
To deploy the operator with this RBAC policy use:
```bash
$ kubectl create -f manifests/configmap.yaml
@ -103,18 +107,18 @@ operator with this RBAC policy use:
Note that the service account in `operator-rbac.yaml` is named
`zalando-postgres-operator`. You may have to change the `service_account_name`
in the operator configmap and `serviceAccountName` in the postgres-operator
in the operator ConfigMap and `serviceAccountName` in the postgres-operator
deployment appropriately.
This is done intentionally, as to avoid breaking those setups that already work
This is done intentionally to avoid breaking those setups that already work
with the default `operator` account. In the future the operator should ideally
be run under the `zalando-postgres-operator` service account.
The service account defined in `operator-rbac.yaml` acquires some privileges
not really used by the operator (i.e. we only need list and watch on
configmaps), this is also done intentionally to avoid breaking things if
someone decides to configure the same service account in the operator's
configmap to run postgres clusters.
The service account defined in `operator-rbac.yaml` acquires some privileges
not really used by the operator (i.e. we only need `list` and `watch` on
`configmaps` resources), this is also done intentionally to avoid breaking
things if someone decides to configure the same service account in the
operator's ConfigMap to run postgres clusters.
### Use taints and tolerations for dedicated PostgreSQL nodes
@ -142,14 +146,101 @@ data:
...
```
Note that the Kubernetes version 1.13 brings [taint-based eviction](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/#taint-based-evictions) to the beta stage and enables it by default.
Postgres pods by default receive tolerations for `unreachable` and `noExecute` taints with the timeout of `5m`.
Depending on your setup, you may want to adjust these parameters to prevent master pods from being evicted by the Kubernetes runtime.
To prevent eviction completely, specify the toleration by leaving out the `tolerationSeconds` value (similar to how Kubernetes' own DaemonSets are configured)
### Enable pod anti affinity
To ensure Postgres pods are running on different topologies, you can use [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/)
and configure the required topology in the operator ConfigMap.
Enable pod anti affinity by adding following line to the operator ConfigMap:
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: postgres-operator
data:
enable_pod_antiaffinity: "true"
```
By default the topology key for the pod anti affinity is set to `kubernetes.io/hostname`,
you can set another topology key e.g. `failure-domain.beta.kubernetes.io/zone` by adding following line
to the operator ConfigMap, see [built-in node labels](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels) for available topology keys:
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: postgres-operator
data:
enable_pod_antiaffinity: "true"
pod_antiaffinity_topology_key: "failure-domain.beta.kubernetes.io/zone"
```
### Add cluster-specific labels
In some cases, you might want to add `labels` that are specific to a given
postgres cluster, in order to identify its child objects.
The typical use case is to add labels that identifies the `Pods` created by the
operator, in order to implement fine-controlled `NetworkPolicies`.
**OperatorConfiguration**
```yaml
apiVersion: "acid.zalan.do/v1"
kind: OperatorConfiguration
metadata:
name: postgresql-operator-configuration
configuration:
kubernetes:
inherited_labels:
- application
- environment
...
```
**cluster manifest**
```yaml
apiVersion: "acid.zalan.do/v1"
kind: postgresql
metadata:
name: demo-cluster
labels:
application: my-app
environment: demo
spec:
...
```
**network policy**
```yaml
kind: NetworkPolicy
apiVersion: networking.k8s.io/v1
metadata:
name: netpol-example
spec:
podSelector:
matchLabels:
application: my-app
environment: demo
...
```
## Custom Pod Environment Variables
It is possible to configure a config map which is used by the Postgres pods as
It is possible to configure a ConfigMap which is used by the Postgres pods as
an additional provider for environment variables.
One use case is to customize the Spilo image and configure it with environment
variables. The config map with the additional settings is configured in the
operator's main config map:
variables. The ConfigMap with the additional settings is configured in the
operator's main ConfigMap:
**postgres-operator ConfigMap**
@ -186,12 +277,12 @@ instances permitted by each Postgres cluster managed by the operator. If either
`min_instances` or `max_instances` is set to a non-zero value, the operator may
adjust the number of instances specified in the cluster manifest to match
either the min or the max boundary. For instance, of a cluster manifest has 1
instance and the min_instances is set to 3, the cluster will be created with 3
instances. By default, both parameters are set to -1.
instance and the `min_instances` is set to 3, the cluster will be created with 3
instances. By default, both parameters are set to `-1`.
## Load balancers
For any Postgresql/Spilo cluster, the operator creates two separate k8s
For any Postgresql/Spilo cluster, the operator creates two separate Kubernetes
services: one for the master pod and one for replica pods. To expose these
services to an outer network, one can attach load balancers to them by setting
`enableMasterLoadBalancer` and/or `enableReplicaLoadBalancer` to `true` in the
@ -200,29 +291,47 @@ manifest, the operator configmap's settings `enable_master_load_balancer` and
`enable_replica_load_balancer` apply. Note that the operator settings affect
all Postgresql services running in all namespaces watched by the operator.
To limit the range of IP adresses that can reach a load balancer, specify desired ranges in the `allowedSourceRanges` field (applies to both master and replica LBs). To prevent exposing LBs to the entire Internet, this field is set at cluster creation time to `127.0.0.1/32` unless overwritten explicitly. If you want to revoke all IP ranges from an existing cluster, please set the `allowedSourceRanges` field to `127.0.0.1/32` or to the empty sequence `[]`. Setting the field to `null` or omitting entirely may lead to k8s removing this field from the manifest due to [the k8s handling of null fields](https://kubernetes.io/docs/concepts/overview/object-management-kubectl/declarative-config/#how-apply-calculates-differences-and-merges-changes). Then the resultant manifest will not have the necessary change, and the operator will respectively do noting with the existing source ranges.
To limit the range of IP adresses that can reach a load balancer, specify the
desired ranges in the `allowedSourceRanges` field (applies to both master and
replica load balancers). To prevent exposing load balancers to the entire
Internet, this field is set at cluster creation time to `127.0.0.1/32` unless
overwritten explicitly. If you want to revoke all IP ranges from an existing
cluster, please set the `allowedSourceRanges` field to `127.0.0.1/32` or to an
empty sequence `[]`. Setting the field to `null` or omitting it entirely may
lead to Kubernetes removing this field from the manifest due to its
[handling of null fields](https://kubernetes.io/docs/concepts/overview/object-management-kubectl/declarative-config/#how-apply-calculates-differences-and-merges-changes).
Then the resultant manifest will not contain the necessary change, and the
operator will respectively do noting with the existing source ranges.
## Running periodic 'autorepair' scans of Kubernetes objects
The Postgres operator periodically scans all Kubernetes objects belonging to
each cluster and repairs all discrepancies between them and the definitions
generated from the current cluster manifest. There are two types of scans: a
`sync scan`, running every `resync_period` seconds for every cluster, and the
`repair scan`, coming every `repair_period` only for those clusters that didn't
generated from the current cluster manifest. There are two types of scans:
* `sync scan`, running every `resync_period` seconds for every cluster
* `repair scan`, coming every `repair_period` only for those clusters that didn't
report success as a result of the last operation applied to them.
## Postgres roles supported by the operator
The operator is capable of maintaining roles of multiple kinds within a Postgres database cluster:
The operator is capable of maintaining roles of multiple kinds within a
Postgres database cluster:
1. **System roles** are roles necessary for the proper work of Postgres itself such as a replication role or the initial superuser role. The operator delegates creating such roles to Patroni and only establishes relevant secrets.
* **System roles** are roles necessary for the proper work of Postgres itself such as a replication role or the initial superuser role. The operator delegates creating such roles to Patroni and only establishes relevant secrets.
2. **Infrastructure roles** are roles for processes originating from external systems, e.g. monitoring robots. The operator creates such roles in all PG clusters it manages assuming k8s secrets with the relevant credentials exist beforehand.
* **Infrastructure roles** are roles for processes originating from external systems, e.g. monitoring robots. The operator creates such roles in all Postgres clusters it manages assuming that Kubernetes secrets with the relevant credentials exist beforehand.
3. **Per-cluster robot users** are also roles for processes originating from external systems but defined for an individual Postgres cluster in its manifest. A typical example is a role for connections from an application that uses the database.
* **Per-cluster robot users** are also roles for processes originating from external systems but defined for an individual Postgres cluster in its manifest. A typical example is a role for connections from an application that uses the database.
4. **Human users** originate from the Teams API that returns list of the team members given a team id. Operator differentiates between (a) product teams that own a particular Postgres cluster and are granted admin rights to maintain it, and (b) Postgres superuser teams that get the superuser access to all PG databases running in a k8s cluster for the purposes of maintaining and troubleshooting.
* **Human users** originate from the Teams API that returns a list of the team members given a team id. The operator differentiates between (a) product teams that own a particular Postgres cluster and are granted admin rights to maintain it, and (b) Postgres superuser teams that get the superuser access to all Postgres databases running in a Kubernetes cluster for the purposes of maintaining and troubleshooting.
## Understanding rolling update of Spilo pods
The operator logs reasons for a rolling update with the `info` level and a diff between the old and new StatefulSet specs with the `debug` level. To benefit from numerous escape characters in the latter log entry, view it in CLI with `echo -e`. Note that the resultant message will contain some noise because the `PodTemplate` used by the operator is yet to be updated with the default values used internally in Kubernetes.
The operator logs reasons for a rolling update with the `info` level and
a diff between the old and new StatefulSet specs with the `debug` level.
To read the latter log entry with the escaped characters rendered, view it
in CLI with `echo -e`. Note that the resultant message will contain some
noise because the `PodTemplate` used by the operator is yet to be updated
with the default values used internally in Kubernetes.

View File

@ -188,13 +188,13 @@ defaults to 4)
* /workers/$id/logs - log of the operations performed by a given worker
* /clusters/ - list of teams and clusters known to the operator
* /clusters/$team - list of clusters for the given team
* /cluster/$team/$clustername - detailed status of the cluster, including the
* /clusters/$team/$namespace/$clustername - detailed status of the cluster, including the
specifications for CRD, master and replica services, endpoints and
statefulsets, as well as any errors and the worker that cluster is assigned
to.
* /cluster/$team/$clustername/logs/ - logs of all operations performed to the
* /clusters/$team/$namespace/$clustername/logs/ - logs of all operations performed to the
cluster so far.
* /cluster/$team/$clustername/history/ - history of cluster changes triggered
* /clusters/$team/$namespace/$clustername/history/ - history of cluster changes triggered
by the changes of the manifest (shows the somewhat obscure diff and what
exactly has triggered the change)

11
docs/diagrams/Makefile Normal file
View File

@ -0,0 +1,11 @@
OBJ=$(patsubst %.tex, %.png, $(wildcard *.tex))
.PHONY: all
all: $(OBJ)
%.pdf: %.tex
lualatex $< -shell-escape $@
%.png: %.pdf
convert -flatten -density 300 $< -quality 90 $@

BIN
docs/diagrams/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

BIN
docs/diagrams/operator.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 253 KiB

101
docs/diagrams/operator.tex Normal file
View File

@ -0,0 +1,101 @@
\documentclass{article}
\usepackage{tikz}
\usepackage[graphics,tightpage,active]{preview}
\usetikzlibrary{arrows, shadows.blur, positioning, fit, calc, backgrounds}
\usepackage{lscape}
\pagenumbering{gobble}
\PreviewEnvironment{tikzpicture}
\PreviewEnvironment{equation}
\PreviewEnvironment{equation*}
\newlength{\imagewidth}
\newlength{\imagescale}
\pagestyle{empty}
\thispagestyle{empty}
\begin{document}
\begin{center}
\begin{tikzpicture}[
scale=0.5,transform shape,
font=\sffamily,
every matrix/.style={ampersand replacement=\&,column sep=2cm,row sep=2cm},
operator/.style={draw,solid,thick,circle,fill=red!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
component/.style={draw,solid,thick,rounded corners,fill=yellow!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
border/.style={draw,dashed,rounded corners,fill=gray!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
pod/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
service/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
endpoint/.style={draw,solid,thick,rounded corners,fill=blue!20, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
secret/.style={draw,solid,thick,rounded corners,fill=blue!20, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
pvc/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
label/.style={rectangle,inner sep=0,outer sep=0},
to/.style={->,>=stealth',shorten >=1pt,semithick,font=\sffamily\footnotesize},
every node/.style={align=center}]
% Position the nodes using a matrix layout
\matrix{
\& \node[component] (crd) {CRD}; \\
\& \node[operator] (operator) {Operator}; \\
\path
node[service] (service-master) {Master}
node[label, right of=service-master] (service-middle) {}
node[label, below of=service-middle] (services-label) {Services}
node[service, right=.5cm of service-master] (service-replica) {Replica}
node[border, behind path,
fit=(service-master)(service-replica)(services-label)
] (services) {};
\&
\node[component] (sts) {Statefulset}; \& \node[component] (pdb) {Pod Disruption Budget}; \\
\path
node[service] (master-endpoint) {Master}
node[service, right=.5cm of master-endpoint] (replica-endpoint) {Replica}
node[label, right of=master-endpoint] (endpoint-middle) {}
node[label, below of=endpoint-middle] (endpoint-label) {Endpoints}
node[border, behind path,
fit=(master-endpoint)(replica-endpoint)(endpoint-label)
] (endpoints) {}; \&
\node[component] (pod-template) {Pod Template}; \&
\node[border] (secrets) {
\begin{tikzpicture}[]
\node[secret] (users-secret) at (0, 0) {Users};
\node[secret] (robots-secret) at (2, 0) {Robots};
\node[secret] (standby-secret) at (4, 0) {Standby};
\end{tikzpicture} \\
Secrets
}; \\ \&
\path
node[pod] (replica1-pod) {Replica}
node[pod, left=.5cm of replica1-pod] (master-pod) {Master}
node[pod, right=.5cm of replica1-pod] (replica2-pod) {Replica}
node[label, below of=replica1-pod] (pod-label) {Pods}
node[border, behind path,
fit=(master-pod)(replica1-pod)(replica2-pod)(pod-label)
] (pods) {}; \\ \&
\path
node[pvc] (replica1-pvc) {Replica}
node[pvc, left=.5cm of replica1-pvc] (master-pvc) {Master}
node[pvc, right=.5cm of replica1-pvc] (replica2-pvc) {Replica}
node[label, below of=replica1-pvc] (pvc-label) {Persistent Volume Claims}
node[border, behind path,
fit=(master-pvc)(replica1-pvc)(replica2-pvc)(pvc-label)
] (pvcs) {}; \&
\\ \& \\
};
% Draw the arrows between the nodes and label them.
\draw[to] (crd) -- node[midway,above] {} node[midway,below] {} (operator);
\draw[to] (operator) -- node[midway,above] {} node[midway,below] {} (sts);
\draw[to] (operator) -- node[midway,above] {} node[midway,below] {} (secrets);
\draw[to] (operator) -| node[midway,above] {} node[midway,below] {} (pdb);
\draw[to] (service-master) -- node[midway,above] {} node[midway,below] {} (master-endpoint);
\draw[to] (service-replica) -- node[midway,above] {} node[midway,below] {} (replica-endpoint);
\draw[to] (master-pod) -- node[midway,above] {} node[midway,below] {} (master-pvc);
\draw[to] (replica1-pod) -- node[midway,above] {} node[midway,below] {} (replica1-pvc);
\draw[to] (replica2-pod) -- node[midway,above] {} node[midway,below] {} (replica2-pvc);
\draw[to] (operator) -| node[midway,above] {} node[midway,below] {} (services);
\draw[to] (sts) -- node[midway,above] {} node[midway,below] {} (pod-template);
\draw[to] (pod-template) -- node[midway,above] {} node[midway,below] {} (pods);
\end{tikzpicture}
\end{center}
\end{document}

BIN
docs/diagrams/pod.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 236 KiB

92
docs/diagrams/pod.tex Normal file
View File

@ -0,0 +1,92 @@
\documentclass{article}
\usepackage{tikz}
\usepackage[graphics,tightpage,active]{preview}
\usetikzlibrary{arrows, shadows.blur, positioning, fit, calc, backgrounds}
\usepackage{lscape}
\pagenumbering{gobble}
\PreviewEnvironment{tikzpicture}
\PreviewEnvironment{equation}
\PreviewEnvironment{equation*}
\newlength{\imagewidth}
\newlength{\imagescale}
\pagestyle{empty}
\thispagestyle{empty}
\begin{document}
\begin{center}
\begin{tikzpicture}[
scale=0.5,transform shape,
font=\sffamily,
every matrix/.style={ampersand replacement=\&,column sep=2cm,row sep=2cm},
pod/.style={draw,solid,thick,circle,fill=red!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
component/.style={draw,solid,thick,rounded corners,fill=yellow!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
border/.style={draw,dashed,rounded corners,fill=gray!20,inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
volume/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
sidecar/.style={draw,solid,thick,rounded corners,fill=blue!20, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
k8s-label/.style={draw,solid,thick,rounded corners,fill=blue!20, minimum width=1.5cm, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
affinity/.style={draw,solid,thick,rounded corners,fill=blue!20, minimum width=2cm, inner sep=.3cm, blur shadow={shadow blur steps=5,shadow blur extra rounding=1.3pt}},
label/.style={rectangle,inner sep=0,outer sep=0},
to/.style={->,>=stealth',shorten >=1pt,semithick,font=\sffamily\footnotesize},
every node/.style={align=center}]
% Position the nodes using a matrix layout
\matrix{
\path
node[k8s-label] (app-label) {App}
node[k8s-label, right=.25cm of app-label] (role-label) {Role}
node[k8s-label, right=.25cm of role-label] (custom-label) {Custom}
node[label, below of=role-label] (k8s-label-label) {K8S Labels}
node[border, behind path,
fit=(app-label)(role-label)(custom-label)(k8s-label-label)
] (k8s-labels) {}; \& \&
\path
node[affinity] (affinity) {Affinity}
node[label, right=.25cm of affinity] (affinity-middle) {}
node[affinity, right=.25cm of affinity-middle] (anti-affinity) {Anti-affinity}
node[label, below of=affinity-middle] (affinity-label) {Assigning to nodes}
node[border, behind path,
fit=(affinity)(anti-affinity)(affinity-label)
] (affinity) {}; \\
\& \node[pod] (pod) {Pod}; \& \\
\path
node[volume, minimum width={width("shm-volume")}] (data-volume) {Data}
node[volume, right=.25cm of data-volume, minimum width={width("shm-volume")}] (tokens-volume) {Tokens}
node[volume, right=.25cm of tokens-volume] (shm-volume) {/dev/shm}
node[label, below of=tokens-volume] (volumes-label) {Volumes}
node[border, behind path,
fit=(data-volume)(shm-volume)(tokens-volume)(volumes-label)
] (volumes) {}; \&
\node[component] (spilo) {Spilo}; \&
\node[sidecar] (scalyr) {Scalyr}; \& \\ \&
\path
node[component] (patroni) {Patroni}
node[component, below=.25cm of patroni] (postgres) {PostgreSQL}
node[border, behind path,
fit=(postgres)(patroni)
] (spilo-components) {}; \&
\path
node[sidecar] (custom-sidecar1) {User defined}
node[label, right=.25cm of custom-sidecar1] (sidecars-middle) {}
node[sidecar, right=.25cm of sidecars-middle] (custom-sidecar2) {User defined}
node[label, below of=sidecars-middle] (sidecars-label) {Custom sidecars}
node[border, behind path,
fit=(custom-sidecar1)(custom-sidecar2)(sidecars-label)
] (sidecars) {};
\\ \& \\
};
% Draw the arrows between the nodes and label them.
\draw[to] (pod) to [bend left=25] (volumes);
\draw[to] (pod) to [bend left=25] (k8s-labels);
\draw[to] (pod) to [bend right=25] (affinity);
\draw[to] (pod) to [bend right=25] (scalyr);
\draw[to] (pod) to [bend right=25] (sidecars);
\draw[to] (pod) -- node[midway,above] {} node[midway,below] {} (spilo);
\draw[to] (spilo) -- node[midway,above] {} node[midway,below] {} (spilo-components);
\end{tikzpicture}
\end{center}
\end{document}

64
docs/gsoc-2019/ideas.md Normal file
View File

@ -0,0 +1,64 @@
# Google Summer of Code 2019
## Applications steps
1. Please carefully read the official [Google Summer of Code Student Guide](https://google.github.io/gsocguides/student/)
2. Join the #postgres-operator slack channel under [Postgres Slack](https://postgres-slack.herokuapp.com) to introduce yourself to the community and get quick feedback on your application.
3. Select a project from the list of ideas below or propose your own.
4. Write a proposal draft. Please open an issue with the label `gsoc2019_application` in the [operator repository](https://github.com/zalando-incubator/postgres-operator/issues) so that the community members can publicly review it. See proposal instructions below for details.
5. Submit proposal and the proof of enrollment before April 9 2019 18:00 UTC through the web site of the Program.
## Project ideas
### Place database pods into the "Guaranteed" Quality-of-Service class
* **Description**: Kubernetes runtime does not kill pods in this class on condition they stay within their resource limits, which is desirable for the DB pods serving production workloads. To be assigned to that class, pod's resources must equal its limits. The task is to add the `enableGuaranteedQoSClass` or the like option to the Postgres manifest and the operator configmap that forcibly re-write pod resources to match the limits.
* **Recommended skills**: golang, basic Kubernetes abstractions
* **Difficulty**: moderate
* **Mentor(s)**: Felix Kunde [@FxKu](https://github.com/fxku), Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov)
### Implement the kubectl plugin for the Postgres CustomResourceDefinition
* **Description**: [kubectl plugins](https://kubernetes.io/docs/tasks/extend-kubectl/kubectl-plugins/) enable extending the Kubernetes command-line client `kubectl` with commands to manage custom resources. The task is to design and implement a plugin for the `kubectl postgres` command,
that can enable, for example, correct deletion or major version upgrade of Postgres clusters.
* **Recommended skills**: golang, shell scripting, operational experience with Kubernetes
* **Difficulty**: moderate to medium, depending on the plugin design
* **Mentor(s)**: Felix Kunde [@FxKu](https://github.com/fxku), Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov)
### Implement the openAPIV3Schema for the Postgres CRD
* **Description**: at present the operator validates a database manifest on its own.
It will be helpful to reject erroneous manifests before they reach the operator using the [native Kubernetes CRD validation](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/#validation). It is up to the student to decide whether to write the schema manually or to adopt existing [schema generator developed for the Prometheus project](https://github.com/ant31/crd-validation).
* **Recommended skills**: golang, JSON schema
* **Difficulty**: medium
* **Mentor(s)**: Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov)
* **Issue**: [#388](https://github.com/zalando-incubator/postgres-operator/issues/388)
### Design a solution for the local testing of the operator
* **Description**: The current way of testing is to run minikube, either manually or with some tooling around it like `/run-operator_locally.sh` or Vagrant. This has at least three problems:
First, minikube is a single node cluster, so it is unsuitable for testing vital functions such as pod migration between nodes. Second, minikube starts slowly; that prolongs local testing.
Third, every contributor needs to come up with their own solution for local testing. The task is to come up with a better option which will enable us to conveniently and uniformly run e2e tests locally / potentially in Travis CI.
A promising option is the Kubernetes own [kind](https://github.com/kubernetes-sigs/kind)
* **Recommended skills**: Docker, shell scripting, basic Kubernetes abstractions
* **Difficulty**: medium to hard depending on the selected desing
* **Mentor(s)**: Dmitry Dolgov [@erthalion](https://github.com/erthalion), Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov)
* **Issue**: [#475](https://github.com/zalando-incubator/postgres-operator/issues/475)
### Detach a Postgres cluster from the operator for maintenance
* **Description**: sometimes a Postgres cluster requires manual maintenance. During such maintenance the operator should ignore all the changes manually applied to the cluster.
Currently the only way to achieve this behavior is to shutdown the operator altogether, for instance by scaling down the operator's own deployment to zero pods. That approach evidently affects all Postgres databases under the operator control and thus is highly undesirable in production Kubernetes clusters. It would be much better to be able to detach only the desired Postgres cluster from the operator for the time being and re-attach it again after maintenance.
* **Recommended skills**: golang, architecture of a Kubernetes operator
* **Difficulty**: hard - requires significant modification of the operator's internals and careful consideration of the corner cases.
* **Mentor(s)**: Dmitry Dolgov [@erthalion](https://github.com/erthalion), Sergey Dudoladov [@sdudoladov](https://github.com/sdudoladov)
* **Issue**: [#421](https://github.com/zalando-incubator/postgres-operator/issues/421)
### Propose your own idea
Feel free to come up with your own ideas. For inspiration,
see [our bug tracker](https://github.com/zalando-incubator/postgres-operator/issues),
the [official `CustomResouceDefinition` docs](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/)
and [other operators](https://github.com/operator-framework/awesome-operators).

View File

@ -51,7 +51,9 @@ Please, report any issues discovered to https://github.com/zalando-incubator/pos
## Talks
1. "PostgreSQL High Availability on Kubernetes with Patroni" talk by Oleksii Kliukin, Atmosphere 2018: [video](https://www.youtube.com/watch?v=cFlwQOPPkeg) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-high-availability-on-kubernetes-with-patroni)
1. "PostgreSQL and Kubernetes: DBaaS without a vendor-lock" talk by Oleksii Kliukin, PostgreSQL Sessions 2018: [video](https://www.youtube.com/watch?v=q26U2rQcqMw) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-and-kubernetes-dbaas-without-a-vendor-lock)
2. "PostgreSQL High Availability on Kubernetes with Patroni" talk by Oleksii Kliukin, Atmosphere 2018: [video](https://www.youtube.com/watch?v=cFlwQOPPkeg) | [slides](https://speakerdeck.com/alexeyklyukin/postgresql-high-availability-on-kubernetes-with-patroni)
2. "Blue elephant on-demand: Postgres + Kubernetes" talk by Oleksii Kliukin and Jan Mussler, FOSDEM 2018: [video](https://fosdem.org/2018/schedule/event/blue_elephant_on_demand_postgres_kubernetes/) | [slides (pdf)](https://www.postgresql.eu/events/fosdem2018/sessions/session/1735/slides/59/FOSDEM%202018_%20Blue_Elephant_On_Demand.pdf)

View File

@ -35,6 +35,14 @@ Those parameters are grouped under the `metadata` top-level key.
namespace. Optional (if present, should match the namespace where the
manifest is applied).
* **labels**
if labels are matching one of the `inherited_labels` [configured in the
operator parameters](operator_parameters.md#kubernetes-resources),
they will automatically be added to all the objects (StatefulSet, Service,
Endpoints, etc.) that are created by the operator.
Labels that are set here but not listed as `inherited_labels` in the operator
parameters are ignored.
## Top-level parameters
Those are parameters grouped directly under the `spec` key in the manifest.
@ -97,6 +105,18 @@ Those are parameters grouped directly under the `spec` key in the manifest.
is taken from the `pod_priority_class_name` operator parameter, if not set
then the default priority class is taken. The priority class itself must be defined in advance.
* **enableShmVolume**
Start a database pod without limitations on shm memory. By default docker
limit `/dev/shm` to `64M` (see e.g. the [docker
issue](https://github.com/docker-library/postgres/issues/416), which could be
not enough if PostgreSQL uses parallel workers heavily. If this option is
present and value is `true`, to the target database pod will be mounted a new
tmpfs volume to remove this limitation. If it's not present, the decision
about mounting a volume will be made based on operator configuration
(`enable_shm_volume`, which is `true` by default). It it's present and value
is `false`, then no volume will be mounted no matter how operator was
configured (so you can override the operator configuration).
## Postgres parameters
Those parameters are grouped under the `postgresql` top-level key.
@ -112,6 +132,7 @@ Those parameters are grouped under the `postgresql` top-level key.
cluster. Optional (Spilo automatically sets reasonable defaults for
parameters like work_mem or max_connections).
## Patroni parameters
Those parameters are grouped under the `patroni` top-level key. See the [patroni

View File

@ -10,29 +10,37 @@ configuration.
configuration structure. There is an
[example](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/configmap.yaml)
* CRD-based configuration. The configuration is stored in the custom YAML
manifest, an instance of the custom resource definition (CRD) called
`OperatorConfiguration`. This CRD is registered by the operator
during the start when `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` variable is
set to a non-empty value. The CRD-based configuration is a regular YAML
document; non-scalar keys are simply represented in the usual YAML way. The
usage of the CRD-based configuration is triggered by setting the
`POSTGRES_OPERATOR_CONFIGURATION_OBJECT` variable, which should point to the
`postgresql-operator-configuration` object name in the operators namespace.
* CRD-based configuration. The configuration is stored in a custom YAML
manifest. The manifest is an instance of the custom resource definition (CRD) called
`OperatorConfiguration`. The operator registers this CRD
during the start and uses it for configuration if the [operator deployment manifest ](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgres-operator.yaml#L21) sets the `POSTGRES_OPERATOR_CONFIGURATION_OBJECT` env variable to a non-empty value. The variable should point to the
`postgresql-operator-configuration` object in the operator's namespace.
The CRD-based configuration is a regular YAML
document; non-scalar keys are simply represented in the usual YAML way.
There are no default values built-in in the operator, each parameter that is
not supplied in the configuration receives an empty value. In order to
create your own configuration just copy the [default
one](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgresql-operator-default-configuration.yaml)
and change it.
CRD-based configuration is more natural and powerful then the one based on
To test the CRD-based configuration locally, use the following
```bash
kubectl create -f manifests/operator-service-account-rbac.yaml
kubectl create -f manifests/postgres-operator.yaml # set the env var as mentioned above
kubectl create -f manifests/postgresql-operator-default-configuration.yaml
kubectl get operatorconfigurations postgresql-operator-default-configuration -o yaml
```
Note that the operator first registers the definition of the CRD `OperatorConfiguration` and then waits for an instance of the CRD to be created. In between these two event the operator pod may be failing since it cannot fetch the not-yet-existing `OperatorConfiguration` instance.
The CRD-based configuration is more powerful than the one based on
ConfigMaps and should be used unless there is a compatibility requirement to
use an already existing configuration. Even in that case, it should be rather
straightforward to convert the configmap based configuration into the CRD-based
one and restart the operator. The ConfigMaps-based configuration will be
deprecated and subsequently removed in future releases.
Note that for the CRD-based configuration configuration groups below correspond
Note that for the CRD-based configuration groups of configuration options below correspond
to the non-leaf keys in the target YAML (i.e. for the Kubernetes resources the
key is `kubernetes`). The key is mentioned alongside the group description. The
ConfigMap-based configuration is flat and does not allow non-leaf keys.
@ -46,7 +54,6 @@ They will be deprecated and removed in the future.
Variable names are underscore-separated words.
## General
Those are top-level keys, containing both leaf keys and groups.
@ -165,6 +172,14 @@ configuration they are grouped under the `kubernetes` key.
list of `name:value` pairs for additional labels assigned to the cluster
objects. The default is `application:spilo`.
* **inherited_labels**
list of labels that can be inherited from the cluster manifest, and added to
each child objects (`StatefulSet`, `Pod`, `Service` and `Endpoints`) created by
the opertor.
Typical use case is to dynamically pass labels that are specific to a given
postgres cluster, in order to implement `NetworkPolicy`.
The default is empty.
* **cluster_name_label**
name of the label assigned to Kubernetes objects created by the operator that
indicates which cluster a given object belongs to. The default is
@ -198,6 +213,14 @@ configuration they are grouped under the `kubernetes` key.
that should be assigned to the Postgres pods. The priority class itself must be defined in advance.
Default is empty (use the default priority class).
* **enable_pod_antiaffinity**
toggles [pod anti affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/) on the Postgres pods, to avoid multiple pods
of the same Postgres cluster in the same topology , e.g. node. The default is `false`.
* **pod_antiaffinity_topology_key**
override
[topology key](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#interlude-built-in-node-labels)
for pod anti affinity. The default is `kubernetes.io/hostname`.
## Kubernetes resource requests
@ -221,6 +244,17 @@ CRD-based configuration.
memory limits for the postgres containers, unless overridden by cluster-specific
settings. The default is `1Gi`.
* **set_memory_request_to_limit**
Set `memory_request` to `memory_limit` for all Postgres clusters (the default value is also increased). This prevents certain cases of memory overcommitment at the cost of overprovisioning memory and potential scheduling problems for containers with high memory limits due to the lack of memory on Kubernetes cluster nodes. This affects all containers created by the operator (Postgres, Scalyr sidecar, and other sidecars); to set resources for the operator's own container, change the [operator deployment manually](https://github.com/zalando-incubator/postgres-operator/blob/master/manifests/postgres-operator.yaml#L13). The default is `false`.
* **enable_shm_volume**
Instruct operator to start any new database pod without limitations on shm
memory. If this option is enabled, to the target database pod will be mounted
a new tmpfs volume to remove shm memory limitation (see e.g. the [docker
issue](https://github.com/docker-library/postgres/issues/416)). This option
is global for an operator object, and can be overwritten by `enableShmVolume`
parameter from Postgres manifest. The default is `true`
## Operator timeouts
This set of parameters define various timeouts related to some operator
@ -276,6 +310,11 @@ In the CRD-based configuration they are grouped under the `load_balancer` key.
cluster. Can be overridden by individual cluster settings. The default is
`false`.
* **custom_service_annotations**
when load balancing is enabled, LoadBalancer service is created and
this parameter takes service annotations that are applied to service.
Optional.
* **master_dns_name_format** defines the DNS name string template for the
master load balancer cluster. The default is
`{cluster}.{team}.{hostedzone}`, where `{cluster}` is replaced by the cluster
@ -290,12 +329,12 @@ In the CRD-based configuration they are grouped under the `load_balancer` key.
replaced with the hosted zone (the value of the `db_hosted_zone` parameter).
No other placeholders are allowed.
## AWS or GSC interaction
## AWS or GCP interaction
The options in this group configure operator interactions with non-Kubernetes
objects from AWS or Google cloud. They have no effect unless you are using
objects from Amazon Web Services (AWS) or Google Cloud Platform (GCP). They have no effect unless you are using
either. In the CRD-based configuration those options are grouped under the
`aws_or_gcp` key.
`aws_or_gcp` key. Note the GCP integration is not yet officially supported.
* **wal_s3_bucket**
S3 bucket to use for shipping WAL segments with WAL-E. A bucket has to be
@ -323,7 +362,7 @@ Options to aid debugging of the operator itself. Grouped under the `debug` key.
boolean parameter that toggles verbose debug logs from the operator. The
default is `true`.
* **enable_db_access**
* **enable_database_access**
boolean parameter that toggles the functionality of the operator that require
access to the postgres database, i.e. creating databases and users. The default
is `true`.
@ -362,6 +401,9 @@ key.
role name to grant to team members created from the Teams API. The default is
`admin`, that role is created by Spilo as a `NOLOGIN` role.
* **enable_admin_role_for_users**
if `true`, the `team_admin_role` will have the rights to grant roles coming from PG manifests. Such roles will be created as in "CREATE ROLE 'role_from_manifest' ... ADMIN 'team_admin_role'". The default is `true`.
* **pam_role_name**
when set, the operator will add all team member roles to this group and add a
`pg_hba` line to authenticate members of that role via `pam`. The default is

View File

@ -57,12 +57,11 @@ $ psql -U postgres
Postgres operator allows defining roles to be created in the resulting database
cluster. It covers three use-cases:
* create application roles specific to the cluster described in the manifest:
`manifest roles`.
* create application roles that should be automatically created on every
cluster managed by the operator: `infrastructure roles`.
* automatically create users for every member of the team owning the database
cluster: `teams API roles`.
* `manifest roles`: create application roles specific to the cluster described in the manifest.
* `infrastructure roles`: create application roles that should be automatically created on every
cluster managed by the operator.
* `teams API roles`: automatically create users for every member of the team owning the database
cluster.
In the next sections, we will cover those use cases in more details.
@ -99,10 +98,13 @@ An infrastructure role is a role that should be present on every PostgreSQL
cluster managed by the operator. An example of such a role is a monitoring
user. There are two ways to define them:
* Exclusively via the infrastructure roles secret (specified by the
`infrastructure_roles_secret_name` parameter).
* With the infrastructure roles secret only
* With both the the secret and the infrastructure role ConfigMap.
The role definition looks like this (values are base64 encoded):
### Infrastructure roles secret
The infrastructure roles secret is specified by the `infrastructure_roles_secret_name`
parameter. The role definition looks like this (values are base64 encoded):
```yaml
user1: ZGJ1c2Vy
@ -110,25 +112,29 @@ The role definition looks like this (values are base64 encoded):
inrole1: b3BlcmF0b3I=
```
A block above describes the infrastructure role 'dbuser' with the password
'secret' that is the member of the 'operator' role. For the following
The block above describes the infrastructure role 'dbuser' with password
'secret' that is a member of the 'operator' role. For the following
definitions one must increase the index, i.e. the next role will be defined as
'user2' and so on. Note that there is no way to specify role options (like
superuser or nologin) this way, and the resulting role will automatically be a
login role.
'user2' and so on. The resulting role will automatically be a login role.
* Via both the infrastructure roles secret and the infrastructure role
configmap (with the same name as the infrastructure roles secret).
Note that with definitions that solely use the infrastructure roles secret
there is no way to specify role options (like superuser or nologin) or role
memberships. This is where the ConfigMap comes into play.
The infrastructure roles secret should contain an entry with 'rolename:
rolepassword' for each role, and the role description should be specified in
the configmap. Below is the example:
### Secret plus ConfigMap
A [ConfigMap](https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/)
allows for defining more details regarding the infrastructure roles.
Therefore, one should use the new style that specifies infrastructure roles
using both the secret and a ConfigMap. The ConfigMap must have the same name as
the secret. The secret should contain an entry with 'rolename:rolepassword' for
each role.
```yaml
dbuser: c2VjcmV0
```
and the configmap definition for that user:
And the role description for that user should be specified in the ConfigMap.
```yaml
data:
@ -140,18 +146,13 @@ and the configmap definition for that user:
log_statement: all
```
Note that the definition above allows for more details than the one that relies
solely on the infrastructure role secret. In particular, one can allow
membership in multiple roles via the `inrole` array parameter, define role
flags via the `user_flags` list and supply per-role options through the
`db_parameters` dictionary. All those parameters are optional.
One can allow membership in multiple roles via the `inrole` array parameter,
define role flags via the `user_flags` list and supply per-role options through
the `db_parameters` dictionary. All those parameters are optional.
The definitions that solely use the infrastructure roles secret are more
limited and considered legacy ones; one should use the new style that specifies
infrastructure roles using both the secret and the configmap. You can mix both
in the infrastructure role secret, as long as your new-style definition can be
clearly distinguished from the old-style one (for instance, do not name
new-style roles`userN`).
Both definitions can be mixed in the infrastructure role secret, as long as
your new-style definition can be clearly distinguished from the old-style one
(for instance, do not name new-style roles `userN`).
Since an infrastructure role is created uniformly on all clusters managed by
the operator, it makes no sense to define it without the password. Such
@ -272,6 +273,32 @@ are always passed to sidecars:
The PostgreSQL volume is shared with sidecars and is mounted at `/home/postgres/pgdata`.
## InitContainers Support
Each cluster can specify arbitrary init containers to run. These containers can be
used to run custom actions before any normal and sidecar containers start.
An init container can be specified like this:
```yaml
apiVersion: "acid.zalan.do/v1"
kind: postgresql
metadata:
name: acid-minimal-cluster
spec:
...
init_containers:
- name: "container-name"
image: "company/image:tag"
env:
- name: "ENV_VAR_NAME"
value: "any-k8s-env-things"
```
`init_containers` accepts full `v1.Container` definition.
## Increase volume size
PostgreSQL operator supports statefulset volume resize if you're using the

View File

@ -4,9 +4,13 @@ kind: postgresql
metadata:
name: acid-test-cluster
spec:
init_containers:
- name: date
image: busybox
command: [ "/bin/date" ]
teamId: "ACID"
volume:
size: 5Gi
size: 1Gi
numberOfInstances: 2
users: #Application/Robot users
zalando:
@ -19,6 +23,7 @@ spec:
databases:
foo: zalando
#Expert section
enableShmVolume: true
postgresql:
version: "10"
parameters:
@ -31,7 +36,7 @@ spec:
memory: 100Mi
limits:
cpu: 300m
memory: 3000Mi
memory: 300Mi
patroni:
initdb:
encoding: "UTF8"

View File

@ -10,14 +10,18 @@ data:
debug_logging: "true"
workers: "4"
docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.4-p29
docker_image: registry.opensource.zalan.do/acid/spilo-11:1.5-p4
pod_service_account_name: "zalando-postgres-operator"
secret_name_template: '{username}.{cluster}.credentials'
super_username: postgres
enable_teams_api: "false"
# custom_service_annotations:
# "keyx:valuez,keya:valuea"
# set_memory_request_to_limit: "true"
# postgres_superuser_teams: "postgres_superusers"
# enable_team_superuser: "false"
# team_admin_role: "admin"
# enable_admin_role_for_users: "true"
# teams_api_url: http://fake-teams-api.default.svc.cluster.local
# team_api_role_configuration: "log_statement:all"
# infrastructure_roles_secret_name: postgresql-infrastructure-roles

View File

@ -2,7 +2,7 @@ apiVersion: "acid.zalan.do/v1"
kind: postgresql
metadata:
name: acid-minimal-cluster
namespace: test # assumes namespace exists beforehand
namespace: default
spec:
teamId: "ACID"
volume:
@ -15,7 +15,8 @@ spec:
- createdb
# role for application foo
foo_user:
foo_user: []
#databases: name->owner
databases:

View File

@ -14,6 +14,7 @@ rules:
- acid.zalan.do
resources:
- postgresqls
- operatorconfigurations
verbs:
- "*"
- apiGroups:

View File

@ -12,9 +12,20 @@ spec:
serviceAccountName: zalando-postgres-operator
containers:
- name: postgres-operator
image: registry.opensource.zalan.do/acid/postgres-operator:v1.0.0
image: registry.opensource.zalan.do/acid/smoke-tested-postgres-operator:v1.0.0-37-g2422d72
imagePullPolicy: IfNotPresent
resources:
requests:
cpu: 500m
memory: 250Mi
limits:
cpu: 2000m
memory: 500Mi
env:
# provided additional ENV vars can overwrite individual config map entries
- name: CONFIG_MAP_NAME
value: "postgres-operator"
# In order to use the CRD OperatorConfiguration instead, uncomment these lines and comment out the two lines above
# - name: POSTGRES_OPERATOR_CONFIGURATION_OBJECT
# value: postgresql-operator-default-configuration

View File

@ -4,7 +4,7 @@ metadata:
name: postgresql-operator-default-configuration
configuration:
etcd_host: ""
docker_image: registry.opensource.zalan.do/acid/spilo-cdp-10:1.4-p29
docker_image: registry.opensource.zalan.do/acid/spilo-cdp-11:1.5-p42
workers: 4
min_instances: -1
max_instances: -1
@ -25,6 +25,9 @@ configuration:
pod_role_label: spilo-role
cluster_labels:
application: spilo
# inherited_labels:
# - application
# - app
cluster_name_label: cluster-name
# watched_namespace:""
# node_readiness_label: ""
@ -46,6 +49,9 @@ configuration:
load_balancer:
enable_master_load_balancer: false
enable_replica_load_balancer: false
# custom_service_annotations:
# keyx: valuex
# keyy: valuey
master_dns_name_format: "{cluster}.{team}.{hostedzone}"
replica_dns_name_format: "{cluster}-repl.{team}.{hostedzone}"
aws_or_gcp:

View File

@ -1,10 +1,7 @@
package v1
// ClusterStatusUnknown etc : status of a Postgres cluster known to the operator
const (
serviceNameMaxLength = 63
clusterNameMaxLength = serviceNameMaxLength - len("-repl")
serviceNameRegexString = `^[a-z]([-a-z0-9]*[a-z0-9])?$`
ClusterStatusUnknown PostgresStatus = ""
ClusterStatusCreating PostgresStatus = "Creating"
ClusterStatusUpdating PostgresStatus = "Updating"
@ -14,3 +11,9 @@ const (
ClusterStatusRunning PostgresStatus = "Running"
ClusterStatusInvalid PostgresStatus = "Invalid"
)
const (
serviceNameMaxLength = 63
clusterNameMaxLength = serviceNameMaxLength - len("-repl")
serviceNameRegexString = `^[a-z]([-a-z0-9]*[a-z0-9])?$`
)

View File

@ -6,6 +6,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// CRDResource* define names necesssary for the k8s CRD API
const (
PostgresCRDResourceKind = "postgresql"
PostgresCRDResourcePlural = "postgresqls"
@ -39,6 +40,7 @@ func buildCRD(name, kind, plural, short string) *apiextv1beta1.CustomResourceDef
}
}
// PostgresCRD returns CustomResourceDefinition built from PostgresCRDResource
func PostgresCRD() *apiextv1beta1.CustomResourceDefinition {
return buildCRD(PostgresCRDResouceName,
PostgresCRDResourceKind,
@ -46,6 +48,7 @@ func PostgresCRD() *apiextv1beta1.CustomResourceDefinition {
PostgresCRDResourceShort)
}
// ConfigurationCRD returns CustomResourceDefinition built from OperatorConfigCRDResource
func ConfigurationCRD() *apiextv1beta1.CustomResourceDefinition {
return buildCRD(OperatorConfigCRDResourceName,
OperatorConfigCRDResouceKind,

View File

@ -1,6 +1,6 @@
// Package v1 is the v1 version of the API.
// +k8s:deepcopy-gen=package,register
// Package v1 is the v1 version of the API.
// +groupName=acid.zalan.do
package v1

View File

@ -104,6 +104,7 @@ func (p *Postgresql) UnmarshalJSON(data []byte) error {
return nil
}
// UnmarshalJSON convert to Duration from byte slice of json
func (d *Duration) UnmarshalJSON(b []byte) error {
var (
v interface{}

View File

@ -13,6 +13,8 @@ import (
// +genclient:onlyVerbs=get
// +genclient:noStatus
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// OperatorConfiguration defines the specification for the OperatorConfiguration.
type OperatorConfiguration struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata"`
@ -21,6 +23,8 @@ type OperatorConfiguration struct {
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// OperatorConfigurationList is used in the k8s API calls
type OperatorConfigurationList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata"`
@ -28,11 +32,13 @@ type OperatorConfigurationList struct {
Items []OperatorConfiguration `json:"items"`
}
// PostgresUsersConfiguration defines the system users of Postgres.
type PostgresUsersConfiguration struct {
SuperUsername string `json:"super_username,omitempty"`
ReplicationUsername string `json:"replication_username,omitempty"`
}
// KubernetesMetaConfiguration defines k8s conf required for all Postgres clusters and the operator itself
type KubernetesMetaConfiguration struct {
PodServiceAccountName string `json:"pod_service_account_name,omitempty"`
// TODO: change it to the proper json
@ -46,6 +52,7 @@ type KubernetesMetaConfiguration struct {
InfrastructureRolesSecretName spec.NamespacedName `json:"infrastructure_roles_secret_name,omitempty"`
PodRoleLabel string `json:"pod_role_label,omitempty"`
ClusterLabels map[string]string `json:"cluster_labels,omitempty"`
InheritedLabels []string `json:"inherited_labels,omitempty"`
ClusterNameLabel string `json:"cluster_name_label,omitempty"`
NodeReadinessLabel map[string]string `json:"node_readiness_label,omitempty"`
// TODO: use a proper toleration structure?
@ -53,8 +60,11 @@ type KubernetesMetaConfiguration struct {
// TODO: use namespacedname
PodEnvironmentConfigMap string `json:"pod_environment_configmap,omitempty"`
PodPriorityClassName string `json:"pod_priority_class_name,omitempty"`
EnablePodAntiAffinity bool `json:"enable_pod_antiaffinity" default:"false"`
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
}
// PostgresPodResourcesDefaults defines the spec of default resources
type PostgresPodResourcesDefaults struct {
DefaultCPURequest string `json:"default_cpu_request,omitempty"`
DefaultMemoryRequest string `json:"default_memory_request,omitempty"`
@ -62,6 +72,7 @@ type PostgresPodResourcesDefaults struct {
DefaultMemoryLimit string `json:"default_memory_limit,omitempty"`
}
// OperatorTimeouts defines the timeout of ResourceCheck, PodWait, ReadyWait
type OperatorTimeouts struct {
ResourceCheckInterval Duration `json:"resource_check_interval,omitempty"`
ResourceCheckTimeout Duration `json:"resource_check_timeout,omitempty"`
@ -71,14 +82,18 @@ type OperatorTimeouts struct {
ReadyWaitTimeout Duration `json:"ready_wait_timeout,omitempty"`
}
// LoadBalancerConfiguration defines the LB configuration
type LoadBalancerConfiguration struct {
DbHostedZone string `json:"db_hosted_zone,omitempty"`
EnableMasterLoadBalancer bool `json:"enable_master_load_balancer,omitempty"`
EnableReplicaLoadBalancer bool `json:"enable_replica_load_balancer,omitempty"`
CustomServiceAnnotations map[string]string `json:"custom_service_annotations,omitempty"`
MasterDNSNameFormat config.StringTemplate `json:"master_dns_name_format,omitempty"`
ReplicaDNSNameFormat config.StringTemplate `json:"replica_dns_name_format,omitempty"`
}
// AWSGCPConfiguration defines the configuration for AWS
// TODO complete Google Cloud Platform (GCP) configuration
type AWSGCPConfiguration struct {
WALES3Bucket string `json:"wal_s3_bucket,omitempty"`
AWSRegion string `json:"aws_region,omitempty"`
@ -86,11 +101,13 @@ type AWSGCPConfiguration struct {
KubeIAMRole string `json:"kube_iam_role,omitempty"`
}
// OperatorDebugConfiguration defines options for the debug mode
type OperatorDebugConfiguration struct {
DebugLogging bool `json:"debug_logging,omitempty"`
EnableDBAccess bool `json:"enable_database_access,omitempty"`
}
// TeamsAPIConfiguration defines the configration of TeamsAPI
type TeamsAPIConfiguration struct {
EnableTeamsAPI bool `json:"enable_teams_api,omitempty"`
TeamsAPIUrl string `json:"teams_api_url,omitempty"`
@ -103,12 +120,14 @@ type TeamsAPIConfiguration struct {
PostgresSuperuserTeams []string `json:"postgres_superuser_teams,omitempty"`
}
// LoggingRESTAPIConfiguration defines Logging API conf
type LoggingRESTAPIConfiguration struct {
APIPort int `json:"api_port,omitempty"`
RingLogLines int `json:"ring_log_lines,omitempty"`
ClusterHistoryEntries int `json:"cluster_history_entries,omitempty"`
}
// ScalyrConfiguration defines the configuration for ScalyrAPI
type ScalyrConfiguration struct {
ScalyrAPIKey string `json:"scalyr_api_key,omitempty"`
ScalyrImage string `json:"scalyr_image,omitempty"`
@ -119,6 +138,7 @@ type ScalyrConfiguration struct {
ScalyrMemoryLimit string `json:"scalyr_memory_limit,omitempty"`
}
// OperatorConfigurationData defines the operation config
type OperatorConfigurationData struct {
EtcdHost string `json:"etcd_host,omitempty"`
DockerImage string `json:"docker_image,omitempty"`
@ -131,6 +151,7 @@ type OperatorConfigurationData struct {
PostgresUsersConfiguration PostgresUsersConfiguration `json:"users"`
Kubernetes KubernetesMetaConfiguration `json:"kubernetes"`
PostgresPodResources PostgresPodResourcesDefaults `json:"postgres_pod_resources"`
SetMemoryRequestToLimit bool `json:"set_memory_request_to_limit,omitempty"`
Timeouts OperatorTimeouts `json:"timeouts"`
LoadBalancer LoadBalancerConfiguration `json:"load_balancer"`
AWSGCP AWSGCPConfiguration `json:"aws_or_gcp"`
@ -140,6 +161,7 @@ type OperatorConfigurationData struct {
Scalyr ScalyrConfiguration `json:"scalyr"`
}
// OperatorConfigurationUsers defines configration for super user
type OperatorConfigurationUsers struct {
SuperUserName string `json:"superuser_name,omitempty"`
Replication string `json:"replication_user_name,omitempty"`
@ -147,4 +169,5 @@ type OperatorConfigurationUsers struct {
TeamAPIRoleConfiguration map[string]string `json:"team_api_role_configuration,omitempty"`
}
//Duration shortens this frequently used name
type Duration time.Duration

View File

@ -9,7 +9,8 @@ import (
// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
//Postgresql defines PostgreSQL Custom Resource Definition Object.
// Postgresql defines PostgreSQL Custom Resource Definition Object.
type Postgresql struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
@ -50,10 +51,13 @@ type PostgresSpec struct {
Databases map[string]string `json:"databases,omitempty"`
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
Sidecars []Sidecar `json:"sidecars,omitempty"`
InitContainers []v1.Container `json:"init_containers,omitempty"`
PodPriorityClassName string `json:"pod_priority_class_name,omitempty"`
ShmVolume *bool `json:"enableShmVolume,omitempty"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// PostgresqlList defines a list of PostgreSQL clusters.
type PostgresqlList struct {
metav1.TypeMeta `json:",inline"`
@ -90,8 +94,8 @@ type ResourceDescription struct {
// Resources describes requests and limits for the cluster resouces.
type Resources struct {
ResourceRequest ResourceDescription `json:"requests,omitempty"`
ResourceLimits ResourceDescription `json:"limits,omitempty"`
ResourceRequests ResourceDescription `json:"requests,omitempty"`
ResourceLimits ResourceDescription `json:"limits,omitempty"`
}
// Patroni contains Patroni-specific configuration

View File

@ -8,15 +8,20 @@ import (
"github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do"
)
// APIVersion of the `postgresql` and `operator` CRDs
const (
APIVersion = "v1"
)
var (
// localSchemeBuilder and AddToScheme will stay in k8s.io/kubernetes.
// An instance of runtime.SchemeBuilder, global for this package
SchemeBuilder runtime.SchemeBuilder
localSchemeBuilder = &SchemeBuilder
AddToScheme = localSchemeBuilder.AddToScheme
//AddToScheme is localSchemeBuilder.AddToScheme
AddToScheme = localSchemeBuilder.AddToScheme
//SchemeGroupVersion has GroupName and APIVersion
SchemeGroupVersion = schema.GroupVersion{Group: acidzalando.GroupName, Version: APIVersion}
)

View File

@ -14,6 +14,7 @@ var (
serviceNameRegex = regexp.MustCompile(serviceNameRegexString)
)
// Clone convenience wrapper around DeepCopy
func (p *Postgresql) Clone() *Postgresql {
if p == nil {
return nil
@ -83,6 +84,7 @@ func validateCloneClusterDescription(clone *CloneDescription) error {
return nil
}
// Success of the current Status
func (status PostgresStatus) Success() bool {
return status != ClusterStatusAddFailed &&
status != ClusterStatusUpdateFailed &&

View File

@ -240,8 +240,8 @@ var unmarshalCluster = []struct {
Slots: map[string]map[string]string{"permanent_logical_1": {"type": "logical", "database": "foo", "plugin": "pgoutput"}},
},
Resources: Resources{
ResourceRequest: ResourceDescription{CPU: "10m", Memory: "50Mi"},
ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"},
ResourceRequests: ResourceDescription{CPU: "10m", Memory: "50Mi"},
ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"},
},
TeamID: "ACID",
@ -499,7 +499,7 @@ func TestMarshal(t *testing.T) {
t.Errorf("Marshal error: %v", err)
}
if !bytes.Equal(m, tt.marshal) {
t.Errorf("Marshal Postgresql expected: %q, got: %q", string(tt.marshal), string(m))
t.Errorf("Marshal Postgresql \nexpected: %q, \ngot: %q", string(tt.marshal), string(m))
}
}
}
@ -507,11 +507,11 @@ func TestMarshal(t *testing.T) {
func TestPostgresMeta(t *testing.T) {
for _, tt := range unmarshalCluster {
if a := tt.out.GetObjectKind(); a != &tt.out.TypeMeta {
t.Errorf("GetObjectKindMeta expected: %v, got: %v", tt.out.TypeMeta, a)
t.Errorf("GetObjectKindMeta \nexpected: %v, \ngot: %v", tt.out.TypeMeta, a)
}
if a := tt.out.GetObjectMeta(); reflect.DeepEqual(a, tt.out.ObjectMeta) {
t.Errorf("GetObjectMeta expected: %v, got: %v", tt.out.ObjectMeta, a)
t.Errorf("GetObjectMeta \nexpected: %v, \ngot: %v", tt.out.ObjectMeta, a)
}
}
}

View File

@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -442,6 +442,18 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.InitContainers != nil {
in, out := &in.InitContainers, &out.InitContainers
*out = make([]corev1.Container, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.ShmVolume != nil {
in, out := &in.ShmVolume, &out.ShmVolume
*out = new(bool)
**out = **in
}
return
}
@ -573,7 +585,7 @@ func (in *ResourceDescription) DeepCopy() *ResourceDescription {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Resources) DeepCopyInto(out *Resources) {
*out = *in
out.ResourceRequest = in.ResourceRequest
out.ResourceRequests = in.ResourceRequests
out.ResourceLimits = in.ResourceLimits
return
}

View File

@ -12,7 +12,7 @@ import (
"github.com/sirupsen/logrus"
"k8s.io/api/apps/v1beta1"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
policybeta1 "k8s.io/api/policy/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
@ -494,7 +494,7 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
defer func() {
if updateFailed {
c.setStatus(acidv1.ClusterStatusUpdateFailed)
} else if c.Status != acidv1.ClusterStatusRunning {
} else {
c.setStatus(acidv1.ClusterStatusRunning)
}
}()
@ -709,11 +709,16 @@ func (c *Cluster) initRobotUsers() error {
if err != nil {
return fmt.Errorf("invalid flags for user %q: %v", username, err)
}
adminRole := ""
if c.OpConfig.EnableAdminRoleForUsers {
adminRole = c.OpConfig.TeamAdminRole
}
newRole := spec.PgUser{
Origin: spec.RoleOriginManifest,
Name: username,
Password: util.RandomPassword(constants.PasswordLength),
Flags: flags,
Origin: spec.RoleOriginManifest,
Name: username,
Password: util.RandomPassword(constants.PasswordLength),
Flags: flags,
AdminRole: adminRole,
}
if currentRole, present := c.pgUsers[username]; present {
c.pgUsers[username] = c.resolveNameConflict(&currentRole, &newRole)
@ -872,7 +877,7 @@ func (c *Cluster) GetStatus() *ClusterStatus {
func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) error {
var err error
c.logger.Debugf("failing over from %q to %q", curMaster.Name, candidate)
c.logger.Debugf("switching over from %q to %q", curMaster.Name, candidate)
var wg sync.WaitGroup
@ -898,12 +903,12 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e
}()
if err = c.patroni.Switchover(curMaster, candidate.Name); err == nil {
c.logger.Debugf("successfully failed over from %q to %q", curMaster.Name, candidate)
c.logger.Debugf("successfully switched over from %q to %q", curMaster.Name, candidate)
if err = <-podLabelErr; err != nil {
err = fmt.Errorf("could not get master pod label: %v", err)
}
} else {
err = fmt.Errorf("could not failover: %v", err)
err = fmt.Errorf("could not switch over: %v", err)
}
// signal the role label waiting goroutine to close the shop and go home

View File

@ -18,6 +18,7 @@ import (
acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1"
"github.com/zalando-incubator/postgres-operator/pkg/spec"
"github.com/zalando-incubator/postgres-operator/pkg/util"
"github.com/zalando-incubator/postgres-operator/pkg/util/config"
"github.com/zalando-incubator/postgres-operator/pkg/util/constants"
"k8s.io/apimachinery/pkg/labels"
)
@ -92,18 +93,18 @@ func (c *Cluster) makeDefaultResources() acidv1.Resources {
defaultRequests := acidv1.ResourceDescription{CPU: config.DefaultCPURequest, Memory: config.DefaultMemoryRequest}
defaultLimits := acidv1.ResourceDescription{CPU: config.DefaultCPULimit, Memory: config.DefaultMemoryLimit}
return acidv1.Resources{ResourceRequest: defaultRequests, ResourceLimits: defaultLimits}
return acidv1.Resources{ResourceRequests: defaultRequests, ResourceLimits: defaultLimits}
}
func generateResourceRequirements(resources acidv1.Resources, defaultResources acidv1.Resources) (*v1.ResourceRequirements, error) {
var err error
specRequests := resources.ResourceRequest
specRequests := resources.ResourceRequests
specLimits := resources.ResourceLimits
result := v1.ResourceRequirements{}
result.Requests, err = fillResourceList(specRequests, defaultResources.ResourceRequest)
result.Requests, err = fillResourceList(specRequests, defaultResources.ResourceRequests)
if err != nil {
return nil, fmt.Errorf("could not fill resource requests: %v", err)
}
@ -289,6 +290,26 @@ func nodeAffinity(nodeReadinessLabel map[string]string) *v1.Affinity {
}
}
func generatePodAffinity(labels labels.Set, topologyKey string, nodeAffinity *v1.Affinity) *v1.Affinity {
// generate pod anti-affinity to avoid multiple pods of the same Postgres cluster in the same topology , e.g. node
podAffinity := v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: topologyKey,
}},
},
}
if nodeAffinity != nil && nodeAffinity.NodeAffinity != nil {
podAffinity.NodeAffinity = nodeAffinity.NodeAffinity
}
return &podAffinity
}
func tolerations(tolerationsSpec *[]v1.Toleration, podToleration map[string]string) []v1.Toleration {
// allow to override tolerations by postgresql manifest
if len(*tolerationsSpec) > 0 {
@ -338,7 +359,6 @@ func generateSpiloContainer(
envVars []v1.EnvVar,
volumeMounts []v1.VolumeMount,
) *v1.Container {
privilegedMode := true
return &v1.Container{
Name: name,
@ -377,8 +397,8 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar,
resources, err := generateResourceRequirements(
makeResources(
sidecar.Resources.ResourceRequest.CPU,
sidecar.Resources.ResourceRequest.Memory,
sidecar.Resources.ResourceRequests.CPU,
sidecar.Resources.ResourceRequests.Memory,
sidecar.Resources.ResourceLimits.CPU,
sidecar.Resources.ResourceLimits.Memory,
),
@ -396,10 +416,21 @@ func generateSidecarContainers(sidecars []acidv1.Sidecar,
return nil, nil
}
// Check whether or not we're requested to mount an shm volume,
// taking into account that PostgreSQL manifest has precedence.
func mountShmVolumeNeeded(opConfig config.Config, pgSpec *acidv1.PostgresSpec) bool {
if pgSpec.ShmVolume != nil {
return *pgSpec.ShmVolume
}
return opConfig.ShmVolume
}
func generatePodTemplate(
namespace string,
labels labels.Set,
spiloContainer *v1.Container,
initContainers []v1.Container,
sidecarContainers []v1.Container,
tolerationsSpec *[]v1.Toleration,
nodeAffinity *v1.Affinity,
@ -407,6 +438,9 @@ func generatePodTemplate(
podServiceAccountName string,
kubeIAMRole string,
priorityClassName string,
shmVolume bool,
podAntiAffinity bool,
podAntiAffinityTopologyKey string,
) (*v1.PodTemplateSpec, error) {
terminateGracePeriodSeconds := terminateGracePeriod
@ -417,10 +451,17 @@ func generatePodTemplate(
ServiceAccountName: podServiceAccountName,
TerminationGracePeriodSeconds: &terminateGracePeriodSeconds,
Containers: containers,
InitContainers: initContainers,
Tolerations: *tolerationsSpec,
}
if nodeAffinity != nil {
if shmVolume {
addShmVolume(&podSpec)
}
if podAntiAffinity {
podSpec.Affinity = generatePodAffinity(labels, podAntiAffinityTopologyKey, nodeAffinity)
} else if nodeAffinity != nil {
podSpec.Affinity = nodeAffinity
}
@ -475,6 +516,18 @@ func (c *Cluster) generateSpiloPodEnvVars(uid types.UID, spiloConfiguration stri
Name: "PGUSER_SUPERUSER",
Value: c.OpConfig.SuperUsername,
},
{
Name: "KUBERNETES_SCOPE_LABEL",
Value: c.OpConfig.ClusterNameLabel,
},
{
Name: "KUBERNETES_ROLE_LABEL",
Value: c.OpConfig.PodRoleLabel,
},
{
Name: "KUBERNETES_LABELS",
Value: labels.Set(c.OpConfig.ClusterLabels).String(),
},
{
Name: "PGPASSWORD_SUPERUSER",
ValueFrom: &v1.EnvVarSource{
@ -629,7 +682,7 @@ func getBucketScopeSuffix(uid string) string {
func makeResources(cpuRequest, memoryRequest, cpuLimit, memoryLimit string) acidv1.Resources {
return acidv1.Resources{
ResourceRequest: acidv1.ResourceDescription{
ResourceRequests: acidv1.ResourceDescription{
CPU: cpuRequest,
Memory: memoryRequest,
},
@ -648,6 +701,61 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
podTemplate *v1.PodTemplateSpec
volumeClaimTemplate *v1.PersistentVolumeClaim
)
// Improve me. Please.
if c.OpConfig.SetMemoryRequestToLimit {
// controller adjusts the default memory request at operator startup
request := spec.Resources.ResourceRequests.Memory
if request == "" {
request = c.OpConfig.DefaultMemoryRequest
}
limit := spec.Resources.ResourceLimits.Memory
if limit == "" {
limit = c.OpConfig.DefaultMemoryLimit
}
isSmaller, err := util.RequestIsSmallerThanLimit(request, limit)
if err != nil {
return nil, err
}
if isSmaller {
c.logger.Warningf("The memory request of %v for the Postgres container is increased to match the memory limit of %v.", request, limit)
spec.Resources.ResourceRequests.Memory = limit
}
// controller adjusts the Scalyr sidecar request at operator startup
// as this sidecar is managed separately
// adjust sidecar containers defined for that particular cluster
for _, sidecar := range spec.Sidecars {
// TODO #413
sidecarRequest := sidecar.Resources.ResourceRequests.Memory
if request == "" {
request = c.OpConfig.DefaultMemoryRequest
}
sidecarLimit := sidecar.Resources.ResourceLimits.Memory
if limit == "" {
limit = c.OpConfig.DefaultMemoryLimit
}
isSmaller, err := util.RequestIsSmallerThanLimit(sidecarRequest, sidecarLimit)
if err != nil {
return nil, err
}
if isSmaller {
c.logger.Warningf("The memory request of %v for the %v sidecar container is increased to match the memory limit of %v.", sidecar.Resources.ResourceRequests.Memory, sidecar.Name, sidecar.Resources.ResourceLimits.Memory)
sidecar.Resources.ResourceRequests.Memory = sidecar.Resources.ResourceLimits.Memory
}
}
}
defaultResources := c.makeDefaultResources()
resourceRequirements, err := generateResourceRequirements(spec.Resources, defaultResources)
@ -674,8 +782,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
// generate environment variables for the spilo container
spiloEnvVars := deduplicateEnvVars(
c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone, customPodEnvVarsList),
c.containerName(), c.logger)
c.generateSpiloPodEnvVars(c.Postgresql.GetUID(), spiloConfiguration, &spec.Clone,
customPodEnvVarsList), c.containerName(), c.logger)
// pickup the docker image for the spilo container
effectiveDockerImage := util.Coalesce(spec.DockerImage, c.OpConfig.DockerImage)
@ -683,9 +791,15 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
volumeMounts := generateVolumeMounts()
// generate the spilo container
spiloContainer := generateSpiloContainer(c.containerName(), &effectiveDockerImage, resourceRequirements, spiloEnvVars, volumeMounts)
c.logger.Debugf("Generating Spilo container, environment variables: %v", spiloEnvVars)
spiloContainer := generateSpiloContainer(c.containerName(),
&effectiveDockerImage,
resourceRequirements,
spiloEnvVars,
volumeMounts,
)
// resolve conflicts between operator-global and per-cluster sidecards
// resolve conflicts between operator-global and per-cluster sidecars
sideCars := c.mergeSidecars(spec.Sidecars)
resourceRequirementsScalyrSidecar := makeResources(
@ -714,18 +828,22 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*v1beta1.State
tolerationSpec := tolerations(&spec.Tolerations, c.OpConfig.PodToleration)
effectivePodPriorityClassName := util.Coalesce(spec.PodPriorityClassName, c.OpConfig.PodPriorityClassName)
// generate pod template for the statefulset, based on the spilo container and sidecards
// generate pod template for the statefulset, based on the spilo container and sidecars
if podTemplate, err = generatePodTemplate(
c.Namespace,
c.labelsSet(true),
spiloContainer,
spec.InitContainers,
sidecarContainers,
&tolerationSpec,
nodeAffinity(c.OpConfig.NodeReadinessLabel),
int64(c.OpConfig.PodTerminateGracePeriod.Seconds()),
c.OpConfig.PodServiceAccountName,
c.OpConfig.KubeIAMRole,
effectivePodPriorityClassName); err != nil {
effectivePodPriorityClassName,
mountShmVolumeNeeded(c.OpConfig, spec),
c.OpConfig.EnablePodAntiAffinity,
c.OpConfig.PodAntiAffinityTopologyKey); err != nil {
return nil, fmt.Errorf("could not generate pod template: %v", err)
}
@ -832,6 +950,32 @@ func (c *Cluster) getNumberOfInstances(spec *acidv1.PostgresSpec) int32 {
return newcur
}
// To avoid issues with limited /dev/shm inside docker environment, when
// PostgreSQL can't allocate enough of dsa segments from it, we can
// mount an extra memory volume
//
// see https://docs.okd.io/latest/dev_guide/shared_memory.html
func addShmVolume(podSpec *v1.PodSpec) {
volumes := append(podSpec.Volumes, v1.Volume{
Name: constants.ShmVolumeName,
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{
Medium: "Memory",
},
},
})
pgIdx := constants.PostgresContainerIdx
mounts := append(podSpec.Containers[pgIdx].VolumeMounts,
v1.VolumeMount{
Name: constants.ShmVolumeName,
MountPath: constants.ShmVolumePath,
})
podSpec.Containers[0].VolumeMounts = mounts
podSpec.Volumes = volumes
}
func generatePersistentVolumeClaimTemplate(volumeSize, volumeStorageClass string) (*v1.PersistentVolumeClaim, error) {
var storageClassName *string
@ -959,7 +1103,7 @@ func (c *Cluster) generateService(role PostgresRole, spec *acidv1.PostgresSpec)
}
if role == Replica {
serviceSpec.Selector = c.roleLabelsSet(role)
serviceSpec.Selector = c.roleLabelsSet(false, role)
}
var annotations map[string]string
@ -982,6 +1126,13 @@ func (c *Cluster) generateService(role PostgresRole, spec *acidv1.PostgresSpec)
constants.ZalandoDNSNameAnnotation: dnsName,
constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue,
}
if len(c.OpConfig.CustomServiceAnnotations) != 0 {
c.logger.Debugf("There are custom annotations defined, creating them.")
for customAnnotationKey, customAnnotationValue := range c.OpConfig.CustomServiceAnnotations {
annotations[customAnnotationKey] = customAnnotationValue
}
}
} else if role == Replica {
// before PR #258, the replica service was only created if allocated a LB
// now we always create the service but warn if the LB is absent
@ -992,7 +1143,7 @@ func (c *Cluster) generateService(role PostgresRole, spec *acidv1.PostgresSpec)
ObjectMeta: metav1.ObjectMeta{
Name: c.serviceName(role),
Namespace: c.Namespace,
Labels: c.roleLabelsSet(role),
Labels: c.roleLabelsSet(true, role),
Annotations: annotations,
},
Spec: serviceSpec,
@ -1006,7 +1157,7 @@ func (c *Cluster) generateEndpoint(role PostgresRole, subsets []v1.EndpointSubse
ObjectMeta: metav1.ObjectMeta{
Name: c.endpointName(role),
Namespace: c.Namespace,
Labels: c.roleLabelsSet(role),
Labels: c.roleLabelsSet(true, role),
},
}
if len(subsets) > 0 {
@ -1070,7 +1221,7 @@ func (c *Cluster) generatePodDisruptionBudget() *policybeta1.PodDisruptionBudget
Spec: policybeta1.PodDisruptionBudgetSpec{
MinAvailable: &minAvailable,
Selector: &metav1.LabelSelector{
MatchLabels: c.roleLabelsSet(Master),
MatchLabels: c.roleLabelsSet(false, Master),
},
},
}

View File

@ -1,8 +1,11 @@
package cluster
import (
"k8s.io/api/core/v1"
acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1"
"github.com/zalando-incubator/postgres-operator/pkg/util/config"
"github.com/zalando-incubator/postgres-operator/pkg/util/constants"
"github.com/zalando-incubator/postgres-operator/pkg/util/k8sutil"
"testing"
)
@ -75,3 +78,54 @@ func TestCreateLoadBalancerLogic(t *testing.T) {
}
}
}
func TestShmVolume(t *testing.T) {
testName := "TestShmVolume"
tests := []struct {
subTest string
podSpec *v1.PodSpec
shmPos int
}{
{
subTest: "empty PodSpec",
podSpec: &v1.PodSpec{
Volumes: []v1.Volume{},
Containers: []v1.Container{
v1.Container{
VolumeMounts: []v1.VolumeMount{},
},
},
},
shmPos: 0,
},
{
subTest: "non empty PodSpec",
podSpec: &v1.PodSpec{
Volumes: []v1.Volume{v1.Volume{}},
Containers: []v1.Container{
v1.Container{
VolumeMounts: []v1.VolumeMount{
v1.VolumeMount{},
},
},
},
},
shmPos: 1,
},
}
for _, tt := range tests {
addShmVolume(tt.podSpec)
volumeName := tt.podSpec.Volumes[tt.shmPos].Name
volumeMountName := tt.podSpec.Containers[0].VolumeMounts[tt.shmPos].Name
if volumeName != constants.ShmVolumeName {
t.Errorf("%s %s: Expected volume %s was not created, have %s instead",
testName, tt.subTest, constants.ShmVolumeName, volumeName)
}
if volumeMountName != constants.ShmVolumeName {
t.Errorf("%s %s: Expected mount %s was not created, have %s instead",
testName, tt.subTest, constants.ShmVolumeName, volumeMountName)
}
}
}

View File

@ -4,7 +4,7 @@ import (
"fmt"
"math/rand"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/zalando-incubator/postgres-operator/pkg/spec"
@ -27,7 +27,7 @@ func (c *Cluster) listPods() ([]v1.Pod, error) {
func (c *Cluster) getRolePods(role PostgresRole) ([]v1.Pod, error) {
listOptions := metav1.ListOptions{
LabelSelector: c.roleLabelsSet(role).String(),
LabelSelector: c.roleLabelsSet(false, role).String(),
}
pods, err := c.KubeClient.Pods(c.Namespace).List(listOptions)
@ -77,11 +77,7 @@ func (c *Cluster) deletePod(podName spec.NamespacedName) error {
return err
}
if err := c.waitForPodDeletion(ch); err != nil {
return err
}
return nil
return c.waitForPodDeletion(ch)
}
func (c *Cluster) unregisterPodSubscriber(podName spec.NamespacedName) {
@ -122,7 +118,7 @@ func (c *Cluster) movePodFromEndOfLifeNode(pod *v1.Pod) (*v1.Pod, error) {
if eol, err = c.podIsEndOfLife(pod); err != nil {
return nil, fmt.Errorf("could not get node %q: %v", pod.Spec.NodeName, err)
} else if !eol {
c.logger.Infof("pod %q is already on a live node", podName)
c.logger.Infof("check failed: pod %q is already on a live node", podName)
return pod, nil
}
@ -162,7 +158,7 @@ func (c *Cluster) masterCandidate(oldNodeName string) (*v1.Pod, error) {
}
if len(replicas) == 0 {
c.logger.Warningf("no available master candidates, migration will cause longer downtime of the master instance")
c.logger.Warningf("no available master candidates, migration will cause longer downtime of Postgres cluster")
return nil, nil
}
@ -193,18 +189,18 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
return fmt.Errorf("could not get pod: %v", err)
}
c.logger.Infof("migrating master pod %q", podName)
c.logger.Infof("starting process to migrate master pod %q", podName)
if eol, err = c.podIsEndOfLife(oldMaster); err != nil {
return fmt.Errorf("could not get node %q: %v", oldMaster.Spec.NodeName, err)
}
if !eol {
c.logger.Debugf("pod is already on a live node")
c.logger.Debugf("no action needed: master pod is already on a live node")
return nil
}
if role := PostgresRole(oldMaster.Labels[c.OpConfig.PodRoleLabel]); role != Master {
c.logger.Warningf("pod %q is not a master", podName)
c.logger.Warningf("no action needed: pod %q is not the master (anymore)", podName)
return nil
}
// we must have a statefulset in the cluster for the migration to work
@ -219,10 +215,10 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
// We may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case.
if *c.Statefulset.Spec.Replicas > 1 {
if masterCandidatePod, err = c.masterCandidate(oldMaster.Spec.NodeName); err != nil {
return fmt.Errorf("could not get new master candidate: %v", err)
return fmt.Errorf("could not find suitable replica pod as candidate for failover: %v", err)
}
} else {
c.logger.Warningf("single master pod for cluster %q, migration will cause longer downtime of the master instance", c.clusterName())
c.logger.Warningf("migrating single pod cluster %q, this will cause downtime of the Postgres cluster until pod is back", c.clusterName())
}
// there are two cases for each postgres cluster that has its master pod on the node to migrate from:
@ -256,15 +252,15 @@ func (c *Cluster) MigrateReplicaPod(podName spec.NamespacedName, fromNodeName st
return fmt.Errorf("could not get pod: %v", err)
}
c.logger.Infof("migrating replica pod %q", podName)
c.logger.Infof("migrating replica pod %q to live node", podName)
if replicaPod.Spec.NodeName != fromNodeName {
c.logger.Infof("pod %q has already migrated to node %q", podName, replicaPod.Spec.NodeName)
c.logger.Infof("check failed: pod %q has already migrated to node %q", podName, replicaPod.Spec.NodeName)
return nil
}
if role := PostgresRole(replicaPod.Labels[c.OpConfig.PodRoleLabel]); role != Replica {
return fmt.Errorf("pod %q is not a replica", podName)
return fmt.Errorf("check failed: pod %q is not a replica", podName)
}
_, err = c.movePodFromEndOfLifeNode(replicaPod)
@ -296,7 +292,7 @@ func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) {
}
func (c *Cluster) recreatePods() error {
c.setProcessName("recreating pods")
c.setProcessName("starting to recreate pods")
ls := c.labelsSet(false)
namespace := c.Namespace
@ -337,10 +333,10 @@ func (c *Cluster) recreatePods() error {
// failover if we have not observed a master pod when re-creating former replicas.
if newMasterPod == nil && len(replicas) > 0 {
if err := c.Switchover(masterPod, masterCandidate(replicas)); err != nil {
c.logger.Warningf("could not perform failover: %v", err)
c.logger.Warningf("could not perform switch over: %v", err)
}
} else if newMasterPod == nil && len(replicas) == 0 {
c.logger.Warningf("cannot switch master role before re-creating the pod: no replicas")
c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas")
}
c.logger.Infof("recreating old master pod %q", util.NameFromMeta(masterPod.ObjectMeta))

View File

@ -437,7 +437,11 @@ func (c *Cluster) updateService(role PostgresRole, newService *v1.Service) error
func (c *Cluster) deleteService(role PostgresRole) error {
c.logger.Debugf("deleting service %s", role)
service := c.Services[role]
service, ok := c.Services[role]
if !ok {
c.logger.Debugf("No service for %s role was found, nothing to delete", role)
return nil
}
if err := c.KubeClient.Services(service.Namespace).Delete(service.Name, c.deleteOptions); err != nil {
return err

View File

@ -1,12 +1,13 @@
package cluster
import (
"time"
acidv1 "github.com/zalando-incubator/postgres-operator/pkg/apis/acid.zalan.do/v1"
"k8s.io/api/apps/v1beta1"
"k8s.io/api/core/v1"
policybeta1 "k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/types"
"time"
)
// PostgresRole describes role of the node
@ -20,6 +21,7 @@ const (
Replica PostgresRole = "replica"
)
// PodEventType represents the type of a pod-related event
type PodEventType string
// Possible values for the EventType

View File

@ -389,6 +389,19 @@ func (c *Cluster) labelsSet(shouldAddExtraLabels bool) labels.Set {
if shouldAddExtraLabels {
// enables filtering resources owned by a team
lbls["team"] = c.Postgresql.Spec.TeamID
// allow to inherit certain labels from the 'postgres' object
if spec, err := c.GetSpec(); err == nil {
for k, v := range spec.ObjectMeta.Labels {
for _, match := range c.OpConfig.InheritedLabels {
if k == match {
lbls[k] = v
}
}
}
} else {
c.logger.Warningf("could not get the list of InheritedLabels for cluster %q: %v", c.Name, err)
}
}
return labels.Set(lbls)
@ -398,8 +411,8 @@ func (c *Cluster) labelsSelector() *metav1.LabelSelector {
return &metav1.LabelSelector{MatchLabels: c.labelsSet(false), MatchExpressions: nil}
}
func (c *Cluster) roleLabelsSet(role PostgresRole) labels.Set {
lbls := c.labelsSet(false)
func (c *Cluster) roleLabelsSet(shouldAddExtraLabels bool, role PostgresRole) labels.Set {
lbls := c.labelsSet(shouldAddExtraLabels)
lbls[c.OpConfig.PodRoleLabel] = string(role)
return lbls
}
@ -460,6 +473,7 @@ func (c *Cluster) setSpec(newSpec *acidv1.Postgresql) {
c.specMu.Unlock()
}
// GetSpec returns a copy of the operator-side spec of a Postgres cluster in a thread-safe manner
func (c *Cluster) GetSpec() (*acidv1.Postgresql, error) {
c.specMu.RLock()
defer c.specMu.RUnlock()

View File

@ -110,6 +110,29 @@ func (c *Controller) initOperatorConfig() {
c.opConfig = config.NewFromMap(configMapData)
c.warnOnDeprecatedOperatorParameters()
if c.opConfig.SetMemoryRequestToLimit {
isSmaller, err := util.RequestIsSmallerThanLimit(c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit)
if err != nil {
panic(err)
}
if isSmaller {
c.logger.Warningf("The default memory request of %v for Postgres containers is increased to match the default memory limit of %v.", c.opConfig.DefaultMemoryRequest, c.opConfig.DefaultMemoryLimit)
c.opConfig.DefaultMemoryRequest = c.opConfig.DefaultMemoryLimit
}
isSmaller, err = util.RequestIsSmallerThanLimit(c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit)
if err != nil {
panic(err)
}
if isSmaller {
c.logger.Warningf("The memory request of %v for the Scalyr sidecar container is increased to match the memory limit of %v.", c.opConfig.ScalyrMemoryRequest, c.opConfig.ScalyrMemoryLimit)
c.opConfig.ScalyrMemoryRequest = c.opConfig.ScalyrMemoryLimit
}
// generateStatefulSet adjusts values for individual Postgres clusters
}
}
func (c *Controller) modifyConfigFromEnvironment() {

View File

@ -39,6 +39,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.PodServiceAccountName = fromCRD.Kubernetes.PodServiceAccountName
result.PodServiceAccountDefinition = fromCRD.Kubernetes.PodServiceAccountDefinition
result.PodServiceAccountRoleBindingDefinition = fromCRD.Kubernetes.PodServiceAccountRoleBindingDefinition
result.PodEnvironmentConfigMap = fromCRD.Kubernetes.PodEnvironmentConfigMap
result.PodTerminateGracePeriod = time.Duration(fromCRD.Kubernetes.PodTerminateGracePeriod)
result.WatchedNamespace = fromCRD.Kubernetes.WatchedNamespace
result.PDBNameFormat = fromCRD.Kubernetes.PDBNameFormat
@ -47,14 +48,19 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.InfrastructureRolesSecretName = fromCRD.Kubernetes.InfrastructureRolesSecretName
result.PodRoleLabel = fromCRD.Kubernetes.PodRoleLabel
result.ClusterLabels = fromCRD.Kubernetes.ClusterLabels
result.InheritedLabels = fromCRD.Kubernetes.InheritedLabels
result.ClusterNameLabel = fromCRD.Kubernetes.ClusterNameLabel
result.NodeReadinessLabel = fromCRD.Kubernetes.NodeReadinessLabel
result.PodPriorityClassName = fromCRD.Kubernetes.PodPriorityClassName
result.EnablePodAntiAffinity = fromCRD.Kubernetes.EnablePodAntiAffinity;
result.PodAntiAffinityTopologyKey = fromCRD.Kubernetes.PodAntiAffinityTopologyKey;
result.DefaultCPURequest = fromCRD.PostgresPodResources.DefaultCPURequest
result.DefaultMemoryRequest = fromCRD.PostgresPodResources.DefaultMemoryRequest
result.DefaultCPULimit = fromCRD.PostgresPodResources.DefaultCPULimit
result.DefaultMemoryLimit = fromCRD.PostgresPodResources.DefaultMemoryLimit
result.SetMemoryRequestToLimit = fromCRD.SetMemoryRequestToLimit
result.ResourceCheckInterval = time.Duration(fromCRD.Timeouts.ResourceCheckInterval)
result.ResourceCheckTimeout = time.Duration(fromCRD.Timeouts.ResourceCheckTimeout)
@ -66,6 +72,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.DbHostedZone = fromCRD.LoadBalancer.DbHostedZone
result.EnableMasterLoadBalancer = fromCRD.LoadBalancer.EnableMasterLoadBalancer
result.EnableReplicaLoadBalancer = fromCRD.LoadBalancer.EnableReplicaLoadBalancer
result.CustomServiceAnnotations = fromCRD.LoadBalancer.CustomServiceAnnotations
result.MasterDNSNameFormat = fromCRD.LoadBalancer.MasterDNSNameFormat
result.ReplicaDNSNameFormat = fromCRD.LoadBalancer.ReplicaDNSNameFormat

View File

@ -385,8 +385,14 @@ func (c *Controller) queueClusterEvent(informerOldSpec, informerNewSpec *acidv1.
if informerOldSpec != nil { //update, delete
uid = informerOldSpec.GetUID()
clusterName = util.NameFromMeta(informerOldSpec.ObjectMeta)
// user is fixing previously incorrect spec
if eventType == EventUpdate && informerNewSpec.Error == "" && informerOldSpec.Error != "" {
eventType = EventSync
}
// set current error to be one of the new spec if present
if informerNewSpec != nil {
clusterError = informerNewSpec.Error
} else {
clusterError = informerOldSpec.Error

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -1,5 +1,5 @@
/*
Copyright 2018 Compose, Zalando SE
Copyright 2019 Compose, Zalando SE
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -49,6 +49,7 @@ type PgUser struct {
Flags []string `yaml:"user_flags"`
MemberOf []string `yaml:"inrole"`
Parameters map[string]string `yaml:"db_parameters"`
AdminRole string `yaml:"admin_role"`
}
// PgUserMap maps user names to the definitions.
@ -125,6 +126,7 @@ func (n *NamespacedName) Decode(value string) error {
return n.DecodeWorker(value, GetOperatorNamespace())
}
// UnmarshalJSON converts a byte slice to NamespacedName
func (n *NamespacedName) UnmarshalJSON(data []byte) error {
result := NamespacedName{}
var tmp string

View File

@ -27,6 +27,7 @@ type Resources struct {
PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"`
PodPriorityClassName string `name:"pod_priority_class_name"`
ClusterLabels map[string]string `name:"cluster_labels" default:"application:spilo"`
InheritedLabels []string `name:"inherited_labels" default:""`
ClusterNameLabel string `name:"cluster_name_label" default:"cluster-name"`
PodRoleLabel string `name:"pod_role_label" default:"spilo-role"`
PodToleration map[string]string `name:"toleration" default:""`
@ -38,6 +39,7 @@ type Resources struct {
NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""`
MaxInstances int32 `name:"max_instances" default:"-1"`
MinInstances int32 `name:"min_instances" default:"-1"`
ShmVolume bool `name:"enable_shm_volume" default:"true"`
}
// Auth describes authentication specific configuration parameters
@ -89,8 +91,12 @@ type Config struct {
EnableTeamsAPI bool `name:"enable_teams_api" default:"true"`
EnableTeamSuperuser bool `name:"enable_team_superuser" default:"false"`
TeamAdminRole string `name:"team_admin_role" default:"admin"`
EnableAdminRoleForUsers bool `name:"enable_admin_role_for_users" default:"true"`
EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"`
EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"`
CustomServiceAnnotations map[string]string `name:"custom_service_annotations"`
EnablePodAntiAffinity bool `name:"enable_pod_antiaffinity" default:"false"`
PodAntiAffinityTopologyKey string `name:"pod_antiaffinity_topology_key" default:"kubernetes.io/hostname"`
// deprecated and kept for backward compatibility
EnableLoadBalancer *bool `name:"enable_load_balancer"`
MasterDNSNameFormat StringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"`
@ -104,6 +110,7 @@ type Config struct {
PodTerminateGracePeriod time.Duration `name:"pod_terminate_grace_period" default:"5m"`
ProtectedRoles []string `name:"protected_role_names" default:"admin"`
PostgresSuperuserTeams []string `name:"postgres_superuser_teams" default:""`
SetMemoryRequestToLimit bool `name:"set_memory_request_to_limit" defaults:"false"`
}
// MustMarshal marshals the config or panics

View File

@ -19,6 +19,7 @@ type fieldInfo struct {
Field reflect.Value
}
// StringTemplate is a convenience alias
type StringTemplate string
func decoderFrom(field reflect.Value) (d decoder) {
@ -221,12 +222,14 @@ func getMapPairsFromString(value string) (pairs []string, err error) {
return
}
// Decode cast value to StringTemplate
func (f *StringTemplate) Decode(value string) error {
*f = StringTemplate(value)
return nil
}
// Format formatted string from StringTemplate
func (f *StringTemplate) Format(a ...string) string {
res := string(*f)
@ -237,6 +240,7 @@ func (f *StringTemplate) Format(a ...string) string {
return res
}
// MarshalJSON converts a StringTemplate to byte slice
func (f StringTemplate) MarshalJSON() ([]byte, error) {
return json.Marshal(string(f))
}

View File

@ -5,6 +5,7 @@ import "time"
// General kubernetes-related constants
const (
PostgresContainerName = "postgres"
PostgresContainerIdx = 0
K8sAPIPath = "/apis"
StatefulsetDeletionInterval = 1 * time.Second
StatefulsetDeletionTimeout = 30 * time.Second

View File

@ -10,4 +10,7 @@ const (
PostgresConnectRetryTimeout = 2 * time.Minute
PostgresConnectTimeout = 15 * time.Second
ShmVolumeName = "dshm"
ShmVolumePath = "/dev/shm"
)

View File

@ -17,8 +17,10 @@ type Ticker struct {
ticker *time.Ticker
}
// Stop is a convenience wrapper around ticker.Stop
func (t *Ticker) Stop() { t.ticker.Stop() }
// Tick is a convenience wrapper around ticker.C
func (t *Ticker) Tick() { <-t.ticker.C }
// Retry is a wrapper around RetryWorker that provides a real RetryTicker

View File

@ -43,6 +43,7 @@ type httpClient interface {
Do(req *http.Request) (*http.Response, error)
}
// Interface to the TeamsAPIClient
type Interface interface {
TeamInfo(teamID, token string) (tm *Team, err error)
}

View File

@ -5,9 +5,10 @@ import (
"fmt"
"strings"
"reflect"
"github.com/zalando-incubator/postgres-operator/pkg/spec"
"github.com/zalando-incubator/postgres-operator/pkg/util"
"reflect"
)
const (
@ -19,6 +20,7 @@ const (
doBlockStmt = `SET LOCAL synchronous_commit = 'local'; DO $$ BEGIN %s; END;$$;`
passwordTemplate = "ENCRYPTED PASSWORD '%s'"
inRoleTemplate = `IN ROLE %s`
adminTemplate = `ADMIN %s`
)
// DefaultUserSyncStrategy implements a user sync strategy that merges already existing database users
@ -113,6 +115,9 @@ func (strategy DefaultUserSyncStrategy) createPgUser(user spec.PgUser, db *sql.D
if len(user.MemberOf) > 0 {
userFlags = append(userFlags, fmt.Sprintf(inRoleTemplate, quoteMemberList(user)))
}
if user.AdminRole != "" {
userFlags = append(userFlags, fmt.Sprintf(adminTemplate, user.AdminRole))
}
if user.Password == "" {
userPassword = "PASSWORD NULL"

View File

@ -3,12 +3,14 @@ package util
import (
"crypto/md5" // #nosec we need it to for PostgreSQL md5 passwords
"encoding/hex"
"fmt"
"math/rand"
"regexp"
"strings"
"time"
"github.com/motomux/pretty"
resource "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/zalando-incubator/postgres-operator/pkg/spec"
@ -127,3 +129,19 @@ func Coalesce(val, defaultVal string) string {
}
return val
}
// RequestIsSmallerThanLimit
func RequestIsSmallerThanLimit(requestStr, limitStr string) (bool, error) {
request, err := resource.ParseQuantity(requestStr)
if err != nil {
return false, fmt.Errorf("could not parse memory request %v : %v", requestStr, err)
}
limit, err2 := resource.ParseQuantity(limitStr)
if err2 != nil {
return false, fmt.Errorf("could not parse memory limit %v : %v", limitStr, err2)
}
return request.Cmp(limit) == -1, nil
}

View File

@ -69,6 +69,17 @@ var substringMatch = []struct {
{regexp.MustCompile(`aaaa (\d+) bbbb`), "aaaa 123 bbbb", nil},
}
var requestIsSmallerThanLimitTests = []struct {
request string
limit string
out bool
}{
{"1G", "2G", true},
{"1G", "1Gi", true}, // G is 1000^3 bytes, Gi is 1024^3 bytes
{"1024Mi", "1G", false},
{"1e9", "1G", false}, // 1e9 bytes == 1G
}
func TestRandomPassword(t *testing.T) {
const pwdLength = 10
pwd := RandomPassword(pwdLength)
@ -143,3 +154,15 @@ func TestMapContains(t *testing.T) {
}
}
}
func TestRequestIsSmallerThanLimit(t *testing.T) {
for _, tt := range requestIsSmallerThanLimitTests {
res, err := RequestIsSmallerThanLimit(tt.request, tt.limit)
if err != nil {
t.Errorf("RequestIsSmallerThanLimit returned unexpected error: %#v", err)
}
if res != tt.out {
t.Errorf("RequestIsSmallerThanLimit expected: %#v, got: %#v", tt.out, res)
}
}
}

View File

@ -3,6 +3,11 @@
# Deploy a Postgres operator to a minikube aka local Kubernetes cluster
# Optionally re-build the operator binary beforehand to test local changes
# Known limitations:
# 1) minikube provides a single node k8s cluster. That is, you will not be able test functions like pod
# migration between multiple nodes locally
# 2) this script configures the operator via configmap, not the operator CRD
# enable unofficial bash strict mode
set -o errexit
@ -13,6 +18,7 @@ IFS=$'\n\t'
readonly PATH_TO_LOCAL_OPERATOR_MANIFEST="/tmp/local-postgres-operator-manifest.yaml"
readonly PATH_TO_PORT_FORWARED_KUBECTL_PID="/tmp/kubectl-port-forward.pid"
readonly PATH_TO_THE_PG_CLUSTER_MANIFEST="/tmp/minimal-postgres-manifest.yaml"
readonly LOCAL_PORT="8080"
readonly OPERATOR_PORT="8080"
@ -37,18 +43,16 @@ function retry(){
return 1
}
function display_help(){
echo "Usage: $0 [ -r | --rebuild-operator ] [ -h | --help ]"
echo "Usage: $0 [ -r | --rebuild-operator ] [ -h | --help ] [ -n | --deploy-new-operator-image ] [ -t | --deploy-pg-to-namespace-test ]"
}
function clean_up(){
echo "==== CLEAN UP PREVIOUS RUN ==== "
local status
status=$(minikube status --format "{{.MinikubeStatus}}" || true)
status=$(minikube status --format "{{.Host}}" || true)
if [[ "$status" = "Running" ]] || [[ "$status" = "Stopped" ]]; then
echo "Delete the existing local cluster so that we can cleanly apply resources from scratch..."
@ -56,7 +60,7 @@ function clean_up(){
fi
if [[ -e "$PATH_TO_LOCAL_OPERATOR_MANIFEST" ]]; then
rm --verbose "$PATH_TO_LOCAL_OPERATOR_MANIFEST"
rm -v "$PATH_TO_LOCAL_OPERATOR_MANIFEST"
fi
# the kubectl process does the port-forwarding between operator and local ports
@ -70,7 +74,7 @@ function clean_up(){
if kill "$pid" > /dev/null 2>&1; then
echo "Kill the kubectl process responsible for port forwarding for minikube so that we can re-use the same ports for forwarding later..."
fi
rm --verbose "$PATH_TO_PORT_FORWARED_KUBECTL_PID"
rm -v "$PATH_TO_PORT_FORWARED_KUBECTL_PID"
fi
}
@ -121,9 +125,9 @@ function deploy_self_built_image() {
# update the tag in the postgres operator conf
# since the image with this tag already exists on the machine,
# docker should not attempt to fetch it from the registry due to imagePullPolicy
sed --expression "s/\(image\:.*\:\).*$/\1$TAG/" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST"
sed -e "s/\(image\:.*\:\).*$/\1$TAG/; s/smoke-tested-//" manifests/postgres-operator.yaml > "$PATH_TO_LOCAL_OPERATOR_MANIFEST"
retry "kubectl create -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource"
retry "kubectl apply -f \"$PATH_TO_LOCAL_OPERATOR_MANIFEST\"" "attempt to create $PATH_TO_LOCAL_OPERATOR_MANIFEST resource"
}
@ -139,17 +143,18 @@ function start_operator(){
retry "kubectl create -f manifests/\"$file\"" "attempt to create $file resource"
done
cp manifests/postgres-operator.yaml $PATH_TO_LOCAL_OPERATOR_MANIFEST
if [[ "$should_build_custom_operator" = true ]]; then # set in main()
deploy_self_built_image
else
retry "kubectl create -f manifests/postgres-operator.yaml" "attempt to create /postgres-operator.yaml resource"
retry "kubectl create -f ${PATH_TO_LOCAL_OPERATOR_MANIFEST}" "attempt to create ${PATH_TO_LOCAL_OPERATOR_MANIFEST} resource"
fi
local -r msg="Wait for the postgresql custom resource definition to register..."
local -r cmd="kubectl get crd | grep --quiet 'postgresqls.acid.zalan.do'"
retry "$cmd" "$msg "
kubectl create -f manifests/minimal-postgres-manifest.yaml
}
@ -186,16 +191,38 @@ function check_health(){
}
function submit_postgresql_manifest(){
echo "==== SUBMIT MINIMAL POSTGRES MANIFEST ==== "
local namespace="default"
cp manifests/minimal-postgres-manifest.yaml $PATH_TO_THE_PG_CLUSTER_MANIFEST
if $should_deploy_pg_to_namespace_test; then
kubectl create namespace test
namespace="test"
sed --in-place 's/namespace: default/namespace: test/' $PATH_TO_THE_PG_CLUSTER_MANIFEST
fi
kubectl create -f $PATH_TO_THE_PG_CLUSTER_MANIFEST
echo "The operator will create the PG cluster with minimal manifest $PATH_TO_THE_PG_CLUSTER_MANIFEST in the ${namespace} namespace"
}
function main(){
if ! [[ $(basename "$PWD") == "postgres-operator" ]]; then
echo "Please execute the script only from the root directory of the Postgres opepator repo."
echo "Please execute the script only from the root directory of the Postgres operator repo."
exit 1
fi
trap "echo 'If you observe issues with minikube VM not starting/not proceeding, consider deleting the .minikube dir and/or rebooting before re-running the script'" EXIT
local should_build_custom_operator=false # used in start_operator()
local should_build_custom_operator=false
local should_deploy_pg_to_namespace_test=false
local should_replace_operator_image=false
while true
do
# if the 1st param is unset, use the empty string as a default value
@ -204,19 +231,32 @@ function main(){
display_help
exit 0
;;
-r | --rebuild-operator)
-r | --rebuild-operator) # with minikube restart
should_build_custom_operator=true
break
;;
-n | --deploy-new-operator-image) # without minikube restart that takes minutes
should_replace_operator_image=true
break
;;
-t | --deploy-pg-to-namespace-test) # to test multi-namespace support locally
should_deploy_pg_to_namespace_test=true
break
;;
*) break
;;
esac
done
if ${should_replace_operator_image}; then
deploy_self_built_image
exit 0
fi
clean_up
start_minikube
kubectl create namespace test
start_operator
submit_postgresql_manifest
forward_ports
check_health