diff --git a/.github/workflows/publish_ghcr_image.yaml b/.github/workflows/publish_ghcr_image.yaml new file mode 100644 index 000000000..356e62601 --- /dev/null +++ b/.github/workflows/publish_ghcr_image.yaml @@ -0,0 +1,56 @@ +name: Publish multiarch postgres-operator image on ghcr.io + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +on: + push: + tags: + - '*' +jobs: + publish: + name: Build, test and push image + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - uses: actions/setup-go@v2 + with: + go-version: "^1.18.9" + + - name: Run unit tests + run: make deps mocks test + + - name: Define image name + id: image + run: | + IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${GITHUB_REF/refs\/tags\//}" + echo "NAME=$IMAGE" >> $GITHUB_OUTPUT + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to GHCR + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push multiarch image to ghcr + uses: docker/build-push-action@v3 + with: + context: . + file: docker/Dockerfile + push: true + build-args: BASE_IMAGE=alpine:3.15 + tags: "${{ steps.image.outputs.NAME }}" + platforms: linux/amd64,linux/arm64 diff --git a/.github/workflows/run_e2e.yaml b/.github/workflows/run_e2e.yaml index cdfcf9b2b..64f91dbe7 100644 --- a/.github/workflows/run_e2e.yaml +++ b/.github/workflows/run_e2e.yaml @@ -14,13 +14,11 @@ jobs: - uses: actions/checkout@v1 - uses: actions/setup-go@v2 with: - go-version: "^1.17.4" + go-version: "^1.18.9" - name: Make dependencies run: make deps mocks - name: Code generation run: make codegen - - name: Compile - run: make linux - name: Run unit tests run: make test - name: Run end-2-end tests diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 13810d9d2..98b5dc3d0 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -14,7 +14,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-go@v2 with: - go-version: "^1.17.4" + go-version: "^1.18.9" - name: Make dependencies run: make deps mocks - name: Compile diff --git a/.gitignore b/.gitignore index 1f2395f35..081eb5fba 100644 --- a/.gitignore +++ b/.gitignore @@ -95,6 +95,7 @@ coverage.xml # e2e tests e2e/manifests +e2e/tls # Translations *.mo diff --git a/Makefile b/Makefile index 792c43075..754382b5a 100644 --- a/Makefile +++ b/Makefile @@ -60,17 +60,13 @@ linux: ${SOURCES} macos: ${SOURCES} GOOS=darwin GOARCH=amd64 CGO_ENABLED=${CGO_ENABLED} go build -o build/macos/${BINARY} ${BUILD_FLAGS} -ldflags "$(LDFLAGS)" $^ -docker-context: scm-source.json linux - mkdir -p docker/build/ - cp build/linux/${BINARY} scm-source.json docker/build/ - -docker: ${DOCKERDIR}/${DOCKERFILE} docker-context +docker: ${DOCKERDIR}/${DOCKERFILE} scm-source.json echo `(env)` echo "Tag ${TAG}" echo "Version ${VERSION}" echo "CDP tag ${CDP_TAG}" echo "git describe $(shell git describe --tags --always --dirty)" - cd "${DOCKERDIR}" && docker build --rm -t "$(IMAGE):$(TAG)$(CDP_TAG)$(DEBUG_FRESH)$(DEBUG_POSTFIX)" -f "${DOCKERFILE}" . + docker build --rm -t "$(IMAGE):$(TAG)$(CDP_TAG)$(DEBUG_FRESH)$(DEBUG_POSTFIX)" -f "${DOCKERDIR}/${DOCKERFILE}" --build-arg VERSION="${VERSION}" . indocker-race: docker run --rm -v "${GOPATH}":"${GOPATH}" -e GOPATH="${GOPATH}" -e RACE=1 -w ${PWD} golang:1.18.9 bash -c "make linux" diff --git a/README.md b/README.md index 660665ea3..f49e312ef 100644 --- a/README.md +++ b/README.md @@ -53,17 +53,13 @@ pipelines with no access to Kubernetes API directly, promoting infrastructure as The Postgres Operator has been developed at Zalando and is being used in production for over three years. -## Using Spilo 12 images or lower +## Supported Postgres & K8s versions -If you are already using the Postgres operator in older version with a Spilo 12 Docker image you need to be aware of the changes for the backup path. -We introduce the major version into the backup path to smoothen the [major version upgrade](docs/administrator.md#minor-and-major-version-upgrade) that is now supported. - -The new operator configuration can set a compatibility flag *enable_spilo_wal_path_compat* to make Spilo look for wal segments in the current path but also old format paths. -This comes at potential performance costs and should be disabled after a few days. - -The newest Spilo image is: `ghcr.io/zalando/spilo-15:2.1-p9` - -The last Spilo 12 image is: `registry.opensource.zalan.do/acid/spilo-12:1.6-p5` +| Release | Postgres versions | K8s versions | Golang | +| :-------- | :---------------: | :---------------: | :-----: | +| v1.9.* | 10 → 15 | 1.25+ | 1.18.9 | +| v1.8.* | 9.5 → 14 | 1.20 → 1.24 | 1.17.4 | +| v1.7.1 | 9.5 → 14 | 1.20 → 1.24 | 1.16.9 | ## Getting started diff --git a/charts/postgres-operator-ui/templates/deployment.yaml b/charts/postgres-operator-ui/templates/deployment.yaml index 23eb750a7..89f013248 100644 --- a/charts/postgres-operator-ui/templates/deployment.yaml +++ b/charts/postgres-operator-ui/templates/deployment.yaml @@ -46,7 +46,7 @@ spec: {{- toYaml .Values.resources | nindent 12 }} env: - name: "APP_URL" - value: "http://localhost:8081" + value: {{ .Values.envs.appUrl }} - name: "OPERATOR_API_URL" value: {{ .Values.envs.operatorApiUrl | quote }} - name: "OPERATOR_CLUSTER_NAME_LABEL" diff --git a/charts/postgres-operator-ui/values.yaml b/charts/postgres-operator-ui/values.yaml index 31b925c73..55d6dee88 100644 --- a/charts/postgres-operator-ui/values.yaml +++ b/charts/postgres-operator-ui/values.yaml @@ -41,6 +41,7 @@ resources: envs: # IMPORTANT: While operator chart and UI chart are independent, this is the interface between # UI and operator API. Insert the service name of the operator API here! + appUrl: "http://localhost:8081" operatorApiUrl: "http://postgres-operator:8080" operatorClusterNameLabel: "cluster-name" resourcesVisible: "False" diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index e01a5f997..d523e3b17 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -637,7 +637,7 @@ spec: default: "pooler" connection_pooler_image: type: string - default: "registry.opensource.zalan.do/acid/pgbouncer:master-26" + default: "registry.opensource.zalan.do/acid/pgbouncer:master-27" connection_pooler_max_db_connections: type: integer default: 60 @@ -670,7 +670,7 @@ spec: patroni: type: object properties: - failsafe_mode: + enable_patroni_failsafe_mode: type: boolean default: false status: diff --git a/charts/postgres-operator/templates/configmap.yaml b/charts/postgres-operator/templates/configmap.yaml index 094652a21..471f1aee4 100644 --- a/charts/postgres-operator/templates/configmap.yaml +++ b/charts/postgres-operator/templates/configmap.yaml @@ -26,4 +26,5 @@ data: {{- include "flattenValuesForConfigMap" .Values.configLoggingRestApi | indent 2 }} {{- include "flattenValuesForConfigMap" .Values.configTeamsApi | indent 2 }} {{- include "flattenValuesForConfigMap" .Values.configConnectionPooler | indent 2 }} +{{- include "flattenValuesForConfigMap" .Values.configPatroni | indent 2 }} {{- end }} diff --git a/charts/postgres-operator/templates/operatorconfiguration.yaml b/charts/postgres-operator/templates/operatorconfiguration.yaml index ef4674d94..6d3b0eb83 100644 --- a/charts/postgres-operator/templates/operatorconfiguration.yaml +++ b/charts/postgres-operator/templates/operatorconfiguration.yaml @@ -40,4 +40,6 @@ configuration: {{ toYaml .Values.configLoggingRestApi | indent 4 }} connection_pooler: {{ toYaml .Values.configConnectionPooler | indent 4 }} + patroni: +{{ toYaml .Values.configPatroni | indent 4 }} {{- end }} diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index bca269b0a..f6b6b2491 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -348,7 +348,7 @@ configLogicalBackup: # logical_backup_memory_request: "" # image for pods of the logical backup job (example runs pg_dumpall) - logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.8.0" + logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.9.0" # path of google cloud service account json file # logical_backup_google_application_credentials: "" @@ -416,7 +416,7 @@ configConnectionPooler: # db user for pooler to use connection_pooler_user: "pooler" # docker image - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-26" + connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-27" # max db connections the pooler should hold connection_pooler_max_db_connections: 60 # default pooling mode @@ -431,7 +431,7 @@ configConnectionPooler: configPatroni: # enable Patroni DCS failsafe_mode feature - failsafe_mode: false + enable_patroni_failsafe_mode: false # Zalando's internal CDC stream feature enableStreams: false diff --git a/docker/Dockerfile b/docker/Dockerfile index becfcf308..bad0dc71b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,11 +1,24 @@ -FROM registry.opensource.zalan.do/library/alpine-3.15:latest +ARG BASE_IMAGE=registry.opensource.zalan.do/library/alpine-3.15:latest +ARG VERSION=latest + +FROM ubuntu:20.04 as builder + +ARG VERSION + +COPY . /go/src/github.com/zalando/postgres-operator +WORKDIR /go/src/github.com/zalando/postgres-operator + +ENV OPERATOR_LDFLAGS="-X=main.version=${VERSION}" +RUN bash docker/build_operator.sh + +FROM ${BASE_IMAGE} LABEL maintainer="Team ACID @ Zalando " # We need root certificates to deal with teams api over https RUN apk --no-cache add curl RUN apk --no-cache add ca-certificates -COPY build/* / +COPY --from=builder /go/src/github.com/zalando/postgres-operator/build/* / RUN addgroup -g 1000 pgo RUN adduser -D -u 1000 -G pgo -g 'Postgres Operator' pgo diff --git a/docker/build_operator.sh b/docker/build_operator.sh new file mode 100644 index 000000000..9f812da04 --- /dev/null +++ b/docker/build_operator.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +export DEBIAN_FRONTEND=noninteractive + +arch=$(dpkg --print-architecture) + +set -ex + +# Install dependencies + +apt-get update +apt-get install -y wget + +( + cd /tmp + wget -q "https://storage.googleapis.com/golang/go1.18.9.linux-${arch}.tar.gz" -O go.tar.gz + tar -xf go.tar.gz + mv go /usr/local + ln -s /usr/local/go/bin/go /usr/bin/go + go version +) + +# Build + +export PATH="$PATH:$HOME/go/bin" +export GOPATH="$HOME/go" +mkdir -p build +cp scm-source.json build/ + +GO111MODULE=on go mod vendor +CGO_ENABLED=0 go build -o build/postgres-operator -v -ldflags "$OPERATOR_LDFLAGS" cmd/main.go diff --git a/docker/logical-backup/dump.sh b/docker/logical-backup/dump.sh index 178577ced..3d2f60911 100755 --- a/docker/logical-backup/dump.sh +++ b/docker/logical-backup/dump.sh @@ -12,14 +12,18 @@ DUMP_SIZE_COEFF=5 ERRORCOUNT=0 TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) +KUBERNETES_SERVICE_PORT=${KUBERNETES_SERVICE_PORT:-443} if [ "$KUBERNETES_SERVICE_HOST" != "${KUBERNETES_SERVICE_HOST#*[0-9].[0-9]}" ]; then - echo "IPv4" - K8S_API_URL=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1 + echo "IPv4" + K8S_API_URL=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1 elif [ "$KUBERNETES_SERVICE_HOST" != "${KUBERNETES_SERVICE_HOST#*:[0-9a-fA-F]}" ]; then - echo "IPv6" - K8S_API_URL=https://[$KUBERNETES_SERVICE_HOST]:$KUBERNETES_SERVICE_PORT/api/v1 + echo "IPv6" + K8S_API_URL=https://[$KUBERNETES_SERVICE_HOST]:$KUBERNETES_SERVICE_PORT/api/v1 +elif [ -n "$KUBERNETES_SERVICE_HOST" ]; then + echo "Hostname" + K8S_API_URL=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1 else - echo "Unrecognized IP format '$KUBERNETES_SERVICE_HOST'" + echo "KUBERNETES_SERVICE_HOST was not set" fi echo "API Endpoint: ${K8S_API_URL}" CERT=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt @@ -59,42 +63,42 @@ function aws_delete_objects { export -f aws_delete_objects function aws_delete_outdated { - if [[ -z "$LOGICAL_BACKUP_S3_RETENTION_TIME" ]] ; then - echo "no retention time configured: skip cleanup of outdated backups" - return 0 - fi - - # define cutoff date for outdated backups (day precision) - cutoff_date=$(date -d "$LOGICAL_BACKUP_S3_RETENTION_TIME ago" +%F) - - # mimic bucket setup from Spilo - prefix="spilo/"$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX"/logical_backups/" - - args=( - "--no-paginate" - "--output=text" - "--prefix=$prefix" - "--bucket=$LOGICAL_BACKUP_S3_BUCKET" - ) - - [[ ! -z "$LOGICAL_BACKUP_S3_ENDPOINT" ]] && args+=("--endpoint-url=$LOGICAL_BACKUP_S3_ENDPOINT") - [[ ! -z "$LOGICAL_BACKUP_S3_REGION" ]] && args+=("--region=$LOGICAL_BACKUP_S3_REGION") - - # list objects older than the cutoff date - aws s3api list-objects "${args[@]}" --query="Contents[?LastModified<='$cutoff_date'].[Key]" > /tmp/outdated-backups - - # spare the last backup - sed -i '$d' /tmp/outdated-backups - - count=$(wc -l < /tmp/outdated-backups) - if [[ $count == 0 ]] ; then - echo "no outdated backups to delete" + if [[ -z "$LOGICAL_BACKUP_S3_RETENTION_TIME" ]] ; then + echo "no retention time configured: skip cleanup of outdated backups" return 0 - fi - echo "deleting $count outdated backups created before $cutoff_date" + fi - # deleted outdated files in batches with 100 at a time - tr '\n' '\0' < /tmp/outdated-backups | xargs -0 -P1 -n100 bash -c 'aws_delete_objects "$@"' _ + # define cutoff date for outdated backups (day precision) + cutoff_date=$(date -d "$LOGICAL_BACKUP_S3_RETENTION_TIME ago" +%F) + + # mimic bucket setup from Spilo + prefix="spilo/"$SCOPE$LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX"/logical_backups/" + + args=( + "--no-paginate" + "--output=text" + "--prefix=$prefix" + "--bucket=$LOGICAL_BACKUP_S3_BUCKET" + ) + + [[ ! -z "$LOGICAL_BACKUP_S3_ENDPOINT" ]] && args+=("--endpoint-url=$LOGICAL_BACKUP_S3_ENDPOINT") + [[ ! -z "$LOGICAL_BACKUP_S3_REGION" ]] && args+=("--region=$LOGICAL_BACKUP_S3_REGION") + + # list objects older than the cutoff date + aws s3api list-objects "${args[@]}" --query="Contents[?LastModified<='$cutoff_date'].[Key]" > /tmp/outdated-backups + + # spare the last backup + sed -i '$d' /tmp/outdated-backups + + count=$(wc -l < /tmp/outdated-backups) + if [[ $count == 0 ]] ; then + echo "no outdated backups to delete" + return 0 + fi + echo "deleting $count outdated backups created before $cutoff_date" + + # deleted outdated files in batches with 100 at a time + tr '\n' '\0' < /tmp/outdated-backups | xargs -0 -P1 -n100 bash -c 'aws_delete_objects "$@"' _ } function aws_upload { @@ -137,14 +141,14 @@ function get_pods { declare -r SELECTOR="$1" curl "${K8S_API_URL}/namespaces/${POD_NAMESPACE}/pods?$SELECTOR" \ - --cacert $CERT \ - -H "Authorization: Bearer ${TOKEN}" | jq .items[].status.podIP -r + --cacert $CERT \ + -H "Authorization: Bearer ${TOKEN}" | jq .items[].status.podIP -r } function get_current_pod { curl "${K8S_API_URL}/namespaces/${POD_NAMESPACE}/pods?fieldSelector=metadata.name%3D${HOSTNAME}" \ - --cacert $CERT \ - -H "Authorization: Bearer ${TOKEN}" + --cacert $CERT \ + -H "Authorization: Bearer ${TOKEN}" } declare -a search_strategy=( diff --git a/docs/administrator.md b/docs/administrator.md index 57c2c4574..c44d08f90 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -993,7 +993,81 @@ with `USE_WALG_BACKUP: "true"`. ### Google Cloud Platform setup -To configure the operator on GCP these prerequisites that are needed: +When using GCP, there are two authentication methods to allow the postgres +cluster to access buckets to write WAL-E logs: Workload Identity (recommended) +or using a GCP Service Account Key (legacy). + +#### Workload Identity setup + +To configure the operator on GCP using Workload Identity these prerequisites are +needed. + +* [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) enabled on the GKE cluster where the operator will be deployed +* A GCP service account with the proper IAM setup to access the GCS bucket for the WAL-E logs +* An IAM policy granting the Kubernetes service account the + `roles/iam.workloadIdentityUser` role on the GCP service account, e.g.: +```bash +gcloud iam service-accounts add-iam-policy-binding @.iam.gserviceaccount.com \ + --role roles/iam.workloadIdentityUser \ + --member "serviceAccount:PROJECT_ID.svc.id.goog[/postgres-pod-custom]" +``` + +The configuration parameters that we will be using are: + +* `wal_gs_bucket` + +1. Create a custom Kubernetes service account to be used by Patroni running on +the postgres cluster pods, this service account should include an annotation +with the email address of the Google IAM service account used to communicate +with the GCS bucket, e.g. + +```yml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: postgres-pod-custom + namespace: + annotations: + iam.gke.io/gcp-service-account: @.iam.gserviceaccount.com +``` + +2. Specify the new custom service account in your [operator paramaters](./reference/operator_parameters.md) + +If using manual deployment or kustomize, this is done by setting +`pod_service_account_name` in your configuration file specified in the +[postgres-operator deployment](../manifests/postgres-operator.yaml#L37) + +If deploying the operator [using Helm](./quickstart.md#helm-chart), this can +be specified in the chart's values file, e.g.: + +```yml +... +podServiceAccount: + name: postgres-pod-custom +``` + +3. Setup your operator configuration values. Ensure that the operator's configuration +is set up like the following: +```yml +... +aws_or_gcp: + # additional_secret_mount: "" + # additional_secret_mount_path: "" + # aws_region: eu-central-1 + # kube_iam_role: "" + # log_s3_bucket: "" + # wal_s3_bucket: "" + wal_gs_bucket: "postgres-backups-bucket-28302F2" # name of bucket on where to save the WAL-E logs + # gcp_credentials: "" +... +``` + +Continue to shared steps below. + +#### GCP Service Account Key setup + +To configure the operator on GCP using a GCP service account key these +prerequisites are needed. * A service account with the proper IAM setup to access the GCS bucket for the WAL-E logs * The credentials file for the service account. @@ -1037,7 +1111,10 @@ aws_or_gcp: ... ``` -3. Setup pod environment configmap that instructs the operator to use WAL-G, +Once you have set up authentication using one of the two methods above, continue +with the remaining shared steps: + +1. Setup pod environment configmap that instructs the operator to use WAL-G, instead of WAL-E, for backup and restore. ```yml apiVersion: v1 @@ -1052,7 +1129,7 @@ data: CLONE_USE_WALG_RESTORE: "true" ``` -4. Then provide this configmap in postgres-operator settings: +2. Then provide this configmap in postgres-operator settings: ```yml ... # namespaced name of the ConfigMap with environment variables to populate on every pod diff --git a/docs/quickstart.md b/docs/quickstart.md index 00ba2f3aa..f080bd567 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -216,7 +216,7 @@ Non-encrypted connections are rejected by default, so set the SSL mode to require: ```bash -export PGPASSWORD=$(kubectl get secret postgres.acid-minimal-cluster.credentials -o 'jsonpath={.data.password}' | base64 -d) +export PGPASSWORD=$(kubectl get secret postgres.acid-minimal-cluster.credentials.postgresql.acid.zalan.do -o 'jsonpath={.data.password}' | base64 -d) export PGSSLMODE=require psql -U postgres ``` diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 8cca890c8..8caf55b94 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -118,7 +118,7 @@ These parameters are grouped directly under the `spec` key in the manifest. a map of usernames to user flags for the users that should be created in the cluster by the operator. User flags are a list, allowed elements are `SUPERUSER`, `REPLICATION`, `INHERIT`, `LOGIN`, `NOLOGIN`, `CREATEROLE`, - `CREATEDB`, `BYPASSURL`. A login user is created by default unless NOLOGIN is + `CREATEDB`, `BYPASSRLS`. A login user is created by default unless NOLOGIN is specified, in which case the operator creates a role. One can specify empty flags by providing a JSON empty array '*[]*'. If the config option `enable_cross_namespace_secret` is enabled you can specify the namespace in @@ -334,7 +334,12 @@ explanation of `ttl` and `loop_wait` parameters. Patroni `synchronous_node_count` parameter value. Note, this option is only available for Spilo images with Patroni 2.0+. The default is set to `1`. Optional. * **failsafe_mode** - Patroni `failsafe_mode` parameter value. If enabled, allows Patroni to cope with DCS outages and avoid leader demotion. Note, this option is currently not included in any Patroni release. The default is set to `false`. Optional. + Patroni `failsafe_mode` parameter value. If enabled, Patroni will cope + with DCS outages by avoiding leader demotion. See the Patroni documentation + [here](https://patroni.readthedocs.io/en/master/dcs_failsafe_mode.html) for more details. + This feature is included since Patroni 3.0.0. Hence, check the container + image in use if this feature is included in the used Patroni version. The + default is set to `false`. Optional. ## Postgres container resources @@ -541,7 +546,9 @@ for both master and replica pooler services (if `enableReplicaConnectionPooler` ## Custom TLS certificates -Those parameters are grouped under the `tls` top-level key. +Those parameters are grouped under the `tls` top-level key. Note, you have to +define `spiloFSGroup` in the Postgres cluster manifest or `spilo_fsgroup` in +the global configuration before adding the `tls` section'. * **secretName** By setting the `secretName` value, the cluster will switch to load the given diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 198870d77..29d66aee9 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -549,6 +549,19 @@ CRD-based configuration. hard memory minimum what we consider to be required to properly run Postgres clusters with Patroni on Kubernetes. The default is `250Mi`. +## Patroni options + +Parameters configuring Patroni. In the CRD-based configuration they are grouped +under the `patroni` key. + +* **enable_patroni_failsafe_mode** + If enabled, Patroni copes with DCS outages by avoiding leader demotion. + See the Patroni documentation [here](https://patroni.readthedocs.io/en/master/dcs_failsafe_mode.html) for more details. + This feature is included since Patroni 3.0.0. Hence, check the container image + in use if this feature is included in the used Patroni version. It can also be + enabled cluster-wise with the `failsafe_mode` flag under the `patroni` section + in the manifest. The default for the global config option is set to `false`. + ## Operator timeouts This set of parameters define various timeouts related to some operator diff --git a/docs/user.md b/docs/user.md index fa82e3344..8506b0acd 100644 --- a/docs/user.md +++ b/docs/user.md @@ -1197,14 +1197,19 @@ don't know the value, use `103` which is the GID from the default Spilo image OpenShift allocates the users and groups dynamically (based on scc), and their range is different in every namespace. Due to this dynamic behaviour, it's not trivial to know at deploy time the uid/gid of the user in the cluster. -Therefore, instead of using a global `spilo_fsgroup` setting, use the -`spiloFSGroup` field per Postgres cluster. +Therefore, instead of using a global `spilo_fsgroup` setting in operator +configuration or use the `spiloFSGroup` field per Postgres cluster manifest. + +For testing purposes, you can generate a self-signed certificate with openssl: +```sh +openssl req -x509 -nodes -newkey rsa:2048 -keyout tls.key -out tls.crt -subj "/CN=acid.zalan.do" +``` Upload the cert as a kubernetes secret: ```sh kubectl create secret tls pg-tls \ - --key pg-tls.key \ - --cert pg-tls.crt + --key tls.key \ + --cert tls.crt ``` When doing client auth, CA can come optionally from the same secret: @@ -1231,8 +1236,7 @@ spec: Optionally, the CA can be provided by a different secret: ```sh -kubectl create secret generic pg-tls-ca \ - --from-file=ca.crt=ca.crt +kubectl create secret generic pg-tls-ca --from-file=ca.crt=ca.crt ``` Then configure the postgres resource with the TLS secret: @@ -1255,3 +1259,16 @@ Alternatively, it is also possible to use Certificate rotation is handled in the Spilo image which checks every 5 minutes if the certificates have changed and reloads postgres accordingly. + +### TLS certificates for connection pooler + +By default, the pgBouncer image generates its own TLS certificate like Spilo. +When the `tls` section is specfied in the manifest it will be used for the +connection pooler pod(s) as well. The security context options are hard coded +to `runAsUser: 100` and `runAsGroup: 101`. The `fsGroup` will be the same +like for Spilo. + +As of now, the operator does not sync the pooler deployment automatically +which means that changes in the pod template are not caught. You need to +toggle `enableConnectionPooler` to set environment variables, volumes, secret +mounts and securityContext required for TLS support in the pooler pod. diff --git a/e2e/Makefile b/e2e/Makefile index 9b1b5ea11..017f5d345 100644 --- a/e2e/Makefile +++ b/e2e/Makefile @@ -29,10 +29,12 @@ default: tools clean: rm -rf manifests + rm -rf tls copy: clean mkdir manifests cp -r ../manifests . + mkdir tls docker: scm-source.json docker build -t "$(IMAGE):$(TAG)" . diff --git a/e2e/run.sh b/e2e/run.sh index 12581a26a..ecef56d08 100755 --- a/e2e/run.sh +++ b/e2e/run.sh @@ -55,6 +55,10 @@ function set_kind_api_server_ip(){ sed -i "s/server.*$/server: https:\/\/$kind_api_server/g" "${kubeconfig_path}" } +function generate_certificate(){ + openssl req -x509 -nodes -newkey rsa:2048 -keyout tls/tls.key -out tls/tls.crt -subj "/CN=acid.zalan.do" +} + function run_tests(){ echo "Running tests... image: ${e2e_test_runner_image}" # tests modify files in ./manifests, so we mount a copy of this directory done by the e2e Makefile @@ -62,6 +66,7 @@ function run_tests(){ docker run --rm --network=host -e "TERM=xterm-256color" \ --mount type=bind,source="$(readlink -f ${kubeconfig_path})",target=/root/.kube/config \ --mount type=bind,source="$(readlink -f manifests)",target=/manifests \ + --mount type=bind,source="$(readlink -f tls)",target=/tls \ --mount type=bind,source="$(readlink -f tests)",target=/tests \ --mount type=bind,source="$(readlink -f exec.sh)",target=/exec.sh \ --mount type=bind,source="$(readlink -f scripts)",target=/scripts \ @@ -82,6 +87,7 @@ function main(){ [[ ! -f ${kubeconfig_path} ]] && start_kind load_operator_image set_kind_api_server_ip + generate_certificate shift run_tests $@ diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index 82fed4c0b..3d687f49a 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -156,6 +156,26 @@ class K8s: while not get_services(): time.sleep(self.RETRY_TIMEOUT_SEC) + def count_pods_with_volume_mount(self, mount_name, labels, namespace='default'): + pod_count = 0 + pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items + for pod in pods: + for mount in pod.spec.containers[0].volume_mounts: + if mount.name == mount_name: + pod_count += 1 + + return pod_count + + def count_pods_with_env_variable(self, env_variable_key, labels, namespace='default'): + pod_count = 0 + pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items + for pod in pods: + for env in pod.spec.containers[0].env: + if env.name == env_variable_key: + pod_count += 1 + + return pod_count + def count_pods_with_rolling_update_flag(self, labels, namespace='default'): pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items return len(list(filter(lambda x: "zalando-postgres-operator-rolling-update-required" in x.metadata.annotations, pods))) @@ -241,6 +261,18 @@ class K8s: def patch_pod(self, data, pod_name, namespace="default"): self.api.core_v1.patch_namespaced_pod(pod_name, namespace, data) + def create_tls_secret_with_kubectl(self, secret_name): + return subprocess.run( + ["kubectl", "create", "secret", "tls", secret_name, "--key=tls/tls.key", "--cert=tls/tls.crt"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + def create_tls_ca_secret_with_kubectl(self, secret_name): + return subprocess.run( + ["kubectl", "create", "secret", "generic", secret_name, "--from-file=ca.crt=tls/ca.crt"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + def create_with_kubectl(self, path): return subprocess.run( ["kubectl", "apply", "-f", path], diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index d28cd6241..ed04fab61 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -622,6 +622,49 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", self.test_namespace), 1, "Secret not created for user in namespace") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_custom_ssl_certificate(self): + ''' + Test if spilo uses a custom SSL certificate + ''' + + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + tls_secret = "pg-tls" + + # get nodes of master and replica(s) (expected target of new master) + _, replica_nodes = k8s.get_pg_nodes(cluster_label) + self.assertNotEqual(replica_nodes, []) + + try: + # create secret containing ssl certificate + result = self.k8s.create_tls_secret_with_kubectl(tls_secret) + print("stdout: {}, stderr: {}".format(result.stdout, result.stderr)) + + # enable load balancer services + pg_patch_tls = { + "spec": { + "spiloFSGroup": 103, + "tls": { + "secretName": tls_secret + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_tls) + + # wait for switched over + k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + + self.eventuallyEqual(lambda: k8s.count_pods_with_env_variable("SSL_CERTIFICATE_FILE", cluster_label), 2, "TLS env variable SSL_CERTIFICATE_FILE missing in Spilo pods") + self.eventuallyEqual(lambda: k8s.count_pods_with_env_variable("SSL_PRIVATE_KEY_FILE", cluster_label), 2, "TLS env variable SSL_PRIVATE_KEY_FILE missing in Spilo pods") + self.eventuallyEqual(lambda: k8s.count_pods_with_volume_mount(tls_secret, cluster_label), 2, "TLS volume mount missing in Spilo pods") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_enable_disable_connection_pooler(self): ''' @@ -653,6 +696,11 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), 2, "No pooler service found") self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), 1, "Pooler secret not created") + # TLS still enabled so check existing env variables and volume mounts + self.eventuallyEqual(lambda: k8s.count_pods_with_env_variable("CONNECTION_POOLER_CLIENT_TLS_CRT", pooler_label), 4, "TLS env variable CONNECTION_POOLER_CLIENT_TLS_CRT missing in pooler pods") + self.eventuallyEqual(lambda: k8s.count_pods_with_env_variable("CONNECTION_POOLER_CLIENT_TLS_KEY", pooler_label), 4, "TLS env variable CONNECTION_POOLER_CLIENT_TLS_KEY missing in pooler pods") + self.eventuallyEqual(lambda: k8s.count_pods_with_volume_mount("pg-tls", pooler_label), 4, "TLS volume mount missing in pooler pods") + k8s.api.custom_objects_api.patch_namespaced_custom_object( 'acid.zalan.do', 'v1', 'default', 'postgresqls', 'acid-minimal-cluster', diff --git a/go.mod b/go.mod index 39c76a9c5..38cf2dabb 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( github.com/PuerkitoBio/purell v1.1.1 // indirect github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/emicklei/go-restful v2.9.5+incompatible // indirect + github.com/emicklei/go-restful v2.16.0+incompatible // indirect github.com/evanphx/json-patch v4.12.0+incompatible // indirect github.com/go-logr/logr v1.2.0 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect @@ -49,11 +49,11 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/spf13/pflag v1.0.5 // indirect golang.org/x/mod v0.6.0 // indirect - golang.org/x/net v0.1.0 // indirect + golang.org/x/net v0.7.0 // indirect golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f // indirect - golang.org/x/sys v0.1.0 // indirect - golang.org/x/term v0.1.0 // indirect - golang.org/x/text v0.4.0 // indirect + golang.org/x/sys v0.5.0 // indirect + golang.org/x/term v0.5.0 // indirect + golang.org/x/text v0.7.0 // indirect golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect golang.org/x/tools v0.2.0 // indirect google.golang.org/appengine v1.6.7 // indirect diff --git a/go.sum b/go.sum index 385d31cdb..f925cc505 100644 --- a/go.sum +++ b/go.sum @@ -113,8 +113,9 @@ github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25Kn github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153 h1:yUdfgN0XgIJw7foRItutHYUIhlcKzcSf5vDpdhQAKTc= github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/emicklei/go-restful v2.9.5+incompatible h1:spTtZBk5DYEvbxMVutUuTyh1Ao2r4iyvLdACqsl/Ljk= github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= +github.com/emicklei/go-restful v2.16.0+incompatible h1:rgqiKNjTnFQA6kkhFe16D8epTksy9HQ1MyrbDXSdYhM= +github.com/emicklei/go-restful v2.16.0+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -590,8 +591,8 @@ golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0= -golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -680,12 +681,12 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= -golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.1.0 h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw= -golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -695,8 +696,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= -golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/kubectl-pg/README.md b/kubectl-pg/README.md index 78102cf9c..8213d4ff5 100644 --- a/kubectl-pg/README.md +++ b/kubectl-pg/README.md @@ -60,7 +60,7 @@ Use `--namespace` or `-n` flag if your cluster is in a different namespace to wh ```kubectl pg add-user USER01 -p CREATEDB,LOGIN -c acid-minimal-cluster``` -Privileges can only be [SUPERUSER, REPLICATION, INHERIT, LOGIN, NOLOGIN, CREATEROLE, CREATEDB, BYPASSURL] +Privileges can only be [SUPERUSER, REPLICATION, INHERIT, LOGIN, NOLOGIN, CREATEROLE, CREATEDB, BYPASSRLS] Note: By default, a LOGIN user is created (unless NOLOGIN is specified). ## Adding databases to an existing cluster diff --git a/kubectl-pg/cmd/addUser.go b/kubectl-pg/cmd/addUser.go index 288af0836..602adb51d 100644 --- a/kubectl-pg/cmd/addUser.go +++ b/kubectl-pg/cmd/addUser.go @@ -35,7 +35,7 @@ import ( "k8s.io/apimachinery/pkg/types" ) -var allowedPrivileges = []string{"SUPERUSER", "REPLICATION", "INHERIT", "LOGIN", "NOLOGIN", "CREATEROLE", "CREATEDB", "BYPASSURL"} +var allowedPrivileges = []string{"SUPERUSER", "REPLICATION", "INHERIT", "LOGIN", "NOLOGIN", "CREATEROLE", "CREATEDB", "BYPASSRLS"} // addUserCmd represents the addUser command var addUserCmd = &cobra.Command{ diff --git a/kubectl-pg/go.mod b/kubectl-pg/go.mod index 711f1b90e..c09b4ef1c 100644 --- a/kubectl-pg/go.mod +++ b/kubectl-pg/go.mod @@ -5,7 +5,7 @@ go 1.18 require ( github.com/spf13/cobra v1.2.1 github.com/spf13/viper v1.9.0 - github.com/zalando/postgres-operator v1.8.2 + github.com/zalando/postgres-operator v1.9.0 k8s.io/api v0.23.5 k8s.io/apiextensions-apiserver v0.23.5 k8s.io/apimachinery v0.23.5 @@ -18,7 +18,7 @@ require ( github.com/go-logr/logr v1.2.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.2 // indirect - github.com/google/go-cmp v0.5.6 // indirect + github.com/google/go-cmp v0.5.8 // indirect github.com/google/gofuzz v1.1.0 // indirect github.com/googleapis/gnostic v0.5.5 // indirect github.com/hashicorp/hcl v1.0.0 // indirect @@ -39,12 +39,12 @@ require ( github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.2.0 // indirect - golang.org/x/crypto v0.0.0-20211202192323-5770296d904e // indirect - golang.org/x/net v0.0.0-20211209124913-491a49abca63 // indirect + golang.org/x/crypto v0.1.0 // indirect + golang.org/x/net v0.7.0 // indirect golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f // indirect - golang.org/x/sys v0.0.0-20211124211545-fe61309f8881 // indirect - golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b // indirect - golang.org/x/text v0.3.7 // indirect + golang.org/x/sys v0.5.0 // indirect + golang.org/x/term v0.5.0 // indirect + golang.org/x/text v0.7.0 // indirect golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.27.1 // indirect diff --git a/kubectl-pg/go.sum b/kubectl-pg/go.sum index 9853eabdf..0cc4a395f 100644 --- a/kubectl-pg/go.sum +++ b/kubectl-pg/go.sum @@ -216,8 +216,9 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -480,8 +481,8 @@ github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/zalando/postgres-operator v1.8.2 h1:3FW3j2gXua1MSeE+NiSvB8cxM7k7fyoun46G1v++CCA= -github.com/zalando/postgres-operator v1.8.2/go.mod h1:f7AXk8LO/tWFdW4myPJZCwMueGg6fI4RqTuOA0BefZE= +github.com/zalando/postgres-operator v1.9.0 h1:8Ab/zSXkpzBT+G6EQvQGC30DXCV7aDX27bfSTf99W7c= +github.com/zalando/postgres-operator v1.9.0/go.mod h1:9AUgDFbuNeJXxkehPLvUSIqNg+Yn99M7bLvF2C36gzQ= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= @@ -529,8 +530,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20211202192323-5770296d904e h1:MUP6MR3rJ7Gk9LEia0LP2ytiH6MuCfs7qYz+47jGdD8= -golang.org/x/crypto v0.0.0-20211202192323-5770296d904e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU= +golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -614,8 +615,9 @@ golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20211209124913-491a49abca63 h1:iocB37TsdFuN6IBRZ+ry36wrkoV51/tl5vOWqkcPGvY= golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -717,11 +719,12 @@ golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211124211545-fe61309f8881 h1:TyHqChC80pFkXWraUUf6RuB5IqFdQieMLwwCJokV2pc= -golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b h1:9zKuko04nR4gjZ4+DNjHqRlAJqbJETHwiNKDqTfOjfE= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -730,8 +733,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index e2fb21504..73f2f00ed 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -17,7 +17,7 @@ data: # connection_pooler_default_cpu_request: "500m" # connection_pooler_default_memory_limit: 100Mi # connection_pooler_default_memory_request: 100Mi - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-26" + connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-27" # connection_pooler_max_db_connections: 60 # connection_pooler_mode: "transaction" # connection_pooler_number_of_instances: 2 @@ -47,7 +47,7 @@ data: enable_master_load_balancer: "false" enable_master_pooler_load_balancer: "false" enable_password_rotation: "false" - # enable_patroni_failsafe_mode: "false" + enable_patroni_failsafe_mode: "false" enable_pgversion_env_var: "true" # enable_pod_antiaffinity: "false" # enable_pod_disruption_budget: "true" diff --git a/manifests/minimal-fake-pooler-deployment.yaml b/manifests/minimal-fake-pooler-deployment.yaml index b05f4f4ca..53332bad2 100644 --- a/manifests/minimal-fake-pooler-deployment.yaml +++ b/manifests/minimal-fake-pooler-deployment.yaml @@ -23,7 +23,7 @@ spec: serviceAccountName: postgres-operator containers: - name: postgres-operator - image: registry.opensource.zalan.do/acid/pgbouncer:master-26 + image: registry.opensource.zalan.do/acid/pgbouncer:master-27 imagePullPolicy: IfNotPresent resources: requests: diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 8582c866a..f5778a199 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -635,7 +635,7 @@ spec: default: "pooler" connection_pooler_image: type: string - default: "registry.opensource.zalan.do/acid/pgbouncer:master-26" + default: "registry.opensource.zalan.do/acid/pgbouncer:master-27" connection_pooler_max_db_connections: type: integer default: 60 @@ -668,7 +668,7 @@ spec: patroni: type: object properties: - failsafe_mode: + enable_patroni_failsafe_mode: type: boolean default: false status: diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 2e475910c..82e56516d 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -203,11 +203,11 @@ configuration: connection_pooler_default_cpu_request: "500m" connection_pooler_default_memory_limit: 100Mi connection_pooler_default_memory_request: 100Mi - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-26" + connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-27" # connection_pooler_max_db_connections: 60 connection_pooler_mode: "transaction" connection_pooler_number_of_instances: 2 # connection_pooler_schema: "pooler" # connection_pooler_user: "pooler" - # patroni: - # failsafe_mode: "false" + patroni: + enable_patroni_failsafe_mode: false diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index b82aa30b6..558a03f0f 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -1483,7 +1483,7 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ "patroni": { Type: "object", Properties: map[string]apiextv1.JSONSchemaProps{ - "failsafe_mode": { + "enable_patroni_failsafe_mode": { Type: "boolean", }, }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 4ff5ee81e..d966aa1aa 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -240,7 +240,7 @@ type OperatorLogicalBackupConfiguration struct { // PatroniConfiguration defines configuration for Patroni type PatroniConfiguration struct { - FailsafeMode *bool `json:"failsafe_mode,omitempty"` + FailsafeMode *bool `json:"enable_patroni_failsafe_mode,omitempty"` } // OperatorConfigurationData defines the operation config diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 590fe6564..29c321efb 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -241,12 +241,11 @@ func (c *Cluster) initUsers() error { } // Create creates the new kubernetes objects associated with the cluster. -func (c *Cluster) Create() error { +func (c *Cluster) Create() (err error) { c.mu.Lock() defer c.mu.Unlock() - var ( - err error + var ( service *v1.Service ep *v1.Endpoints ss *appsv1.StatefulSet @@ -370,16 +369,22 @@ func (c *Cluster) Create() error { // something fails, report warning c.createConnectionPooler(c.installLookupFunction) + // remember slots to detect deletion from manifest + for slotName, desiredSlot := range c.Spec.Patroni.Slots { + c.replicationSlots[slotName] = desiredSlot + } + if len(c.Spec.Streams) > 0 { + // creating streams requires syncing the statefulset first + err = c.syncStatefulSet() + if err != nil { + return fmt.Errorf("could not sync statefulset: %v", err) + } if err = c.syncStreams(); err != nil { c.logger.Errorf("could not create streams: %v", err) } } - for slotName, desiredSlot := range c.Spec.Patroni.Slots { - c.replicationSlots[slotName] = desiredSlot - } - return nil } diff --git a/pkg/cluster/connection_pooler.go b/pkg/cluster/connection_pooler.go index 761644e13..d66a39b26 100644 --- a/pkg/cluster/connection_pooler.go +++ b/pkg/cluster/connection_pooler.go @@ -3,7 +3,6 @@ package cluster import ( "context" "fmt" - "path/filepath" "strings" "time" @@ -25,6 +24,9 @@ import ( "github.com/zalando/postgres-operator/pkg/util/retryutil" ) +var poolerRunAsUser = int64(100) +var poolerRunAsGroup = int64(101) + // ConnectionPoolerObjects K8s objects that are belong to connection pooler type ConnectionPoolerObjects struct { Deployment *appsv1.Deployment @@ -261,6 +263,10 @@ func (c *Cluster) generateConnectionPoolerPodTemplate(role PostgresRole) ( makeDefaultConnectionPoolerResources(&c.OpConfig), connectionPoolerContainer) + if err != nil { + return nil, fmt.Errorf("could not generate resource requirements: %v", err) + } + effectiveDockerImage := util.Coalesce( connectionPoolerSpec.DockerImage, c.OpConfig.ConnectionPooler.Image) @@ -269,10 +275,6 @@ func (c *Cluster) generateConnectionPoolerPodTemplate(role PostgresRole) ( connectionPoolerSpec.Schema, c.OpConfig.ConnectionPooler.Schema) - if err != nil { - return nil, fmt.Errorf("could not generate resource requirements: %v", err) - } - secretSelector := func(key string) *v1.SecretKeySelector { effectiveUser := util.Coalesce( connectionPoolerSpec.User, @@ -344,49 +346,53 @@ func (c *Cluster) generateConnectionPoolerPodTemplate(role PostgresRole) ( // 2. Reference the secret in a volume // 3. Mount the volume to the container at /tls var poolerVolumes []v1.Volume + var volumeMounts []v1.VolumeMount if spec.TLS != nil && spec.TLS.SecretName != "" { - // Env vars - crtFile := spec.TLS.CertificateFile - keyFile := spec.TLS.PrivateKeyFile - if crtFile == "" { - crtFile = "tls.crt" - } - if keyFile == "" { - crtFile = "tls.key" - } + getPoolerTLSEnv := func(k string) string { + keyName := "" + switch k { + case "tls.crt": + keyName = "CONNECTION_POOLER_CLIENT_TLS_CRT" + case "tls.key": + keyName = "CONNECTION_POOLER_CLIENT_TLS_KEY" + case "tls.ca": + keyName = "CONNECTION_POOLER_CLIENT_CA_FILE" + default: + panic(fmt.Sprintf("TLS env key for pooler unknown %s", k)) + } - envVars = append( - envVars, - v1.EnvVar{ - Name: "CONNECTION_POOLER_CLIENT_TLS_CRT", Value: filepath.Join("/tls", crtFile), - }, - v1.EnvVar{ - Name: "CONNECTION_POOLER_CLIENT_TLS_KEY", Value: filepath.Join("/tls", keyFile), - }, - ) - - // Volume - mode := int32(0640) - volume := v1.Volume{ - Name: "tls", - VolumeSource: v1.VolumeSource{ - Secret: &v1.SecretVolumeSource{ - SecretName: spec.TLS.SecretName, - DefaultMode: &mode, - }, - }, + return keyName + } + tlsEnv, tlsVolumes := generateTlsMounts(spec, getPoolerTLSEnv) + envVars = append(envVars, tlsEnv...) + for _, vol := range tlsVolumes { + poolerVolumes = append(poolerVolumes, v1.Volume{ + Name: vol.Name, + VolumeSource: vol.VolumeSource, + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: vol.Name, + MountPath: vol.MountPath, + }) } - poolerVolumes = append(poolerVolumes, volume) - - // Mount - poolerContainer.VolumeMounts = []v1.VolumeMount{{ - Name: "tls", - MountPath: "/tls", - }} } poolerContainer.Env = envVars + poolerContainer.VolumeMounts = volumeMounts tolerationsSpec := tolerations(&spec.Tolerations, c.OpConfig.PodToleration) + securityContext := v1.PodSecurityContext{} + + // determine the User, Group and FSGroup for the pooler pod + securityContext.RunAsUser = &poolerRunAsUser + securityContext.RunAsGroup = &poolerRunAsGroup + + effectiveFSGroup := c.OpConfig.Resources.SpiloFSGroup + if spec.SpiloFSGroup != nil { + effectiveFSGroup = spec.SpiloFSGroup + } + if effectiveFSGroup != nil { + securityContext.FSGroup = effectiveFSGroup + } podTemplate := &v1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ @@ -399,6 +405,7 @@ func (c *Cluster) generateConnectionPoolerPodTemplate(role PostgresRole) ( Containers: []v1.Container{poolerContainer}, Tolerations: tolerationsSpec, Volumes: poolerVolumes, + SecurityContext: &securityContext, }, } diff --git a/pkg/cluster/connection_pooler_test.go b/pkg/cluster/connection_pooler_test.go index 13718ca06..25cc40ef1 100644 --- a/pkg/cluster/connection_pooler_test.go +++ b/pkg/cluster/connection_pooler_test.go @@ -1,6 +1,7 @@ package cluster import ( + "context" "errors" "fmt" "strings" @@ -11,6 +12,7 @@ import ( fakeacidv1 "github.com/zalando/postgres-operator/pkg/generated/clientset/versioned/fake" "github.com/zalando/postgres-operator/pkg/util" "github.com/zalando/postgres-operator/pkg/util/config" + "github.com/zalando/postgres-operator/pkg/util/constants" "github.com/zalando/postgres-operator/pkg/util/k8sutil" appsv1 "k8s.io/api/apps/v1" @@ -19,6 +21,19 @@ import ( "k8s.io/client-go/kubernetes/fake" ) +func newFakeK8sPoolerTestClient() (k8sutil.KubernetesClient, *fake.Clientset) { + acidClientSet := fakeacidv1.NewSimpleClientset() + clientSet := fake.NewSimpleClientset() + + return k8sutil.KubernetesClient{ + PodsGetter: clientSet.CoreV1(), + PostgresqlsGetter: acidClientSet.AcidV1(), + StatefulSetsGetter: clientSet.AppsV1(), + DeploymentsGetter: clientSet.AppsV1(), + ServicesGetter: clientSet.CoreV1(), + }, clientSet +} + func mockInstallLookupFunction(schema string, user string) error { return nil } @@ -919,6 +934,122 @@ func testServiceSelector(cluster *Cluster, service *v1.Service, role PostgresRol return nil } +func TestPoolerTLS(t *testing.T) { + client, _ := newFakeK8sPoolerTestClient() + clusterName := "acid-test-cluster" + namespace := "default" + tlsSecretName := "my-secret" + spiloFSGroup := int64(103) + defaultMode := int32(0640) + mountPath := "/tls" + + pg := acidv1.Postgresql{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + }, + Spec: acidv1.PostgresSpec{ + TeamID: "myapp", NumberOfInstances: 1, + EnableConnectionPooler: util.True(), + Resources: &acidv1.Resources{ + ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, + ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, + }, + Volume: acidv1.Volume{ + Size: "1G", + }, + TLS: &acidv1.TLSDescription{ + SecretName: tlsSecretName, CAFile: "ca.crt"}, + AdditionalVolumes: []acidv1.AdditionalVolume{ + acidv1.AdditionalVolume{ + Name: tlsSecretName, + MountPath: mountPath, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: tlsSecretName, + DefaultMode: &defaultMode, + }, + }, + }, + }, + }, + } + + var cluster = New( + Config{ + OpConfig: config.Config{ + PodManagementPolicy: "ordered_ready", + ProtectedRoles: []string{"admin"}, + Auth: config.Auth{ + SuperUsername: superUserName, + ReplicationUsername: replicationUserName, + }, + Resources: config.Resources{ + ClusterLabels: map[string]string{"application": "spilo"}, + ClusterNameLabel: "cluster-name", + DefaultCPURequest: "300m", + DefaultCPULimit: "300m", + DefaultMemoryRequest: "300Mi", + DefaultMemoryLimit: "300Mi", + PodRoleLabel: "spilo-role", + SpiloFSGroup: &spiloFSGroup, + }, + ConnectionPooler: config.ConnectionPooler{ + ConnectionPoolerDefaultCPURequest: "100m", + ConnectionPoolerDefaultCPULimit: "100m", + ConnectionPoolerDefaultMemoryRequest: "100Mi", + ConnectionPoolerDefaultMemoryLimit: "100Mi", + }, + }, + }, client, pg, logger, eventRecorder) + + // create a statefulset + _, err := cluster.createStatefulSet() + assert.NoError(t, err) + + // create pooler resources + cluster.ConnectionPooler = map[PostgresRole]*ConnectionPoolerObjects{} + cluster.ConnectionPooler[Master] = &ConnectionPoolerObjects{ + Deployment: nil, + Service: nil, + Name: cluster.connectionPoolerName(Master), + ClusterName: clusterName, + Namespace: namespace, + LookupFunction: false, + Role: Master, + } + + _, err = cluster.syncConnectionPoolerWorker(nil, &pg, Master) + assert.NoError(t, err) + + deploy, err := client.Deployments(namespace).Get(context.TODO(), cluster.connectionPoolerName(Master), metav1.GetOptions{}) + assert.NoError(t, err) + + fsGroup := int64(103) + assert.Equal(t, &fsGroup, deploy.Spec.Template.Spec.SecurityContext.FSGroup, "has a default FSGroup assigned") + + volume := v1.Volume{ + Name: "my-secret", + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: "my-secret", + DefaultMode: &defaultMode, + }, + }, + } + assert.Contains(t, deploy.Spec.Template.Spec.Volumes, volume, "the pod gets a secret volume") + + poolerContainer := deploy.Spec.Template.Spec.Containers[constants.ConnectionPoolerContainer] + assert.Contains(t, poolerContainer.VolumeMounts, v1.VolumeMount{ + MountPath: "/tls", + Name: "my-secret", + }, "the volume gets mounted in /tls") + + assert.Contains(t, poolerContainer.Env, v1.EnvVar{Name: "CONNECTION_POOLER_CLIENT_TLS_CRT", Value: "/tls/tls.crt"}) + assert.Contains(t, poolerContainer.Env, v1.EnvVar{Name: "CONNECTION_POOLER_CLIENT_TLS_KEY", Value: "/tls/tls.key"}) + assert.Contains(t, poolerContainer.Env, v1.EnvVar{Name: "CONNECTION_POOLER_CLIENT_CA_FILE", Value: "/tls/ca.crt"}) +} + func TestConnectionPoolerServiceSpec(t *testing.T) { testName := "Test connection pooler service spec generation" var cluster = New( diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index c652608a4..8be32f09c 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1288,57 +1288,26 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef // configure TLS with a custom secret volume if spec.TLS != nil && spec.TLS.SecretName != "" { - // this is combined with the FSGroup in the section above - // to give read access to the postgres user - defaultMode := int32(0640) - mountPath := "/tls" - additionalVolumes = append(additionalVolumes, acidv1.AdditionalVolume{ - Name: spec.TLS.SecretName, - MountPath: mountPath, - VolumeSource: v1.VolumeSource{ - Secret: &v1.SecretVolumeSource{ - SecretName: spec.TLS.SecretName, - DefaultMode: &defaultMode, - }, - }, - }) - - // use the same filenames as Secret resources by default - certFile := ensurePath(spec.TLS.CertificateFile, mountPath, "tls.crt") - privateKeyFile := ensurePath(spec.TLS.PrivateKeyFile, mountPath, "tls.key") - spiloEnvVars = appendEnvVars( - spiloEnvVars, - v1.EnvVar{Name: "SSL_CERTIFICATE_FILE", Value: certFile}, - v1.EnvVar{Name: "SSL_PRIVATE_KEY_FILE", Value: privateKeyFile}, - ) - - if spec.TLS.CAFile != "" { - // support scenario when the ca.crt resides in a different secret, diff path - mountPathCA := mountPath - if spec.TLS.CASecretName != "" { - mountPathCA = mountPath + "ca" + getSpiloTLSEnv := func(k string) string { + keyName := "" + switch k { + case "tls.crt": + keyName = "SSL_CERTIFICATE_FILE" + case "tls.key": + keyName = "SSL_PRIVATE_KEY_FILE" + case "tls.ca": + keyName = "SSL_CA_FILE" + default: + panic(fmt.Sprintf("TLS env key unknown %s", k)) } - caFile := ensurePath(spec.TLS.CAFile, mountPathCA, "") - spiloEnvVars = appendEnvVars( - spiloEnvVars, - v1.EnvVar{Name: "SSL_CA_FILE", Value: caFile}, - ) - - // the ca file from CASecretName secret takes priority - if spec.TLS.CASecretName != "" { - additionalVolumes = append(additionalVolumes, acidv1.AdditionalVolume{ - Name: spec.TLS.CASecretName, - MountPath: mountPathCA, - VolumeSource: v1.VolumeSource{ - Secret: &v1.SecretVolumeSource{ - SecretName: spec.TLS.CASecretName, - DefaultMode: &defaultMode, - }, - }, - }) - } + return keyName } + tlsEnv, tlsVolumes := generateTlsMounts(spec, getSpiloTLSEnv) + for _, env := range tlsEnv { + spiloEnvVars = appendEnvVars(spiloEnvVars, env) + } + additionalVolumes = append(additionalVolumes, tlsVolumes...) } // generate the spilo container @@ -1492,6 +1461,59 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef return statefulSet, nil } +func generateTlsMounts(spec *acidv1.PostgresSpec, tlsEnv func(key string) string) ([]v1.EnvVar, []acidv1.AdditionalVolume) { + // this is combined with the FSGroup in the section above + // to give read access to the postgres user + defaultMode := int32(0640) + mountPath := "/tls" + env := make([]v1.EnvVar, 0) + volumes := make([]acidv1.AdditionalVolume, 0) + + volumes = append(volumes, acidv1.AdditionalVolume{ + Name: spec.TLS.SecretName, + MountPath: mountPath, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: spec.TLS.SecretName, + DefaultMode: &defaultMode, + }, + }, + }) + + // use the same filenames as Secret resources by default + certFile := ensurePath(spec.TLS.CertificateFile, mountPath, "tls.crt") + privateKeyFile := ensurePath(spec.TLS.PrivateKeyFile, mountPath, "tls.key") + env = append(env, v1.EnvVar{Name: tlsEnv("tls.crt"), Value: certFile}) + env = append(env, v1.EnvVar{Name: tlsEnv("tls.key"), Value: privateKeyFile}) + + if spec.TLS.CAFile != "" { + // support scenario when the ca.crt resides in a different secret, diff path + mountPathCA := mountPath + if spec.TLS.CASecretName != "" { + mountPathCA = mountPath + "ca" + } + + caFile := ensurePath(spec.TLS.CAFile, mountPathCA, "") + env = append(env, v1.EnvVar{Name: tlsEnv("tls.ca"), Value: caFile}) + + // the ca file from CASecretName secret takes priority + if spec.TLS.CASecretName != "" { + volumes = append(volumes, acidv1.AdditionalVolume{ + Name: spec.TLS.CASecretName, + MountPath: mountPathCA, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: spec.TLS.CASecretName, + DefaultMode: &defaultMode, + }, + }, + }) + } + } + + return env, volumes +} + func (c *Cluster) generatePodAnnotations(spec *acidv1.PostgresSpec) map[string]string { annotations := make(map[string]string) for k, v := range c.OpConfig.CustomPodAnnotations { @@ -1766,7 +1788,7 @@ func (c *Cluster) generatePersistentVolumeClaimTemplate(volumeSize, volumeStorag } func (c *Cluster) generateUserSecrets() map[string]*v1.Secret { - secrets := make(map[string]*v1.Secret, len(c.pgUsers)) + secrets := make(map[string]*v1.Secret, len(c.pgUsers)+len(c.systemUsers)) namespace := c.Namespace for username, pgUser := range c.pgUsers { //Skip users with no password i.e. human users (they'll be authenticated using pam) @@ -2346,8 +2368,6 @@ func (c *Cluster) generateLogicalBackupPodEnvVars() []v1.EnvVar { envVars = append(envVars, v1.EnvVar{Name: "AWS_SECRET_ACCESS_KEY", Value: c.OpConfig.LogicalBackup.LogicalBackupS3SecretAccessKey}) } - c.logger.Debugf("Generated logical backup env vars") - c.logger.Debugf("%v", envVars) return envVars } diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 098fdc057..582e3cb47 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -469,10 +469,24 @@ func (c *Cluster) getSwitchoverCandidate(master *v1.Pod) (spec.NamespacedName, e func() (bool, error) { var err error members, err = c.patroni.GetClusterMembers(master) - if err != nil { return false, err } + + // look for SyncStandby candidates (which also implies pod is in running state) + for _, member := range members { + if PostgresRole(member.Role) == SyncStandby { + syncCandidates = append(syncCandidates, member) + } + } + + // if synchronous mode is enabled and no SyncStandy was found + // return false for retry - cannot failover with no sync candidate + if c.Spec.Patroni.SynchronousMode && len(syncCandidates) == 0 { + c.logger.Warnf("no sync standby found - retrying fetching cluster members") + return false, nil + } + return true, nil }, ) @@ -480,28 +494,26 @@ func (c *Cluster) getSwitchoverCandidate(master *v1.Pod) (spec.NamespacedName, e return spec.NamespacedName{}, fmt.Errorf("failed to get Patroni cluster members: %s", err) } - for _, member := range members { - if PostgresRole(member.Role) != Leader && PostgresRole(member.Role) != StandbyLeader && member.State == "running" { - candidates = append(candidates, member) - if PostgresRole(member.Role) == SyncStandby { - syncCandidates = append(syncCandidates, member) - } - } - } - // pick candidate with lowest lag - // if sync_standby replicas were found assume synchronous_mode is enabled and ignore other candidates list if len(syncCandidates) > 0 { sort.Slice(syncCandidates, func(i, j int) bool { return syncCandidates[i].Lag < syncCandidates[j].Lag }) return spec.NamespacedName{Namespace: master.Namespace, Name: syncCandidates[0].Name}, nil - } - if len(candidates) > 0 { - sort.Slice(candidates, func(i, j int) bool { - return candidates[i].Lag < candidates[j].Lag - }) - return spec.NamespacedName{Namespace: master.Namespace, Name: candidates[0].Name}, nil + } else { + // in asynchronous mode find running replicas + for _, member := range members { + if PostgresRole(member.Role) != Leader && PostgresRole(member.Role) != StandbyLeader && member.State == "running" { + candidates = append(candidates, member) + } + } + + if len(candidates) > 0 { + sort.Slice(candidates, func(i, j int) bool { + return candidates[i].Lag < candidates[j].Lag + }) + return spec.NamespacedName{Namespace: master.Namespace, Name: candidates[0].Name}, nil + } } return spec.NamespacedName{}, fmt.Errorf("no switchover candidate found") diff --git a/pkg/cluster/pod_test.go b/pkg/cluster/pod_test.go index 068145312..6a642387e 100644 --- a/pkg/cluster/pod_test.go +++ b/pkg/cluster/pod_test.go @@ -36,30 +36,42 @@ func TestGetSwitchoverCandidate(t *testing.T) { tests := []struct { subtest string clusterJson string + syncModeEnabled bool expectedCandidate spec.NamespacedName expectedError error }{ { subtest: "choose sync_standby over replica", clusterJson: `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "sync_standby", "state": "running", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 0}, {"name": "acid-test-cluster-2", "role": "replica", "state": "running", "api_url": "http://192.168.100.3:8008/patroni", "host": "192.168.100.3", "port": 5432, "timeline": 1, "lag": 0}]}`, + syncModeEnabled: true, expectedCandidate: spec.NamespacedName{Namespace: namespace, Name: "acid-test-cluster-1"}, expectedError: nil, }, + { + subtest: "no running sync_standby available", + clusterJson: `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "replica", "state": "running", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 0}]}`, + syncModeEnabled: true, + expectedCandidate: spec.NamespacedName{}, + expectedError: fmt.Errorf("failed to get Patroni cluster members: unexpected end of JSON input"), + }, { subtest: "choose replica with lowest lag", clusterJson: `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "replica", "state": "running", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 5}, {"name": "acid-test-cluster-2", "role": "replica", "state": "running", "api_url": "http://192.168.100.3:8008/patroni", "host": "192.168.100.3", "port": 5432, "timeline": 1, "lag": 2}]}`, + syncModeEnabled: false, expectedCandidate: spec.NamespacedName{Namespace: namespace, Name: "acid-test-cluster-2"}, expectedError: nil, }, { subtest: "choose first replica when lag is equal evrywhere", clusterJson: `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 1}, {"name": "acid-test-cluster-1", "role": "replica", "state": "running", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 1, "lag": 5}, {"name": "acid-test-cluster-2", "role": "replica", "state": "running", "api_url": "http://192.168.100.3:8008/patroni", "host": "192.168.100.3", "port": 5432, "timeline": 1, "lag": 5}]}`, + syncModeEnabled: false, expectedCandidate: spec.NamespacedName{Namespace: namespace, Name: "acid-test-cluster-1"}, expectedError: nil, }, { subtest: "no running replica available", clusterJson: `{"members": [{"name": "acid-test-cluster-0", "role": "leader", "state": "running", "api_url": "http://192.168.100.1:8008/patroni", "host": "192.168.100.1", "port": 5432, "timeline": 2}, {"name": "acid-test-cluster-1", "role": "replica", "state": "starting", "api_url": "http://192.168.100.2:8008/patroni", "host": "192.168.100.2", "port": 5432, "timeline": 2}]}`, + syncModeEnabled: false, expectedCandidate: spec.NamespacedName{}, expectedError: fmt.Errorf("no switchover candidate found"), }, @@ -81,6 +93,7 @@ func TestGetSwitchoverCandidate(t *testing.T) { cluster.patroni = p mockMasterPod := newMockPod("192.168.100.1") mockMasterPod.Namespace = namespace + cluster.Spec.Patroni.SynchronousMode = tt.syncModeEnabled candidate, err := cluster.getSwitchoverCandidate(mockMasterPod) if err != nil && err.Error() != tt.expectedError.Error() { diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index ecdda6998..bd31271f4 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -413,6 +413,7 @@ func (c *Cluster) syncStatefulSet() error { } // sync Patroni config + c.logger.Debug("syncing Patroni config") if configPatched, restartPrimaryFirst, restartWait, err = c.syncPatroniConfig(pods, c.Spec.Patroni, requiredPgParameters); err != nil { c.logger.Warningf("Patroni config updated? %v - errors during config sync: %v", configPatched, err) isSafeToRecreatePods = false @@ -457,7 +458,7 @@ func (c *Cluster) syncPatroniConfig(pods []v1.Pod, requiredPatroniConfig acidv1. // get Postgres config, compare with manifest and update via Patroni PATCH endpoint if it differs for i, pod := range pods { podName := util.NameFromMeta(pods[i].ObjectMeta) - effectivePatroniConfig, effectivePgParameters, err = c.patroni.GetConfig(&pod) + effectivePatroniConfig, effectivePgParameters, err = c.getPatroniConfig(&pod) if err != nil { errors = append(errors, fmt.Sprintf("could not get Postgres config from pod %s: %v", podName, err)) continue @@ -641,18 +642,18 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, effectiv } // check if specified slots exist in config and if they differ for slotName, desiredSlot := range desiredPatroniConfig.Slots { - if effectiveSlot, exists := effectivePatroniConfig.Slots[slotName]; exists { - if reflect.DeepEqual(desiredSlot, effectiveSlot) { - continue - } - } - slotsToSet[slotName] = desiredSlot // only add slots specified in manifest to c.replicationSlots for manifestSlotName, _ := range c.Spec.Patroni.Slots { if manifestSlotName == slotName { c.replicationSlots[slotName] = desiredSlot } } + if effectiveSlot, exists := effectivePatroniConfig.Slots[slotName]; exists { + if reflect.DeepEqual(desiredSlot, effectiveSlot) { + continue + } + } + slotsToSet[slotName] = desiredSlot } if len(slotsToSet) > 0 { configToSet["slots"] = slotsToSet @@ -1050,7 +1051,7 @@ DBUSERS: func (c *Cluster) syncDatabases() error { c.setProcessName("syncing databases") - + errors := make([]string, 0) createDatabases := make(map[string]string) alterOwnerDatabases := make(map[string]string) preparedDatabases := make([]string, 0) @@ -1096,12 +1097,12 @@ func (c *Cluster) syncDatabases() error { for databaseName, owner := range createDatabases { if err = c.executeCreateDatabase(databaseName, owner); err != nil { - return err + errors = append(errors, err.Error()) } } for databaseName, owner := range alterOwnerDatabases { if err = c.executeAlterDatabaseOwner(databaseName, owner); err != nil { - return err + errors = append(errors, err.Error()) } } @@ -1117,24 +1118,32 @@ func (c *Cluster) syncDatabases() error { // set default privileges for prepared database for _, preparedDatabase := range preparedDatabases { if err := c.initDbConnWithName(preparedDatabase); err != nil { - return fmt.Errorf("could not init database connection to %s", preparedDatabase) + errors = append(errors, fmt.Sprintf("could not init database connection to %s", preparedDatabase)) + continue } for _, owner := range c.getOwnerRoles(preparedDatabase, c.Spec.PreparedDatabases[preparedDatabase].DefaultUsers) { if err = c.execAlterGlobalDefaultPrivileges(owner, preparedDatabase); err != nil { - return err + errors = append(errors, err.Error()) } } } + if len(errors) > 0 { + return fmt.Errorf("error(s) while syncing databases: %v", strings.Join(errors, `', '`)) + } + return nil } func (c *Cluster) syncPreparedDatabases() error { c.setProcessName("syncing prepared databases") + errors := make([]string, 0) + for preparedDbName, preparedDB := range c.Spec.PreparedDatabases { if err := c.initDbConnWithName(preparedDbName); err != nil { - return fmt.Errorf("could not init connection to database %s: %v", preparedDbName, err) + errors = append(errors, fmt.Sprintf("could not init connection to database %s: %v", preparedDbName, err)) + continue } c.logger.Debugf("syncing prepared database %q", preparedDbName) @@ -1144,12 +1153,13 @@ func (c *Cluster) syncPreparedDatabases() error { preparedSchemas = map[string]acidv1.PreparedSchema{"data": {DefaultRoles: util.True()}} } if err := c.syncPreparedSchemas(preparedDbName, preparedSchemas); err != nil { - return err + errors = append(errors, err.Error()) + continue } // install extensions if err := c.syncExtensions(preparedDB.Extensions); err != nil { - return err + errors = append(errors, err.Error()) } if err := c.closeDbConn(); err != nil { @@ -1157,11 +1167,16 @@ func (c *Cluster) syncPreparedDatabases() error { } } + if len(errors) > 0 { + return fmt.Errorf("error(s) while syncing prepared databases: %v", strings.Join(errors, `', '`)) + } + return nil } func (c *Cluster) syncPreparedSchemas(databaseName string, preparedSchemas map[string]acidv1.PreparedSchema) error { c.setProcessName("syncing prepared schemas") + errors := make([]string, 0) currentSchemas, err := c.getSchemas() if err != nil { @@ -1184,17 +1199,21 @@ func (c *Cluster) syncPreparedSchemas(databaseName string, preparedSchemas map[s owner = dbOwner } if err = c.executeCreateDatabaseSchema(databaseName, schemaName, dbOwner, owner); err != nil { - return err + errors = append(errors, err.Error()) } } } + if len(errors) > 0 { + return fmt.Errorf("error(s) while syncing schemas of prepared databases: %v", strings.Join(errors, `', '`)) + } + return nil } func (c *Cluster) syncExtensions(extensions map[string]string) error { c.setProcessName("syncing database extensions") - + errors := make([]string, 0) createExtensions := make(map[string]string) alterExtensions := make(map[string]string) @@ -1214,15 +1233,19 @@ func (c *Cluster) syncExtensions(extensions map[string]string) error { for extName, schema := range createExtensions { if err = c.executeCreateExtension(extName, schema); err != nil { - return err + errors = append(errors, err.Error()) } } for extName, schema := range alterExtensions { if err = c.executeAlterExtension(extName, schema); err != nil { - return err + errors = append(errors, err.Error()) } } + if len(errors) > 0 { + return fmt.Errorf("error(s) while syncing database extensions: %v", strings.Join(errors, `', '`)) + } + return nil } diff --git a/pkg/controller/util.go b/pkg/controller/util.go index bca8082f6..bd1e65d02 100644 --- a/pkg/controller/util.go +++ b/pkg/controller/util.go @@ -415,6 +415,7 @@ func (c *Controller) postgresTeamAdd(obj interface{}) { pgTeam, ok := obj.(*acidv1.PostgresTeam) if !ok { c.logger.Errorf("could not cast to PostgresTeam spec") + return } c.logger.Debugf("PostgreTeam %q added. Reloading postgres team CRDs and overwriting cached map", pgTeam.Name) c.loadPostgresTeams() @@ -424,6 +425,7 @@ func (c *Controller) postgresTeamUpdate(prev, obj interface{}) { pgTeam, ok := obj.(*acidv1.PostgresTeam) if !ok { c.logger.Errorf("could not cast to PostgresTeam spec") + return } c.logger.Debugf("PostgreTeam %q updated. Reloading postgres team CRDs and overwriting cached map", pgTeam.Name) c.loadPostgresTeams() diff --git a/run_operator_locally.sh b/run_operator_locally.sh index 9e3e082da..600cc2f60 100755 --- a/run_operator_locally.sh +++ b/run_operator_locally.sh @@ -30,8 +30,8 @@ function retry(){ local -r retry_cmd="$1" local -r retry_msg="$2" - # times out after 1 minute - for i in {1..20}; do + # Time out after three minutes. + for i in {1..60}; do if eval "$retry_cmd"; then return 0 fi @@ -165,11 +165,63 @@ function forward_ports(){ local operator_pod operator_pod=$(kubectl get pod -l name=postgres-operator -o jsonpath={.items..metadata.name}) - # runs in the background to keep current terminal responsive - # stdout redirect removes the info message about forwarded ports; the message sometimes garbles the cli prompt - kubectl port-forward "$operator_pod" "$LOCAL_PORT":"$OPERATOR_PORT" &> /dev/null & + # Spawn `kubectl port-forward` in the background to keep current terminal + # responsive. Hide stdout because otherwise there is a note about each TCP + # connection. Do not hide stderr so port-forward setup errors can be + # debugged. Sometimes the port-forward setup fails because expected k8s + # state isn't achieved yet. Try to detect that case and then run the + # command again (in a finite loop). + for _attempt in {1..20}; do + # Delay between retry attempts. First attempt should already be + # delayed. + echo "soon: invoke kubectl port-forward command (attempt $_attempt)" + sleep 5 - echo $! > "$PATH_TO_PORT_FORWARED_KUBECTL_PID" + # With the --pod-running-timeout=4s argument the process is expected + # to terminate within about that time if the pod isn't ready yet. + kubectl port-forward --pod-running-timeout=4s "$operator_pod" "$LOCAL_PORT":"$OPERATOR_PORT" 1> /dev/null & + _kubectl_pid=$! + _pf_success=true + + # A successful `kubectl port-forward` setup can pragmatically be + # detected with a time-based criterion: it is a long-running process if + # successfully set up. If it does not terminate within deadline then + # consider the setup successful. Overall, observe the process for + # roughly 7 seconds. If it terminates before that it's certainly an + # error. If it did not terminate within that time frame then consider + # setup successful. + for ib in {1..7}; do + sleep 1 + # Portable and non-blocking test: is process still running? + if kill -s 0 -- "${_kubectl_pid}" >/dev/null 2>&1; then + echo "port-forward process is still running" + else + # port-forward process seems to have terminated, reap zombie + set +e + # `wait` is now expected to be non-blocking, and exits with the + # exit code of pid (first arg). + wait $_kubectl_pid + _kubectl_rc=$? + set -e + echo "port-forward process terminated with exit code ${_kubectl_rc}" + _pf_success=false + break + fi + done + + if [ ${_pf_success} = true ]; then + echo "port-forward setup seems successful. leave retry loop." + break + fi + + done + + if [ "${_pf_success}" = false ]; then + echo "port-forward setup failed after retrying. exit." + exit 1 + fi + + echo "${_kubectl_pid}" > "$PATH_TO_PORT_FORWARED_KUBECTL_PID" } diff --git a/ui/app/src/postgresql.tag.pug b/ui/app/src/postgresql.tag.pug index 1091d32fa..960c3bd07 100644 --- a/ui/app/src/postgresql.tag.pug +++ b/ui/app/src/postgresql.tag.pug @@ -94,7 +94,7 @@ postgresql .alert.alert-success(if='{ progress.masterLabel }') PostgreSQL master available, label is attached .alert.alert-success(if='{ progress.masterLabel && progress.dnsName }') PostgreSQL ready: { progress.dnsName } - .alert.alert-success(if='{ progress.pooler }') Connection pooler deployment created + .alert.alert-success(if='{ progress.pooler && this.progress.postgresqlManifest.spec.enableConnectionPooler }') Pooler ready: { progress.poolerDnsName } .col-lg-3 help-general(config='{ opts.config }') @@ -110,6 +110,7 @@ postgresql this.progress = {} this.progress.requestStatus = 'OK' + this.progress.pooler = false this.pollProgressTimer = false @@ -126,7 +127,6 @@ postgresql jQuery.get( './postgresqls/' + this.cluster_path, ).done(data => { - this.progress.pooler = false this.progress.postgresql = true this.progress.postgresqlManifest = data // copy status as we delete later for edit @@ -169,8 +169,22 @@ postgresql } if (this.progress.poolerEnabled == true) { - jQuery.get('./pooler/' + this.cluster_path).done(data => { + jQuery.get( + './pooler/' + this.cluster_path, + ).done(data => { this.progress.pooler = {"url": ""} + jQuery.get( + './services/' + this.cluster_path + "-pooler", + ).done(data => { + if (data.metadata && data.metadata.annotations && 'zalando.org/dnsname' in data.metadata.annotations) { + this.progress.poolerDnsName = data.metadata.annotations['zalando.org/dnsname'] + } else if (data.metadata && data.metadata.annotations && 'external-dns.alpha.kubernetes.io/hostname' in data.metadata.annotations) { + this.progress.poolerDnsName = data.metadata.annotations['external-dns.alpha.kubernetes.io/hostname'] + } else { + this.progress.poolerDnsName = data.metadata.name + '.' + data.metadata.namespace + } + this.update() + }) this.update() }) } diff --git a/ui/operator_ui/main.py b/ui/operator_ui/main.py index 0399f14f8..bec441dca 100644 --- a/ui/operator_ui/main.py +++ b/ui/operator_ui/main.py @@ -311,7 +311,7 @@ def send_js(path): @app.route('/') @authorize def index(): - return render_template('index.html', google_analytics=GOOGLE_ANALYTICS) + return render_template('index.html', google_analytics=GOOGLE_ANALYTICS, app_url=APP_URL) DEFAULT_UI_CONFIG = { diff --git a/ui/operator_ui/templates/index.html b/ui/operator_ui/templates/index.html index 7307c8a3a..021c5c75d 100644 --- a/ui/operator_ui/templates/index.html +++ b/ui/operator_ui/templates/index.html @@ -2,6 +2,7 @@ + PostgreSQL Operator UI diff --git a/ui/requirements.txt b/ui/requirements.txt index 790bc6cdd..75ac95fd1 100644 --- a/ui/requirements.txt +++ b/ui/requirements.txt @@ -12,4 +12,4 @@ kubernetes==11.0.0 requests==2.28.2 stups-tokens>=1.1.19 wal_e==1.1.1 -werkzeug==2.2.2 +werkzeug==2.2.3