diff --git a/.github/workflows/publish_ghcr_image.yaml b/.github/workflows/publish_ghcr_image.yaml index 3cead3503..2425e39b3 100644 --- a/.github/workflows/publish_ghcr_image.yaml +++ b/.github/workflows/publish_ghcr_image.yaml @@ -34,6 +34,12 @@ jobs: OPERATOR_IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${GITHUB_REF/refs\/tags\//}" echo "OPERATOR_IMAGE=$OPERATOR_IMAGE" >> $GITHUB_OUTPUT + - name: Define pooler image name + id: image_pooler + run: | + POOLER_IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/pgbouncer:${GITHUB_REF/refs\/tags\//}" + echo "POOLER_IMAGE=$POOLER_IMAGE" >> $GITHUB_OUTPUT + - name: Define UI image name id: image_ui run: | @@ -69,6 +75,15 @@ jobs: tags: "${{ steps.image.outputs.OPERATOR_IMAGE }}" platforms: linux/amd64,linux/arm64 + - name: Build and push multiarch pooler image to ghcr + uses: docker/build-push-action@v3 + with: + context: pooler + push: true + build-args: BASE_IMAGE=alpine:3.22 + tags: "${{ steps.image_pooler.outputs.POOLER_IMAGE }}" + platforms: linux/amd64,linux/arm64 + - name: Build and push multiarch ui image to ghcr uses: docker/build-push-action@v3 with: diff --git a/Makefile b/Makefile index 02c9c73f5..c1becbc99 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: clean local test linux macos mocks docker push e2e +.PHONY: clean local test linux macos mocks docker pooler push e2e BINARY ?= postgres-operator BUILD_FLAGS ?= -v @@ -49,6 +49,7 @@ endif PATH := $(GOPATH)/bin:$(PATH) SHELL := env PATH="$(PATH)" $(SHELL) IMAGE_TAG := $(IMAGE):$(TAG)$(CDP_TAG)$(DEBUG_FRESH)$(DEBUG_POSTFIX) +POOLER_TAG := $(IMAGE)/pgbouncer:$(TAG)$(CDP_TAG)$(DEBUG_FRESH)$(DEBUG_POSTFIX) default: local @@ -78,6 +79,9 @@ $(GENERATED_CRDS): $(GENERATED) local: ${SOURCES} $(GENERATED_CRDS) CGO_ENABLED=${CGO_ENABLED} go build -o build/${BINARY} $(LOCAL_BUILD_FLAGS) -ldflags "$(LDFLAGS)" $(SOURCES) +wasm: ${SOURCES} $(GENERATED_CRDS) + GOOS=wasip1 GOARCH=wasm CGO_ENABLED=${CGO_ENABLED} go build -o build/${BINARY}.wasm ${BUILD_FLAGS} -ldflags "$(LDFLAGS)" $(SOURCES) + linux: ${SOURCES} $(GENERATED_CRDS) GOOS=linux GOARCH=amd64 CGO_ENABLED=${CGO_ENABLED} go build -o build/linux/${BINARY} ${BUILD_FLAGS} -ldflags "$(LDFLAGS)" $(SOURCES) @@ -92,6 +96,9 @@ docker: $(GENERATED_CRDS) ${DOCKERDIR}/${DOCKERFILE} echo "git describe $(shell git describe --tags --always --dirty)" docker build --rm -t "$(IMAGE_TAG)" -f "${DOCKERDIR}/${DOCKERFILE}" --build-arg VERSION="${VERSION}" --build-arg BASE_IMAGE="${BASE_IMAGE}" . +pooler: + cd pooler; docker build --rm -t "$(POOLER_TAG)" --build-arg VERSION="${VERSION}" --build-arg BASE_IMAGE="${BASE_IMAGE}" . + indocker-race: docker run --rm -v "${GOPATH}":"${GOPATH}" -e GOPATH="${GOPATH}" -e RACE=1 -w ${PWD} golang:1.25.3 bash -c "make linux" @@ -110,5 +117,5 @@ test: mocks $(GENERATED) $(GENERATED_CRDS) codegen: $(GENERATED) -e2e: docker # build operator image to be tested +e2e: docker pooler # build operator and pooler images to be tested cd e2e; make e2etest diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index cb4b7a335..80ef38d25 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -672,7 +672,7 @@ spec: default: "pooler" connection_pooler_image: type: string - default: "registry.opensource.zalan.do/acid/pgbouncer:master-32" + default: "ghcr.io/zalando/postgres-operator/pgbouncer:latest" connection_pooler_max_db_connections: type: integer default: 60 diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index dfec76b6b..a1f4fa94c 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -443,7 +443,7 @@ configConnectionPooler: # db user for pooler to use connection_pooler_user: "pooler" # docker image - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-32" + connection_pooler_image: "ghcr.io/zalando/postgres-operator/pgbouncer:latest" # max db connections the pooler should hold connection_pooler_max_db_connections: 60 # default pooling mode diff --git a/delivery.yaml b/delivery.yaml index 933e72733..ac1ed90c6 100644 --- a/delivery.yaml +++ b/delivery.yaml @@ -42,6 +42,33 @@ pipeline: -f docker/Dockerfile \ --push . + - id: build-pooler + env: + <<: *BUILD_ENV + type: script + vm_config: + type: linux + + commands: + - desc: Build image + cmd: | + cd pooler + if [ -z ${CDP_SOURCE_BRANCH} ]; then + IMAGE=${MULTI_ARCH_REGISTRY}/pgbouncer + else + IMAGE=${MULTI_ARCH_REGISTRY}/pgbouncer-test + fi + + docker buildx create --config /etc/cdp-buildkitd.toml --driver-opt network=host --bootstrap --use + docker buildx build --platform "linux/amd64,linux/arm64" \ + --build-arg BASE_IMAGE="${ALPINE_BASE_IMAGE}" \ + -t "${IMAGE}:${CDP_BUILD_VERSION}" \ + --push . + + if [ -z ${CDP_SOURCE_BRANCH} ]; then + cdp-promote-image ${IMAGE}:${CDP_BUILD_VERSION} + fi + - id: build-operator-ui env: <<: *BUILD_ENV diff --git a/docs/administrator.md b/docs/administrator.md index b7880b183..e854775ce 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -891,15 +891,13 @@ cluster manifest. In the case any of these variables are omitted from the manifest, the operator configuration settings `enable_master_load_balancer` and `enable_replica_load_balancer` apply. Note that the operator settings affect all Postgresql services running in all namespaces watched by the operator. -If load balancing is enabled two default annotations will be applied to its -services: +If load balancing is enabled the following default annotation will be applied to +its services: - `external-dns.alpha.kubernetes.io/hostname` with the value defined by the operator configs `master_dns_name_format` and `replica_dns_name_format`. This value can't be overwritten. If any changing in its value is needed, it MUST be done changing the DNS format operator config parameters; and -- `service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout` with - a default value of "3600". There are multiple options to specify service annotations that will be merged with each other and override in the following order (where latter take diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 6dd775069..bd2ca7cf6 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -900,6 +900,19 @@ grouped under the `logical_backup` key. * **logical_backup_cronjob_environment_secret** Reference to a Kubernetes secret, which keys will be added as environment variables to the cronjob. Default: "" +The following environment variables can be passed to the logical backup +cronjob via `logical_backup_cronjob_environment_secret` to control +connectivity checks before the backup starts: + +* **LOGICAL_BACKUP_CONNECT_RETRIES** + Number of times to retry connecting to the target PostgreSQL pod before + giving up. This is useful when NetworkPolicy enforcement introduces a + short delay before a newly-created pod's IP is allowed through ingress + rules on the destination node. Default: "10" + +* **LOGICAL_BACKUP_CONNECT_RETRY_DELAY** + Delay in seconds between connectivity retries. Default: "2" + ## Debugging the operator Options to aid debugging of the operator itself. Grouped under the `debug` key. @@ -1062,7 +1075,7 @@ operator being able to provide some reasonable defaults. * **connection_pooler_image** Docker image to use for connection pooler deployment. - Default: "registry.opensource.zalan.do/acid/pgbouncer" + Default: "ghcr.io/zalando/postgres-operator/pgbouncer:latest" * **connection_pooler_max_db_connections** How many connections the pooler can max hold. This value is divided among the diff --git a/e2e/exec_into_env.sh b/e2e/exec_into_env.sh index a46efecbd..4d017bc35 100755 --- a/e2e/exec_into_env.sh +++ b/e2e/exec_into_env.sh @@ -3,6 +3,7 @@ export cluster_name="postgres-operator-e2e-tests" export kubeconfig_path="/tmp/kind-config-${cluster_name}" export operator_image="ghcr.io/zalando/postgres-operator:latest" +export pooler_image="ghcr.io/zalando/postgres-operator/pgbouncer:latest" export e2e_test_runner_image="ghcr.io/zalando/postgres-operator-e2e-tests-runner:latest" docker run -it --entrypoint /bin/bash --network=host -e "TERM=xterm-256color" \ @@ -11,4 +12,5 @@ docker run -it --entrypoint /bin/bash --network=host -e "TERM=xterm-256color" \ --mount type=bind,source="$(readlink -f tests)",target=/tests \ --mount type=bind,source="$(readlink -f exec.sh)",target=/exec.sh \ --mount type=bind,source="$(readlink -f scripts)",target=/scripts \ - -e OPERATOR_IMAGE="${operator_image}" "${e2e_test_runner_image}" + -e OPERATOR_IMAGE="${operator_image}" -e POOLER_IMAGE="${pooler_image}" \ + "${e2e_test_runner_image}" diff --git a/e2e/run.sh b/e2e/run.sh index f74158240..c5daf81f6 100755 --- a/e2e/run.sh +++ b/e2e/run.sh @@ -26,17 +26,33 @@ echo "Kubeconfig path: ${kubeconfig_path}" function pull_images(){ operator_tag=$(git describe --tags --always --dirty) - image_name="ghcr.io/zalando/postgres-operator:${operator_tag}" - if [[ -z $(docker images -q "${image_name}") ]] - then - if ! docker pull "${image_name}" - then - echo "Failed to pull operator image: ${image_name}" - exit 1 + components=("postgres-operator" "pooler") + image_urls=("ghcr.io/zalando/postgres-operator:${operator_tag}" "ghcr.io/zalando/postgres-operator/pgbouncer:${operator_tag}") + + for i in "${!components[@]}"; do + component="${components[$i]}" + image="${image_urls[$i]}" + + if [[ -z $(docker images -q "$image") ]]; then + echo "Pulling $component image: $image" + if ! docker pull "$image"; then + echo "Failed to pull $component image: $image" + exit 1 + fi + else + echo "$component image already exists: $image" fi - fi - operator_image="${image_name}" - echo "Using operator image: ${operator_image}" + + # Set variables for later use + if [[ "$component" == "postgres-operator" ]]; then + operator_image="$image" + elif [[ "$component" == "pooler" ]]; then + pooler_image="$image" + fi + done + + echo "Using operator image: $operator_image" + echo "Using pooler image: $pooler_image" } function start_kind(){ @@ -55,10 +71,11 @@ function start_kind(){ kind load docker-image "${spilo_image}" --name ${cluster_name} } -function load_operator_image() { - echo "Loading operator image" +function load_operator_images() { + echo "Loading operator images" export KUBECONFIG="${kubeconfig_path}" kind load docker-image "${operator_image}" --name ${cluster_name} + kind load docker-image "${pooler_image}" --name ${cluster_name} } function set_kind_api_server_ip(){ @@ -85,7 +102,8 @@ function run_tests(){ --mount type=bind,source="$(readlink -f tests)",target=/tests \ --mount type=bind,source="$(readlink -f exec.sh)",target=/exec.sh \ --mount type=bind,source="$(readlink -f scripts)",target=/scripts \ - -e OPERATOR_IMAGE="${operator_image}" "${e2e_test_runner_image}" ${E2E_TEST_CASE-} $@ + -e OPERATOR_IMAGE="${operator_image}" -e POOLER_IMAGE="${pooler_image}" \ + "${e2e_test_runner_image}" ${E2E_TEST_CASE-} $@ } function cleanup(){ @@ -100,7 +118,7 @@ function main(){ [[ -z ${NOCLEANUP-} ]] && trap "cleanup" QUIT TERM EXIT pull_images [[ ! -f ${kubeconfig_path} ]] && start_kind - load_operator_image + load_operator_images set_kind_api_server_ip generate_certificate diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 70145f3e4..159c3cc79 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -116,6 +116,7 @@ class EndToEndTestCase(unittest.TestCase): configmap["data"]["workers"] = "1" configmap["data"]["docker_image"] = SPILO_CURRENT configmap["data"]["major_version_upgrade_mode"] = "full" + configmap["data"]["connection_pooler_image"] = os.environ['POOLER_IMAGE'] with open("manifests/configmap.yaml", 'w') as f: yaml.dump(configmap, f, Dumper=yaml.Dumper) @@ -698,7 +699,7 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), 2, "No pooler pods found") self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), 2, "No pooler replica pods found") self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), 2, "No pooler service found") - self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), 1, "Pooler secret not created") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), 3, "Not all pooler secrets found") # TLS still enabled so check existing env variables and volume mounts self.eventuallyEqual(lambda: k8s.count_pods_with_env_variable("CONNECTION_POOLER_CLIENT_TLS_CRT", pooler_label), 4, "TLS env variable CONNECTION_POOLER_CLIENT_TLS_CRT missing in pooler pods") @@ -724,14 +725,12 @@ class EndToEndTestCase(unittest.TestCase): master_annotations = { "external-dns.alpha.kubernetes.io/hostname": "acid-minimal-cluster-pooler.default.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", } self.eventuallyTrue(lambda: k8s.check_service_annotations( master_pooler_label+","+pooler_label, master_annotations), "Wrong annotations") replica_annotations = { "external-dns.alpha.kubernetes.io/hostname": "acid-minimal-cluster-pooler-repl.default.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", } self.eventuallyTrue(lambda: k8s.check_service_annotations( replica_pooler_label+","+pooler_label, replica_annotations), "Wrong annotations") @@ -758,7 +757,7 @@ class EndToEndTestCase(unittest.TestCase): self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), 1, "No pooler service found") self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), - 1, "Secret not created") + 2, "Not all pooler secrets created") # Turn off only replica connection pooler k8s.api.custom_objects_api.patch_namespaced_custom_object( @@ -786,7 +785,7 @@ class EndToEndTestCase(unittest.TestCase): 'ClusterIP', "Expected LoadBalancer service type for master, found {}") self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), - 1, "Secret not created") + 2, "Not all pooler secrets created") # scale up connection pooler deployment k8s.api.custom_objects_api.patch_namespaced_custom_object( @@ -821,8 +820,8 @@ class EndToEndTestCase(unittest.TestCase): 0, "Pooler pods not scaled down") self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), 0, "Pooler service not removed") - self.eventuallyEqual(lambda: k8s.count_secrets_with_label('application=spilo,cluster-name=acid-minimal-cluster'), - 4, "Secrets not deleted") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), + 0, "Not all pooler secrets deleted") # Verify that all the databases have pooler schema installed. # Do this via psql, since otherwise we need to deal with diff --git a/go.mod b/go.mod index 898b4e65f..e60566166 100644 --- a/go.mod +++ b/go.mod @@ -6,11 +6,11 @@ require ( github.com/Masterminds/semver v1.5.0 github.com/aws/aws-sdk-go v1.55.8 github.com/golang/mock v1.6.0 - github.com/lib/pq v1.10.9 + github.com/lib/pq v1.11.2 github.com/motomux/pretty v0.0.0-20161209205251-b2aad2c9a95d github.com/pkg/errors v0.9.1 github.com/r3labs/diff v1.1.0 - github.com/sirupsen/logrus v1.9.3 + github.com/sirupsen/logrus v1.9.4 github.com/stretchr/testify v1.11.1 golang.org/x/crypto v0.45.0 gopkg.in/yaml.v2 v2.4.0 diff --git a/go.sum b/go.sum index c34c1123b..4cd35bf35 100644 --- a/go.sum +++ b/go.sum @@ -71,8 +71,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= -github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/lib/pq v1.11.2 h1:x6gxUeu39V0BHZiugWe8LXZYZ+Utk7hSJGThs8sdzfs= +github.com/lib/pq v1.11.2/go.mod h1:/p+8NSbOcwzAEI7wiMXFlgydTwcgTr3OSKMsD2BitpA= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= @@ -111,8 +111,8 @@ github.com/r3labs/diff v1.1.0/go.mod h1:7WjXasNzi0vJetRcB/RqNl5dlIsmXcTTLmF5IoH6 github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= +github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= @@ -122,7 +122,6 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -166,7 +165,6 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= diff --git a/hack/update-codegen.sh b/hack/update-codegen.sh index 9d43bc512..1363c2786 100755 --- a/hack/update-codegen.sh +++ b/hack/update-codegen.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright 2017 The Kubernetes Authors. # diff --git a/logical-backup/dump.sh b/logical-backup/dump.sh index a250670a6..7833de399 100755 --- a/logical-backup/dump.sh +++ b/logical-backup/dump.sh @@ -183,6 +183,25 @@ function get_master_pod { get_pods "labelSelector=${CLUSTER_NAME_LABEL}%3D${SCOPE},spilo-role%3Dmaster" | tee | head -n 1 } +# Wait for TCP connectivity to the target PostgreSQL pod. +# When NetworkPolicy is enforced via iptables, a newly-created pod's IP may not +# yet be present in the destination node's ingress allow lists, causing +# cross-node connections to be rejected until the next policy sync. +function wait_for_pg { + local retries=${LOGICAL_BACKUP_CONNECT_RETRIES:-10} + local delay=${LOGICAL_BACKUP_CONNECT_RETRY_DELAY:-2} + local i + for (( i=1; i<=retries; i++ )); do + if "$PG_BIN"/pg_isready -h "$PGHOST" -p "${PGPORT:-5432}" -q 2>/dev/null; then + return 0 + fi + echo "waiting for $PGHOST:${PGPORT:-5432} to become reachable (attempt $i/$retries)..." + sleep "$delay" + done + echo "ERROR: $PGHOST:${PGPORT:-5432} not reachable after $((retries * delay))s" + return 1 +} + CURRENT_NODENAME=$(get_current_pod | jq .items[].spec.nodeName --raw-output) export CURRENT_NODENAME @@ -197,6 +216,8 @@ for search in "${search_strategy[@]}"; do done +wait_for_pg + set -x if [ "$LOGICAL_BACKUP_PROVIDER" == "az" ]; then dump | compress > /tmp/azure-backup.sql.gz diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 571a4171b..1096e0265 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -17,7 +17,7 @@ data: connection_pooler_default_cpu_request: "500m" connection_pooler_default_memory_limit: 100Mi connection_pooler_default_memory_request: 100Mi - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-32" + connection_pooler_image: "ghcr.io/zalando/postgres-operator/pgbouncer:latest" connection_pooler_max_db_connections: "60" connection_pooler_mode: "transaction" connection_pooler_number_of_instances: "2" diff --git a/manifests/minimal-fake-pooler-deployment.yaml b/manifests/minimal-fake-pooler-deployment.yaml index 59a32ad0b..13f2cc68f 100644 --- a/manifests/minimal-fake-pooler-deployment.yaml +++ b/manifests/minimal-fake-pooler-deployment.yaml @@ -23,7 +23,7 @@ spec: serviceAccountName: postgres-operator containers: - name: postgres-operator - image: registry.opensource.zalan.do/acid/pgbouncer:master-32 + image: ghcr.io/zalando/postgres-operator/pgbouncer:latest imagePullPolicy: IfNotPresent resources: requests: diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 3be545b65..b5044b467 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -670,7 +670,7 @@ spec: default: "pooler" connection_pooler_image: type: string - default: "registry.opensource.zalan.do/acid/pgbouncer:master-32" + default: "ghcr.io/zalando/postgres-operator/pgbouncer:latest" connection_pooler_max_db_connections: type: integer default: 60 diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 1c6a0e34a..13dfd6977 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -218,7 +218,7 @@ configuration: connection_pooler_default_cpu_request: "500m" connection_pooler_default_memory_limit: 100Mi connection_pooler_default_memory_request: 100Mi - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-32" + connection_pooler_image: "ghcr.io/zalando/postgres-operator/pgbouncer:latest" # connection_pooler_max_db_connections: 60 connection_pooler_mode: "transaction" connection_pooler_number_of_instances: 2 diff --git a/pkg/cluster/cluster_test.go b/pkg/cluster/cluster_test.go index c7181dbbc..8046943d4 100644 --- a/pkg/cluster/cluster_test.go +++ b/pkg/cluster/cluster_test.go @@ -680,8 +680,7 @@ func TestServiceAnnotations(t *testing.T) { operatorAnnotations: make(map[string]string), serviceAnnotations: make(map[string]string), expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", }, }, { @@ -702,8 +701,7 @@ func TestServiceAnnotations(t *testing.T) { operatorAnnotations: make(map[string]string), serviceAnnotations: make(map[string]string), expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", }, }, { @@ -714,8 +712,7 @@ func TestServiceAnnotations(t *testing.T) { operatorAnnotations: make(map[string]string), serviceAnnotations: map[string]string{"foo": "bar"}, expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", "foo": "bar", }, }, @@ -737,8 +734,7 @@ func TestServiceAnnotations(t *testing.T) { operatorAnnotations: map[string]string{"foo": "bar"}, serviceAnnotations: make(map[string]string), expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", "foo": "bar", }, }, @@ -780,8 +776,7 @@ func TestServiceAnnotations(t *testing.T) { "external-dns.alpha.kubernetes.io/hostname": "wrong.external-dns-name.example.com", }, expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", }, }, { @@ -792,8 +787,7 @@ func TestServiceAnnotations(t *testing.T) { serviceAnnotations: make(map[string]string), operatorAnnotations: make(map[string]string), expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg.test.db.example.com,test-stg.acid.db.example.com", }, }, { @@ -835,8 +829,7 @@ func TestServiceAnnotations(t *testing.T) { operatorAnnotations: make(map[string]string), serviceAnnotations: make(map[string]string), expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", }, }, { @@ -857,8 +850,7 @@ func TestServiceAnnotations(t *testing.T) { operatorAnnotations: make(map[string]string), serviceAnnotations: make(map[string]string), expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", }, }, { @@ -869,8 +861,7 @@ func TestServiceAnnotations(t *testing.T) { operatorAnnotations: make(map[string]string), serviceAnnotations: map[string]string{"foo": "bar"}, expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", "foo": "bar", }, }, @@ -892,8 +883,7 @@ func TestServiceAnnotations(t *testing.T) { operatorAnnotations: map[string]string{"foo": "bar"}, serviceAnnotations: make(map[string]string), expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", "foo": "bar", }, }, @@ -935,8 +925,7 @@ func TestServiceAnnotations(t *testing.T) { "external-dns.alpha.kubernetes.io/hostname": "wrong.external-dns-name.example.com", }, expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", }, }, { @@ -947,8 +936,7 @@ func TestServiceAnnotations(t *testing.T) { serviceAnnotations: make(map[string]string), operatorAnnotations: make(map[string]string), expect: map[string]string{ - "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", - "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600", + "external-dns.alpha.kubernetes.io/hostname": "acid-test-stg-repl.test.db.example.com,test-stg-repl.acid.db.example.com", }, }, { @@ -1377,7 +1365,6 @@ func TestCompareServices(t *testing.T) { serviceWithOwnerReference := newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeClusterIP, []string{"128.141.0.0/16", "137.138.0.0/16"}, @@ -1406,7 +1393,6 @@ func TestCompareServices(t *testing.T) { current: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeClusterIP, []string{"128.141.0.0/16", "137.138.0.0/16"}, @@ -1414,7 +1400,6 @@ func TestCompareServices(t *testing.T) { new: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeClusterIP, []string{"128.141.0.0/16", "137.138.0.0/16"}, @@ -1426,7 +1411,6 @@ func TestCompareServices(t *testing.T) { current: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeClusterIP, []string{"128.141.0.0/16", "137.138.0.0/16"}, @@ -1434,7 +1418,6 @@ func TestCompareServices(t *testing.T) { new: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeLoadBalancer, []string{"128.141.0.0/16", "137.138.0.0/16"}, @@ -1447,7 +1430,6 @@ func TestCompareServices(t *testing.T) { current: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeLoadBalancer, []string{"128.141.0.0/16", "137.138.0.0/16"}, @@ -1455,7 +1437,6 @@ func TestCompareServices(t *testing.T) { new: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeLoadBalancer, []string{"185.249.56.0/22"}, @@ -1468,7 +1449,6 @@ func TestCompareServices(t *testing.T) { current: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeLoadBalancer, []string{"128.141.0.0/16", "137.138.0.0/16"}, @@ -1476,7 +1456,6 @@ func TestCompareServices(t *testing.T) { new: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeLoadBalancer, []string{}, @@ -1489,7 +1468,6 @@ func TestCompareServices(t *testing.T) { current: newService( map[string]string{ constants.ZalandoDNSNameAnnotation: "clstr.acid.zalan.do", - constants.ElbTimeoutAnnotationName: constants.ElbTimeoutAnnotationValue, }, v1.ServiceTypeClusterIP, []string{"128.141.0.0/16", "137.138.0.0/16"}, diff --git a/pkg/cluster/connection_pooler.go b/pkg/cluster/connection_pooler.go index ac4ce67d8..97f7a3076 100644 --- a/pkg/cluster/connection_pooler.go +++ b/pkg/cluster/connection_pooler.go @@ -31,6 +31,7 @@ var poolerRunAsGroup = int64(101) // ConnectionPoolerObjects K8s objects that are belong to connection pooler type ConnectionPoolerObjects struct { + AuthSecret *v1.Secret Deployment *appsv1.Deployment Service *v1.Service Name string @@ -167,6 +168,38 @@ func (c *Cluster) createConnectionPooler(LookupFunction InstallFunction) (SyncRe return reason, nil } +func (c *Cluster) generateUserlist() string { + var sb strings.Builder + + poolerAdminUser := c.systemUsers[constants.ConnectionPoolerUserKeyName] + fmt.Fprintf(&sb, "\"%s\" \"%s\"\n", poolerAdminUser.Name, poolerAdminUser.Password) + + for roleName, infraRole := range c.InfrastructureRoles { + if infraRole.Password != "" { + fmt.Fprintf(&sb, "\"%s\" \"%s\"\n", roleName, infraRole.Password) + } + } + + return sb.String() +} + +func (c *Cluster) generateConnectionPoolerAuthSecret(connectionPooler *ConnectionPoolerObjects) *v1.Secret { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Labels: c.connectionPoolerLabels(connectionPooler.Role, true).MatchLabels, + Name: fmt.Sprintf("%s-userlist", connectionPooler.Name), + Namespace: connectionPooler.Namespace, + Annotations: c.annotationsSet(nil), + OwnerReferences: c.ownerReferences(), + }, + Type: v1.SecretTypeOpaque, + // Secret data must be bytes. Kubernetes handles the encoding. + StringData: map[string]string{ + "userlist.txt": c.generateUserlist(), + }, + } +} + // Generate pool size related environment variables. // // MAX_DB_CONN would specify the global maximum for connections to a target @@ -320,6 +353,18 @@ func (c *Cluster) generateConnectionPoolerPodTemplate(role PostgresRole) ( } envVars = append(envVars, c.getConnectionPoolerEnvVars()...) + infraRolesList := make([]string, 0) + for infraRoleName := range c.InfrastructureRoles { + infraRolesList = append(infraRolesList, infraRoleName) + } + + if len(infraRolesList) > 0 { + envVars = append(envVars, v1.EnvVar{ + Name: "INFRASTRUCTURE_ROLES", + Value: strings.Join(infraRolesList, ","), + }) + } + poolerContainer := v1.Container{ Name: connectionPoolerContainer, Image: effectiveDockerImage, @@ -343,12 +388,29 @@ func (c *Cluster) generateConnectionPoolerPodTemplate(role PostgresRole) ( }, } + var poolerVolumes []v1.Volume + var volumeMounts []v1.VolumeMount + + // mount secret volume with userlist.txt for pgBouncer to authenticate users + poolerVolumes = append(poolerVolumes, v1.Volume{ + Name: fmt.Sprintf("%s-userlist-volume", c.connectionPoolerName(role)), + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: fmt.Sprintf("%s-userlist", c.connectionPoolerName(role)), + }, + }, + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: fmt.Sprintf("%s-userlist-volume", c.connectionPoolerName(role)), + MountPath: "/etc/pgbouncer/userlist.txt", + SubPath: "userlist.txt", + ReadOnly: true, + }) + // If the cluster has custom TLS certificates configured, we do the following: // 1. Add environment variables to tell pgBouncer where to find the TLS certificates // 2. Reference the secret in a volume // 3. Mount the volume to the container at /tls - var poolerVolumes []v1.Volume - var volumeMounts []v1.VolumeMount if spec.TLS != nil && spec.TLS.SecretName != "" { getPoolerTLSEnv := func(k string) string { keyName := "" @@ -533,10 +595,6 @@ func (c *Cluster) generatePoolerServiceAnnotations(role PostgresRole, spec *acid annotations := c.getCustomServiceAnnotations(role, spec) if c.shouldCreateLoadBalancerForPoolerService(role, spec) { - // set ELB Timeout annotation with default value - if _, ok := annotations[constants.ElbTimeoutAnnotationName]; !ok { - annotations[constants.ElbTimeoutAnnotationName] = constants.ElbTimeoutAnnotationValue - } // -repl suffix will be added by replicaDNSName clusterNameWithPoolerSuffix := c.connectionPoolerName(Master) if role == Master { @@ -639,12 +697,31 @@ func (c *Cluster) deleteConnectionPooler(role PostgresRole) (err error) { c.logger.Infof("connection pooler service %s has been deleted for role %s", service.Name, role) } + // Repeat the same for the auth secret + authSecret := c.ConnectionPooler[role].AuthSecret + if authSecret == nil { + c.logger.Debug("no connection pooler auth secret to delete") + } else { + err := c.KubeClient. + Secrets(c.Namespace). + Delete(context.TODO(), authSecret.Name, metav1.DeleteOptions{}) + + if k8sutil.ResourceNotFound(err) { + c.logger.Debugf("connection pooler auth secret %s for role %s has already been deleted", authSecret.Name, role) + } else if err != nil { + return fmt.Errorf("could not delete connection pooler auth secret: %v", err) + } + + c.logger.Infof("connection pooler auth secret %s has been deleted for role %s", authSecret.Name, role) + } + + c.ConnectionPooler[role].AuthSecret = nil c.ConnectionPooler[role].Deployment = nil c.ConnectionPooler[role].Service = nil return nil } -// delete connection pooler +// delete connection pooler secret func (c *Cluster) deleteConnectionPoolerSecret() (err error) { // Repeat the same for the secret object secretName := c.credentialSecretName(c.OpConfig.ConnectionPooler.User) @@ -660,6 +737,7 @@ func (c *Cluster) deleteConnectionPoolerSecret() (err error) { return fmt.Errorf("could not delete pooler secret: %v", err) } } + return nil } @@ -912,7 +990,7 @@ func (c *Cluster) syncConnectionPooler(oldSpec, newSpec *acidv1.Postgresql, Look // in this case also do not forget to install lookup function // skip installation in standby clusters, since they are read-only - if !c.ConnectionPooler[role].LookupFunction && c.Spec.StandbyCluster == nil { + if !c.ConnectionPooler[role].LookupFunction && !isStandbyCluster(&newSpec.Spec) { connectionPooler := c.Spec.ConnectionPooler specSchema := "" specUser := "" @@ -975,11 +1053,42 @@ func (c *Cluster) syncConnectionPoolerWorker(oldSpec, newSpec *acidv1.Postgresql pods []v1.Pod service *v1.Service newService *v1.Service + authSecret *v1.Secret + newAuthSecret *v1.Secret err error ) updatedPodAnnotations := map[string]*string{} syncReason := make([]string, 0) + + // create extra secret for connection pooler authentication + newAuthSecret = c.generateConnectionPoolerAuthSecret(c.ConnectionPooler[role]) + if authSecret, err = c.KubeClient.Secrets(c.Namespace).Get(context.TODO(), fmt.Sprintf("%s-userlist", c.connectionPoolerName(role)), metav1.GetOptions{}); err == nil { + c.ConnectionPooler[role].AuthSecret = authSecret + // make sure existing annotations are preserved + newAuthSecret.Annotations = c.annotationsSet(authSecret.Annotations) + authSecret, err = c.KubeClient.Secrets(authSecret.Namespace).Update(context.TODO(), newAuthSecret, metav1.UpdateOptions{}) + if err != nil { + return NoSync, fmt.Errorf("could not update connection pooler auth secret: %v", err) + } + c.ConnectionPooler[role].AuthSecret = authSecret + } else if !k8sutil.ResourceNotFound(err) { + return NoSync, fmt.Errorf("could not get auth secret for connection pooler to sync: %v", err) + } + + if k8sutil.ResourceNotFound(err) { + c.logger.Warningf("auth secret %s for connection pooler is not found, create it", fmt.Sprintf("%s-userlist", c.connectionPoolerName(role))) + authSecret, err = c.KubeClient. + Secrets(newAuthSecret.Namespace). + Create(context.TODO(), newAuthSecret, metav1.CreateOptions{}) + + if err != nil { + return NoSync, err + } + c.ConnectionPooler[role].AuthSecret = authSecret + } + + // next the pooler deployment deployment, err = c.KubeClient. Deployments(c.Namespace). Get(context.TODO(), c.connectionPoolerName(role), metav1.GetOptions{}) diff --git a/pkg/cluster/connection_pooler_test.go b/pkg/cluster/connection_pooler_test.go index 78d1c2527..23213520f 100644 --- a/pkg/cluster/connection_pooler_test.go +++ b/pkg/cluster/connection_pooler_test.go @@ -30,6 +30,7 @@ func newFakeK8sPoolerTestClient() (k8sutil.KubernetesClient, *fake.Clientset) { StatefulSetsGetter: clientSet.AppsV1(), DeploymentsGetter: clientSet.AppsV1(), ServicesGetter: clientSet.CoreV1(), + SecretsGetter: clientSet.CoreV1(), }, clientSet } @@ -803,6 +804,7 @@ func TestConnectionPoolerDeploymentSpec(t *testing.T) { } cluster.ConnectionPooler = map[PostgresRole]*ConnectionPoolerObjects{ Master: { + AuthSecret: nil, Deployment: nil, Service: nil, LookupFunction: true, @@ -1019,6 +1021,7 @@ func TestPoolerTLS(t *testing.T) { // create pooler resources cluster.ConnectionPooler = map[PostgresRole]*ConnectionPoolerObjects{} cluster.ConnectionPooler[Master] = &ConnectionPoolerObjects{ + AuthSecret: nil, Deployment: nil, Service: nil, Name: cluster.connectionPoolerName(Master), @@ -1089,12 +1092,14 @@ func TestConnectionPoolerServiceSpec(t *testing.T) { } cluster.ConnectionPooler = map[PostgresRole]*ConnectionPoolerObjects{ Master: { + AuthSecret: nil, Deployment: nil, Service: nil, LookupFunction: false, Role: Master, }, Replica: { + AuthSecret: nil, Deployment: nil, Service: nil, LookupFunction: false, diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 724986dbc..2eb867f06 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -818,9 +818,6 @@ func (c *Cluster) generatePodTemplate( sidecarContainers []v1.Container, sharePgSocketWithSidecars *bool, tolerationsSpec *[]v1.Toleration, - spiloRunAsUser *int64, - spiloRunAsGroup *int64, - spiloFSGroup *int64, nodeAffinity *v1.Affinity, schedulerName *string, terminateGracePeriod int64, @@ -839,18 +836,22 @@ func (c *Cluster) generatePodTemplate( terminateGracePeriodSeconds := terminateGracePeriod containers := []v1.Container{*spiloContainer} containers = append(containers, sidecarContainers...) - securityContext := v1.PodSecurityContext{} - - if spiloRunAsUser != nil { - securityContext.RunAsUser = spiloRunAsUser + securityContext := v1.PodSecurityContext{ + RunAsUser: c.OpConfig.Resources.SpiloRunAsUser, + RunAsGroup: c.OpConfig.Resources.SpiloRunAsGroup, + FSGroup: c.OpConfig.Resources.SpiloFSGroup, } - if spiloRunAsGroup != nil { - securityContext.RunAsGroup = spiloRunAsGroup + if c.Spec.SpiloRunAsUser != nil { + securityContext.RunAsUser = c.Spec.SpiloRunAsUser } - if spiloFSGroup != nil { - securityContext.FSGroup = spiloFSGroup + if c.Spec.SpiloRunAsGroup != nil { + securityContext.RunAsGroup = c.Spec.SpiloRunAsGroup + } + + if c.Spec.SpiloFSGroup != nil { + securityContext.FSGroup = c.Spec.SpiloFSGroup } podSpec := v1.PodSpec{ @@ -1352,22 +1353,6 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef // pickup the docker image for the spilo container effectiveDockerImage := util.Coalesce(spec.DockerImage, c.OpConfig.DockerImage) - // determine the User, Group and FSGroup for the spilo pod - effectiveRunAsUser := c.OpConfig.Resources.SpiloRunAsUser - if spec.SpiloRunAsUser != nil { - effectiveRunAsUser = spec.SpiloRunAsUser - } - - effectiveRunAsGroup := c.OpConfig.Resources.SpiloRunAsGroup - if spec.SpiloRunAsGroup != nil { - effectiveRunAsGroup = spec.SpiloRunAsGroup - } - - effectiveFSGroup := c.OpConfig.Resources.SpiloFSGroup - if spec.SpiloFSGroup != nil { - effectiveFSGroup = spec.SpiloFSGroup - } - volumeMounts := generateVolumeMounts(spec.Volume) // configure TLS with a custom secret volume @@ -1485,9 +1470,6 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef sidecarContainers, c.OpConfig.SharePgSocketWithSidecars, &tolerationSpec, - effectiveRunAsUser, - effectiveRunAsGroup, - effectiveFSGroup, c.nodeAffinity(c.OpConfig.NodeReadinessLabel, spec.NodeAffinity), spec.SchedulerName, int64(c.OpConfig.PodTerminateGracePeriod.Seconds()), @@ -2047,11 +2029,6 @@ func (c *Cluster) generateServiceAnnotations(role PostgresRole, spec *acidv1.Pos if c.shouldCreateLoadBalancerForService(role, spec) { dnsName := c.dnsName(role) - // Just set ELB Timeout annotation with default value, if it does not - // have a custom value - if _, ok := annotations[constants.ElbTimeoutAnnotationName]; !ok { - annotations[constants.ElbTimeoutAnnotationName] = constants.ElbTimeoutAnnotationValue - } // External DNS name annotation is not customizable annotations[constants.ZalandoDNSNameAnnotation] = dnsName } @@ -2379,9 +2356,6 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) { []v1.Container{}, util.False(), &tolerationsSpec, - nil, - nil, - nil, c.nodeAffinity(c.OpConfig.NodeReadinessLabel, nil), nil, int64(c.OpConfig.PodTerminateGracePeriod.Seconds()), diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index 04f6476a6..62481c7e3 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -2967,6 +2967,7 @@ func newLBFakeClient() (k8sutil.KubernetesClient, *fake.Clientset) { DeploymentsGetter: clientSet.AppsV1(), PodsGetter: clientSet.CoreV1(), ServicesGetter: clientSet.CoreV1(), + SecretsGetter: clientSet.CoreV1(), }, clientSet } diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 959bacb54..6658ba414 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -376,6 +376,36 @@ func (c *Cluster) getPatroniMemberData(pod *v1.Pod) (patroni.MemberData, error) return memberData, nil } +// podIsNotRunning returns true if a pod is known to be in a non-running state, +// e.g. stuck in CreateContainerConfigError, CrashLoopBackOff, ImagePullBackOff, etc. +// Pods with no status information are not considered non-running, as they may +// simply not have reported status yet. +func podIsNotRunning(pod *v1.Pod) bool { + if pod.Status.Phase == "" { + // No status reported yet — don't treat as non-running + return false + } + if pod.Status.Phase != v1.PodRunning { + return true + } + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil || cs.State.Terminated != nil { + return true + } + } + return false +} + +// allPodsRunning returns true only if every pod in the list is in a healthy running state. +func (c *Cluster) allPodsRunning(pods []v1.Pod) bool { + for i := range pods { + if podIsNotRunning(&pods[i]) { + return false + } + } + return true +} + func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { stopCh := make(chan struct{}) ch := c.registerPodSubscriber(podName) @@ -444,7 +474,8 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp // switchover if // 1. we have not observed a new master pod when re-creating former replicas // 2. we know possible switchover targets even when no replicas were recreated - if newMasterPod == nil && len(replicas) > 0 { + // 3. the master pod is actually running (can't switchover a dead master) + if newMasterPod == nil && len(replicas) > 0 && !podIsNotRunning(masterPod) { masterCandidate, err := c.getSwitchoverCandidate(masterPod) if err != nil { // do not recreate master now so it will keep the update flag and switchover will be retried on next sync @@ -455,6 +486,9 @@ func (c *Cluster) recreatePods(pods []v1.Pod, switchoverCandidates []spec.Namesp } } else if newMasterPod == nil && len(replicas) == 0 { c.logger.Warningf("cannot perform switch over before re-creating the pod: no replicas") + } else if podIsNotRunning(masterPod) { + c.logger.Warningf("master pod %q is not running, skipping switchover and recreating directly", + util.NameFromMeta(masterPod.ObjectMeta)) } c.logger.Infof("recreating old master pod %q", util.NameFromMeta(masterPod.ObjectMeta)) diff --git a/pkg/cluster/pod_test.go b/pkg/cluster/pod_test.go index 6816b4d7a..6ab3f9207 100644 --- a/pkg/cluster/pod_test.go +++ b/pkg/cluster/pod_test.go @@ -15,6 +15,7 @@ import ( "github.com/zalando/postgres-operator/pkg/util/config" "github.com/zalando/postgres-operator/pkg/util/k8sutil" "github.com/zalando/postgres-operator/pkg/util/patroni" + v1 "k8s.io/api/core/v1" ) func TestGetSwitchoverCandidate(t *testing.T) { @@ -112,3 +113,302 @@ func TestGetSwitchoverCandidate(t *testing.T) { } } } + +func TestPodIsNotRunning(t *testing.T) { + tests := []struct { + subtest string + pod v1.Pod + expected bool + }{ + { + subtest: "pod with no status reported yet", + pod: v1.Pod{ + Status: v1.PodStatus{}, + }, + expected: false, + }, + { + subtest: "pod running with all containers ready", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + expected: false, + }, + { + subtest: "pod in pending phase", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + expected: true, + }, + { + subtest: "pod running but container in CreateContainerConfigError", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CreateContainerConfigError", + Message: `secret "some-secret" not found`, + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "pod running but container in CrashLoopBackOff", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CrashLoopBackOff", + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "pod running but container terminated", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Terminated: &v1.ContainerStateTerminated{ + ExitCode: 137, + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "pod running with mixed container states - one healthy one broken", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CreateContainerConfigError", + }, + }, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "pod in failed phase", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodFailed, + }, + }, + expected: true, + }, + { + subtest: "pod running with multiple healthy containers", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + { + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + }, + }, + }, + }, + expected: false, + }, + { + subtest: "pod running with ImagePullBackOff", + pod: v1.Pod{ + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "ImagePullBackOff", + }, + }, + }, + }, + }, + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.subtest, func(t *testing.T) { + result := podIsNotRunning(&tt.pod) + if result != tt.expected { + t.Errorf("podIsNotRunning() = %v, expected %v", result, tt.expected) + } + }) + } +} + +func TestAllPodsRunning(t *testing.T) { + client, _ := newFakeK8sSyncClient() + + var cluster = New( + Config{ + OpConfig: config.Config{ + Resources: config.Resources{ + ClusterLabels: map[string]string{"application": "spilo"}, + ClusterNameLabel: "cluster-name", + PodRoleLabel: "spilo-role", + }, + }, + }, client, acidv1.Postgresql{}, logger, eventRecorder) + + tests := []struct { + subtest string + pods []v1.Pod + expected bool + }{ + { + subtest: "all pods running", + pods: []v1.Pod{ + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + {State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + }, + }, + }, + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + {State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + }, + }, + }, + }, + expected: true, + }, + { + subtest: "one pod not running", + pods: []v1.Pod{ + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + {State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}, + }, + }, + }, + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CreateContainerConfigError", + }, + }, + }, + }, + }, + }, + }, + expected: false, + }, + { + subtest: "all pods not running", + pods: []v1.Pod{ + { + Status: v1.PodStatus{ + Phase: v1.PodPending, + }, + }, + { + Status: v1.PodStatus{ + Phase: v1.PodRunning, + ContainerStatuses: []v1.ContainerStatus{ + { + State: v1.ContainerState{ + Waiting: &v1.ContainerStateWaiting{ + Reason: "CrashLoopBackOff", + }, + }, + }, + }, + }, + }, + }, + expected: false, + }, + { + subtest: "empty pod list", + pods: []v1.Pod{}, + expected: true, + }, + { + subtest: "pods with no status reported yet", + pods: []v1.Pod{ + { + Status: v1.PodStatus{}, + }, + { + Status: v1.PodStatus{}, + }, + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.subtest, func(t *testing.T) { + result := cluster.allPodsRunning(tt.pods) + if result != tt.expected { + t.Errorf("allPodsRunning() = %v, expected %v", result, tt.expected) + } + }) + } +} diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 3fa9e9783..7c478477a 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -719,14 +719,26 @@ func (c *Cluster) syncStatefulSet() error { if configPatched, restartPrimaryFirst, restartWait, err = c.syncPatroniConfig(pods, c.Spec.Patroni, requiredPgParameters); err != nil { c.logger.Warningf("Patroni config updated? %v - errors during config sync: %v", configPatched, err) postponeReasons = append(postponeReasons, "errors during Patroni config sync") - isSafeToRecreatePods = false + // Only mark unsafe if all pods are running. If some pods are not running, + // Patroni API errors are expected and should not block pod recreation, + // which is the only way to fix non-running pods. + if c.allPodsRunning(pods) { + isSafeToRecreatePods = false + } else { + c.logger.Warningf("ignoring Patroni config sync errors because some pods are not running") + } } // restart Postgres where it is still pending if err = c.restartInstances(pods, restartWait, restartPrimaryFirst); err != nil { c.logger.Errorf("errors while restarting Postgres in pods via Patroni API: %v", err) postponeReasons = append(postponeReasons, "errors while restarting Postgres via Patroni API") - isSafeToRecreatePods = false + // Same logic: don't let unreachable non-running pods block recreation. + if c.allPodsRunning(pods) { + isSafeToRecreatePods = false + } else { + c.logger.Warningf("ignoring Patroni restart errors because some pods are not running") + } } // if we get here we also need to re-create the pods (either leftovers from the old diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 0a458618b..4df8a8bd2 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -275,7 +275,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.ConnectionPooler.Image = util.Coalesce( fromCRD.ConnectionPooler.Image, - "registry.opensource.zalan.do/acid/pgbouncer") + "ghcr.io/zalando/postgres-operator/pgbouncer:latest") result.ConnectionPooler.Mode = util.Coalesce( fromCRD.ConnectionPooler.Mode, diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 914d7a180..796594a89 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -155,7 +155,7 @@ type ConnectionPooler struct { NumberOfInstances *int32 `name:"connection_pooler_number_of_instances" default:"2"` Schema string `name:"connection_pooler_schema" default:"pooler"` User string `name:"connection_pooler_user" default:"pooler"` - Image string `name:"connection_pooler_image" default:"registry.opensource.zalan.do/acid/pgbouncer"` + Image string `name:"connection_pooler_image" default:"ghcr.io/zalando/postgres-operator/pgbouncer:latest"` Mode string `name:"connection_pooler_mode" default:"transaction"` MaxDBConnections *int32 `name:"connection_pooler_max_db_connections" default:"60"` ConnectionPoolerDefaultCPURequest string `name:"connection_pooler_default_cpu_request"` diff --git a/pkg/util/constants/annotations.go b/pkg/util/constants/annotations.go index fc5a84fa5..0330ddcb8 100644 --- a/pkg/util/constants/annotations.go +++ b/pkg/util/constants/annotations.go @@ -3,8 +3,6 @@ package constants // Names and values in Kubernetes annotation for services, statefulsets and volumes const ( ZalandoDNSNameAnnotation = "external-dns.alpha.kubernetes.io/hostname" - ElbTimeoutAnnotationName = "service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout" - ElbTimeoutAnnotationValue = "3600" KubeIAmAnnotation = "iam.amazonaws.com/role" VolumeStorateProvisionerAnnotation = "pv.kubernetes.io/provisioned-by" PostgresqlControllerAnnotationKey = "acid.zalan.do/controller" diff --git a/pooler/Dockerfile b/pooler/Dockerfile new file mode 100644 index 000000000..e7836e0f2 --- /dev/null +++ b/pooler/Dockerfile @@ -0,0 +1,54 @@ +ARG BASE_IMAGE=alpine:3.22 +FROM ${BASE_IMAGE} AS build_stage + +RUN apk add -U --no-cache \ + autoconf \ + automake \ + curl \ + gcc \ + libc-dev \ + libevent \ + libevent-dev \ + libtool \ + make \ + openssl-dev \ + pkgconfig \ + git + +WORKDIR /src + +RUN git clone --single-branch --depth 1 https://github.com/pgbouncer/pgbouncer.git . && \ + git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) + +RUN git submodule init && git submodule update + +RUN ./autogen.sh && \ + ./configure --prefix=/pgbouncer --with-libevent=/usr/lib && \ + sed -i '/dist_man_MANS/d' Makefile && \ + make && \ + make install + +FROM ${BASE_IMAGE} + +RUN apk -U upgrade --no-cache \ + && apk --no-cache add bash c-ares ca-certificates gettext libevent openssl postgresql-client + +RUN addgroup -g 101 -S pgbouncer && \ + adduser -u 100 -S pgbouncer -G pgbouncer && \ + mkdir -p /etc/pgbouncer /var/log/pgbouncer /var/run/pgbouncer /etc/ssl/certs + +COPY --from=build_stage /pgbouncer/bin/pgbouncer /bin/pgbouncer +COPY pgbouncer.ini.tmpl /etc/pgbouncer/ +COPY entrypoint.sh /entrypoint.sh + +RUN chown -R pgbouncer:pgbouncer \ + /var/log/pgbouncer \ + /var/run/pgbouncer \ + /etc/pgbouncer \ + /etc/ssl/certs \ + && chmod +x /entrypoint.sh + +USER pgbouncer:pgbouncer +WORKDIR /etc/pgbouncer + +ENTRYPOINT ["/bin/sh", "/entrypoint.sh"] diff --git a/pooler/entrypoint.sh b/pooler/entrypoint.sh new file mode 100755 index 000000000..326d63fbe --- /dev/null +++ b/pooler/entrypoint.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -ex + +if [ -z "${CONNECTION_POOLER_CLIENT_TLS_CRT}" ]; then + openssl req -nodes -new -x509 -subj /CN=spilo.dummy.org \ + -keyout /etc/ssl/certs/pgbouncer.key \ + -out /etc/ssl/certs/pgbouncer.crt +else + ln -s ${CONNECTION_POOLER_CLIENT_TLS_CRT} /etc/ssl/certs/pgbouncer.crt + ln -s ${CONNECTION_POOLER_CLIENT_TLS_KEY} /etc/ssl/certs/pgbouncer.key + if [ ! -z "${CONNECTION_POOLER_CLIENT_CA_FILE}" ]; then + ln -s ${CONNECTION_POOLER_CLIENT_CA_FILE} /etc/ssl/certs/ca.crt + fi +fi + +envsubst < /etc/pgbouncer/pgbouncer.ini.tmpl > /etc/pgbouncer/pgbouncer.ini + +exec /bin/pgbouncer /etc/pgbouncer/pgbouncer.ini diff --git a/pooler/pgbouncer.ini.tmpl b/pooler/pgbouncer.ini.tmpl new file mode 100644 index 000000000..c26cf1453 --- /dev/null +++ b/pooler/pgbouncer.ini.tmpl @@ -0,0 +1,70 @@ +# vim: set ft=dosini: + +[databases] +* = host=$PGHOST port=$PGPORT auth_user=$PGUSER +postgres = host=$PGHOST port=$PGPORT auth_user=$PGUSER + +[pgbouncer] +pool_mode = $CONNECTION_POOLER_MODE +listen_port = $CONNECTION_POOLER_PORT +listen_addr = * +admin_users = $PGUSER +stats_users = $INFRASTRUCTURE_ROLES +auth_dbname = postgres +auth_file = /etc/pgbouncer/userlist.txt +auth_query = SELECT * FROM $PGSCHEMA.user_lookup($1) +auth_type = md5 +logfile = /var/log/pgbouncer/pgbouncer.log +pidfile = /var/run/pgbouncer/pgbouncer.pid + +server_tls_sslmode = require +server_tls_ca_file = /etc/ssl/certs/pgbouncer.crt +server_tls_protocols = secure +client_tls_sslmode = require +client_tls_key_file = /etc/ssl/certs/pgbouncer.key +client_tls_cert_file = /etc/ssl/certs/pgbouncer.crt + +log_connections = 0 +log_disconnections = 0 + +# Number of prepared statements to cache on a server connection (zero value +# disables support of prepared statements). +max_prepared_statements = 200 + +# How many server connections to allow per user/database pair. +default_pool_size = $CONNECTION_POOLER_DEFAULT_SIZE + +# Add more server connections to pool if below this number. Improves behavior +# when usual load comes suddenly back after period of total inactivity. +# +# NOTE: This value is per pool, i.e. a pair of (db, user), not a global one. +# Which means on the higher level it has to be calculated from the max allowed +# database connections and number of databases and users. If not taken into +# account, then for too many users or databases PgBouncer will go crazy +# opening/evicting connections. For now disable it. +# +# min_pool_size = $CONNECTION_POOLER_MIN_SIZE + +# How many additional connections to allow to a pool +reserve_pool_size = $CONNECTION_POOLER_RESERVE_SIZE + +# Maximum number of client connections allowed. +max_client_conn = $CONNECTION_POOLER_MAX_CLIENT_CONN + +# Do not allow more than this many connections per database (regardless of +# pool, i.e. user) +max_db_connections = $CONNECTION_POOLER_MAX_DB_CONN + +# If a client has been in "idle in transaction" state longer, it will be +# disconnected. [seconds] +idle_transaction_timeout = 600 + +# If login failed, because of failure from connect() or authentication that +# pooler waits this much before retrying to connect. Default is 15. [seconds] +server_login_retry = 5 + +# To ignore extra parameter in startup packet. By default only 'database' and +# 'user' are allowed, all others raise error. This is needed to tolerate +# overenthusiastic JDBC wanting to unconditionally set 'extra_float_digits=2' +# in startup packet. +ignore_startup_parameters = extra_float_digits,options diff --git a/ui/operator_ui/spiloutils.py b/ui/operator_ui/spiloutils.py index 6a2f03bb2..8d2b73967 100644 --- a/ui/operator_ui/spiloutils.py +++ b/ui/operator_ui/spiloutils.py @@ -321,11 +321,18 @@ def read_basebackups( suffix = '' if uid == 'base' else '/' + uid backups = [] + # Reuse a single S3 client configured with AWS_ENDPOINT so MinIO / + # other S3-compatible backends are hit for list+get calls too. The + # previous plain client('s3') fell back to the default AWS endpoint + # and returned empty data against a custom endpoint; read_stored_clusters + # and read_versions already pass endpoint_url=AWS_ENDPOINT (#3078). + s3_client = client('s3', endpoint_url=AWS_ENDPOINT) + for vp in postgresql_versions: backup_prefix = f'{prefix}{pg_cluster}{suffix}/wal/{vp}/basebackups_005/' logger.info(f"{bucket}/{backup_prefix}") - paginator = client('s3').get_paginator('list_objects_v2') + paginator = s3_client.get_paginator('list_objects_v2') pages = paginator.paginate(Bucket=bucket, Prefix=backup_prefix) for page in pages: @@ -334,7 +341,7 @@ def read_basebackups( if not key.endswith("backup_stop_sentinel.json"): continue - response = client('s3').get_object(Bucket=bucket, Key=key) + response = s3_client.get_object(Bucket=bucket, Key=key) backup_info = loads(response["Body"].read().decode("utf-8")) last_modified = response["LastModified"].astimezone(timezone.utc).isoformat() diff --git a/ui/requirements.txt b/ui/requirements.txt index 2e43ccb0e..ace18641d 100644 --- a/ui/requirements.txt +++ b/ui/requirements.txt @@ -11,4 +11,4 @@ kubernetes==11.0.0 python-json-logger==2.0.7 requests==2.32.4 stups-tokens>=1.1.19 -werkzeug==3.1.5 +werkzeug==3.1.6