From e3b39a5cbeafcadae5c9535f573a277e5ff779b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Inge=20Bols=C3=B8?= Date: Sat, 5 Oct 2019 10:10:02 +0200 Subject: [PATCH 1/7] document configmap variant of inherited_labels (#678) * document configmap varient of inherited_labels and remove application label from cluster example since we will get application:spilo by default --- charts/postgres-operator/values-crd.yaml | 2 +- charts/postgres-operator/values.yaml | 2 +- docs/administrator.md | 12 ++++++++++++ manifests/complete-postgres-manifest.yaml | 2 ++ manifests/configmap.yaml | 2 +- .../postgresql-operator-default-configuration.yaml | 2 +- 6 files changed, 18 insertions(+), 4 deletions(-) diff --git a/charts/postgres-operator/values-crd.yaml b/charts/postgres-operator/values-crd.yaml index 5a8fe951a..2728b245c 100644 --- a/charts/postgres-operator/values-crd.yaml +++ b/charts/postgres-operator/values-crd.yaml @@ -66,7 +66,7 @@ configKubernetes: # list of labels that can be inherited from the cluster manifest # inherited_labels: # - application - # - app + # - environment # timeout for successful migration of master pods from unschedulable node # master_pod_move_timeout: 20m diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index a3d4edf95..a14c8ab92 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -62,7 +62,7 @@ configKubernetes: # infrastructure_roles_secret_name: postgresql-infrastructure-roles # list of labels that can be inherited from the cluster manifest - # inherited_labels: "" + # inherited_labels: application,environment # timeout for successful migration of master pods from unschedulable node # master_pod_move_timeout: 20m diff --git a/docs/administrator.md b/docs/administrator.md index 5eaf3ff71..e7ac1bd7b 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -200,6 +200,18 @@ Postgres cluster, in order to identify its child objects. The typical use case is to add labels that identifies the `Pods` created by the operator, in order to implement fine-controlled `NetworkPolicies`. +**postgres-operator ConfigMap** + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-operator +data: + inherited_labels: application,environment + ... +``` + **OperatorConfiguration** ```yaml diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 2c600fc3e..d1bd471db 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -2,6 +2,8 @@ apiVersion: "acid.zalan.do/v1" kind: postgresql metadata: name: acid-test-cluster +# labels: +# environment: demo spec: dockerImage: registry.opensource.zalan.do/acid/spilo-11:1.6-p1 initContainers: diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 9ee80016c..f9223f3fe 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -31,7 +31,7 @@ data: enable_teams_api: "false" # etcd_host: "" # infrastructure_roles_secret_name: postgresql-infrastructure-roles - # inherited_labels: "" + # inherited_labels: application,environment # kube_iam_role: "" # log_s3_bucket: "" # logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 5ad156e3c..ad4d028c3 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -27,7 +27,7 @@ configuration: # infrastructure_roles_secret_name: "" # inherited_labels: # - application - # - app + # - environment # node_readiness_label: "" oauth_token_secret_name: postgresql-operator pdb_name_format: "postgres-{cluster}-pdb" From 647a4d3023d4dd13e9d88e2bf7242237623b58bf Mon Sep 17 00:00:00 2001 From: Dmitry Dolgov <9erthalion6@gmail.com> Date: Fri, 11 Oct 2019 11:06:14 +0200 Subject: [PATCH 2/7] Remove service accounts cache (#685) For optimization purposes operator was creating a cache map to remember if service accounts and role binding was deployed to a namespace. This could lead to a problem, when a namespace was deleted, since this cache was not synchronized. For the sake of correctness remove the cache, and check every time if required service account and rbac is present. In the normal case this introduces an overhead of two API calls per an event (one to get a service accounts, one to get a role binding), which should not be a problem, unless proven otherwise. --- pkg/controller/controller.go | 1 - pkg/controller/postgresql.go | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index a492a85e2..9162ce27d 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -58,7 +58,6 @@ type Controller struct { PodServiceAccount *v1.ServiceAccount PodServiceAccountRoleBinding *rbacv1beta1.RoleBinding - namespacesWithDefinedRBAC sync.Map } // NewController creates a new controller diff --git a/pkg/controller/postgresql.go b/pkg/controller/postgresql.go index 5b76af12f..8e8f9ae85 100644 --- a/pkg/controller/postgresql.go +++ b/pkg/controller/postgresql.go @@ -493,17 +493,16 @@ func (c *Controller) postgresqlDelete(obj interface{}) { } /* - Ensures the pod service account and role bindings exists in a namespace before a PG cluster is created there so that a user does not have to deploy these credentials manually. - StatefulSets require the service account to create pods; Patroni requires relevant RBAC bindings to access endpoints. + Ensures the pod service account and role bindings exists in a namespace + before a PG cluster is created there so that a user does not have to deploy + these credentials manually. StatefulSets require the service account to + create pods; Patroni requires relevant RBAC bindings to access endpoints. The operator does not sync accounts/role bindings after creation. */ func (c *Controller) submitRBACCredentials(event ClusterEvent) error { namespace := event.NewSpec.GetNamespace() - if _, ok := c.namespacesWithDefinedRBAC.Load(namespace); ok { - return nil - } if err := c.createPodServiceAccount(namespace); err != nil { return fmt.Errorf("could not create pod service account %v : %v", c.opConfig.PodServiceAccountName, err) @@ -512,7 +511,6 @@ func (c *Controller) submitRBACCredentials(event ClusterEvent) error { if err := c.createRoleBindings(namespace); err != nil { return fmt.Errorf("could not create role binding %v : %v", c.PodServiceAccountRoleBinding.Name, err) } - c.namespacesWithDefinedRBAC.Store(namespace, true) return nil } From bb855fd9bc4a18f8b725141ea01184cf5e80c745 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 11 Oct 2019 16:07:13 +0200 Subject: [PATCH 3/7] fetch cluster_name_label from configuration (#684) --- docker/logical-backup/dump.sh | 43 ++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/docker/logical-backup/dump.sh b/docker/logical-backup/dump.sh index dcbd7d334..c3cf5ba67 100755 --- a/docker/logical-backup/dump.sh +++ b/docker/logical-backup/dump.sh @@ -14,8 +14,9 @@ PG_BIN=$PG_DIR/$PG_VERSION/bin DUMP_SIZE_COEFF=5 TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) -K8S_API_URL=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT/api/v1 +K8S_API_URL=https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT CERT=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt +CLUSTER_NAME_LABEL=cluster-name function estimate_size { "$PG_BIN"/psql -tqAc "${ALL_DB_SIZE_QUERY}" @@ -48,33 +49,63 @@ function aws_upload { function get_pods { declare -r SELECTOR="$1" - curl "${K8S_API_URL}/namespaces/${POD_NAMESPACE}/pods?$SELECTOR" \ + curl "${K8S_API_URL}/api/v1/namespaces/${POD_NAMESPACE}/pods?$SELECTOR" \ --cacert $CERT \ -H "Authorization: Bearer ${TOKEN}" | jq .items[].status.podIP -r } function get_current_pod { - curl "${K8S_API_URL}/namespaces/${POD_NAMESPACE}/pods?fieldSelector=metadata.name%3D${HOSTNAME}" \ + curl "${K8S_API_URL}/api/v1/namespaces/${POD_NAMESPACE}/pods?fieldSelector=metadata.name%3D${HOSTNAME}" \ --cacert $CERT \ -H "Authorization: Bearer ${TOKEN}" } declare -a search_strategy=( + get_cluster_name_label list_all_replica_pods_current_node list_all_replica_pods_any_node get_master_pod ) +function get_config_resource() { + curl "${K8S_API_URL}/apis/apps/v1/namespaces/default/deployments/postgres-operator" \ + --cacert $CERT \ + -H "Authorization: Bearer ${TOKEN}" | jq '.spec.template.spec.containers[0].env[] | select(.name == "$1") | .value' +} + +function get_cluster_name_label { + local config + local clustername + + config=$(get_config_resource "CONFIG_MAP_NAME") + if [ -n "$config" ]; then + clustername=$(curl "${K8S_API_URL}/api/v1/namespaces/default/configmaps/${config}" \ + --cacert $CERT \ + -H "Authorization: Bearer ${TOKEN}" | jq '.data.cluster_name_label') + else + config=$(get_config_resource "POSTGRES_OPERATOR_CONFIGURATION_OBJECT") + if [ -n "$config" ]; then + clustername=$(curl "${K8S_API_URL}/apis/acid.zalan.do/v1/namespaces/default/operatorconfigurations/${config}" \ + --cacert $CERT \ + -H "Authorization: Bearer ${TOKEN}" | jq '.configuration.kubernetes.cluster_name_label') + fi + fi + + if [ -n "$clustername" ]; then + CLUSTER_NAME_LABEL=${clustername} + fi; +} + function list_all_replica_pods_current_node { - get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dreplica&fieldSelector=spec.nodeName%3D${CURRENT_NODENAME}" | head -n 1 + get_pods "labelSelector=${CLUSTER_NAME_LABEL}%3D${SCOPE},spilo-role%3Dreplica&fieldSelector=spec.nodeName%3D${CURRENT_NODENAME}" | head -n 1 } function list_all_replica_pods_any_node { - get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dreplica" | head -n 1 + get_pods "labelSelector=${CLUSTER_NAME_LABEL}%3D${SCOPE},spilo-role%3Dreplica" | head -n 1 } function get_master_pod { - get_pods "labelSelector=version%3D${SCOPE},spilo-role%3Dmaster" | head -n 1 + get_pods "labelSelector=${CLUSTER_NAME_LABEL}%3D${SCOPE},spilo-role%3Dmaster" | head -n 1 } CURRENT_NODENAME=$(get_current_pod | jq .items[].spec.nodeName --raw-output) From 8d103ee9f95aeba47c42b9d90f3d64d0134f6754 Mon Sep 17 00:00:00 2001 From: anikin-aa Date: Tue, 15 Oct 2019 15:08:06 +0300 Subject: [PATCH 4/7] Update quickstart.md (#687) Fix values-crd.yaml incorrect link --- docs/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index b8b97c94e..968adfde2 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -80,7 +80,7 @@ is prepended to the operator resource's names. Use `--name zalando` to match with the default service account name as older operator versions do not support custom names for service accounts. To use -CRD-based configuration you need to specify the [values-crd yaml file](../charts/values-crd.yaml). +CRD-based configuration you need to specify the [values-crd yaml file](../charts/postgres-operator/values-crd.yaml). ```bash # 1) initialize helm From 6e682fd6b5707efa90e8c40684914436e457de78 Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 18 Oct 2019 10:41:56 -0400 Subject: [PATCH 5/7] Fixing spelling mistake in delete PVC function name (#691) --- pkg/cluster/resources.go | 2 +- pkg/cluster/volumes.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index 5d181ba1f..23ac3f348 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -329,7 +329,7 @@ func (c *Cluster) deleteStatefulSet() error { return fmt.Errorf("could not delete pods: %v", err) } - if err := c.deletePersistenVolumeClaims(); err != nil { + if err := c.deletePersistentVolumeClaims(); err != nil { return fmt.Errorf("could not delete PersistentVolumeClaims: %v", err) } diff --git a/pkg/cluster/volumes.go b/pkg/cluster/volumes.go index 57f8ca7e3..d92ae6258 100644 --- a/pkg/cluster/volumes.go +++ b/pkg/cluster/volumes.go @@ -30,7 +30,7 @@ func (c *Cluster) listPersistentVolumeClaims() ([]v1.PersistentVolumeClaim, erro return pvcs.Items, nil } -func (c *Cluster) deletePersistenVolumeClaims() error { +func (c *Cluster) deletePersistentVolumeClaims() error { c.logger.Debugln("deleting PVCs") pvcs, err := c.listPersistentVolumeClaims() if err != nil { From b738283f6fb31087141b0ae5c958e455d83c1116 Mon Sep 17 00:00:00 2001 From: Emre Hasegeli Date: Wed, 23 Oct 2019 10:22:23 +0100 Subject: [PATCH 6/7] charts: Add pods/exec permission (#694) --- charts/postgres-operator/templates/clusterrole.yaml | 6 ++++++ manifests/operator-service-account-rbac.yaml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/charts/postgres-operator/templates/clusterrole.yaml b/charts/postgres-operator/templates/clusterrole.yaml index a00ea3ab7..f8550a539 100644 --- a/charts/postgres-operator/templates/clusterrole.yaml +++ b/charts/postgres-operator/templates/clusterrole.yaml @@ -87,6 +87,12 @@ rules: - list - watch - patch +- apiGroups: + - "" + resources: + - pods/exec + verbs: + - create - apiGroups: - "" resources: diff --git a/manifests/operator-service-account-rbac.yaml b/manifests/operator-service-account-rbac.yaml index bca1128f4..e95fe320b 100644 --- a/manifests/operator-service-account-rbac.yaml +++ b/manifests/operator-service-account-rbac.yaml @@ -88,6 +88,12 @@ rules: - list - watch - patch +- apiGroups: + - "" + resources: + - pods/exec + verbs: + - create - apiGroups: - "" resources: From aab4d511428e25e7364909cd96d612b72670fd18 Mon Sep 17 00:00:00 2001 From: Yujun Zhang Date: Mon, 28 Oct 2019 23:33:16 +0800 Subject: [PATCH 7/7] Fix bad formatting in docs (#637) List requires a proceeding empty line --- docs/administrator.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/administrator.md b/docs/administrator.md index e7ac1bd7b..ab5368e7d 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -184,6 +184,7 @@ parameter of the PDB is set to `1` which prevents killing masters in single-node clusters and/or the last remaining running instance in a multi-node cluster. The PDB is only relaxed in two scenarios: + * If a cluster is scaled down to `0` instances (e.g. for draining nodes) * If the PDB is disabled in the configuration (`enable_pod_disruption_budget`) @@ -387,7 +388,7 @@ manifest. Notes: backup via `pg_dumpall` and upload of compressed and encrypted results to an S3 bucket; the default image ``registry.opensource.zalan.do/acid/logical-backup`` is the same image built with the Zalando-internal CI pipeline. `pg_dumpall` -requires a `superuser` access to a DB and runs on the replica when possible. +requires a `superuser` access to a DB and runs on the replica when possible. 2. Due to the [limitation of K8s cron jobs](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) it is highly advisable to set up additional monitoring for this feature; such