From 168b679506db5d9e3ce2ce748d593beefe3a6f19 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Thu, 7 Jan 2021 10:38:07 +0100 Subject: [PATCH 1/5] add a prefix for the name of a logical backup job (#1287) * add a prefix for the name of a logical backup job Co-authored-by: Sergey Dudoladov --- charts/postgres-operator/values.yaml | 8 +++-- docs/reference/operator_parameters.md | 33 ++++++++++--------- manifests/operatorconfiguration.crd.yaml | 2 ++ ...gresql-operator-default-configuration.yaml | 1 + .../v1/operator_configuration_type.go | 1 + pkg/cluster/k8sres.go | 2 +- pkg/controller/operator_config.go | 1 + pkg/util/config/config.go | 1 + 8 files changed, 30 insertions(+), 19 deletions(-) diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 39fdb9929..5683013e5 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -242,17 +242,18 @@ configLogicalBackup: logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v.1.6.0" # path of google cloud service account json file # logical_backup_google_application_credentials: "" - + # prefix for the backup job name + logical_backup_job_prefix: "logical-backup-" # storage provider - either "s3" or "gcs" logical_backup_provider: "s3" # S3 Access Key ID logical_backup_s3_access_key_id: "" # S3 bucket to store backup results logical_backup_s3_bucket: "my-bucket-url" - # S3 region of bucket - logical_backup_s3_region: "" # S3 endpoint url when not using AWS logical_backup_s3_endpoint: "" + # S3 region of bucket + logical_backup_s3_region: "" # S3 Secret Access Key logical_backup_s3_secret_access_key: "" # S3 server side encryption @@ -260,6 +261,7 @@ configLogicalBackup: # backup schedule in the cron format logical_backup_schedule: "30 00 * * *" + # automate creation of human users with teams API service configTeamsApi: # team_admin_role will have the rights to grant roles coming from PG manifests diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 01e5c4039..76a3cefd4 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -551,11 +551,6 @@ These parameters configure a K8s cron job managed by the operator to produce Postgres logical backups. In the CRD-based configuration those parameters are grouped under the `logical_backup` key. -* **logical_backup_schedule** - Backup schedule in the cron format. Please take the - [reference schedule format](https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#schedule) - into account. Default: "30 00 \* \* \*" - * **logical_backup_docker_image** An image for pods of the logical backup job. The [example image](../../docker/logical-backup/Dockerfile) runs `pg_dumpall` on a replica if possible and uploads compressed results to @@ -563,32 +558,40 @@ grouped under the `logical_backup` key. The default image is the same image built with the Zalando-internal CI pipeline. Default: "registry.opensource.zalan.do/acid/logical-backup" +* **logical_backup_google_application_credentials** + Specifies the path of the google cloud service account json file. Default is empty. + +* **logical_backup_job_prefix** + The prefix to be prepended to the name of a k8s CronJob running the backups. Beware the prefix counts towards the name length restrictions imposed by k8s. Empty string is a legitimate value. Operator does not do the actual renaming: It simply creates the job with the new prefix. You will have to delete the old cron job manually. Default: "logical-backup-". + * **logical_backup_provider** Specifies the storage provider to which the backup should be uploaded (`s3` or `gcs`). Default: "s3" +* **logical_backup_s3_access_key_id** + When set, value will be in AWS_ACCESS_KEY_ID env variable. The Default is empty. + * **logical_backup_s3_bucket** S3 bucket to store backup results. The bucket has to be present and accessible by Postgres pods. Default: empty. +* **logical_backup_s3_endpoint** + When using non-AWS S3 storage, endpoint can be set as a ENV variable. The default is empty. + * **logical_backup_s3_region** Specifies the region of the bucket which is required with some non-AWS S3 storage services. The default is empty. -* **logical_backup_s3_endpoint** - When using non-AWS S3 storage, endpoint can be set as a ENV variable. The default is empty. +* **logical_backup_s3_secret_access_key** + When set, value will be in AWS_SECRET_ACCESS_KEY env variable. The Default is empty. * **logical_backup_s3_sse** Specify server side encryption that S3 storage is using. If empty string is specified, no argument will be passed to `aws s3` command. Default: "AES256". -* **logical_backup_s3_access_key_id** - When set, value will be in AWS_ACCESS_KEY_ID env variable. The Default is empty. - -* **logical_backup_s3_secret_access_key** - When set, value will be in AWS_SECRET_ACCESS_KEY env variable. The Default is empty. - -* **logical_backup_google_application_credentials** - Specifies the path of the google cloud service account json file. Default is empty. +* **logical_backup_schedule** + Backup schedule in the cron format. Please take the + [reference schedule format](https://kubernetes.io/docs/tasks/job/automated-tasks-with-cron-jobs/#schedule) + into account. Default: "30 00 \* \* \*" ## Debugging the operator diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 50405a8cc..d52608c15 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -317,6 +317,8 @@ spec: type: string logical_backup_google_application_credentials: type: string + logical_backup_job_prefix: + type: string logical_backup_provider: type: string logical_backup_s3_access_key_id: diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 69e53daeb..f011feca7 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -117,6 +117,7 @@ configuration: logical_backup: logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v.1.6.0" # logical_backup_google_application_credentials: "" + logical_backup_job_prefix: "logical-backup-" logical_backup_provider: "s3" # logical_backup_s3_access_key_id: "" logical_backup_s3_bucket: "my-bucket-url" diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 9e5d01040..b55bfa492 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -196,6 +196,7 @@ type OperatorLogicalBackupConfiguration struct { S3SecretAccessKey string `json:"logical_backup_s3_secret_access_key,omitempty"` S3SSE string `json:"logical_backup_s3_sse,omitempty"` GoogleApplicationCredentials string `json:"logical_backup_google_application_credentials,omitempty"` + JobPrefix string `json:"logical_backup_job_prefix,omitempty"` } // OperatorConfigurationData defines the operation config diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 6b47b37f6..6b1af045f 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -2079,7 +2079,7 @@ func (c *Cluster) generateLogicalBackupPodEnvVars() []v1.EnvVar { // getLogicalBackupJobName returns the name; the job itself may not exists func (c *Cluster) getLogicalBackupJobName() (jobName string) { - return "logical-backup-" + c.clusterName().Name + return c.OpConfig.LogicalBackupJobPrefix + c.clusterName().Name } // Return an array of ownerReferences to make an arbitraty object dependent on diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index a55dab2d8..faf1a4908 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -154,6 +154,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.LogicalBackupS3SecretAccessKey = fromCRD.LogicalBackup.S3SecretAccessKey result.LogicalBackupS3SSE = fromCRD.LogicalBackup.S3SSE result.LogicalBackupGoogleApplicationCredentials = fromCRD.LogicalBackup.GoogleApplicationCredentials + result.LogicalBackupJobPrefix = fromCRD.LogicalBackup.JobPrefix // debug config result.DebugLogging = fromCRD.OperatorDebug.DebugLogging diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 62cfdec57..142aa2be9 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -121,6 +121,7 @@ type LogicalBackup struct { LogicalBackupS3SecretAccessKey string `name:"logical_backup_s3_secret_access_key" default:""` LogicalBackupS3SSE string `name:"logical_backup_s3_sse" default:""` LogicalBackupGoogleApplicationCredentials string `name:"logical_backup_google_application_credentials" default:""` + LogicalBackupJobPrefix string `name:"logical_backup_job_prefix" default:"logical-backup-"` } // Operator options for connection pooler From 260714b657c6deab43a64ee78c8362d67caf69a2 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 7 Jan 2021 15:16:09 +0100 Subject: [PATCH 2/5] fix typo in pooler env variable (#1294) --- ui/operator_ui/spiloutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/operator_ui/spiloutils.py b/ui/operator_ui/spiloutils.py index 6b1d394a1..34ca42718 100644 --- a/ui/operator_ui/spiloutils.py +++ b/ui/operator_ui/spiloutils.py @@ -21,7 +21,7 @@ AWS_ENDPOINT = getenv('AWS_ENDPOINT') OPERATOR_CLUSTER_NAME_LABEL = getenv('OPERATOR_CLUSTER_NAME_LABEL', 'cluster-name') COMMON_CLUSTER_LABEL = getenv('COMMON_CLUSTER_LABEL', '{"application":"spilo"}') -COMMON_POOLER_LABEL = getenv('COMMONG_POOLER_LABEL', '{"application":"db-connection-pooler"}') +COMMON_POOLER_LABEL = getenv('COMMON_POOLER_LABEL', '{"application":"db-connection-pooler"}') logger.info("Common Cluster Label: {}".format(COMMON_CLUSTER_LABEL)) logger.info("Common Pooler Label: {}".format(COMMON_POOLER_LABEL)) From 646c28e9c931d2dbeba73b2654289a99e2ab3579 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 8 Jan 2021 12:00:23 +0100 Subject: [PATCH 3/5] [UI] on read_pods filter only spilo pods (#1297) --- ui/operator_ui/spiloutils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ui/operator_ui/spiloutils.py b/ui/operator_ui/spiloutils.py index 34ca42718..26113bd54 100644 --- a/ui/operator_ui/spiloutils.py +++ b/ui/operator_ui/spiloutils.py @@ -107,6 +107,12 @@ def encode_labels(label_selector): ]) +def cluster_labels(spilo_cluster): + labels = COMMON_CLUSTER_LABEL + labels[OPERATOR_CLUSTER_NAME_LABEL] = spilo_cluster + return labels + + def kubernetes_url( resource_type, namespace='default', @@ -151,7 +157,7 @@ def read_pods(cluster, namespace, spilo_cluster): cluster=cluster, resource_type='pods', namespace=namespace, - label_selector={OPERATOR_CLUSTER_NAME_LABEL: spilo_cluster}, + label_selector=cluster_labels(spilo_cluster), ) From 9d94e018ffc903ba50959ad140a2345bf3c30bd5 Mon Sep 17 00:00:00 2001 From: Pavel Tumik Date: Fri, 8 Jan 2021 03:30:28 -0800 Subject: [PATCH 4/5] fix incorrect tag for logical backup docker image (#1295) --- charts/postgres-operator/values.yaml | 2 +- manifests/postgresql-operator-default-configuration.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 5683013e5..ebfd49252 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -239,7 +239,7 @@ configAwsOrGcp: # configure K8s cron job managed by the operator configLogicalBackup: # image for pods of the logical backup job (example runs pg_dumpall) - logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v.1.6.0" + logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" # path of google cloud service account json file # logical_backup_google_application_credentials: "" # prefix for the backup job name diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index f011feca7..96394976d 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -115,7 +115,7 @@ configuration: # wal_gs_bucket: "" # wal_s3_bucket: "" logical_backup: - logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v.1.6.0" + logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" # logical_backup_google_application_credentials: "" logical_backup_job_prefix: "logical-backup-" logical_backup_provider: "s3" From b7f4cde541ba959d1519adf544bad39672a303bf Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Fri, 8 Jan 2021 15:08:44 +0100 Subject: [PATCH 5/5] wrap getting Patroni state into retry (#1293) Retry calls to Patorni API to get cluster state Co-authored-by: Sergey Dudoladov --- pkg/cluster/pod.go | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index a13eb479c..cf43de9a7 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "math/rand" + "time" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -11,6 +12,7 @@ import ( "github.com/zalando/postgres-operator/pkg/spec" "github.com/zalando/postgres-operator/pkg/util" + "github.com/zalando/postgres-operator/pkg/util/retryutil" ) func (c *Cluster) listPods() ([]v1.Pod, error) { @@ -309,7 +311,23 @@ func (c *Cluster) isSafeToRecreatePods(pods *v1.PodList) bool { } for _, pod := range pods.Items { - state, err := c.patroni.GetPatroniMemberState(&pod) + + var state string + + err := retryutil.Retry(1*time.Second, 5*time.Second, + func() (bool, error) { + + var err error + + state, err = c.patroni.GetPatroniMemberState(&pod) + + if err != nil { + return false, err + } + return true, nil + }, + ) + if err != nil { c.logger.Errorf("failed to get Patroni state for pod: %s", err) return false