From 5e1d86e31e9bf04d7069c075b10068858c292fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20G=C3=B3mez?= Date: Mon, 16 Apr 2018 18:23:41 +0200 Subject: [PATCH 01/21] Fix clone timestamp key in example manifest (#276) It was set to `endTimestamp`, but it should be `timestamp`. --- manifests/complete-postgres-manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 68bda81cf..2f929bc2c 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -49,7 +49,7 @@ spec: # with an empty/absent timestamp, clone from an existing alive cluster using pg_basebackup # clone: # cluster: "acid-batman" - # endTimestamp: "2017-12-19T12:40:33+01:00" # timezone required (offset relative to UTC, see RFC 3339 section 5.6) + # timestamp: "2017-12-19T12:40:33+01:00" # timezone required (offset relative to UTC, see RFC 3339 section 5.6) maintenanceWindows: - 01:00-06:00 #UTC - Sat:00:00-04:00 From 214ae04aa7692d51fbed1a2e10f56bb5b03d4c4d Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Wed, 18 Apr 2018 16:20:20 +0200 Subject: [PATCH 02/21] Deploy service account for pod creation on demand --- pkg/cluster/cluster.go | 5 +++++ pkg/cluster/sync.go | 34 ++++++++++++++++++++++++++++++++++ pkg/controller/controller.go | 20 ++++++++++++++++++++ pkg/util/config/config.go | 35 ++++++++++++++++++++--------------- 4 files changed, 79 insertions(+), 15 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index bfb8e35d7..0bcaf09f0 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -256,6 +256,11 @@ func (c *Cluster) Create() error { } c.logger.Infof("pod disruption budget %q has been successfully created", util.NameFromMeta(pdb.ObjectMeta)) + if err = c.syncPodServiceAccounts(); err != nil { + return fmt.Errorf("could not sync pod service accounts: %v", err) + } + c.logger.Infof("pod service accounts have been successfully synced") + if c.Statefulset != nil { return fmt.Errorf("statefulset already exists in the cluster") } diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 5a77e658b..133ea50fc 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -44,6 +44,12 @@ func (c *Cluster) Sync(newSpec *spec.Postgresql) (err error) { return } + c.logger.Debugf("syncing service accounts") + if err = c.syncPodServiceAccounts(); err != nil { + err = fmt.Errorf("could not sync service accounts: %v", err) + return + } + c.logger.Debugf("syncing services") if err = c.syncServices(); err != nil { err = fmt.Errorf("could not sync services: %v", err) @@ -103,6 +109,34 @@ func (c *Cluster) syncServices() error { return nil } +/* + Ensures the service account required by StatefulSets to create pods exists in all namespaces watched by the operator. +*/ +func (c *Cluster) syncPodServiceAccounts() error { + + podServiceAccount := c.Config.OpConfig.PodServiceAccountName + c.setProcessName("syncing pod service account in the watched namespaces") + + _, err := c.KubeClient.ServiceAccounts(c.Namespace).Get(podServiceAccount, metav1.GetOptions{}) + + if err != nil { + c.logger.Warnf("the pod service account %q is absent from the namespace %q. Stateful sets in the namespace are unable to create pods.", podServiceAccount, c.Namespace) + + c.OpConfig.PodServiceAccount.SetNamespace(c.Namespace) + + _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(c.OpConfig.PodServiceAccount) + if err != nil { + c.logger.Warnf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccount, c.Namespace, err) + } else { + c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccount, c.Namespace) + } + } else { + c.logger.Infof("successfully found the service account %q used to create pods to the namespace %q", podServiceAccount, c.Namespace) + } + + return err +} + func (c *Cluster) syncService(role PostgresRole) error { c.setProcessName("syncing %s service", role) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index d19da5b84..4c47c72b0 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -8,6 +8,7 @@ import ( "github.com/Sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/pkg/api/v1" "k8s.io/client-go/tools/cache" @@ -113,11 +114,30 @@ func (c *Controller) initOperatorConfig() { if scalyrAPIKey != "" { c.opConfig.ScalyrAPIKey = scalyrAPIKey } + +} + +func (c *Controller) initPodServiceAccount() { + // re-uses k8s internal parsing. See k8s client-go issue #193 for explanation + decode := scheme.Codecs.UniversalDeserializer().Decode + obj, groupVersionKind, err := decode([]byte(c.opConfig.PodServiceAccountDefinition), nil, nil) + + switch { + case err != nil: + panic(fmt.Errorf("Unable to parse pod service account definiton from the operator config map: %v", err)) + case groupVersionKind.Kind != "ServiceAccount": + panic(fmt.Errorf("pod service account definiton in the operator config map defines another type of resource: %v", groupVersionKind.Kind)) + default: + c.opConfig.PodServiceAccount = obj.(*v1.ServiceAccount) + } + + // actual service accounts are deployed lazily at the time of cluster creation or sync } func (c *Controller) initController() { c.initClients() c.initOperatorConfig() + c.initPodServiceAccount() c.initSharedInformers() diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 0e653ce0b..15a4738b4 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -8,6 +8,7 @@ import ( "fmt" "github.com/zalando-incubator/postgres-operator/pkg/spec" + "k8s.io/client-go/pkg/api/v1" ) // CRD describes CustomResourceDefinition specific configuration parameters @@ -67,21 +68,25 @@ type Config struct { Resources Auth Scalyr - WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' - EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` - DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` - ServiceAccountName string `name:"service_account_name" default:"operator"` - DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"` - EtcdScope string `name:"etcd_scope" default:"service"` - WALES3Bucket string `name:"wal_s3_bucket"` - KubeIAMRole string `name:"kube_iam_role"` - DebugLogging bool `name:"debug_logging" default:"true"` - EnableDBAccess bool `name:"enable_database_access" default:"true"` - EnableTeamsAPI bool `name:"enable_teams_api" default:"true"` - EnableTeamSuperuser bool `name:"enable_team_superuser" default:"false"` - TeamAdminRole string `name:"team_admin_role" default:"admin"` - EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"` - EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"` + PodServiceAccount *v1.ServiceAccount + WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' + EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` + DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` + // re-use one account for both Spilo pods and the operator; this grants extra privileges to pods + ServiceAccountName string `name:"service_account_name" default:"operator"` + PodServiceAccountName string `name:"pod_service_account_name" default:"operator"` + PodServiceAccountDefinition string `name:"pod_service_account_definition" default:"apiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: operator\n"` + DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"` + EtcdScope string `name:"etcd_scope" default:"service"` + WALES3Bucket string `name:"wal_s3_bucket"` + KubeIAMRole string `name:"kube_iam_role"` + DebugLogging bool `name:"debug_logging" default:"true"` + EnableDBAccess bool `name:"enable_database_access" default:"true"` + EnableTeamsAPI bool `name:"enable_teams_api" default:"true"` + EnableTeamSuperuser bool `name:"enable_team_superuser" default:"false"` + TeamAdminRole string `name:"team_admin_role" default:"admin"` + EnableMasterLoadBalancer bool `name:"enable_master_load_balancer" default:"true"` + EnableReplicaLoadBalancer bool `name:"enable_replica_load_balancer" default:"false"` // deprecated and kept for backward compatibility EnableLoadBalancer *bool `name:"enable_load_balancer" default:"true"` MasterDNSNameFormat stringTemplate `name:"master_dns_name_format" default:"{cluster}.{team}.{hostedzone}"` From 23f893647c18664a5fdc1220cf5e2795d275a339 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Thu, 19 Apr 2018 15:48:58 +0200 Subject: [PATCH 03/21] Remove sync of pod service accounts --- pkg/cluster/cluster.go | 32 +++++++++++++++++++++++++++++++- pkg/cluster/sync.go | 34 ---------------------------------- pkg/controller/controller.go | 2 +- 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 0bcaf09f0..389af8e8c 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -194,6 +194,36 @@ func (c *Cluster) initUsers() error { return nil } +/* + Ensures the service account required by StatefulSets to create pods exists in a namespace before a PG cluster is created there so that a user does not have to deploy the account manually. + + The operator does not sync these accounts. +*/ +func (c *Cluster) createPodServiceAccounts() error { + + podServiceAccount := c.Config.OpConfig.PodServiceAccountName + c.setProcessName("creating pod service account in the watched namespaces") + + _, err := c.KubeClient.ServiceAccounts(c.Namespace).Get(podServiceAccount, metav1.GetOptions{}) + + if err != nil { + c.logger.Warnf("the pod service account %q is absent from the namespace %q. Stateful sets in the namespace are unable to create pods.", podServiceAccount, c.Namespace) + + c.OpConfig.PodServiceAccount.SetNamespace(c.Namespace) + + _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(c.OpConfig.PodServiceAccount) + if err != nil { + c.logger.Warnf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccount, c.Namespace, err) + } else { + c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccount, c.Namespace) + } + } else { + c.logger.Infof("successfully found the service account %q used to create pods to the namespace %q", podServiceAccount, c.Namespace) + } + + return err +} + // Create creates the new kubernetes objects associated with the cluster. func (c *Cluster) Create() error { c.mu.Lock() @@ -256,7 +286,7 @@ func (c *Cluster) Create() error { } c.logger.Infof("pod disruption budget %q has been successfully created", util.NameFromMeta(pdb.ObjectMeta)) - if err = c.syncPodServiceAccounts(); err != nil { + if err = c.createPodServiceAccounts(); err != nil { return fmt.Errorf("could not sync pod service accounts: %v", err) } c.logger.Infof("pod service accounts have been successfully synced") diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 133ea50fc..5a77e658b 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -44,12 +44,6 @@ func (c *Cluster) Sync(newSpec *spec.Postgresql) (err error) { return } - c.logger.Debugf("syncing service accounts") - if err = c.syncPodServiceAccounts(); err != nil { - err = fmt.Errorf("could not sync service accounts: %v", err) - return - } - c.logger.Debugf("syncing services") if err = c.syncServices(); err != nil { err = fmt.Errorf("could not sync services: %v", err) @@ -109,34 +103,6 @@ func (c *Cluster) syncServices() error { return nil } -/* - Ensures the service account required by StatefulSets to create pods exists in all namespaces watched by the operator. -*/ -func (c *Cluster) syncPodServiceAccounts() error { - - podServiceAccount := c.Config.OpConfig.PodServiceAccountName - c.setProcessName("syncing pod service account in the watched namespaces") - - _, err := c.KubeClient.ServiceAccounts(c.Namespace).Get(podServiceAccount, metav1.GetOptions{}) - - if err != nil { - c.logger.Warnf("the pod service account %q is absent from the namespace %q. Stateful sets in the namespace are unable to create pods.", podServiceAccount, c.Namespace) - - c.OpConfig.PodServiceAccount.SetNamespace(c.Namespace) - - _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(c.OpConfig.PodServiceAccount) - if err != nil { - c.logger.Warnf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccount, c.Namespace, err) - } else { - c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccount, c.Namespace) - } - } else { - c.logger.Infof("successfully found the service account %q used to create pods to the namespace %q", podServiceAccount, c.Namespace) - } - - return err -} - func (c *Cluster) syncService(role PostgresRole) error { c.setProcessName("syncing %s service", role) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 4c47c72b0..4a12948de 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -131,7 +131,7 @@ func (c *Controller) initPodServiceAccount() { c.opConfig.PodServiceAccount = obj.(*v1.ServiceAccount) } - // actual service accounts are deployed lazily at the time of cluster creation or sync + // actual service accounts are deployed at the time of Postgres/Spilo cluster creation } func (c *Controller) initController() { From 2f3d63a66349edaa7218444cdcec582bfc68f7a4 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Thu, 19 Apr 2018 16:11:34 +0200 Subject: [PATCH 04/21] Document desired behaviour --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9bd663c60..a60fc2a55 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ By default, the operator watches the namespace it is deployed to. You can change Note that for an operator to manage pods in the watched namespace, the operator's service account (as specified in the operator deployment manifest) has to have appropriate privileges to access the watched namespace. The operator may not be able to function in the case it watches all namespaces but lacks access rights to any of them (except Kubernetes system namespaces like `kube-system`). The reason is that for multiple namespaces operations such as 'list pods' execute at the cluster scope and fail at the first violation of access rights. -The watched namespace also needs to have a (possibly different) service account in the case database pods need to talk to the Kubernetes API (e.g. when using Kubernetes-native configuration of Patroni). +The watched namespace also needs to have a (possibly different) service account in the case database pods need to talk to the Kubernetes API (e.g. when using Kubernetes-native configuration of Patroni). The operator checks that the `pod_service_account_name` exists in the target namespace, and, if not, deploys there the `pod_service_account_definition`. In this definition, the operator overwrites the account's name to match `pod_service_account_name` and the namespace to match the target namespace. The operator performs **no** further syncing of this account. ### Create ConfigMap From a5a65e93f4704d2e4479d73f7a8ab4c676c9723d Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Thu, 19 Apr 2018 16:15:52 +0200 Subject: [PATCH 05/21] Name service account consistenly --- pkg/controller/controller.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 4a12948de..b50dc43ad 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -129,6 +129,8 @@ func (c *Controller) initPodServiceAccount() { panic(fmt.Errorf("pod service account definiton in the operator config map defines another type of resource: %v", groupVersionKind.Kind)) default: c.opConfig.PodServiceAccount = obj.(*v1.ServiceAccount) + // ensure consistent naming of the account + c.opConfig.PodServiceAccount.Name = c.opConfig.PodServiceAccountName } // actual service accounts are deployed at the time of Postgres/Spilo cluster creation From bd51d2922b23ee34e1f8266f363a492a69e51b9e Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Fri, 20 Apr 2018 13:05:05 +0200 Subject: [PATCH 06/21] Turn ServiceAccount into struct value to avoid race conditon during account creation --- pkg/cluster/cluster.go | 18 ++++++++++-------- pkg/controller/controller.go | 2 +- pkg/util/config/config.go | 8 ++++---- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 389af8e8c..4a5db1331 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -197,28 +197,30 @@ func (c *Cluster) initUsers() error { /* Ensures the service account required by StatefulSets to create pods exists in a namespace before a PG cluster is created there so that a user does not have to deploy the account manually. - The operator does not sync these accounts. + The operator does not sync these accounts after creation. */ func (c *Cluster) createPodServiceAccounts() error { - podServiceAccount := c.Config.OpConfig.PodServiceAccountName + podServiceAccountName := c.Config.OpConfig.PodServiceAccountName c.setProcessName("creating pod service account in the watched namespaces") - _, err := c.KubeClient.ServiceAccounts(c.Namespace).Get(podServiceAccount, metav1.GetOptions{}) + _, err := c.KubeClient.ServiceAccounts(c.Namespace).Get(podServiceAccountName, metav1.GetOptions{}) if err != nil { - c.logger.Warnf("the pod service account %q is absent from the namespace %q. Stateful sets in the namespace are unable to create pods.", podServiceAccount, c.Namespace) + c.logger.Warnf("the pod service account %q is absent from the namespace %q. Stateful sets in the namespace are unable to create pods.", podServiceAccountName, c.Namespace) + // when created, each Cluster struct gets a separate copy of OpConfig + // including the nested PodServiceAccount struct, so no race condition here c.OpConfig.PodServiceAccount.SetNamespace(c.Namespace) - _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(c.OpConfig.PodServiceAccount) + _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(&c.OpConfig.PodServiceAccount) if err != nil { - c.logger.Warnf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccount, c.Namespace, err) + c.logger.Warnf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccountName, c.Namespace, err) } else { - c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccount, c.Namespace) + c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccountName, c.Namespace) } } else { - c.logger.Infof("successfully found the service account %q used to create pods to the namespace %q", podServiceAccount, c.Namespace) + c.logger.Infof("successfully found the service account %q used to create pods to the namespace %q", podServiceAccountName, c.Namespace) } return err diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index b50dc43ad..ab22f1359 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -128,7 +128,7 @@ func (c *Controller) initPodServiceAccount() { case groupVersionKind.Kind != "ServiceAccount": panic(fmt.Errorf("pod service account definiton in the operator config map defines another type of resource: %v", groupVersionKind.Kind)) default: - c.opConfig.PodServiceAccount = obj.(*v1.ServiceAccount) + c.opConfig.PodServiceAccount = *obj.(*v1.ServiceAccount) // ensure consistent naming of the account c.opConfig.PodServiceAccount.Name = c.opConfig.PodServiceAccountName } diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 15a4738b4..b3392065d 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -68,10 +68,10 @@ type Config struct { Resources Auth Scalyr - PodServiceAccount *v1.ServiceAccount - WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' - EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` - DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` + PodServiceAccount v1.ServiceAccount // has to be struct value, not a pointer + WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' + EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` + DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` // re-use one account for both Spilo pods and the operator; this grants extra privileges to pods ServiceAccountName string `name:"service_account_name" default:"operator"` PodServiceAccountName string `name:"pod_service_account_name" default:"operator"` From 5daf0a41723dcb78cee06e99983b0dc18e04436b Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Fri, 20 Apr 2018 14:20:38 +0200 Subject: [PATCH 07/21] Fix error reporting during pod service account creation --- pkg/cluster/cluster.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 4a5db1331..661fe5399 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -202,12 +202,12 @@ func (c *Cluster) initUsers() error { func (c *Cluster) createPodServiceAccounts() error { podServiceAccountName := c.Config.OpConfig.PodServiceAccountName - c.setProcessName("creating pod service account in the watched namespaces") + c.setProcessName(fmt.Sprintf("creating pod service account in the namespace %v", c.Namespace)) _, err := c.KubeClient.ServiceAccounts(c.Namespace).Get(podServiceAccountName, metav1.GetOptions{}) if err != nil { - c.logger.Warnf("the pod service account %q is absent from the namespace %q. Stateful sets in the namespace are unable to create pods.", podServiceAccountName, c.Namespace) + c.logger.Warnf("the pod service account %q cannot be retrieved in the namespace %q. Stateful sets in the namespace may be unable to create pods. Error: %v", podServiceAccountName, c.Namespace, err) // when created, each Cluster struct gets a separate copy of OpConfig // including the nested PodServiceAccount struct, so no race condition here @@ -215,15 +215,16 @@ func (c *Cluster) createPodServiceAccounts() error { _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(&c.OpConfig.PodServiceAccount) if err != nil { - c.logger.Warnf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccountName, c.Namespace, err) - } else { - c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccountName, c.Namespace) + return fmt.Errorf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccountName, c.Namespace, err) } + + c.logger.Infof("successfully deployed the pod service account %q to the %q namespace", podServiceAccountName, c.Namespace) + } else { c.logger.Infof("successfully found the service account %q used to create pods to the namespace %q", podServiceAccountName, c.Namespace) } - return err + return nil } // Create creates the new kubernetes objects associated with the cluster. @@ -289,7 +290,7 @@ func (c *Cluster) Create() error { c.logger.Infof("pod disruption budget %q has been successfully created", util.NameFromMeta(pdb.ObjectMeta)) if err = c.createPodServiceAccounts(); err != nil { - return fmt.Errorf("could not sync pod service accounts: %v", err) + return fmt.Errorf("could not create pod service account %v : %v", c.OpConfig.PodServiceAccountName, err) } c.logger.Infof("pod service accounts have been successfully synced") From a88416e6ea891bda5e13f5f14976138af1975e61 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Mon, 23 Apr 2018 14:28:00 +0200 Subject: [PATCH 08/21] Include default service account for pods into README.md --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a60fc2a55..a76905303 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,16 @@ By default, the operator watches the namespace it is deployed to. You can change Note that for an operator to manage pods in the watched namespace, the operator's service account (as specified in the operator deployment manifest) has to have appropriate privileges to access the watched namespace. The operator may not be able to function in the case it watches all namespaces but lacks access rights to any of them (except Kubernetes system namespaces like `kube-system`). The reason is that for multiple namespaces operations such as 'list pods' execute at the cluster scope and fail at the first violation of access rights. -The watched namespace also needs to have a (possibly different) service account in the case database pods need to talk to the Kubernetes API (e.g. when using Kubernetes-native configuration of Patroni). The operator checks that the `pod_service_account_name` exists in the target namespace, and, if not, deploys there the `pod_service_account_definition`. In this definition, the operator overwrites the account's name to match `pod_service_account_name` and the namespace to match the target namespace. The operator performs **no** further syncing of this account. +The watched namespace also needs to have a (possibly different) service account in the case database pods need to talk to the Kubernetes API (e.g. when using Kubernetes-native configuration of Patroni). The operator checks that the `pod_service_account_name` exists in the target namespace, and, if not, deploys there the `pod_service_account_definition` from the operator [`Config`](pkg/util/config/config.go) with the default value of: + +```yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: operator +``` + + In this definition, the operator overwrites the account's name to match `pod_service_account_name` and the `default` namespace to match the target namespace. The operator performs **no** further syncing of this account. ### Create ConfigMap From c31c76281c61168650d81b5f6e937b4136d50250 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Mon, 23 Apr 2018 14:38:20 +0200 Subject: [PATCH 09/21] Make operator unaware of its own service account --- manifests/configmap.yaml | 1 - pkg/cluster/k8sres.go | 2 +- pkg/util/config/config.go | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 7d5c742c8..f3b79e67b 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -7,7 +7,6 @@ data: # if neither is set or evaluates to the empty string, listen to the operator's own namespace # if set to the "*", listen to all namespaces # watched_namespace: development - service_account_name: operator cluster_labels: application:spilo cluster_name_label: version pod_role_label: spilo-role diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index cf16bb39a..2b1460643 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -435,7 +435,7 @@ func (c *Cluster) generatePodTemplate( terminateGracePeriodSeconds := int64(c.OpConfig.PodTerminateGracePeriod.Seconds()) podSpec := v1.PodSpec{ - ServiceAccountName: c.OpConfig.ServiceAccountName, + ServiceAccountName: c.OpConfig.PodServiceAccountName, TerminationGracePeriodSeconds: &terminateGracePeriodSeconds, Containers: []v1.Container{container}, Tolerations: c.tolerations(tolerationsSpec), diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index b3392065d..bb810665b 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -73,7 +73,6 @@ type Config struct { EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` // re-use one account for both Spilo pods and the operator; this grants extra privileges to pods - ServiceAccountName string `name:"service_account_name" default:"operator"` PodServiceAccountName string `name:"pod_service_account_name" default:"operator"` PodServiceAccountDefinition string `name:"pod_service_account_definition" default:"apiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: operator\n"` DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"` From bc8b950da43e64e33855b293ccc2cf371c44e90c Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Mon, 23 Apr 2018 16:31:53 +0200 Subject: [PATCH 10/21] Tolerate issues of the Teams API --- pkg/cluster/util.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/cluster/util.go b/pkg/cluster/util.go index 4a4f4e04a..1220aeb86 100644 --- a/pkg/cluster/util.go +++ b/pkg/cluster/util.go @@ -212,12 +212,14 @@ func (c *Cluster) getTeamMembers() ([]string, error) { token, err := c.oauthTokenGetter.getOAuthToken() if err != nil { - return []string{}, fmt.Errorf("could not get oauth token: %v", err) + c.logger.Warnf("could not get oauth token to authenticate to team service API, returning empty list of team members: %v", err) + return []string{}, nil } teamInfo, err := c.teamsAPIClient.TeamInfo(c.Spec.TeamID, token) if err != nil { - return nil, fmt.Errorf("could not get team info: %v", err) + c.logger.Warnf("could not get team info, returning empty list of team members: %v", err) + return []string{}, nil } return teamInfo.Members, nil From 485ec4b8ea5621da16d73af63e1182e81198c986 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Tue, 24 Apr 2018 15:13:08 +0200 Subject: [PATCH 11/21] Move service account to Controller --- pkg/cluster/cluster.go | 10 ++++++---- pkg/controller/controller.go | 6 ++++-- pkg/controller/util.go | 1 + pkg/util/config/config.go | 9 ++++----- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 661fe5399..6e2999290 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -42,6 +42,7 @@ type Config struct { OpConfig config.Config RestConfig *rest.Config InfrastructureRoles map[string]spec.PgUser // inherited from the controller + PodServiceAccount *v1.ServiceAccount } type kubeResources struct { @@ -209,11 +210,12 @@ func (c *Cluster) createPodServiceAccounts() error { if err != nil { c.logger.Warnf("the pod service account %q cannot be retrieved in the namespace %q. Stateful sets in the namespace may be unable to create pods. Error: %v", podServiceAccountName, c.Namespace, err) - // when created, each Cluster struct gets a separate copy of OpConfig - // including the nested PodServiceAccount struct, so no race condition here - c.OpConfig.PodServiceAccount.SetNamespace(c.Namespace) + // get a separate copy of service account + // to prevent a race condition when setting a namespace for many clusters + sa := *c.PodServiceAccount + sa.SetNamespace(c.Namespace) - _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(&c.OpConfig.PodServiceAccount) + _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(&sa) if err != nil { return fmt.Errorf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccountName, c.Namespace, err) } diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index ab22f1359..2a9c26d8b 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -51,6 +51,8 @@ type Controller struct { lastClusterSyncTime int64 workerLogs map[uint32]ringlog.RingLogger + + PodServiceAccount *v1.ServiceAccount } // NewController creates a new controller @@ -128,9 +130,9 @@ func (c *Controller) initPodServiceAccount() { case groupVersionKind.Kind != "ServiceAccount": panic(fmt.Errorf("pod service account definiton in the operator config map defines another type of resource: %v", groupVersionKind.Kind)) default: - c.opConfig.PodServiceAccount = *obj.(*v1.ServiceAccount) + c.PodServiceAccount = obj.(*v1.ServiceAccount) // ensure consistent naming of the account - c.opConfig.PodServiceAccount.Name = c.opConfig.PodServiceAccountName + c.PodServiceAccount.Name = c.opConfig.PodServiceAccountName } // actual service accounts are deployed at the time of Postgres/Spilo cluster creation diff --git a/pkg/controller/util.go b/pkg/controller/util.go index 69124c111..5e46e93eb 100644 --- a/pkg/controller/util.go +++ b/pkg/controller/util.go @@ -26,6 +26,7 @@ func (c *Controller) makeClusterConfig() cluster.Config { RestConfig: c.config.RestConfig, OpConfig: config.Copy(c.opConfig), InfrastructureRoles: infrastructureRoles, + PodServiceAccount: c.PodServiceAccount, } } diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index bb810665b..72731442c 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -8,7 +8,6 @@ import ( "fmt" "github.com/zalando-incubator/postgres-operator/pkg/spec" - "k8s.io/client-go/pkg/api/v1" ) // CRD describes CustomResourceDefinition specific configuration parameters @@ -68,10 +67,10 @@ type Config struct { Resources Auth Scalyr - PodServiceAccount v1.ServiceAccount // has to be struct value, not a pointer - WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' - EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` - DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` + + WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' + EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` + DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` // re-use one account for both Spilo pods and the operator; this grants extra privileges to pods PodServiceAccountName string `name:"pod_service_account_name" default:"operator"` PodServiceAccountDefinition string `name:"pod_service_account_definition" default:"apiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: operator\n"` From 3d0ab40d64b3b37e646f2e04053e844ac298a5c7 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Tue, 24 Apr 2018 15:30:15 +0200 Subject: [PATCH 12/21] Explicitly warn on account name mismatch --- pkg/controller/controller.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 2a9c26d8b..406a29429 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -131,8 +131,10 @@ func (c *Controller) initPodServiceAccount() { panic(fmt.Errorf("pod service account definiton in the operator config map defines another type of resource: %v", groupVersionKind.Kind)) default: c.PodServiceAccount = obj.(*v1.ServiceAccount) - // ensure consistent naming of the account - c.PodServiceAccount.Name = c.opConfig.PodServiceAccountName + if c.PodServiceAccount.Name != c.opConfig.PodServiceAccountName { + c.logger.Warnf("in the operator config map, the pod service account name %v does not match the name %v given in the account definition; using the former for consistency", c.opConfig.PodServiceAccountName, c.PodServiceAccount.Name) + c.PodServiceAccount.Name = c.opConfig.PodServiceAccountName + } } // actual service accounts are deployed at the time of Postgres/Spilo cluster creation From e3f7fac4431f3ab6ab9ca38cbb94fa433a5f0e85 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Tue, 24 Apr 2018 15:41:28 +0200 Subject: [PATCH 13/21] Comment on the default value for pod service account name --- pkg/util/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 72731442c..f38dc9b1c 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -71,7 +71,7 @@ type Config struct { WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` - // re-use one account for both Spilo pods and the operator; this grants extra privileges to pods + // default name `operator` enables backward compatibility with the older ServiceAccountName field PodServiceAccountName string `name:"pod_service_account_name" default:"operator"` PodServiceAccountDefinition string `name:"pod_service_account_definition" default:"apiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: operator\n"` DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"` From d99b553ec157df226f5da9cfbcf692f77c6ec857 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Wed, 25 Apr 2018 12:35:16 +0200 Subject: [PATCH 14/21] Convert default account definiton into JSON --- pkg/cluster/cluster.go | 4 +--- pkg/controller/controller.go | 12 ++++++++++++ pkg/util/config/config.go | 5 +++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 6e2999290..478940963 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -208,13 +208,11 @@ func (c *Cluster) createPodServiceAccounts() error { _, err := c.KubeClient.ServiceAccounts(c.Namespace).Get(podServiceAccountName, metav1.GetOptions{}) if err != nil { - c.logger.Warnf("the pod service account %q cannot be retrieved in the namespace %q. Stateful sets in the namespace may be unable to create pods. Error: %v", podServiceAccountName, c.Namespace, err) + c.logger.Infof("the pod service account %q cannot be retrieved in the namespace %q; stateful sets in the namespace may be unable to create pods. Trying to deploy the account.", podServiceAccountName, c.Namespace) // get a separate copy of service account // to prevent a race condition when setting a namespace for many clusters sa := *c.PodServiceAccount - sa.SetNamespace(c.Namespace) - _, err = c.KubeClient.ServiceAccounts(c.Namespace).Create(&sa) if err != nil { return fmt.Errorf("cannot deploy the pod service account %q defined in the config map to the %q namespace: %v", podServiceAccountName, c.Namespace, err) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 406a29429..144c0fb4c 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -120,6 +120,17 @@ func (c *Controller) initOperatorConfig() { } func (c *Controller) initPodServiceAccount() { + + if c.opConfig.PodServiceAccountDefinition == "" { + c.opConfig.PodServiceAccountDefinition = ` + { "apiVersion": "v1", + "kind": "ServiceAccount", + "metadata": { + "name": "operator" + } + }` + } + // re-uses k8s internal parsing. See k8s client-go issue #193 for explanation decode := scheme.Codecs.UniversalDeserializer().Decode obj, groupVersionKind, err := decode([]byte(c.opConfig.PodServiceAccountDefinition), nil, nil) @@ -134,6 +145,7 @@ func (c *Controller) initPodServiceAccount() { if c.PodServiceAccount.Name != c.opConfig.PodServiceAccountName { c.logger.Warnf("in the operator config map, the pod service account name %v does not match the name %v given in the account definition; using the former for consistency", c.opConfig.PodServiceAccountName, c.PodServiceAccount.Name) c.PodServiceAccount.Name = c.opConfig.PodServiceAccountName + c.PodServiceAccount.Namespace = "" } } diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index f38dc9b1c..b101c6f08 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -72,8 +72,9 @@ type Config struct { EtcdHost string `name:"etcd_host" default:"etcd-client.default.svc.cluster.local:2379"` DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spiloprivate-9.6:1.2-p4"` // default name `operator` enables backward compatibility with the older ServiceAccountName field - PodServiceAccountName string `name:"pod_service_account_name" default:"operator"` - PodServiceAccountDefinition string `name:"pod_service_account_definition" default:"apiVersion: v1\nkind: ServiceAccount\nmetadata:\n name: operator\n"` + PodServiceAccountName string `name:"pod_service_account_name" default:"operator"` + // value of this string must be valid JSON or YAML; see initPodServiceAccount + PodServiceAccountDefinition string `name:"pod_service_account_definition" default:""` DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"` EtcdScope string `name:"etcd_scope" default:"service"` WALES3Bucket string `name:"wal_s3_bucket"` From 4255e702bc876bdcde5d1c36d5f98378153a4efb Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Wed, 25 Apr 2018 13:57:24 +0200 Subject: [PATCH 15/21] Always empty account's namespace after parsing --- pkg/controller/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 144c0fb4c..7b309a547 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -145,8 +145,8 @@ func (c *Controller) initPodServiceAccount() { if c.PodServiceAccount.Name != c.opConfig.PodServiceAccountName { c.logger.Warnf("in the operator config map, the pod service account name %v does not match the name %v given in the account definition; using the former for consistency", c.opConfig.PodServiceAccountName, c.PodServiceAccount.Name) c.PodServiceAccount.Name = c.opConfig.PodServiceAccountName - c.PodServiceAccount.Namespace = "" } + c.PodServiceAccount.Namespace = "" } // actual service accounts are deployed at the time of Postgres/Spilo cluster creation From 1b718fd4c20e9ae1bb0aaaf585b3860fc3b614f5 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Thu, 26 Apr 2018 13:47:25 +0200 Subject: [PATCH 16/21] Minor improvemets in reporting service account creation --- pkg/cluster/cluster.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 478940963..b6251eafc 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -203,12 +203,13 @@ func (c *Cluster) initUsers() error { func (c *Cluster) createPodServiceAccounts() error { podServiceAccountName := c.Config.OpConfig.PodServiceAccountName - c.setProcessName(fmt.Sprintf("creating pod service account in the namespace %v", c.Namespace)) - _, err := c.KubeClient.ServiceAccounts(c.Namespace).Get(podServiceAccountName, metav1.GetOptions{}) if err != nil { - c.logger.Infof("the pod service account %q cannot be retrieved in the namespace %q; stateful sets in the namespace may be unable to create pods. Trying to deploy the account.", podServiceAccountName, c.Namespace) + + c.setProcessName(fmt.Sprintf("creating pod service account in the namespace %v", c.Namespace)) + + c.logger.Infof("the pod service account %q cannot be retrieved in the namespace %q. Trying to deploy the account.", podServiceAccountName, c.Namespace) // get a separate copy of service account // to prevent a race condition when setting a namespace for many clusters From 37caa3f60b9a92aa2f94e65510fbf687130e04eb Mon Sep 17 00:00:00 2001 From: Oleksii Kliukin Date: Fri, 27 Apr 2018 12:35:25 +0200 Subject: [PATCH 17/21] Fix a bug with syncing services Avoid showing "there is no service in the cluster" when syncing a service for the cluster if the operator has been restarted after the cluster had been created. --- pkg/cluster/sync.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 5a77e658b..5559d07b1 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -108,11 +108,10 @@ func (c *Cluster) syncService(role PostgresRole) error { svc, err := c.KubeClient.Services(c.Namespace).Get(c.serviceName(role), metav1.GetOptions{}) if err == nil { - + c.Services[role] = svc desiredSvc := c.generateService(role, &c.Spec) match, reason := k8sutil.SameService(svc, desiredSvc) if match { - c.Services[role] = svc return nil } c.logServiceChanges(role, svc, desiredSvc, false, reason) From c45219bafa55c032ec772cfc3563392e2bf59d5b Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Wed, 2 May 2018 12:52:42 +0200 Subject: [PATCH 18/21] Set up an S3 bucket for the postgres daily logs --- pkg/cluster/k8sres.go | 12 +++++++++--- pkg/util/config/config.go | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 2b1460643..1c0d5c707 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -360,10 +360,16 @@ func (c *Cluster) generatePodTemplate( } if c.OpConfig.WALES3Bucket != "" { envVars = append(envVars, v1.EnvVar{Name: "WAL_S3_BUCKET", Value: c.OpConfig.WALES3Bucket}) - envVars = append(envVars, v1.EnvVar{Name: "WAL_BUCKET_SCOPE_SUFFIX", Value: getWALBucketScopeSuffix(string(uid))}) + envVars = append(envVars, v1.EnvVar{Name: "WAL_BUCKET_SCOPE_SUFFIX", Value: getBucketScopeSuffix(string(uid))}) envVars = append(envVars, v1.EnvVar{Name: "WAL_BUCKET_SCOPE_PREFIX", Value: ""}) } + if c.OpConfig.PgDailyLogS3Bucket != "" { + envVars = append(envVars, v1.EnvVar{Name: "PG_DAILY_LOG_S3_BUCKET", Value: c.OpConfig.PgDailyLogS3Bucket}) + envVars = append(envVars, v1.EnvVar{Name: "PG_DAILY_LOG_BUCKET_SCOPE_SUFFIX", Value: getBucketScopeSuffix(string(uid))}) + envVars = append(envVars, v1.EnvVar{Name: "PG_DAILY_LOG_BUCKET_SCOPE_PREFIX", Value: ""}) + } + if c.patroniUsesKubernetes() { envVars = append(envVars, v1.EnvVar{Name: "DCS_ENABLE_KUBERNETES_API", Value: "true"}) } else { @@ -504,7 +510,7 @@ func (c *Cluster) generatePodTemplate( return &template } -func getWALBucketScopeSuffix(uid string) string { +func getBucketScopeSuffix(uid string) string { if uid != "" { return fmt.Sprintf("/%s", uid) } @@ -819,7 +825,7 @@ func (c *Cluster) generateCloneEnvironment(description *spec.CloneDescription) [ result = append(result, v1.EnvVar{Name: "CLONE_METHOD", Value: "CLONE_WITH_WALE"}) result = append(result, v1.EnvVar{Name: "CLONE_WAL_S3_BUCKET", Value: c.OpConfig.WALES3Bucket}) result = append(result, v1.EnvVar{Name: "CLONE_TARGET_TIME", Value: description.EndTimestamp}) - result = append(result, v1.EnvVar{Name: "CLONE_WAL_BUCKET_SCOPE_SUFFIX", Value: getWALBucketScopeSuffix(description.Uid)}) + result = append(result, v1.EnvVar{Name: "CLONE_WAL_BUCKET_SCOPE_SUFFIX", Value: getBucketScopeSuffix(description.Uid)}) result = append(result, v1.EnvVar{Name: "CLONE_WAL_BUCKET_SCOPE_PREFIX", Value: ""}) } diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index b101c6f08..08bcc7b32 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -78,6 +78,7 @@ type Config struct { DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"` EtcdScope string `name:"etcd_scope" default:"service"` WALES3Bucket string `name:"wal_s3_bucket"` + PgDailyLogS3Bucket string `name:"pg_daily_log_s3_bucket"` KubeIAMRole string `name:"kube_iam_role"` DebugLogging bool `name:"debug_logging" default:"true"` EnableDBAccess bool `name:"enable_database_access" default:"true"` From 59ded0c212818fb03716f8f1b111e7b9d037d4b4 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Wed, 2 May 2018 14:05:57 +0200 Subject: [PATCH 19/21] Shorten bucket name --- pkg/cluster/k8sres.go | 8 ++++---- pkg/util/config/config.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 1c0d5c707..4e500de29 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -364,10 +364,10 @@ func (c *Cluster) generatePodTemplate( envVars = append(envVars, v1.EnvVar{Name: "WAL_BUCKET_SCOPE_PREFIX", Value: ""}) } - if c.OpConfig.PgDailyLogS3Bucket != "" { - envVars = append(envVars, v1.EnvVar{Name: "PG_DAILY_LOG_S3_BUCKET", Value: c.OpConfig.PgDailyLogS3Bucket}) - envVars = append(envVars, v1.EnvVar{Name: "PG_DAILY_LOG_BUCKET_SCOPE_SUFFIX", Value: getBucketScopeSuffix(string(uid))}) - envVars = append(envVars, v1.EnvVar{Name: "PG_DAILY_LOG_BUCKET_SCOPE_PREFIX", Value: ""}) + if c.OpConfig.LogS3Bucket != "" { + envVars = append(envVars, v1.EnvVar{Name: "LOG_S3_BUCKET", Value: c.OpConfig.LogS3Bucket}) + envVars = append(envVars, v1.EnvVar{Name: "LOG_BUCKET_SCOPE_SUFFIX", Value: getBucketScopeSuffix(string(uid))}) + envVars = append(envVars, v1.EnvVar{Name: "LOG_BUCKET_SCOPE_PREFIX", Value: ""}) } if c.patroniUsesKubernetes() { diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 08bcc7b32..aed9accd3 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -78,7 +78,7 @@ type Config struct { DbHostedZone string `name:"db_hosted_zone" default:"db.example.com"` EtcdScope string `name:"etcd_scope" default:"service"` WALES3Bucket string `name:"wal_s3_bucket"` - PgDailyLogS3Bucket string `name:"pg_daily_log_s3_bucket"` + LogS3Bucket string `name:"log_s3_bucket"` KubeIAMRole string `name:"kube_iam_role"` DebugLogging bool `name:"debug_logging" default:"true"` EnableDBAccess bool `name:"enable_database_access" default:"true"` From fe47f9ebeadd54639913296735158b42d17ee012 Mon Sep 17 00:00:00 2001 From: Oleksii Kliukin Date: Thu, 3 May 2018 10:20:24 +0200 Subject: [PATCH 20/21] Improve the pod moving behavior during the Kubernetes cluster upgrade. (#281) * Improve the pod moving behavior during the Kubernetes cluster upgrade. Fix an issue of not waiting for at least one replica to become ready (if the Statefulset indicates there are replicas) when moving the master pod off the decomissioned node. Resolves the first part of #279. Small fixes to error messages. * Eliminate a race condition during the swithover. When the operator initiates the failover (switchover) that fails and then retries it for a second time it may happen that the previous waitForPodChannel is still active. As a result, the operator subscribes to the former master pod two times, causing a panic. The problem was that the original code didn't bother to cancel the waitForPodLalbel for the new master pod in the case when the failover fails. This commit fixes it by adding a stop channel to that function. Code review by @zerg-junior --- pkg/cluster/cluster.go | 8 +++-- pkg/cluster/cluster_test.go | 4 +-- pkg/cluster/k8sres.go | 2 +- pkg/cluster/pod.go | 28 +++++++++++++----- pkg/cluster/util.go | 58 +++++++++++++++++++++++++++---------- 5 files changed, 71 insertions(+), 29 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index b6251eafc..2260f0e96 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -858,6 +858,7 @@ func (c *Cluster) GetStatus() *spec.ClusterStatus { // ManualFailover does manual failover to a candidate pod func (c *Cluster) ManualFailover(curMaster *v1.Pod, candidate spec.NamespacedName) error { c.logger.Debugf("failing over from %q to %q", curMaster.Name, candidate) + podLabelErr := make(chan error) stopCh := make(chan struct{}) defer close(podLabelErr) @@ -868,11 +869,12 @@ func (c *Cluster) ManualFailover(curMaster *v1.Pod, candidate spec.NamespacedNam role := Master - _, err := c.waitForPodLabel(ch, &role) - select { case <-stopCh: - case podLabelErr <- err: + case podLabelErr <- func() error { + _, err := c.waitForPodLabel(ch, stopCh, &role) + return err + }(): } }() diff --git a/pkg/cluster/cluster_test.go b/pkg/cluster/cluster_test.go index 823d3baf9..34f64e655 100644 --- a/pkg/cluster/cluster_test.go +++ b/pkg/cluster/cluster_test.go @@ -34,8 +34,8 @@ func TestInitRobotUsers(t *testing.T) { { manifestUsers: map[string]spec.UserFlags{"foo": {"superuser", "createdb"}}, infraRoles: map[string]spec.PgUser{"foo": {Origin: spec.RoleOriginInfrastructure, Name: "foo", Password: "bar"}}, - result: map[string]spec.PgUser{"foo": {Origin: spec.RoleOriginInfrastructure, Name: "foo", Password: "bar"}}, - err: nil, + result: map[string]spec.PgUser{"foo": {Origin: spec.RoleOriginInfrastructure, Name: "foo", Password: "bar"}}, + err: nil, }, { manifestUsers: map[string]spec.UserFlags{"!fooBar": {"superuser", "createdb"}}, diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 4e500de29..18350f526 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -707,7 +707,7 @@ func (c *Cluster) shouldCreateLoadBalancerForService(role PostgresRole, spec *sp // `enable_load_balancer`` governs LB for a master service // there is no equivalent deprecated operator option for the replica LB if c.OpConfig.EnableLoadBalancer != nil { - c.logger.Debugf("The operator configmap sets the deprecated `enable_load_balancer` param. Consider using the `enable_master_load_balancer` or `enable_replica_load_balancer` instead.", c.Name) + c.logger.Debugf("The operator configmap sets the deprecated `enable_load_balancer` param. Consider using the `enable_master_load_balancer` or `enable_replica_load_balancer` instead.") return *c.OpConfig.EnableLoadBalancer } diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 28960d9c1..432597f7f 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -149,13 +149,19 @@ func (c *Cluster) movePodFromEndOfLifeNode(pod *v1.Pod) (*v1.Pod, error) { } func (c *Cluster) masterCandidate(oldNodeName string) (*v1.Pod, error) { + + // Wait until at least one replica pod will come up + if err := c.waitForAnyReplicaLabelReady(); err != nil { + c.logger.Warningf("could not find at least one ready replica: %v", err) + } + replicas, err := c.getRolePods(Replica) if err != nil { return nil, fmt.Errorf("could not get replica pods: %v", err) } if len(replicas) == 0 { - c.logger.Warningf("single master pod for cluster %q, migration will cause disruption of the service") + c.logger.Warningf("no available master candidates, migration will cause longer downtime of the master instance") return nil, nil } @@ -168,12 +174,16 @@ func (c *Cluster) masterCandidate(oldNodeName string) (*v1.Pod, error) { } } } - c.logger.Debug("no available master candidates on live nodes") + c.logger.Warningf("no available master candidates on live nodes") return &replicas[rand.Intn(len(replicas))], nil } // MigrateMasterPod migrates master pod via failover to a replica func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error { + var ( + masterCandidatePod *v1.Pod + ) + oldMaster, err := c.KubeClient.Pods(podName.Namespace).Get(podName.Name, metav1.GetOptions{}) if err != nil { @@ -193,10 +203,13 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error { c.logger.Warningf("pod %q is not a master", podName) return nil } - - masterCandidatePod, err := c.masterCandidate(oldMaster.Spec.NodeName) - if err != nil { - return fmt.Errorf("could not get new master candidate: %v", err) + if *c.Statefulset.Spec.Replicas == 1 { + c.logger.Warningf("single master pod for cluster %q, migration will cause longer downtime of the master instance", c.clusterName()) + } else { + masterCandidatePod, err = c.masterCandidate(oldMaster.Spec.NodeName) + if err != nil { + return fmt.Errorf("could not get new master candidate: %v", err) + } } // there are two cases for each postgres cluster that has its master pod on the node to migrate from: @@ -250,6 +263,7 @@ func (c *Cluster) MigrateReplicaPod(podName spec.NamespacedName, fromNodeName st func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { ch := c.registerPodSubscriber(podName) defer c.unregisterPodSubscriber(podName) + stopChan := make(chan struct{}) if err := c.KubeClient.Pods(podName.Namespace).Delete(podName.Name, c.deleteOptions); err != nil { return nil, fmt.Errorf("could not delete pod: %v", err) @@ -258,7 +272,7 @@ func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { if err := c.waitForPodDeletion(ch); err != nil { return nil, err } - if pod, err := c.waitForPodLabel(ch, nil); err != nil { + if pod, err := c.waitForPodLabel(ch, stopChan, nil); err != nil { return nil, err } else { c.logger.Infof("pod %q has been recreated", podName) diff --git a/pkg/cluster/util.go b/pkg/cluster/util.go index 1220aeb86..b72835311 100644 --- a/pkg/cluster/util.go +++ b/pkg/cluster/util.go @@ -225,7 +225,7 @@ func (c *Cluster) getTeamMembers() ([]string, error) { return teamInfo.Members, nil } -func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent, role *PostgresRole) (*v1.Pod, error) { +func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent, stopChan chan struct{}, role *PostgresRole) (*v1.Pod, error) { timeout := time.After(c.OpConfig.PodLabelWaitTimeout) for { select { @@ -241,6 +241,8 @@ func (c *Cluster) waitForPodLabel(podEvents chan spec.PodEvent, role *PostgresRo } case <-timeout: return nil, fmt.Errorf("pod label wait timeout") + case <-stopChan: + return nil, fmt.Errorf("pod label wait cancelled") } } } @@ -278,7 +280,10 @@ func (c *Cluster) waitStatefulsetReady() error { }) } -func (c *Cluster) waitPodLabelsReady() error { +func (c *Cluster) _waitPodLabelsReady(anyReplica bool) error { + var ( + podsNumber int + ) ls := c.labelsSet(false) namespace := c.Namespace @@ -295,35 +300,56 @@ func (c *Cluster) waitPodLabelsReady() error { c.OpConfig.PodRoleLabel: string(Replica), }).String(), } - pods, err := c.KubeClient.Pods(namespace).List(listOptions) - if err != nil { - return err + podsNumber = 1 + if !anyReplica { + pods, err := c.KubeClient.Pods(namespace).List(listOptions) + if err != nil { + return err + } + podsNumber = len(pods.Items) + c.logger.Debugf("Waiting for %d pods to become ready", podsNumber) + } else { + c.logger.Debugf("Waiting for any replica pod to become ready") } - podsNumber := len(pods.Items) - err = retryutil.Retry(c.OpConfig.ResourceCheckInterval, c.OpConfig.ResourceCheckTimeout, + err := retryutil.Retry(c.OpConfig.ResourceCheckInterval, c.OpConfig.ResourceCheckTimeout, func() (bool, error) { - masterPods, err2 := c.KubeClient.Pods(namespace).List(masterListOption) - if err2 != nil { - return false, err2 + masterCount := 0 + if !anyReplica { + masterPods, err2 := c.KubeClient.Pods(namespace).List(masterListOption) + if err2 != nil { + return false, err2 + } + if len(masterPods.Items) > 1 { + return false, fmt.Errorf("too many masters (%d pods with the master label found)", + len(masterPods.Items)) + } + masterCount = len(masterPods.Items) } replicaPods, err2 := c.KubeClient.Pods(namespace).List(replicaListOption) if err2 != nil { return false, err2 } - if len(masterPods.Items) > 1 { - return false, fmt.Errorf("too many masters") - } - if len(replicaPods.Items) == podsNumber { + replicaCount := len(replicaPods.Items) + if anyReplica && replicaCount > 0 { + c.logger.Debugf("Found %d running replica pods", replicaCount) return true, nil } - return len(masterPods.Items)+len(replicaPods.Items) == podsNumber, nil + return masterCount+replicaCount >= podsNumber, nil }) return err } +func (c *Cluster) waitForAnyReplicaLabelReady() error { + return c._waitPodLabelsReady(true) +} + +func (c *Cluster) waitForAllPodsLabelReady() error { + return c._waitPodLabelsReady(false) +} + func (c *Cluster) waitStatefulsetPodsReady() error { c.setProcessName("waiting for the pods of the statefulset") // TODO: wait for the first Pod only @@ -332,7 +358,7 @@ func (c *Cluster) waitStatefulsetPodsReady() error { } // TODO: wait only for master - if err := c.waitPodLabelsReady(); err != nil { + if err := c.waitForAllPodsLabelReady(); err != nil { return fmt.Errorf("pod labels error: %v", err) } From 4c8dfd7e206241815846dd043582960fa47506f4 Mon Sep 17 00:00:00 2001 From: Oleksii Kliukin Date: Thu, 3 May 2018 10:21:37 +0200 Subject: [PATCH 21/21] Remove the check for the clone cluster name. (#270) * Sanity checks for the cluster name, improve tests. - check that the normal and clone cluster name complies with the valid service name. For clone cluster, only do it if clone timestamp is not set; with a clone timestamp set, the clone name points to the S3 bucket - add tests and improve existing ones, making sure we don't call Error() method for an empty error, as well as that we don't miss cases where expected error is not empty, but actual call to be tested does not return an error. Code review by @zerg-junior and @Jan-M --- pkg/spec/postgresql.go | 53 ++++++++++++++++------ pkg/spec/postgresql_test.go | 88 ++++++++++++++++++++++++++----------- 2 files changed, 102 insertions(+), 39 deletions(-) diff --git a/pkg/spec/postgresql.go b/pkg/spec/postgresql.go index 7dac7992b..c7b5df902 100644 --- a/pkg/spec/postgresql.go +++ b/pkg/spec/postgresql.go @@ -3,6 +3,7 @@ package spec import ( "encoding/json" "fmt" + "regexp" "strings" "time" @@ -76,6 +77,12 @@ const ( ClusterStatusInvalid PostgresStatus = "Invalid" ) +const ( + serviceNameMaxLength = 63 + clusterNameMaxLength = serviceNameMaxLength - len("-repl") + serviceNameRegexString = `^[a-z]([-a-z0-9]*[a-z0-9])?$` +) + // Postgresql defines PostgreSQL Custom Resource Definition Object. type Postgresql struct { metav1.TypeMeta `json:",inline"` @@ -126,7 +133,10 @@ type PostgresqlList struct { Items []Postgresql `json:"items"` } -var weekdays = map[string]int{"Sun": 0, "Mon": 1, "Tue": 2, "Wed": 3, "Thu": 4, "Fri": 5, "Sat": 6} +var ( + weekdays = map[string]int{"Sun": 0, "Mon": 1, "Tue": 2, "Wed": 3, "Thu": 4, "Fri": 5, "Sat": 6} + serviceNameRegex = regexp.MustCompile(serviceNameRegexString) +) func parseTime(s string) (time.Time, error) { parts := strings.Split(s, ":") @@ -225,10 +235,31 @@ func extractClusterName(clusterName string, teamName string) (string, error) { if strings.ToLower(clusterName[:teamNameLen+1]) != strings.ToLower(teamName)+"-" { return "", fmt.Errorf("name must match {TEAM}-{NAME} format") } + if len(clusterName) > clusterNameMaxLength { + return "", fmt.Errorf("name cannot be longer than %d characters", clusterNameMaxLength) + } + if !serviceNameRegex.MatchString(clusterName) { + return "", fmt.Errorf("name must confirm to DNS-1035, regex used for validation is %q", + serviceNameRegexString) + } return clusterName[teamNameLen+1:], nil } +func validateCloneClusterDescription(clone *CloneDescription) error { + // when cloning from the basebackup (no end timestamp) check that the cluster name is a valid service name + if clone.ClusterName != "" && clone.EndTimestamp == "" { + if !serviceNameRegex.MatchString(clone.ClusterName) { + return fmt.Errorf("clone cluster name must confirm to DNS-1035, regex used for validation is %q", + serviceNameRegexString) + } + if len(clone.ClusterName) > serviceNameMaxLength { + return fmt.Errorf("clone cluster name must be no longer than %d characters", serviceNameMaxLength) + } + } + return nil +} + type postgresqlListCopy PostgresqlList type postgresqlCopy Postgresql @@ -252,22 +283,16 @@ func (p *Postgresql) UnmarshalJSON(data []byte) error { } tmp2 := Postgresql(tmp) - clusterName, err := extractClusterName(tmp2.ObjectMeta.Name, tmp2.Spec.TeamID) - if err == nil { - tmp2.Spec.ClusterName = clusterName - } else { + if clusterName, err := extractClusterName(tmp2.ObjectMeta.Name, tmp2.Spec.TeamID); err != nil { tmp2.Error = err tmp2.Status = ClusterStatusInvalid + } else if err := validateCloneClusterDescription(&tmp2.Spec.Clone); err != nil { + tmp2.Error = err + tmp2.Status = ClusterStatusInvalid + } else { + tmp2.Spec.ClusterName = clusterName } - // The assumption below is that a cluster to clone, if any, belongs to the same team - if tmp2.Spec.Clone.ClusterName != "" { - _, err := extractClusterName(tmp2.Spec.Clone.ClusterName, tmp2.Spec.TeamID) - if err != nil { - tmp2.Error = fmt.Errorf("%s for the cluster to clone", err) - tmp2.Spec.Clone = CloneDescription{} - tmp2.Status = ClusterStatusInvalid - } - } + *p = tmp2 return nil diff --git a/pkg/spec/postgresql_test.go b/pkg/spec/postgresql_test.go index 091334e8e..07251edeb 100644 --- a/pkg/spec/postgresql_test.go +++ b/pkg/spec/postgresql_test.go @@ -43,7 +43,10 @@ var clusterNames = []struct { {"acid-test", "acid", "test", nil}, {"test-my-name", "test", "my-name", nil}, {"my-team-another-test", "my-team", "another-test", nil}, - {"------strange-team-cluster", "-----", "strange-team-cluster", nil}, + {"------strange-team-cluster", "-----", "strange-team-cluster", + errors.New(`name must confirm to DNS-1035, regex used for validation is "^[a-z]([-a-z0-9]*[a-z0-9])?$"`)}, + {"fooobar-fooobarfooobarfooobarfooobarfooobarfooobarfooobarfooobar", "fooobar", "", + errors.New("name cannot be longer than 58 characters")}, {"acid-test", "test", "", errors.New("name must match {TEAM}-{NAME} format")}, {"-test", "", "", errors.New("team name is empty")}, {"-test", "-", "", errors.New("name must match {TEAM}-{NAME} format")}, @@ -51,6 +54,18 @@ var clusterNames = []struct { {"-", "-", "", errors.New("name is too short")}, } +var cloneClusterDescriptions = []struct { + in *CloneDescription + err error +}{ + {&CloneDescription{"foo+bar", "", "NotEmpty"}, nil}, + {&CloneDescription{"foo+bar", "", ""}, + errors.New(`clone cluster name must confirm to DNS-1035, regex used for validation is "^[a-z]([-a-z0-9]*[a-z0-9])?$"`)}, + {&CloneDescription{"foobar123456789012345678901234567890123456789012345678901234567890", "", ""}, + errors.New("clone cluster name must be no longer than 63 characters")}, + {&CloneDescription{"foobar", "", ""}, nil}, +} + var maintenanceWindows = []struct { in []byte out MaintenanceWindow @@ -279,14 +294,15 @@ var unmarshalCluster = []struct { Name: "acid-testcluster1", }, Spec: PostgresSpec{ - TeamID: "acid", - Clone: CloneDescription{}, + TeamID: "acid", + Clone: CloneDescription{ + ClusterName: "team-batman", + }, ClusterName: "testcluster1", }, - Status: ClusterStatusInvalid, - Error: errors.New("name must match {TEAM}-{NAME} format for the cluster to clone"), + Error: nil, }, - marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), err: nil}, + marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{"cluster":"team-batman"}}}`), err: nil}, {[]byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1"`), Postgresql{}, []byte{}, @@ -350,11 +366,12 @@ func TestParseTime(t *testing.T) { for _, tt := range parseTimeTests { aTime, err := parseTime(tt.in) if err != nil { - if err.Error() != tt.err.Error() { + if tt.err == nil || err.Error() != tt.err.Error() { t.Errorf("ParseTime expected error: %v, got: %v", tt.err, err) } - continue + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } if aTime != tt.out { @@ -367,11 +384,12 @@ func TestWeekdayTime(t *testing.T) { for _, tt := range parseWeekdayTests { aTime, err := parseWeekday(tt.in) if err != nil { - if err.Error() != tt.err.Error() { + if tt.err == nil || err.Error() != tt.err.Error() { t.Errorf("ParseWeekday expected error: %v, got: %v", tt.err, err) } - continue + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } if aTime != tt.out { @@ -383,9 +401,13 @@ func TestWeekdayTime(t *testing.T) { func TestClusterName(t *testing.T) { for _, tt := range clusterNames { name, err := extractClusterName(tt.in, tt.inTeam) - if err != nil && err.Error() != tt.err.Error() { - t.Errorf("extractClusterName expected error: %v, got: %v", tt.err, err) + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("extractClusterName expected error: %v, got: %v", tt.err, err) + } continue + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } if name != tt.clusterName { t.Errorf("Expected cluserName: %q, got: %q", tt.clusterName, name) @@ -393,17 +415,29 @@ func TestClusterName(t *testing.T) { } } +func TestCloneClusterDescription(t *testing.T) { + for _, tt := range cloneClusterDescriptions { + if err := validateCloneClusterDescription(tt.in); err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("testCloneClusterDescription expected error: %v, got: %v", tt.err, err) + } + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) + } + } +} + func TestUnmarshalMaintenanceWindow(t *testing.T) { for _, tt := range maintenanceWindows { var m MaintenanceWindow err := m.UnmarshalJSON(tt.in) - if err != nil && err.Error() != tt.err.Error() { - t.Errorf("MaintenanceWindow unmarshal expected error: %v, got %v", tt.err, err) - continue - } - if tt.err != nil && err == nil { - t.Errorf("Expected error") + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("MaintenanceWindow unmarshal expected error: %v, got %v", tt.err, err) + } continue + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } if !reflect.DeepEqual(m, tt.out) { @@ -421,7 +455,6 @@ func TestMarshalMaintenanceWindow(t *testing.T) { s, err := tt.out.MarshalJSON() if err != nil { t.Errorf("Marshal Error: %v", err) - continue } if !bytes.Equal(s, tt.in) { @@ -435,11 +468,12 @@ func TestPostgresUnmarshal(t *testing.T) { var cluster Postgresql err := cluster.UnmarshalJSON(tt.in) if err != nil { - if err.Error() != tt.err.Error() { + if tt.err == nil || err.Error() != tt.err.Error() { t.Errorf("Unmarshal expected error: %v, got: %v", tt.err, err) } - continue + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } if !reflect.DeepEqual(cluster, tt.out) { @@ -457,7 +491,6 @@ func TestMarshal(t *testing.T) { m, err := json.Marshal(tt.out) if err != nil { t.Errorf("Marshal error: %v", err) - continue } if !bytes.Equal(m, tt.marshal) { t.Errorf("Marshal Postgresql expected: %q, got: %q", string(tt.marshal), string(m)) @@ -481,10 +514,15 @@ func TestUnmarshalPostgresList(t *testing.T) { for _, tt := range postgresqlList { var list PostgresqlList err := list.UnmarshalJSON(tt.in) - if err != nil && err.Error() != tt.err.Error() { - t.Errorf("PostgresqlList unmarshal expected error: %v, got: %v", tt.err, err) - return + if err != nil { + if tt.err == nil || err.Error() != tt.err.Error() { + t.Errorf("PostgresqlList unmarshal expected error: %v, got: %v", tt.err, err) + } + continue + } else if tt.err != nil { + t.Errorf("Expected error: %v", tt.err) } + if !reflect.DeepEqual(list, tt.out) { t.Errorf("Postgresql list unmarshall expected: %#v, got: %#v", tt.out, list) }