From 8b85935a7a37522593ceab18181b22868cc9cac5 Mon Sep 17 00:00:00 2001 From: Oleksii Kliukin Date: Fri, 8 Sep 2017 16:47:03 +0200 Subject: [PATCH] Allow cloning clusters from the operator. (#90) Allow cloning clusters from the operator. The changes add a new JSON node `clone` with possible values `cluster` and `timestamp`. `cluster` is mandatory, and setting a non-empty `timestamp` triggers wal-e point in time recovery. Spilo and Patroni do the whole heavy-lifting, the operator just defines certain variables and gathers some data about how to connect to the host to clone or the target S3 bucket. As a minor change, set the image pull policy to IfNotPresent instead of Always to simplify local testing. Change the default replication username to standby. --- manifests/configmap.yaml | 2 +- pkg/cluster/k8sres.go | 66 ++++++++++++++++++++++++++++++++++--- pkg/cluster/util.go | 6 +++- pkg/spec/postgresql.go | 18 +++++++++- pkg/spec/postgresql_test.go | 34 ++++++++++++++++--- pkg/util/config/config.go | 2 +- 6 files changed, 115 insertions(+), 13 deletions(-) diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index b04f5947e..25205b180 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -21,7 +21,7 @@ data: pod_label_wait_timeout: 10m ready_wait_interval: 3s ready_wait_timeout: 30s - replication_username: replication + replication_username: standby resource_check_interval: 3s resource_check_timeout: 10m resync_period: 5m diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 76e0c0fa4..0046f468b 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -226,7 +226,10 @@ PATRONI_INITDB_PARAMS: return string(result) } -func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequirements, pgParameters *spec.PostgresqlParam, patroniParameters *spec.Patroni) *v1.PodTemplateSpec { +func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequirements, + pgParameters *spec.PostgresqlParam, + patroniParameters *spec.Patroni, + cloneDescription *spec.CloneDescription) *v1.PodTemplateSpec { spiloConfiguration := c.generateSpiloJSONConfiguration(pgParameters, patroniParameters) envVars := []v1.EnvVar{ @@ -301,11 +304,17 @@ func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequireme if c.OpConfig.WALES3Bucket != "" { envVars = append(envVars, v1.EnvVar{Name: "WAL_S3_BUCKET", Value: c.OpConfig.WALES3Bucket}) } + if cloneDescription.ClusterName != "" { + cloneVars := c.generateCloneEnvironment(cloneDescription) + for _, v := range cloneVars { + envVars = append(envVars, v) + } + } privilegedMode := bool(true) container := v1.Container{ Name: c.containerName(), Image: c.OpConfig.DockerImage, - ImagePullPolicy: v1.PullAlways, + ImagePullPolicy: v1.PullIfNotPresent, Resources: *resourceRequirements, Ports: []v1.ContainerPort{ { @@ -357,13 +366,13 @@ func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequireme func (c *Cluster) generateStatefulSet(spec spec.PostgresSpec) (*v1beta1.StatefulSet, error) { resourceRequirements, err := c.resourceRequirements(spec.Resources) if err != nil { - return nil, err + return nil, fmt.Errorf("could not generate resource requirements: %v", err) } - podTemplate := c.generatePodTemplate(resourceRequirements, &spec.PostgresqlParam, &spec.Patroni) + podTemplate := c.generatePodTemplate(resourceRequirements, &spec.PostgresqlParam, &spec.Patroni, &spec.Clone) volumeClaimTemplate, err := generatePersistentVolumeClaimTemplate(spec.Volume.Size, spec.Volume.StorageClass) if err != nil { - return nil, err + return nil, fmt.Errorf("could not generate volume claim template: %v", err) } statefulSet := &v1beta1.StatefulSet{ @@ -523,3 +532,50 @@ func (c *Cluster) generateMasterEndpoints(subsets []v1.EndpointSubset) *v1.Endpo return endpoints } + +func (c *Cluster) generateCloneEnvironment(description *spec.CloneDescription) []v1.EnvVar { + result := make([]v1.EnvVar, 0) + if description.ClusterName == "" { + return result + } + cluster := description.ClusterName + result = append(result, v1.EnvVar{Name: "CLONE_SCOPE", Value: cluster}) + if description.EndTimestamp == "" { + // cloning with basebackup, make a connection string to the cluster to clone from + host, port := c.getClusterServiceConnectionParameters(cluster) + // TODO: make some/all of those constants + result = append(result, v1.EnvVar{Name: "CLONE_METHOD", Value: "CLONE_WITH_BASEBACKUP"}) + result = append(result, v1.EnvVar{Name: "CLONE_HOST", Value: host}) + result = append(result, v1.EnvVar{Name: "CLONE_PORT", Value: port}) + // TODO: assume replication user name is the same for all clusters, fetch it from secrets otherwise + result = append(result, v1.EnvVar{Name: "CLONE_USER", Value: c.OpConfig.ReplicationUsername}) + result = append(result, + v1.EnvVar{Name: "CLONE_PASSWORD", + ValueFrom: &v1.EnvVarSource{ + SecretKeyRef: &v1.SecretKeySelector{ + LocalObjectReference: v1.LocalObjectReference{ + Name: c.credentialSecretNameForCluster(c.OpConfig.ReplicationUsername, + description.ClusterName), + }, + Key: "password", + }, + }, + }) + } else { + // cloning with S3, find out the bucket to clone + clone_wal_s3_bucket := c.OpConfig.WALES3Bucket + result = append(result, v1.EnvVar{Name: "CLONE_METHOD", Value: "CLONE_WITH_WALE"}) + result = append(result, v1.EnvVar{Name: "CLONE_WAL_S3_BUCKET", Value: clone_wal_s3_bucket}) + result = append(result, v1.EnvVar{Name: "CLONE_TARGET_TIME", Value: description.EndTimestamp}) + } + return result +} + +// getClusterServiceConnectionParameters fetches cluster host name and port +// TODO: perhaps we need to query the service (i.e. if non-standard port is used?) +// TODO: handle clusters in different namespaces +func (c *Cluster) getClusterServiceConnectionParameters(clusterName string) (host string, port string) { + host = clusterName + port = "5432" + return +} diff --git a/pkg/cluster/util.go b/pkg/cluster/util.go index b490a583d..207e2743a 100644 --- a/pkg/cluster/util.go +++ b/pkg/cluster/util.go @@ -304,11 +304,15 @@ func (c *Cluster) replicaDNSName() string { } func (c *Cluster) credentialSecretName(username string) string { + return c.credentialSecretNameForCluster(username, c.Name) +} + +func (c *Cluster) credentialSecretNameForCluster(username string, clusterName string) string { // secret must consist of lower case alphanumeric characters, '-' or '.', // and must start and end with an alphanumeric character return fmt.Sprintf(constants.UserSecretTemplate, strings.Replace(username, "_", "-", -1), - c.Name) + clusterName) } func (c *Cluster) podSpiloRole(pod *v1.Pod) string { diff --git a/pkg/spec/postgresql.go b/pkg/spec/postgresql.go index 8598c66cd..1e3962caf 100644 --- a/pkg/spec/postgresql.go +++ b/pkg/spec/postgresql.go @@ -51,6 +51,12 @@ type Patroni struct { MaximumLagOnFailover float32 `json:"maximum_lag_on_failover"` // float32 because https://github.com/kubernetes/kubernetes/issues/30213 } +// CloneDescription describes which cluster the new should clone and up to which point in time +type CloneDescription struct { + ClusterName string `json:"cluster,omitempty"` + EndTimestamp string `json:"timestamp,omitempty"` +} + type userFlags []string // PostgresStatus contains status of the PostgreSQL cluster (running, creation failed etc.) @@ -86,12 +92,13 @@ type PostgresSpec struct { TeamID string `json:"teamId"` AllowedSourceRanges []string `json:"allowedSourceRanges"` - // EnableLoadBalancer is a pointer, since it is importat to know if that parameters is omitted from the manifest + // EnableLoadBalancer is a pointer, since it is important to know if that parameters is omitted from the manifest UseLoadBalancer *bool `json:"useLoadBalancer,omitempty"` ReplicaLoadBalancer bool `json:"replicaLoadBalancer,omitempty"` NumberOfInstances int32 `json:"numberOfInstances"` Users map[string]userFlags `json:"users"` MaintenanceWindows []MaintenanceWindow `json:"maintenanceWindows,omitempty"` + Clone CloneDescription `json:"clone"` ClusterName string `json:"-"` } @@ -236,6 +243,15 @@ func (p *Postgresql) UnmarshalJSON(data []byte) error { tmp2.Error = err tmp2.Status = ClusterStatusInvalid } + // The assumption below is that a cluster to clone, if any, belongs to the same team + if tmp2.Spec.Clone.ClusterName != "" { + _, err := extractClusterName(tmp2.Spec.Clone.ClusterName, tmp2.Spec.TeamID) + if err != nil { + tmp2.Error = fmt.Errorf("%s for the cluster to clone", err) + tmp2.Spec.Clone = CloneDescription{} + tmp2.Status = ClusterStatusInvalid + } + } *p = tmp2 return nil diff --git a/pkg/spec/postgresql_test.go b/pkg/spec/postgresql_test.go index 7c67bf0a9..07a040ee1 100644 --- a/pkg/spec/postgresql_test.go +++ b/pkg/spec/postgresql_test.go @@ -119,7 +119,7 @@ var unmarshalCluster = []struct { Field: "teamId", }, }, - []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"","allowedSourceRanges":null,"numberOfInstances":0,"users":null},"status":"Invalid"}`), nil}, + []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), nil}, {[]byte(`{ "kind": "Postgresql", "apiVersion": "acid.zalan.do/v1", @@ -160,6 +160,9 @@ var unmarshalCluster = []struct { "memory": "3000Mi" } }, + "clone" : { + "cluster": "acid-batman" + }, "patroni": { "initdb": { "encoding": "UTF8", @@ -219,6 +222,7 @@ var unmarshalCluster = []struct { ResourceRequest: ResourceDescription{CPU: "10m", Memory: "50Mi"}, ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"}, }, + TeamID: "ACID", AllowedSourceRanges: []string{"127.0.0.1/32"}, NumberOfInstances: 2, @@ -241,11 +245,14 @@ var unmarshalCluster = []struct { EndTime: mustParseTime("05:15"), }, }, + Clone: CloneDescription{ + ClusterName: "acid-batman", + }, ClusterName: "testcluster1", }, Error: nil, }, - []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"9.6","parameters":{"log_statement":"all","max_connections":"10","shared_buffers":"32MB"}},"volume":{"size":"5Gi","storageClass":"SSD"},"patroni":{"initdb":{"data-checksums":"true","encoding":"UTF8","locale":"en_US.UTF-8"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"],"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432},"resources":{"requests":{"cpu":"10m","memory":"50Mi"},"limits":{"cpu":"300m","memory":"3000Mi"}},"teamId":"ACID","allowedSourceRanges":["127.0.0.1/32"],"numberOfInstances":2,"users":{"zalando":["superuser","createdb"]},"maintenanceWindows":["Mon:01:00-06:00","Sat:00:00-04:00","05:00-05:15"]}}`), nil}, + []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"9.6","parameters":{"log_statement":"all","max_connections":"10","shared_buffers":"32MB"}},"volume":{"size":"5Gi","storageClass":"SSD"},"patroni":{"initdb":{"data-checksums":"true","encoding":"UTF8","locale":"en_US.UTF-8"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"],"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432},"resources":{"requests":{"cpu":"10m","memory":"50Mi"},"limits":{"cpu":"300m","memory":"3000Mi"}},"teamId":"ACID","allowedSourceRanges":["127.0.0.1/32"],"numberOfInstances":2,"users":{"zalando":["superuser","createdb"]},"maintenanceWindows":["Mon:01:00-06:00","Sat:00:00-04:00","05:00-05:15"],"clone":{"cluster":"acid-batman"}}}`), nil}, { []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "teapot-testcluster1"}, "spec": {"teamId": "acid"}}`), Postgresql{ @@ -260,12 +267,31 @@ var unmarshalCluster = []struct { Status: ClusterStatusInvalid, Error: errors.New("name must match {TEAM}-{NAME} format"), }, - []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"teapot-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null},"status":"Invalid"}`), nil}, + []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"teapot-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), nil}, + { + in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": "acid", "clone": {"cluster": "team-batman"}}}`), + out: Postgresql{ + TypeMeta: metav1.TypeMeta{ + Kind: "Postgresql", + APIVersion: "acid.zalan.do/v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "acid-testcluster1", + }, + Spec: PostgresSpec{ + TeamID: "acid", + Clone: CloneDescription{}, + ClusterName: "testcluster1", + }, + Status: ClusterStatusInvalid, + Error: errors.New("name must match {TEAM}-{NAME} format for the cluster to clone"), + }, + marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), err: nil}, {[]byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1"`), Postgresql{}, []byte{}, errors.New("unexpected end of JSON input")}, - {[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster","creationTimestamp":qaz},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null},"status":"Invalid"}`), + {[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster","creationTimestamp":qaz},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), Postgresql{}, []byte{}, errors.New("invalid character 'q' looking for beginning of value")}} diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index db15cc046..3a9a52f55 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -38,7 +38,7 @@ type Auth struct { OAuthTokenSecretName spec.NamespacedName `name:"oauth_token_secret_name" default:"postgresql-operator"` InfrastructureRolesSecretName spec.NamespacedName `name:"infrastructure_roles_secret_name"` SuperUsername string `name:"super_username" default:"postgres"` - ReplicationUsername string `name:"replication_username" default:"replication"` + ReplicationUsername string `name:"replication_username" default:"standby"` } // Config describes operator config