Allow cloning clusters from the operator. (#90)

Allow cloning clusters from the operator.

The changes add a new JSON node `clone` with possible values `cluster`
and `timestamp`. `cluster` is mandatory, and setting a non-empty
`timestamp` triggers wal-e point in time recovery. Spilo and Patroni do
the whole heavy-lifting, the operator just defines certain variables and
gathers some data about how to connect to the host to clone or the
target S3 bucket.

As a minor change, set the image pull policy to IfNotPresent instead
of Always to simplify local testing.

Change the default replication username to standby.
This commit is contained in:
Oleksii Kliukin 2017-09-08 16:47:03 +02:00 committed by GitHub
parent a0a9e8f849
commit 8b85935a7a
6 changed files with 115 additions and 13 deletions

View File

@ -21,7 +21,7 @@ data:
pod_label_wait_timeout: 10m
ready_wait_interval: 3s
ready_wait_timeout: 30s
replication_username: replication
replication_username: standby
resource_check_interval: 3s
resource_check_timeout: 10m
resync_period: 5m

View File

@ -226,7 +226,10 @@ PATRONI_INITDB_PARAMS:
return string(result)
}
func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequirements, pgParameters *spec.PostgresqlParam, patroniParameters *spec.Patroni) *v1.PodTemplateSpec {
func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequirements,
pgParameters *spec.PostgresqlParam,
patroniParameters *spec.Patroni,
cloneDescription *spec.CloneDescription) *v1.PodTemplateSpec {
spiloConfiguration := c.generateSpiloJSONConfiguration(pgParameters, patroniParameters)
envVars := []v1.EnvVar{
@ -301,11 +304,17 @@ func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequireme
if c.OpConfig.WALES3Bucket != "" {
envVars = append(envVars, v1.EnvVar{Name: "WAL_S3_BUCKET", Value: c.OpConfig.WALES3Bucket})
}
if cloneDescription.ClusterName != "" {
cloneVars := c.generateCloneEnvironment(cloneDescription)
for _, v := range cloneVars {
envVars = append(envVars, v)
}
}
privilegedMode := bool(true)
container := v1.Container{
Name: c.containerName(),
Image: c.OpConfig.DockerImage,
ImagePullPolicy: v1.PullAlways,
ImagePullPolicy: v1.PullIfNotPresent,
Resources: *resourceRequirements,
Ports: []v1.ContainerPort{
{
@ -357,13 +366,13 @@ func (c *Cluster) generatePodTemplate(resourceRequirements *v1.ResourceRequireme
func (c *Cluster) generateStatefulSet(spec spec.PostgresSpec) (*v1beta1.StatefulSet, error) {
resourceRequirements, err := c.resourceRequirements(spec.Resources)
if err != nil {
return nil, err
return nil, fmt.Errorf("could not generate resource requirements: %v", err)
}
podTemplate := c.generatePodTemplate(resourceRequirements, &spec.PostgresqlParam, &spec.Patroni)
podTemplate := c.generatePodTemplate(resourceRequirements, &spec.PostgresqlParam, &spec.Patroni, &spec.Clone)
volumeClaimTemplate, err := generatePersistentVolumeClaimTemplate(spec.Volume.Size, spec.Volume.StorageClass)
if err != nil {
return nil, err
return nil, fmt.Errorf("could not generate volume claim template: %v", err)
}
statefulSet := &v1beta1.StatefulSet{
@ -523,3 +532,50 @@ func (c *Cluster) generateMasterEndpoints(subsets []v1.EndpointSubset) *v1.Endpo
return endpoints
}
func (c *Cluster) generateCloneEnvironment(description *spec.CloneDescription) []v1.EnvVar {
result := make([]v1.EnvVar, 0)
if description.ClusterName == "" {
return result
}
cluster := description.ClusterName
result = append(result, v1.EnvVar{Name: "CLONE_SCOPE", Value: cluster})
if description.EndTimestamp == "" {
// cloning with basebackup, make a connection string to the cluster to clone from
host, port := c.getClusterServiceConnectionParameters(cluster)
// TODO: make some/all of those constants
result = append(result, v1.EnvVar{Name: "CLONE_METHOD", Value: "CLONE_WITH_BASEBACKUP"})
result = append(result, v1.EnvVar{Name: "CLONE_HOST", Value: host})
result = append(result, v1.EnvVar{Name: "CLONE_PORT", Value: port})
// TODO: assume replication user name is the same for all clusters, fetch it from secrets otherwise
result = append(result, v1.EnvVar{Name: "CLONE_USER", Value: c.OpConfig.ReplicationUsername})
result = append(result,
v1.EnvVar{Name: "CLONE_PASSWORD",
ValueFrom: &v1.EnvVarSource{
SecretKeyRef: &v1.SecretKeySelector{
LocalObjectReference: v1.LocalObjectReference{
Name: c.credentialSecretNameForCluster(c.OpConfig.ReplicationUsername,
description.ClusterName),
},
Key: "password",
},
},
})
} else {
// cloning with S3, find out the bucket to clone
clone_wal_s3_bucket := c.OpConfig.WALES3Bucket
result = append(result, v1.EnvVar{Name: "CLONE_METHOD", Value: "CLONE_WITH_WALE"})
result = append(result, v1.EnvVar{Name: "CLONE_WAL_S3_BUCKET", Value: clone_wal_s3_bucket})
result = append(result, v1.EnvVar{Name: "CLONE_TARGET_TIME", Value: description.EndTimestamp})
}
return result
}
// getClusterServiceConnectionParameters fetches cluster host name and port
// TODO: perhaps we need to query the service (i.e. if non-standard port is used?)
// TODO: handle clusters in different namespaces
func (c *Cluster) getClusterServiceConnectionParameters(clusterName string) (host string, port string) {
host = clusterName
port = "5432"
return
}

View File

@ -304,11 +304,15 @@ func (c *Cluster) replicaDNSName() string {
}
func (c *Cluster) credentialSecretName(username string) string {
return c.credentialSecretNameForCluster(username, c.Name)
}
func (c *Cluster) credentialSecretNameForCluster(username string, clusterName string) string {
// secret must consist of lower case alphanumeric characters, '-' or '.',
// and must start and end with an alphanumeric character
return fmt.Sprintf(constants.UserSecretTemplate,
strings.Replace(username, "_", "-", -1),
c.Name)
clusterName)
}
func (c *Cluster) podSpiloRole(pod *v1.Pod) string {

View File

@ -51,6 +51,12 @@ type Patroni struct {
MaximumLagOnFailover float32 `json:"maximum_lag_on_failover"` // float32 because https://github.com/kubernetes/kubernetes/issues/30213
}
// CloneDescription describes which cluster the new should clone and up to which point in time
type CloneDescription struct {
ClusterName string `json:"cluster,omitempty"`
EndTimestamp string `json:"timestamp,omitempty"`
}
type userFlags []string
// PostgresStatus contains status of the PostgreSQL cluster (running, creation failed etc.)
@ -86,12 +92,13 @@ type PostgresSpec struct {
TeamID string `json:"teamId"`
AllowedSourceRanges []string `json:"allowedSourceRanges"`
// EnableLoadBalancer is a pointer, since it is importat to know if that parameters is omitted from the manifest
// EnableLoadBalancer is a pointer, since it is important to know if that parameters is omitted from the manifest
UseLoadBalancer *bool `json:"useLoadBalancer,omitempty"`
ReplicaLoadBalancer bool `json:"replicaLoadBalancer,omitempty"`
NumberOfInstances int32 `json:"numberOfInstances"`
Users map[string]userFlags `json:"users"`
MaintenanceWindows []MaintenanceWindow `json:"maintenanceWindows,omitempty"`
Clone CloneDescription `json:"clone"`
ClusterName string `json:"-"`
}
@ -236,6 +243,15 @@ func (p *Postgresql) UnmarshalJSON(data []byte) error {
tmp2.Error = err
tmp2.Status = ClusterStatusInvalid
}
// The assumption below is that a cluster to clone, if any, belongs to the same team
if tmp2.Spec.Clone.ClusterName != "" {
_, err := extractClusterName(tmp2.Spec.Clone.ClusterName, tmp2.Spec.TeamID)
if err != nil {
tmp2.Error = fmt.Errorf("%s for the cluster to clone", err)
tmp2.Spec.Clone = CloneDescription{}
tmp2.Status = ClusterStatusInvalid
}
}
*p = tmp2
return nil

View File

@ -119,7 +119,7 @@ var unmarshalCluster = []struct {
Field: "teamId",
},
},
[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"","allowedSourceRanges":null,"numberOfInstances":0,"users":null},"status":"Invalid"}`), nil},
[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), nil},
{[]byte(`{
"kind": "Postgresql",
"apiVersion": "acid.zalan.do/v1",
@ -160,6 +160,9 @@ var unmarshalCluster = []struct {
"memory": "3000Mi"
}
},
"clone" : {
"cluster": "acid-batman"
},
"patroni": {
"initdb": {
"encoding": "UTF8",
@ -219,6 +222,7 @@ var unmarshalCluster = []struct {
ResourceRequest: ResourceDescription{CPU: "10m", Memory: "50Mi"},
ResourceLimits: ResourceDescription{CPU: "300m", Memory: "3000Mi"},
},
TeamID: "ACID",
AllowedSourceRanges: []string{"127.0.0.1/32"},
NumberOfInstances: 2,
@ -241,11 +245,14 @@ var unmarshalCluster = []struct {
EndTime: mustParseTime("05:15"),
},
},
Clone: CloneDescription{
ClusterName: "acid-batman",
},
ClusterName: "testcluster1",
},
Error: nil,
},
[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"9.6","parameters":{"log_statement":"all","max_connections":"10","shared_buffers":"32MB"}},"volume":{"size":"5Gi","storageClass":"SSD"},"patroni":{"initdb":{"data-checksums":"true","encoding":"UTF8","locale":"en_US.UTF-8"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"],"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432},"resources":{"requests":{"cpu":"10m","memory":"50Mi"},"limits":{"cpu":"300m","memory":"3000Mi"}},"teamId":"ACID","allowedSourceRanges":["127.0.0.1/32"],"numberOfInstances":2,"users":{"zalando":["superuser","createdb"]},"maintenanceWindows":["Mon:01:00-06:00","Sat:00:00-04:00","05:00-05:15"]}}`), nil},
[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"9.6","parameters":{"log_statement":"all","max_connections":"10","shared_buffers":"32MB"}},"volume":{"size":"5Gi","storageClass":"SSD"},"patroni":{"initdb":{"data-checksums":"true","encoding":"UTF8","locale":"en_US.UTF-8"},"pg_hba":["hostssl all all 0.0.0.0/0 md5","host all all 0.0.0.0/0 md5"],"ttl":30,"loop_wait":10,"retry_timeout":10,"maximum_lag_on_failover":33554432},"resources":{"requests":{"cpu":"10m","memory":"50Mi"},"limits":{"cpu":"300m","memory":"3000Mi"}},"teamId":"ACID","allowedSourceRanges":["127.0.0.1/32"],"numberOfInstances":2,"users":{"zalando":["superuser","createdb"]},"maintenanceWindows":["Mon:01:00-06:00","Sat:00:00-04:00","05:00-05:15"],"clone":{"cluster":"acid-batman"}}}`), nil},
{
[]byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "teapot-testcluster1"}, "spec": {"teamId": "acid"}}`),
Postgresql{
@ -260,12 +267,31 @@ var unmarshalCluster = []struct {
Status: ClusterStatusInvalid,
Error: errors.New("name must match {TEAM}-{NAME} format"),
},
[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"teapot-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null},"status":"Invalid"}`), nil},
[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"teapot-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), nil},
{
in: []byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1","metadata": {"name": "acid-testcluster1"}, "spec": {"teamId": "acid", "clone": {"cluster": "team-batman"}}}`),
out: Postgresql{
TypeMeta: metav1.TypeMeta{
Kind: "Postgresql",
APIVersion: "acid.zalan.do/v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: "acid-testcluster1",
},
Spec: PostgresSpec{
TeamID: "acid",
Clone: CloneDescription{},
ClusterName: "testcluster1",
},
Status: ClusterStatusInvalid,
Error: errors.New("name must match {TEAM}-{NAME} format for the cluster to clone"),
},
marshal: []byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster1","creationTimestamp":null},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`), err: nil},
{[]byte(`{"kind": "Postgresql","apiVersion": "acid.zalan.do/v1"`),
Postgresql{},
[]byte{},
errors.New("unexpected end of JSON input")},
{[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster","creationTimestamp":qaz},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null},"status":"Invalid"}`),
{[]byte(`{"kind":"Postgresql","apiVersion":"acid.zalan.do/v1","metadata":{"name":"acid-testcluster","creationTimestamp":qaz},"spec":{"postgresql":{"version":"","parameters":null},"volume":{"size":"","storageClass":""},"patroni":{"initdb":null,"pg_hba":null,"ttl":0,"loop_wait":0,"retry_timeout":0,"maximum_lag_on_failover":0},"resources":{"requests":{"cpu":"","memory":""},"limits":{"cpu":"","memory":""}},"teamId":"acid","allowedSourceRanges":null,"numberOfInstances":0,"users":null,"clone":{}},"status":"Invalid"}`),
Postgresql{},
[]byte{},
errors.New("invalid character 'q' looking for beginning of value")}}

View File

@ -38,7 +38,7 @@ type Auth struct {
OAuthTokenSecretName spec.NamespacedName `name:"oauth_token_secret_name" default:"postgresql-operator"`
InfrastructureRolesSecretName spec.NamespacedName `name:"infrastructure_roles_secret_name"`
SuperUsername string `name:"super_username" default:"postgres"`
ReplicationUsername string `name:"replication_username" default:"replication"`
ReplicationUsername string `name:"replication_username" default:"standby"`
}
// Config describes operator config