introduce applicationId for separate stream CRDs

This commit is contained in:
Felix Kunde 2021-12-08 17:11:18 +01:00
parent 96a2da1fca
commit 74ee530a6c
8 changed files with 116 additions and 83 deletions

View File

@ -475,9 +475,12 @@ spec:
items: items:
type: object type: object
required: required:
- applicationId
- database - database
- tables - tables
properties: properties:
applicationId:
type: string
batchSize: batchSize:
type: integer type: integer
database: database:

View File

@ -522,9 +522,17 @@ Those parameters are grouped under the `tls` top-level key.
This sections enables change data capture (CDC) streams e.g. into Zalandos This sections enables change data capture (CDC) streams e.g. into Zalandos
distributed event broker [Nakadi](https://nakadi.io/). Parameters grouped distributed event broker [Nakadi](https://nakadi.io/). Parameters grouped
under the `streams` top-level key will be used by the operator to create a under the `streams` top-level key will be used by the operator to create
CRD for Zalando's internal CDC operator named like the Postgres cluster. custom resources for Zalando's internal CDC operator. Each stream object can
Each stream object can have the following properties: have the following properties:
* **applicationId**
The application name to which the database and CDC belongs to. For each
set of streams with a distinct `applicationId` a separate stream CR as well
as a separate logical replication slot will be created. This means there can
different streams in the same database and streams with the same
`applicationId` are bundled in one stream CR. The stream CR will be called
like the Postgres cluster plus "-<applicationId>" suffix. Required.
* **database** * **database**
Name of the database from where events will be published via Postgres' Name of the database from where events will be published via Postgres'

View File

@ -3,6 +3,7 @@ kind: postgresql
metadata: metadata:
name: acid-test-cluster name: acid-test-cluster
# labels: # labels:
# application: test-app
# environment: demo # environment: demo
# annotations: # annotations:
# "acid.zalan.do/controller": "second-operator" # "acid.zalan.do/controller": "second-operator"
@ -198,15 +199,16 @@ spec:
# Enables change data capture streams for defined database tables # Enables change data capture streams for defined database tables
# streams: # streams:
# - database: foo # - applicationId: test-app
# database: foo
# tables: # tables:
# data.ta: # data.tab_a:
# eventType: event_type_a # eventType: event_type_a
# data.tb: # data.tab_b:
# eventType: event_type_b # eventType: event_type_b
# idColumn: tb_id # idColumn: tb_id
# payloadColumn: tb_payload # payloadColumn: tb_payload
# # Optional. Filter ignores events before a certain txnId and lsn. Can be used to skip bad events # # Optional. Filter ignores events before a certain txnId and lsn. Can be used to skip bad events
# filter: # filter:
# data.ta: "[?(@.source.txId > 500 && @.source.lsn > 123456)]" # data.tab_a: "[?(@.source.txId > 500 && @.source.lsn > 123456)]"
# batchSize: 1000 # batchSize: 1000

View File

@ -473,9 +473,12 @@ spec:
items: items:
type: object type: object
required: required:
- applicationId
- database - database
- tables - tables
properties: properties:
applicationId:
type: string
batchSize: batchSize:
type: integer type: integer
database: database:

View File

@ -664,8 +664,11 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{
Items: &apiextv1.JSONSchemaPropsOrArray{ Items: &apiextv1.JSONSchemaPropsOrArray{
Schema: &apiextv1.JSONSchemaProps{ Schema: &apiextv1.JSONSchemaProps{
Type: "object", Type: "object",
Required: []string{"database", "tables"}, Required: []string{"applicationId", "database", "tables"},
Properties: map[string]apiextv1.JSONSchemaProps{ Properties: map[string]apiextv1.JSONSchemaProps{
"applicationId": {
Type: "string",
},
"batchSize": { "batchSize": {
Type: "integer", Type: "integer",
}, },

View File

@ -230,10 +230,11 @@ type ConnectionPooler struct {
} }
type Stream struct { type Stream struct {
Database string `json:"database"` ApplicationId string `json:"applicationId"`
Tables map[string]StreamTable `json:"tables"` Database string `json:"database"`
Filter map[string]string `json:"filter,omitempty"` Tables map[string]StreamTable `json:"tables"`
BatchSize uint32 `json:"batchSize,omitempty"` Filter map[string]string `json:"filter,omitempty"`
BatchSize uint32 `json:"batchSize,omitempty"`
} }
type StreamTable struct { type StreamTable struct {

View File

@ -14,16 +14,24 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
func (c *Cluster) createStreams() error { func (c *Cluster) createStreams(appId string) {
c.setProcessName("creating streams") c.setProcessName("creating streams")
fes := c.generateFabricEventStream() var (
_, err := c.KubeClient.FabricEventStreams(c.Namespace).Create(context.TODO(), fes, metav1.CreateOptions{}) fes *zalandov1alpha1.FabricEventStream
if err != nil { err error
return fmt.Errorf("could not create event stream custom resource: %v", err) )
}
return nil msg := "could not create event stream custom resource with applicationId %s: %v"
fes = c.generateFabricEventStream(appId)
if err != nil {
c.logger.Warningf(msg, appId, err)
}
_, err = c.KubeClient.FabricEventStreams(c.Namespace).Create(context.TODO(), fes, metav1.CreateOptions{})
if err != nil {
c.logger.Warningf(msg, appId, err)
}
} }
func (c *Cluster) updateStreams(newEventStreams *zalandov1alpha1.FabricEventStream) error { func (c *Cluster) updateStreams(newEventStreams *zalandov1alpha1.FabricEventStream) error {
@ -54,21 +62,34 @@ func (c *Cluster) deleteStreams() error {
return nil return nil
} }
func gatherApplicationIds(streams []acidv1.Stream) []string {
appIds := make([]string, 0)
for _, stream := range streams {
if !util.SliceContains(appIds, stream.ApplicationId) {
appIds = append(appIds, stream.ApplicationId)
}
}
return appIds
}
func (c *Cluster) syncPostgresConfig() error { func (c *Cluster) syncPostgresConfig() error {
slots := make(map[string]map[string]string)
desiredPatroniConfig := c.Spec.Patroni desiredPatroniConfig := c.Spec.Patroni
slots := desiredPatroniConfig.Slots if len(desiredPatroniConfig.Slots) > 0 {
slots = desiredPatroniConfig.Slots
}
for _, stream := range c.Spec.Streams { for _, stream := range c.Spec.Streams {
slotName := c.getLogicalReplicationSlot(stream.Database) slot := map[string]string{
"database": stream.Database,
if slotName == "" { "plugin": "wal2json",
slot := map[string]string{ "type": "logical",
"database": stream.Database, }
"plugin": "wal2json", slotName := constants.EventStreamSourceSlotPrefix + "_" + stream.Database + "_" + stream.ApplicationId
"type": "logical", if _, exists := slots[slotName]; !exists {
} slots[slotName] = slot
slots[constants.EventStreamSourceSlotPrefix+"_"+stream.Database] = slot
} }
} }
@ -107,16 +128,13 @@ func (c *Cluster) syncPostgresConfig() error {
return nil return nil
} }
func (c *Cluster) generateFabricEventStream() *zalandov1alpha1.FabricEventStream { func (c *Cluster) generateFabricEventStream(appId string) *zalandov1alpha1.FabricEventStream {
var applicationId string
eventStreams := make([]zalandov1alpha1.EventStream, 0) eventStreams := make([]zalandov1alpha1.EventStream, 0)
// take application label from manifest
if spec, err := c.GetSpec(); err == nil {
applicationId = spec.ObjectMeta.Labels["application"]
}
for _, stream := range c.Spec.Streams { for _, stream := range c.Spec.Streams {
if stream.ApplicationId != appId {
continue
}
for tableName, table := range stream.Tables { for tableName, table := range stream.Tables {
streamSource := c.getEventStreamSource(stream, tableName, table.IdColumn) streamSource := c.getEventStreamSource(stream, tableName, table.IdColumn)
streamFlow := getEventStreamFlow(stream, table.PayloadColumn) streamFlow := getEventStreamFlow(stream, table.PayloadColumn)
@ -135,14 +153,14 @@ func (c *Cluster) generateFabricEventStream() *zalandov1alpha1.FabricEventStream
APIVersion: "zalando.org/v1alpha1", APIVersion: "zalando.org/v1alpha1",
}, },
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: c.Name, Name: c.Name + "-" + appId,
Namespace: c.Namespace, Namespace: c.Namespace,
Annotations: c.AnnotationsToPropagate(c.annotationsSet(nil)), Annotations: c.AnnotationsToPropagate(c.annotationsSet(nil)),
// make cluster StatefulSet the owner (like with connection pooler objects) // make cluster StatefulSet the owner (like with connection pooler objects)
OwnerReferences: c.ownerReferences(), OwnerReferences: c.ownerReferences(),
}, },
Spec: zalandov1alpha1.FabricEventStreamSpec{ Spec: zalandov1alpha1.FabricEventStreamSpec{
ApplicationId: applicationId, ApplicationId: appId,
EventStreams: eventStreams, EventStreams: eventStreams,
}, },
} }
@ -156,7 +174,10 @@ func (c *Cluster) getEventStreamSource(stream acidv1.Stream, tableName, idColumn
Schema: schema, Schema: schema,
EventStreamTable: getOutboxTable(table, idColumn), EventStreamTable: getOutboxTable(table, idColumn),
Filter: streamFilter, Filter: streamFilter,
Connection: c.getStreamConnection(stream.Database, constants.EventStreamSourceSlotPrefix+constants.UserRoleNameSuffix), Connection: c.getStreamConnection(
stream.Database,
constants.EventStreamSourceSlotPrefix+constants.UserRoleNameSuffix,
stream.ApplicationId),
} }
} }
@ -193,10 +214,10 @@ func getOutboxTable(tableName, idColumn string) zalandov1alpha1.EventStreamTable
} }
} }
func (c *Cluster) getStreamConnection(database, user string) zalandov1alpha1.Connection { func (c *Cluster) getStreamConnection(database, user, appId string) zalandov1alpha1.Connection {
return zalandov1alpha1.Connection{ return zalandov1alpha1.Connection{
Url: fmt.Sprintf("jdbc:postgresql://%s.%s/%s?user=%s&ssl=true&sslmode=require", c.Name, c.Namespace, database, user), Url: fmt.Sprintf("jdbc:postgresql://%s.%s/%s?user=%s&ssl=true&sslmode=require", c.Name, c.Namespace, database, user),
SlotName: c.getLogicalReplicationSlot(database), SlotName: constants.EventStreamSourceSlotPrefix + "_" + database + "_" + appId,
DBAuth: zalandov1alpha1.DBAuth{ DBAuth: zalandov1alpha1.DBAuth{
Type: constants.EventStreamSourceAuthType, Type: constants.EventStreamSourceAuthType,
Name: c.credentialSecretNameForCluster(user, c.Name), Name: c.credentialSecretNameForCluster(user, c.Name),
@ -206,16 +227,6 @@ func (c *Cluster) getStreamConnection(database, user string) zalandov1alpha1.Con
} }
} }
func (c *Cluster) getLogicalReplicationSlot(database string) string {
for slotName, slot := range c.Spec.Patroni.Slots {
if slot["type"] == "logical" && slot["database"] == database && slot["plugin"] == "wal2json" {
return slotName
}
}
return constants.EventStreamSourceSlotPrefix + "_" + database
}
func (c *Cluster) syncStreams() error { func (c *Cluster) syncStreams() error {
_, err := c.KubeClient.CustomResourceDefinitions().Get(context.TODO(), constants.EventStreamSourceCRDName, metav1.GetOptions{}) _, err := c.KubeClient.CustomResourceDefinitions().Get(context.TODO(), constants.EventStreamSourceCRDName, metav1.GetOptions{})
@ -241,25 +252,26 @@ func (c *Cluster) createOrUpdateStreams() error {
return fmt.Errorf("could not update Postgres config for event streaming: %v", err) return fmt.Errorf("could not update Postgres config for event streaming: %v", err)
} }
effectiveStreams, err := c.KubeClient.FabricEventStreams(c.Namespace).Get(context.TODO(), c.Name, metav1.GetOptions{}) appIds := gatherApplicationIds(c.Spec.Streams)
if err != nil { for _, appId := range appIds {
if !k8sutil.ResourceNotFound(err) { fesName := c.Name + "-" + appId
return fmt.Errorf("error during reading of event streams: %v", err) effectiveStreams, err := c.KubeClient.FabricEventStreams(c.Namespace).Get(context.TODO(), fesName, metav1.GetOptions{})
}
c.logger.Infof("event streams do not exist, create it")
err := c.createStreams()
if err != nil { if err != nil {
return fmt.Errorf("event streams creation failed: %v", err) if !k8sutil.ResourceNotFound(err) {
} return fmt.Errorf("failed reading event stream %s: %v", fesName, err)
} else { }
desiredStreams := c.generateFabricEventStream()
if !reflect.DeepEqual(effectiveStreams.Spec, desiredStreams.Spec) { c.logger.Infof("event streams do not exist, create it")
c.logger.Debug("updating event streams") c.createStreams(appId)
desiredStreams.ObjectMeta.ResourceVersion = effectiveStreams.ObjectMeta.ResourceVersion } else {
err = c.updateStreams(desiredStreams) desiredStreams := c.generateFabricEventStream(appId)
if err != nil { if !reflect.DeepEqual(effectiveStreams.Spec, desiredStreams.Spec) {
return fmt.Errorf("event streams update failed: %v", err) c.logger.Debug("updating event streams")
desiredStreams.ObjectMeta.ResourceVersion = effectiveStreams.ObjectMeta.ResourceVersion
err = c.updateStreams(desiredStreams)
if err != nil {
return fmt.Errorf("failed updating event stream %s: %v", fesName, err)
}
} }
} }
} }

View File

@ -37,7 +37,10 @@ func newFakeK8sStreamClient() (k8sutil.KubernetesClient, *fake.Clientset) {
var ( var (
clusterName string = "acid-test-cluster" clusterName string = "acid-test-cluster"
namespace string = "default" namespace string = "default"
appId string = "test-app"
dbName string = "foo"
fesUser string = constants.EventStreamSourceSlotPrefix + constants.UserRoleNameSuffix fesUser string = constants.EventStreamSourceSlotPrefix + constants.UserRoleNameSuffix
fesName string = clusterName + "-" + appId
pg = acidv1.Postgresql{ pg = acidv1.Postgresql{
TypeMeta: metav1.TypeMeta{ TypeMeta: metav1.TypeMeta{
@ -47,15 +50,15 @@ var (
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: clusterName, Name: clusterName,
Namespace: namespace, Namespace: namespace,
Labels: map[string]string{"application": "test"},
}, },
Spec: acidv1.PostgresSpec{ Spec: acidv1.PostgresSpec{
Databases: map[string]string{ Databases: map[string]string{
"foo": "foo_user", dbName: dbName + constants.UserRoleNameSuffix,
}, },
Streams: []acidv1.Stream{ Streams: []acidv1.Stream{
{ {
Database: "foo", ApplicationId: appId,
Database: "foo",
Tables: map[string]acidv1.StreamTable{ Tables: map[string]acidv1.StreamTable{
"data.bar": acidv1.StreamTable{ "data.bar": acidv1.StreamTable{
EventType: "stream_type_a", EventType: "stream_type_a",
@ -69,9 +72,6 @@ var (
BatchSize: uint32(100), BatchSize: uint32(100),
}, },
}, },
Users: map[string]acidv1.UserFlags{
"foo_user": []string{"replication"},
},
Volume: acidv1.Volume{ Volume: acidv1.Volume{
Size: "1Gi", Size: "1Gi",
}, },
@ -84,7 +84,7 @@ var (
APIVersion: "zalando.org/v1alpha1", APIVersion: "zalando.org/v1alpha1",
}, },
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: clusterName, Name: fesName,
Namespace: namespace, Namespace: namespace,
OwnerReferences: []metav1.OwnerReference{ OwnerReferences: []metav1.OwnerReference{
metav1.OwnerReference{ metav1.OwnerReference{
@ -96,7 +96,7 @@ var (
}, },
}, },
Spec: v1alpha1.FabricEventStreamSpec{ Spec: v1alpha1.FabricEventStreamSpec{
ApplicationId: "test", ApplicationId: appId,
EventStreams: []v1alpha1.EventStream{ EventStreams: []v1alpha1.EventStream{
{ {
EventStreamFlow: v1alpha1.EventStreamFlow{ EventStreamFlow: v1alpha1.EventStreamFlow{
@ -118,7 +118,7 @@ var (
UserKey: "username", UserKey: "username",
}, },
Url: fmt.Sprintf("jdbc:postgresql://%s.%s/foo?user=%s&ssl=true&sslmode=require", clusterName, namespace, fesUser), Url: fmt.Sprintf("jdbc:postgresql://%s.%s/foo?user=%s&ssl=true&sslmode=require", clusterName, namespace, fesUser),
SlotName: "fes_foo", SlotName: fmt.Sprintf("%s_%s_%s", constants.EventStreamSourceSlotPrefix, dbName, appId),
}, },
Schema: "data", Schema: "data",
EventStreamTable: v1alpha1.EventStreamTable{ EventStreamTable: v1alpha1.EventStreamTable{
@ -164,13 +164,13 @@ func TestGenerateFabricEventStream(t *testing.T) {
err = cluster.createOrUpdateStreams() err = cluster.createOrUpdateStreams()
assert.NoError(t, err) assert.NoError(t, err)
result := cluster.generateFabricEventStream() result := cluster.generateFabricEventStream(appId)
if !reflect.DeepEqual(result, fes) { if !reflect.DeepEqual(result, fes) {
t.Errorf("Malformed FabricEventStream, expected %#v, got %#v", fes, result) t.Errorf("Malformed FabricEventStream, expected %#v, got %#v", fes, result)
} }
streamCRD, err := cluster.KubeClient.FabricEventStreams(namespace).Get(context.TODO(), cluster.Name, metav1.GetOptions{}) streamCRD, err := cluster.KubeClient.FabricEventStreams(namespace).Get(context.TODO(), fesName, metav1.GetOptions{})
assert.NoError(t, err) assert.NoError(t, err)
if !reflect.DeepEqual(streamCRD, fes) { if !reflect.DeepEqual(streamCRD, fes) {
@ -206,7 +206,8 @@ func TestUpdateFabricEventStream(t *testing.T) {
var pgSpec acidv1.PostgresSpec var pgSpec acidv1.PostgresSpec
pgSpec.Streams = []acidv1.Stream{ pgSpec.Streams = []acidv1.Stream{
{ {
Database: "foo", ApplicationId: appId,
Database: dbName,
Tables: map[string]acidv1.StreamTable{ Tables: map[string]acidv1.StreamTable{
"data.bar": acidv1.StreamTable{ "data.bar": acidv1.StreamTable{
EventType: "stream_type_b", EventType: "stream_type_b",
@ -230,10 +231,10 @@ func TestUpdateFabricEventStream(t *testing.T) {
err = cluster.createOrUpdateStreams() err = cluster.createOrUpdateStreams()
assert.NoError(t, err) assert.NoError(t, err)
streamCRD, err := cluster.KubeClient.FabricEventStreams(namespace).Get(context.TODO(), cluster.Name, metav1.GetOptions{}) streamCRD, err := cluster.KubeClient.FabricEventStreams(namespace).Get(context.TODO(), fesName, metav1.GetOptions{})
assert.NoError(t, err) assert.NoError(t, err)
result := cluster.generateFabricEventStream() result := cluster.generateFabricEventStream(appId)
if !reflect.DeepEqual(result, streamCRD) { if !reflect.DeepEqual(result, streamCRD) {
t.Errorf("Malformed FabricEventStream, expected %#v, got %#v", streamCRD, result) t.Errorf("Malformed FabricEventStream, expected %#v, got %#v", streamCRD, result)
} }