stop retention user cleanup early again when DB connection attempt fails (#2999)

* stop retention user cleanup early again when DB connection attempt fails
* add unit test and new returned error from updateSecret
This commit is contained in:
Felix Kunde 2025-12-10 10:01:07 +01:00 committed by GitHub
parent 42bbead4c9
commit 04ad66f701
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 83 additions and 17 deletions

View File

@ -281,9 +281,23 @@ func findUsersFromRotation(rotatedUsers []string, db *sql.DB) (map[string]string
return extraUsers, nil
}
func (c *Cluster) cleanupRotatedUsers(rotatedUsers []string, db *sql.DB) error {
func (c *Cluster) cleanupRotatedUsers(rotatedUsers []string) error {
c.setProcessName("checking for rotated users to remove from the database due to configured retention")
extraUsers, err := findUsersFromRotation(rotatedUsers, db)
err := c.initDbConn()
if err != nil {
return fmt.Errorf("could not init db connection: %v", err)
}
defer func() {
if c.connectionIsClosed() {
return
}
if err := c.closeDbConn(); err != nil {
c.logger.Errorf("could not close database connection after removing users exceeding configured retention interval: %v", err)
}
}()
extraUsers, err := findUsersFromRotation(rotatedUsers, c.pgDb)
if err != nil {
return fmt.Errorf("error when querying for deprecated users from password rotation: %v", err)
}
@ -304,7 +318,7 @@ func (c *Cluster) cleanupRotatedUsers(rotatedUsers []string, db *sql.DB) error {
}
if retentionDate.After(userCreationDate) {
c.logger.Infof("dropping user %q due to configured days in password_rotation_user_retention", rotatedUser)
if err = users.DropPgUser(rotatedUser, db); err != nil {
if err = users.DropPgUser(rotatedUser, c.pgDb); err != nil {
c.logger.Errorf("could not drop role %q: %v", rotatedUser, err)
continue
}

View File

@ -1078,7 +1078,7 @@ func (c *Cluster) syncSecrets() error {
c.Secrets[updatedSecret.UID] = updatedSecret
continue
}
errors = append(errors, fmt.Sprintf("syncing secret %s failed: %v", util.NameFromMeta(updatedSecret.ObjectMeta), err))
errors = append(errors, fmt.Sprintf("syncing secret %s failed: %v", util.NameFromMeta(generatedSecret.ObjectMeta), err))
pgUserDegraded = true
} else {
errors = append(errors, fmt.Sprintf("could not create secret for user %s: in namespace %s: %v", secretUsername, generatedSecret.Namespace, err))
@ -1089,16 +1089,9 @@ func (c *Cluster) syncSecrets() error {
// remove rotation users that exceed the retention interval
if len(retentionUsers) > 0 {
err := c.initDbConn()
if err != nil {
errors = append(errors, fmt.Sprintf("could not init db connection: %v", err))
}
if err = c.cleanupRotatedUsers(retentionUsers, c.pgDb); err != nil {
if err := c.cleanupRotatedUsers(retentionUsers); err != nil {
errors = append(errors, fmt.Sprintf("error removing users exceeding configured retention interval: %v", err))
}
if err := c.closeDbConn(); err != nil {
errors = append(errors, fmt.Sprintf("could not close database connection after removing users exceeding configured retention interval: %v", err))
}
}
if len(errors) > 0 {
@ -1187,13 +1180,18 @@ func (c *Cluster) updateSecret(
}
} else {
// username might not match if password rotation has been disabled again
if secretUsername != string(secret.Data["username"]) {
usernameFromSecret := string(secret.Data["username"])
if secretUsername != usernameFromSecret {
// handle edge case when manifest user conflicts with a user from prepared databases
if strings.Replace(usernameFromSecret, "-", "_", -1) == strings.Replace(secretUsername, "-", "_", -1) {
return nil, fmt.Errorf("could not update secret because of user name mismatch: expected: %s, got: %s", secretUsername, usernameFromSecret)
}
*retentionUsers = append(*retentionUsers, secretUsername)
secret.Data["username"] = []byte(secretUsername)
secret.Data["password"] = []byte(util.RandomPassword(constants.PasswordLength))
secret.Data["nextRotation"] = []byte{}
updateSecret = true
updateSecretMsg = fmt.Sprintf("secret %s does not contain the role %s - updating username and resetting password", secretName, secretUsername)
updateSecretMsg = fmt.Sprintf("secret does not contain the role %s - updating username and resetting password", secretUsername)
}
}
@ -1223,18 +1221,18 @@ func (c *Cluster) updateSecret(
if updateSecret {
c.logger.Infof("%s", updateSecretMsg)
if secret, err = c.KubeClient.Secrets(secret.Namespace).Update(context.TODO(), secret, metav1.UpdateOptions{}); err != nil {
return secret, fmt.Errorf("could not update secret %s: %v", secretName, err)
return nil, fmt.Errorf("could not update secret: %v", err)
}
}
if changed, _ := c.compareAnnotations(secret.Annotations, generatedSecret.Annotations, nil); changed {
patchData, err := metaAnnotationsPatch(generatedSecret.Annotations)
if err != nil {
return secret, fmt.Errorf("could not form patch for secret %q annotations: %v", secret.Name, err)
return nil, fmt.Errorf("could not form patch for secret annotations: %v", err)
}
secret, err = c.KubeClient.Secrets(secret.Namespace).Patch(context.TODO(), secret.Name, types.MergePatchType, []byte(patchData), metav1.PatchOptions{})
if err != nil {
return secret, fmt.Errorf("could not patch annotations for secret %q: %v", secret.Name, err)
return nil, fmt.Errorf("could not patch annotations for secret: %v", err)
}
}

View File

@ -964,3 +964,57 @@ func TestUpdateSecret(t *testing.T) {
t.Errorf("%s: updated secret does not contain expected username: expected %s, got %s", testName, appUser, currentUsername)
}
}
func TestUpdateSecretNameConflict(t *testing.T) {
client, _ := newFakeK8sSyncSecretsClient()
clusterName := "acid-test-cluster"
namespace := "default"
secretTemplate := config.StringTemplate("{username}.{cluster}.credentials")
// define manifest user that has the same name as a prepared database owner user except for dashes vs underscores
// because of this the operator cannot create both secrets because underscores are not allowed in k8s secret names
pg := acidv1.Postgresql{
ObjectMeta: metav1.ObjectMeta{
Name: clusterName,
Namespace: namespace,
},
Spec: acidv1.PostgresSpec{
PreparedDatabases: map[string]acidv1.PreparedDatabase{"prepared": {DefaultUsers: true}},
Users: map[string]acidv1.UserFlags{"prepared-owner-user": {}},
Volume: acidv1.Volume{
Size: "1Gi",
},
},
}
var cluster = New(
Config{
OpConfig: config.Config{
Auth: config.Auth{
SuperUsername: "postgres",
ReplicationUsername: "standby",
SecretNameTemplate: secretTemplate,
},
Resources: config.Resources{
ClusterLabels: map[string]string{"application": "spilo"},
ClusterNameLabel: "cluster-name",
},
},
}, client, pg, logger, eventRecorder)
cluster.Name = clusterName
cluster.Namespace = namespace
cluster.pgUsers = map[string]spec.PgUser{}
// init all users
cluster.initUsers()
// create secrets and fail because of user name mismatch
// prepared-owner-user from manifest vs prepared_owner_user from prepared database
err := cluster.syncSecrets()
assert.Error(t, err)
// the order of secrets to sync is not deterministic, check only first part of the error message
expectedError := fmt.Sprintf("syncing secret %s failed: could not update secret because of user name mismatch", "default/prepared-owner-user.acid-test-cluster.credentials")
assert.Contains(t, err.Error(), expectedError)
}