feat(logical-backup): add configurable job history limits and TTL

Adds three new configuration options for logical backup cronjobs:
- logical_backup_successful_jobs_history_limit (default: 3)
- logical_backup_failed_jobs_history_limit (default: 3)
- logical_backup_ttl_seconds_after_finished (default: 86400)

These options control how many completed/failed backup jobs are
retained by Kubernetes and when finished jobs are automatically
deleted. This prevents accumulation of old backup jobs and pods
in namespaces with many PostgreSQL clusters.

Also updates the CronJob comparison logic to detect changes in
these new fields and trigger reconciliation when needed.

Closes zalando/postgres-operator#1092
This commit is contained in:
Jairo Llopis 2026-05-06 10:45:40 +01:00
parent e1713705f4
commit fbfae0e3df
No known key found for this signature in database
GPG Key ID: B24A1D10508180D8
9 changed files with 102 additions and 5 deletions

View File

@ -561,6 +561,18 @@ spec:
default: "30 00 * * *"
logical_backup_cronjob_environment_secret:
type: string
logical_backup_failed_jobs_history_limit:
type: integer
minimum: 0
default: 3
logical_backup_successful_jobs_history_limit:
type: integer
minimum: 0
default: 3
logical_backup_ttl_seconds_after_finished:
type: integer
minimum: 0
default: 86400
debug:
type: object
properties:

View File

@ -399,6 +399,12 @@ configLogicalBackup:
logical_backup_schedule: "30 00 * * *"
# secret to be used as reference for env variables in cronjob
logical_backup_cronjob_environment_secret: ""
# number of successful backup jobs to keep in cronjob history
logical_backup_successful_jobs_history_limit: 3
# number of failed backup jobs to keep in cronjob history
logical_backup_failed_jobs_history_limit: 3
# TTL in seconds after which finished backup jobs are automatically deleted
logical_backup_ttl_seconds_after_finished: 86400
# automate creation of human users with teams API service
configTeamsApi:

View File

@ -244,6 +244,9 @@ type OperatorLogicalBackupConfiguration struct {
MemoryRequest string `json:"logical_backup_memory_request,omitempty"`
CPULimit string `json:"logical_backup_cpu_limit,omitempty"`
MemoryLimit string `json:"logical_backup_memory_limit,omitempty"`
SuccessfulJobsHistoryLimit *int32 `json:"logical_backup_successful_jobs_history_limit,omitempty"`
FailedJobsHistoryLimit *int32 `json:"logical_backup_failed_jobs_history_limit,omitempty"`
TTLSecondsAfterFinished *int32 `json:"logical_backup_ttl_seconds_after_finished,omitempty"`
}
// PatroniConfiguration defines configuration for Patroni

View File

@ -896,6 +896,21 @@ func (c *Cluster) compareLogicalBackupJob(cur, new *batchv1.CronJob) *compareLog
reasons = append(reasons, fmt.Sprintf("logical backup container specs do not match: %v", strings.Join(contReasons, `', '`)))
}
if !reflect.DeepEqual(cur.Spec.SuccessfulJobsHistoryLimit, new.Spec.SuccessfulJobsHistoryLimit) {
match = false
reasons = append(reasons, fmt.Sprintf("new job's successfulJobsHistoryLimit %v does not match the current one %v", new.Spec.SuccessfulJobsHistoryLimit, cur.Spec.SuccessfulJobsHistoryLimit))
}
if !reflect.DeepEqual(cur.Spec.FailedJobsHistoryLimit, new.Spec.FailedJobsHistoryLimit) {
match = false
reasons = append(reasons, fmt.Sprintf("new job's failedJobsHistoryLimit %v does not match the current one %v", new.Spec.FailedJobsHistoryLimit, cur.Spec.FailedJobsHistoryLimit))
}
if !reflect.DeepEqual(cur.Spec.JobTemplate.Spec.TTLSecondsAfterFinished, new.Spec.JobTemplate.Spec.TTLSecondsAfterFinished) {
match = false
reasons = append(reasons, fmt.Sprintf("new job's TTLSecondsAfterFinished %v does not match the current one %v", new.Spec.JobTemplate.Spec.TTLSecondsAfterFinished, cur.Spec.JobTemplate.Spec.TTLSecondsAfterFinished))
}
return &compareLogicalBackupJobResult{match: match, reasons: reasons, deletedPodAnnotations: deletedPodAnnotations}
}

View File

@ -1524,12 +1524,21 @@ func TestCompareServices(t *testing.T) {
}
}
var (
defaultSuccessfulJobsHistoryLimit = int32(3)
defaultFailedJobsHistoryLimit = int32(3)
defaultTTLSecondsAfterFinished = int32(86400)
)
func newCronJob(image, schedule string, vars []v1.EnvVar, mounts []v1.VolumeMount) *batchv1.CronJob {
cron := &batchv1.CronJob{
Spec: batchv1.CronJobSpec{
Schedule: schedule,
Schedule: schedule,
SuccessfulJobsHistoryLimit: &defaultSuccessfulJobsHistoryLimit,
FailedJobsHistoryLimit: &defaultFailedJobsHistoryLimit,
JobTemplate: batchv1.JobTemplateSpec{
Spec: batchv1.JobSpec{
TTLSecondsAfterFinished: &defaultTTLSecondsAfterFinished,
Template: v1.PodTemplateSpec{
Spec: v1.PodSpec{
Containers: []v1.Container{

View File

@ -2379,7 +2379,13 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
// configure a batch job
jobSpec := batchv1.JobSpec{
Template: *podTemplate,
Template: *podTemplate,
TTLSecondsAfterFinished: c.OpConfig.LogicalBackup.LogicalBackupTTLSecondsAfterFinished,
}
if jobSpec.TTLSecondsAfterFinished == nil {
defaultTTL := int32(86400)
jobSpec.TTLSecondsAfterFinished = &defaultTTL
}
// configure a cron job
@ -2393,6 +2399,18 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
schedule = c.OpConfig.LogicalBackupSchedule
}
successfulJobsHistoryLimit := c.OpConfig.LogicalBackup.LogicalBackupSuccessfulJobsHistoryLimit
if successfulJobsHistoryLimit == nil {
defaultLimit := int32(3)
successfulJobsHistoryLimit = &defaultLimit
}
failedJobsHistoryLimit := c.OpConfig.LogicalBackup.LogicalBackupFailedJobsHistoryLimit
if failedJobsHistoryLimit == nil {
defaultLimit := int32(3)
failedJobsHistoryLimit = &defaultLimit
}
cronJob := &batchv1.CronJob{
ObjectMeta: metav1.ObjectMeta{
Name: c.getLogicalBackupJobName(),
@ -2402,9 +2420,11 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
OwnerReferences: c.ownerReferences(),
},
Spec: batchv1.CronJobSpec{
Schedule: schedule,
JobTemplate: jobTemplateSpec,
ConcurrencyPolicy: batchv1.ForbidConcurrent,
Schedule: schedule,
JobTemplate: jobTemplateSpec,
ConcurrencyPolicy: batchv1.ForbidConcurrent,
SuccessfulJobsHistoryLimit: successfulJobsHistoryLimit,
FailedJobsHistoryLimit: failedJobsHistoryLimit,
},
}

View File

@ -4040,6 +4040,32 @@ func TestGenerateLogicalBackupJob(t *testing.T) {
if !reflect.DeepEqual(tt.expectedResources, clusterResources) {
t.Errorf("%s - %s: expected resources %#v, got %#v", t.Name(), tt.subTest, tt.expectedResources, clusterResources)
}
expectedSuccessfulJobsHistoryLimit := int32(3)
if cluster.OpConfig.LogicalBackup.LogicalBackupSuccessfulJobsHistoryLimit != nil {
expectedSuccessfulJobsHistoryLimit = *cluster.OpConfig.LogicalBackup.LogicalBackupSuccessfulJobsHistoryLimit
}
if *cronJob.Spec.SuccessfulJobsHistoryLimit != expectedSuccessfulJobsHistoryLimit {
t.Errorf("%s - %s: expected successfulJobsHistoryLimit %d, got %d", t.Name(), tt.subTest, expectedSuccessfulJobsHistoryLimit, *cronJob.Spec.SuccessfulJobsHistoryLimit)
}
expectedFailedJobsHistoryLimit := int32(3)
if cluster.OpConfig.LogicalBackup.LogicalBackupFailedJobsHistoryLimit != nil {
expectedFailedJobsHistoryLimit = *cluster.OpConfig.LogicalBackup.LogicalBackupFailedJobsHistoryLimit
}
if *cronJob.Spec.FailedJobsHistoryLimit != expectedFailedJobsHistoryLimit {
t.Errorf("%s - %s: expected failedJobsHistoryLimit %d, got %d", t.Name(), tt.subTest, expectedFailedJobsHistoryLimit, *cronJob.Spec.FailedJobsHistoryLimit)
}
expectedTTL := int32(86400)
if cluster.OpConfig.LogicalBackup.LogicalBackupTTLSecondsAfterFinished != nil {
expectedTTL = *cluster.OpConfig.LogicalBackup.LogicalBackupTTLSecondsAfterFinished
}
if cronJob.Spec.JobTemplate.Spec.TTLSecondsAfterFinished == nil {
t.Errorf("%s - %s: expected TTLSecondsAfterFinished to be set", t.Name(), tt.subTest)
} else if *cronJob.Spec.JobTemplate.Spec.TTLSecondsAfterFinished != expectedTTL {
t.Errorf("%s - %s: expected TTLSecondsAfterFinished %d, got %d", t.Name(), tt.subTest, expectedTTL, *cronJob.Spec.JobTemplate.Spec.TTLSecondsAfterFinished)
}
}
}

View File

@ -212,6 +212,9 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.LogicalBackupMemoryRequest = fromCRD.LogicalBackup.MemoryRequest
result.LogicalBackupCPULimit = fromCRD.LogicalBackup.CPULimit
result.LogicalBackupMemoryLimit = fromCRD.LogicalBackup.MemoryLimit
result.LogicalBackupSuccessfulJobsHistoryLimit = util.CoalesceInt32(fromCRD.LogicalBackup.SuccessfulJobsHistoryLimit, k8sutil.Int32ToPointer(3))
result.LogicalBackupFailedJobsHistoryLimit = util.CoalesceInt32(fromCRD.LogicalBackup.FailedJobsHistoryLimit, k8sutil.Int32ToPointer(3))
result.LogicalBackupTTLSecondsAfterFinished = fromCRD.LogicalBackup.TTLSecondsAfterFinished
// debug config
result.DebugLogging = fromCRD.OperatorDebug.DebugLogging

View File

@ -148,6 +148,9 @@ type LogicalBackup struct {
LogicalBackupMemoryRequest string `name:"logical_backup_memory_request"`
LogicalBackupCPULimit string `name:"logical_backup_cpu_limit"`
LogicalBackupMemoryLimit string `name:"logical_backup_memory_limit"`
LogicalBackupSuccessfulJobsHistoryLimit *int32 `name:"logical_backup_successful_jobs_history_limit" default:"3"`
LogicalBackupFailedJobsHistoryLimit *int32 `name:"logical_backup_failed_jobs_history_limit" default:"3"`
LogicalBackupTTLSecondsAfterFinished *int32 `name:"logical_backup_ttl_seconds_after_finished" default:"86400"`
}
// Operator options for connection pooler