feat(logical-backup): add configurable job history limits and TTL
Adds three new configuration options for logical backup cronjobs: - logical_backup_successful_jobs_history_limit (default: 3) - logical_backup_failed_jobs_history_limit (default: 3) - logical_backup_ttl_seconds_after_finished (default: 86400) These options control how many completed/failed backup jobs are retained by Kubernetes and when finished jobs are automatically deleted. This prevents accumulation of old backup jobs and pods in namespaces with many PostgreSQL clusters. Also updates the CronJob comparison logic to detect changes in these new fields and trigger reconciliation when needed. Closes zalando/postgres-operator#1092
This commit is contained in:
parent
e1713705f4
commit
fbfae0e3df
|
|
@ -561,6 +561,18 @@ spec:
|
|||
default: "30 00 * * *"
|
||||
logical_backup_cronjob_environment_secret:
|
||||
type: string
|
||||
logical_backup_failed_jobs_history_limit:
|
||||
type: integer
|
||||
minimum: 0
|
||||
default: 3
|
||||
logical_backup_successful_jobs_history_limit:
|
||||
type: integer
|
||||
minimum: 0
|
||||
default: 3
|
||||
logical_backup_ttl_seconds_after_finished:
|
||||
type: integer
|
||||
minimum: 0
|
||||
default: 86400
|
||||
debug:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
|||
|
|
@ -399,6 +399,12 @@ configLogicalBackup:
|
|||
logical_backup_schedule: "30 00 * * *"
|
||||
# secret to be used as reference for env variables in cronjob
|
||||
logical_backup_cronjob_environment_secret: ""
|
||||
# number of successful backup jobs to keep in cronjob history
|
||||
logical_backup_successful_jobs_history_limit: 3
|
||||
# number of failed backup jobs to keep in cronjob history
|
||||
logical_backup_failed_jobs_history_limit: 3
|
||||
# TTL in seconds after which finished backup jobs are automatically deleted
|
||||
logical_backup_ttl_seconds_after_finished: 86400
|
||||
|
||||
# automate creation of human users with teams API service
|
||||
configTeamsApi:
|
||||
|
|
|
|||
|
|
@ -244,6 +244,9 @@ type OperatorLogicalBackupConfiguration struct {
|
|||
MemoryRequest string `json:"logical_backup_memory_request,omitempty"`
|
||||
CPULimit string `json:"logical_backup_cpu_limit,omitempty"`
|
||||
MemoryLimit string `json:"logical_backup_memory_limit,omitempty"`
|
||||
SuccessfulJobsHistoryLimit *int32 `json:"logical_backup_successful_jobs_history_limit,omitempty"`
|
||||
FailedJobsHistoryLimit *int32 `json:"logical_backup_failed_jobs_history_limit,omitempty"`
|
||||
TTLSecondsAfterFinished *int32 `json:"logical_backup_ttl_seconds_after_finished,omitempty"`
|
||||
}
|
||||
|
||||
// PatroniConfiguration defines configuration for Patroni
|
||||
|
|
|
|||
|
|
@ -896,6 +896,21 @@ func (c *Cluster) compareLogicalBackupJob(cur, new *batchv1.CronJob) *compareLog
|
|||
reasons = append(reasons, fmt.Sprintf("logical backup container specs do not match: %v", strings.Join(contReasons, `', '`)))
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(cur.Spec.SuccessfulJobsHistoryLimit, new.Spec.SuccessfulJobsHistoryLimit) {
|
||||
match = false
|
||||
reasons = append(reasons, fmt.Sprintf("new job's successfulJobsHistoryLimit %v does not match the current one %v", new.Spec.SuccessfulJobsHistoryLimit, cur.Spec.SuccessfulJobsHistoryLimit))
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(cur.Spec.FailedJobsHistoryLimit, new.Spec.FailedJobsHistoryLimit) {
|
||||
match = false
|
||||
reasons = append(reasons, fmt.Sprintf("new job's failedJobsHistoryLimit %v does not match the current one %v", new.Spec.FailedJobsHistoryLimit, cur.Spec.FailedJobsHistoryLimit))
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(cur.Spec.JobTemplate.Spec.TTLSecondsAfterFinished, new.Spec.JobTemplate.Spec.TTLSecondsAfterFinished) {
|
||||
match = false
|
||||
reasons = append(reasons, fmt.Sprintf("new job's TTLSecondsAfterFinished %v does not match the current one %v", new.Spec.JobTemplate.Spec.TTLSecondsAfterFinished, cur.Spec.JobTemplate.Spec.TTLSecondsAfterFinished))
|
||||
}
|
||||
|
||||
return &compareLogicalBackupJobResult{match: match, reasons: reasons, deletedPodAnnotations: deletedPodAnnotations}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1524,12 +1524,21 @@ func TestCompareServices(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
var (
|
||||
defaultSuccessfulJobsHistoryLimit = int32(3)
|
||||
defaultFailedJobsHistoryLimit = int32(3)
|
||||
defaultTTLSecondsAfterFinished = int32(86400)
|
||||
)
|
||||
|
||||
func newCronJob(image, schedule string, vars []v1.EnvVar, mounts []v1.VolumeMount) *batchv1.CronJob {
|
||||
cron := &batchv1.CronJob{
|
||||
Spec: batchv1.CronJobSpec{
|
||||
Schedule: schedule,
|
||||
Schedule: schedule,
|
||||
SuccessfulJobsHistoryLimit: &defaultSuccessfulJobsHistoryLimit,
|
||||
FailedJobsHistoryLimit: &defaultFailedJobsHistoryLimit,
|
||||
JobTemplate: batchv1.JobTemplateSpec{
|
||||
Spec: batchv1.JobSpec{
|
||||
TTLSecondsAfterFinished: &defaultTTLSecondsAfterFinished,
|
||||
Template: v1.PodTemplateSpec{
|
||||
Spec: v1.PodSpec{
|
||||
Containers: []v1.Container{
|
||||
|
|
|
|||
|
|
@ -2379,7 +2379,13 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
|
|||
// configure a batch job
|
||||
|
||||
jobSpec := batchv1.JobSpec{
|
||||
Template: *podTemplate,
|
||||
Template: *podTemplate,
|
||||
TTLSecondsAfterFinished: c.OpConfig.LogicalBackup.LogicalBackupTTLSecondsAfterFinished,
|
||||
}
|
||||
|
||||
if jobSpec.TTLSecondsAfterFinished == nil {
|
||||
defaultTTL := int32(86400)
|
||||
jobSpec.TTLSecondsAfterFinished = &defaultTTL
|
||||
}
|
||||
|
||||
// configure a cron job
|
||||
|
|
@ -2393,6 +2399,18 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
|
|||
schedule = c.OpConfig.LogicalBackupSchedule
|
||||
}
|
||||
|
||||
successfulJobsHistoryLimit := c.OpConfig.LogicalBackup.LogicalBackupSuccessfulJobsHistoryLimit
|
||||
if successfulJobsHistoryLimit == nil {
|
||||
defaultLimit := int32(3)
|
||||
successfulJobsHistoryLimit = &defaultLimit
|
||||
}
|
||||
|
||||
failedJobsHistoryLimit := c.OpConfig.LogicalBackup.LogicalBackupFailedJobsHistoryLimit
|
||||
if failedJobsHistoryLimit == nil {
|
||||
defaultLimit := int32(3)
|
||||
failedJobsHistoryLimit = &defaultLimit
|
||||
}
|
||||
|
||||
cronJob := &batchv1.CronJob{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: c.getLogicalBackupJobName(),
|
||||
|
|
@ -2402,9 +2420,11 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
|
|||
OwnerReferences: c.ownerReferences(),
|
||||
},
|
||||
Spec: batchv1.CronJobSpec{
|
||||
Schedule: schedule,
|
||||
JobTemplate: jobTemplateSpec,
|
||||
ConcurrencyPolicy: batchv1.ForbidConcurrent,
|
||||
Schedule: schedule,
|
||||
JobTemplate: jobTemplateSpec,
|
||||
ConcurrencyPolicy: batchv1.ForbidConcurrent,
|
||||
SuccessfulJobsHistoryLimit: successfulJobsHistoryLimit,
|
||||
FailedJobsHistoryLimit: failedJobsHistoryLimit,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4040,6 +4040,32 @@ func TestGenerateLogicalBackupJob(t *testing.T) {
|
|||
if !reflect.DeepEqual(tt.expectedResources, clusterResources) {
|
||||
t.Errorf("%s - %s: expected resources %#v, got %#v", t.Name(), tt.subTest, tt.expectedResources, clusterResources)
|
||||
}
|
||||
|
||||
expectedSuccessfulJobsHistoryLimit := int32(3)
|
||||
if cluster.OpConfig.LogicalBackup.LogicalBackupSuccessfulJobsHistoryLimit != nil {
|
||||
expectedSuccessfulJobsHistoryLimit = *cluster.OpConfig.LogicalBackup.LogicalBackupSuccessfulJobsHistoryLimit
|
||||
}
|
||||
if *cronJob.Spec.SuccessfulJobsHistoryLimit != expectedSuccessfulJobsHistoryLimit {
|
||||
t.Errorf("%s - %s: expected successfulJobsHistoryLimit %d, got %d", t.Name(), tt.subTest, expectedSuccessfulJobsHistoryLimit, *cronJob.Spec.SuccessfulJobsHistoryLimit)
|
||||
}
|
||||
|
||||
expectedFailedJobsHistoryLimit := int32(3)
|
||||
if cluster.OpConfig.LogicalBackup.LogicalBackupFailedJobsHistoryLimit != nil {
|
||||
expectedFailedJobsHistoryLimit = *cluster.OpConfig.LogicalBackup.LogicalBackupFailedJobsHistoryLimit
|
||||
}
|
||||
if *cronJob.Spec.FailedJobsHistoryLimit != expectedFailedJobsHistoryLimit {
|
||||
t.Errorf("%s - %s: expected failedJobsHistoryLimit %d, got %d", t.Name(), tt.subTest, expectedFailedJobsHistoryLimit, *cronJob.Spec.FailedJobsHistoryLimit)
|
||||
}
|
||||
|
||||
expectedTTL := int32(86400)
|
||||
if cluster.OpConfig.LogicalBackup.LogicalBackupTTLSecondsAfterFinished != nil {
|
||||
expectedTTL = *cluster.OpConfig.LogicalBackup.LogicalBackupTTLSecondsAfterFinished
|
||||
}
|
||||
if cronJob.Spec.JobTemplate.Spec.TTLSecondsAfterFinished == nil {
|
||||
t.Errorf("%s - %s: expected TTLSecondsAfterFinished to be set", t.Name(), tt.subTest)
|
||||
} else if *cronJob.Spec.JobTemplate.Spec.TTLSecondsAfterFinished != expectedTTL {
|
||||
t.Errorf("%s - %s: expected TTLSecondsAfterFinished %d, got %d", t.Name(), tt.subTest, expectedTTL, *cronJob.Spec.JobTemplate.Spec.TTLSecondsAfterFinished)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -212,6 +212,9 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
|
|||
result.LogicalBackupMemoryRequest = fromCRD.LogicalBackup.MemoryRequest
|
||||
result.LogicalBackupCPULimit = fromCRD.LogicalBackup.CPULimit
|
||||
result.LogicalBackupMemoryLimit = fromCRD.LogicalBackup.MemoryLimit
|
||||
result.LogicalBackupSuccessfulJobsHistoryLimit = util.CoalesceInt32(fromCRD.LogicalBackup.SuccessfulJobsHistoryLimit, k8sutil.Int32ToPointer(3))
|
||||
result.LogicalBackupFailedJobsHistoryLimit = util.CoalesceInt32(fromCRD.LogicalBackup.FailedJobsHistoryLimit, k8sutil.Int32ToPointer(3))
|
||||
result.LogicalBackupTTLSecondsAfterFinished = fromCRD.LogicalBackup.TTLSecondsAfterFinished
|
||||
|
||||
// debug config
|
||||
result.DebugLogging = fromCRD.OperatorDebug.DebugLogging
|
||||
|
|
|
|||
|
|
@ -148,6 +148,9 @@ type LogicalBackup struct {
|
|||
LogicalBackupMemoryRequest string `name:"logical_backup_memory_request"`
|
||||
LogicalBackupCPULimit string `name:"logical_backup_cpu_limit"`
|
||||
LogicalBackupMemoryLimit string `name:"logical_backup_memory_limit"`
|
||||
LogicalBackupSuccessfulJobsHistoryLimit *int32 `name:"logical_backup_successful_jobs_history_limit" default:"3"`
|
||||
LogicalBackupFailedJobsHistoryLimit *int32 `name:"logical_backup_failed_jobs_history_limit" default:"3"`
|
||||
LogicalBackupTTLSecondsAfterFinished *int32 `name:"logical_backup_ttl_seconds_after_finished" default:"86400"`
|
||||
}
|
||||
|
||||
// Operator options for connection pooler
|
||||
|
|
|
|||
Loading…
Reference in New Issue