From ec7302c9dc4ac49cb020564405e34845dec90a93 Mon Sep 17 00:00:00 2001 From: yxxhero Date: Tue, 3 Mar 2026 20:45:16 +0800 Subject: [PATCH] fix: enable kubedog tracking with atomic/rollback-on-failure helm flags When using atomic: true with trackMode: kubedog, the --atomic flag was passed to Helm which caused Helm to wait for resources and rollback internally before helmfile could start kubedog tracking. This prevented users from seeing deployment logs and errors. Changes: - Add RollbackRelease method to helmexec interface and implementation - Add appendAtomicFlags function that skips --atomic when kubedog is enabled - Add handleKubedogFailure function to manually rollback on tracking failure - Update flagsForUpgrade to use new atomic flag handling - Add tests for appendAtomicFlags behavior Now when atomic: true and trackMode: kubedog are both enabled: 1. Helm upgrade runs without --atomic (no internal wait) 2. kubedog tracks resources immediately (users can see logs) 3. If kubedog tracking fails, helmfile manually executes rollback 4. Atomic semantics are preserved Fixes: #2448 Signed-off-by: yxxhero --- pkg/app/app_test.go | 3 ++ pkg/exectest/helm.go | 8 +++++ pkg/helmexec/exec.go | 9 ++++++ pkg/helmexec/helmexec.go | 1 + pkg/state/helmx.go | 21 ++++++++++++ pkg/state/helmx_test.go | 69 ++++++++++++++++++++++++++++++++++++++++ pkg/state/state.go | 33 +++++++++++++++++-- pkg/testutil/mocks.go | 4 +++ 8 files changed, 145 insertions(+), 3 deletions(-) diff --git a/pkg/app/app_test.go b/pkg/app/app_test.go index 21ec2d4e..54cd8eb7 100644 --- a/pkg/app/app_test.go +++ b/pkg/app/app_test.go @@ -2727,6 +2727,9 @@ func (helm *mockHelmExec) ReleaseStatus(context helmexec.HelmContext, release st func (helm *mockHelmExec) DeleteRelease(context helmexec.HelmContext, name string, flags ...string) error { return nil } +func (helm *mockHelmExec) RollbackRelease(context helmexec.HelmContext, name string, flags ...string) error { + return nil +} func (helm *mockHelmExec) List(context helmexec.HelmContext, filter string, flags ...string) (string, error) { return "", nil diff --git a/pkg/exectest/helm.go b/pkg/exectest/helm.go index 2531f6f1..c386ec7d 100644 --- a/pkg/exectest/helm.go +++ b/pkg/exectest/helm.go @@ -36,6 +36,7 @@ type Helm struct { RegistryLoginHost string // Captures the host passed to RegistryLogin Releases []Release Deleted []Release + Rolledback []Release Linted []Release Unittested []Release Templated []Release @@ -181,6 +182,13 @@ func (helm *Helm) DeleteRelease(context helmexec.HelmContext, name string, flags helm.Deleted = append(helm.Deleted, Release{Name: name, Flags: flags}) return nil } +func (helm *Helm) RollbackRelease(context helmexec.HelmContext, name string, flags ...string) error { + if strings.Contains(name, "error") { + return errors.New("error") + } + helm.Rolledback = append(helm.Rolledback, Release{Name: name, Flags: flags}) + return nil +} func (helm *Helm) List(context helmexec.HelmContext, filter string, flags ...string) (string, error) { key := ListKey{Filter: filter, Flags: strings.Join(flags, " ")} diff --git a/pkg/helmexec/exec.go b/pkg/helmexec/exec.go index 4b061266..14d63df5 100644 --- a/pkg/helmexec/exec.go +++ b/pkg/helmexec/exec.go @@ -837,6 +837,15 @@ func (helm *execer) DeleteRelease(context HelmContext, name string, flags ...str return err } +func (helm *execer) RollbackRelease(context HelmContext, name string, flags ...string) error { + helm.logger.Infof("Rolling back %v", name) + preArgs := make([]string, 0) + env := make(map[string]string) + out, err := helm.exec(append(append(preArgs, "rollback", name), flags...), env, nil) + helm.info(out) + return err +} + func (helm *execer) TestRelease(context HelmContext, name string, flags ...string) error { helm.logger.Infof("Testing %v", name) preArgs := make([]string, 0) diff --git a/pkg/helmexec/helmexec.go b/pkg/helmexec/helmexec.go index 2b1457c5..9c3caebc 100644 --- a/pkg/helmexec/helmexec.go +++ b/pkg/helmexec/helmexec.go @@ -31,6 +31,7 @@ type Interface interface { Unittest(name, chart string, flags ...string) error ReleaseStatus(context HelmContext, name string, flags ...string) error DeleteRelease(context HelmContext, name string, flags ...string) error + RollbackRelease(context HelmContext, name string, flags ...string) error TestRelease(context HelmContext, name string, flags ...string) error List(context HelmContext, filter string, flags ...string) (string, error) DecryptSecret(context HelmContext, name string, flags ...string) (string, error) diff --git a/pkg/state/helmx.go b/pkg/state/helmx.go index 988afc08..b49eb114 100644 --- a/pkg/state/helmx.go +++ b/pkg/state/helmx.go @@ -237,6 +237,27 @@ func (st *HelmState) appendWaitFlags(flags []string, helm helmexec.Interface, re return flags } +func (st *HelmState) shouldUseAtomic(release *ReleaseSpec, ops *SyncOpts) bool { + switch { + case release.Atomic != nil: + return *release.Atomic + default: + return st.HelmDefaults.Atomic + } +} + +func (st *HelmState) appendAtomicFlags(flags []string, release *ReleaseSpec, ops *SyncOpts) []string { + if st.shouldUseKubedog(release, ops) { + return flags + } + + if st.shouldUseAtomic(release, ops) { + flags = append(flags, "--atomic") + } + + return flags +} + // append post-renderer flags to helm flags func (st *HelmState) appendCascadeFlags(flags []string, helm helmexec.Interface, release *ReleaseSpec, cascade string) []string { // see https://github.com/helm/helm/releases/tag/v3.12.1 diff --git a/pkg/state/helmx_test.go b/pkg/state/helmx_test.go index d4cab5ff..d1163f40 100644 --- a/pkg/state/helmx_test.go +++ b/pkg/state/helmx_test.go @@ -572,3 +572,72 @@ func TestFormatLabels(t *testing.T) { }) } } + +func TestAppendAtomicFlags(t *testing.T) { + tests := []struct { + name string + release *ReleaseSpec + syncOpts *SyncOpts + helmSpec HelmSpec + expected []string + }{ + { + name: "atomic from release", + release: &ReleaseSpec{Atomic: &[]bool{true}[0]}, + syncOpts: nil, + helmSpec: HelmSpec{}, + expected: []string{"--atomic"}, + }, + { + name: "atomic from helm defaults", + release: &ReleaseSpec{}, + syncOpts: nil, + helmSpec: HelmSpec{Atomic: true}, + expected: []string{"--atomic"}, + }, + { + name: "atomic disabled in release", + release: &ReleaseSpec{Atomic: &[]bool{false}[0]}, + syncOpts: nil, + helmSpec: HelmSpec{Atomic: true}, + expected: []string{}, + }, + { + name: "no atomic", + release: &ReleaseSpec{}, + syncOpts: nil, + helmSpec: HelmSpec{}, + expected: []string{}, + }, + { + name: "atomic skipped when kubedog enabled via release", + release: &ReleaseSpec{Atomic: &[]bool{true}[0], TrackMode: "kubedog"}, + syncOpts: nil, + helmSpec: HelmSpec{}, + expected: []string{}, + }, + { + name: "atomic skipped when kubedog enabled via syncOpts", + release: &ReleaseSpec{Atomic: &[]bool{true}[0]}, + syncOpts: &SyncOpts{TrackMode: "kubedog"}, + helmSpec: HelmSpec{}, + expected: []string{}, + }, + { + name: "atomic skipped when kubedog enabled via helm defaults", + release: &ReleaseSpec{Atomic: &[]bool{true}[0]}, + syncOpts: nil, + helmSpec: HelmSpec{TrackMode: "kubedog"}, + expected: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + st := &HelmState{} + st.HelmDefaults = tt.helmSpec + got := st.appendAtomicFlags([]string{}, tt.release, tt.syncOpts) + require.Equalf(t, tt.expected, got, "appendAtomicFlags() = %v, want %v", got, tt.expected) + }) + } +} diff --git a/pkg/state/state.go b/pkg/state/state.go index cec6396d..03e56ed5 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -1141,6 +1141,7 @@ func (st *HelmState) SyncReleases(affectedReleases *AffectedReleases, helm helme if relErr == nil && st.shouldUseKubedog(release, opts) { if trackErr := st.trackWithKubedog(gocontext.Background(), release, helm, opts); trackErr != nil { st.logger.Warnf("kubedog tracking failed for release %s: %v", release.Name, trackErr) + st.handleKubedogFailure(context, helm, release, trackErr, m, affectedReleases) } } } else { @@ -1163,6 +1164,7 @@ func (st *HelmState) SyncReleases(affectedReleases *AffectedReleases, helm helme if st.shouldUseKubedog(release, opts) { if trackErr := st.trackWithKubedog(gocontext.Background(), release, helm, opts); trackErr != nil { st.logger.Warnf("kubedog tracking failed for release %s: %v", release.Name, trackErr) + st.handleKubedogFailure(context, helm, release, trackErr, m, affectedReleases) } } } @@ -1281,6 +1283,33 @@ func (st *HelmState) performSyncOrReinstallOfRelease(affectedReleases *AffectedR return nil } +func (st *HelmState) handleKubedogFailure(context helmexec.HelmContext, helm helmexec.Interface, release *ReleaseSpec, trackErr error, m *sync.Mutex, affectedReleases *AffectedReleases) { + if !st.shouldUseAtomic(release, nil) { + return + } + + st.logger.Infof("Rolling back release %s due to kubedog tracking failure (atomic mode enabled)", release.Name) + + rollbackFlags := st.appendConnectionFlags([]string{}, release) + if err := helm.RollbackRelease(context, release.Name, rollbackFlags...); err != nil { + st.logger.Errorf("Failed to rollback release %s after kubedog tracking failure: %v", release.Name, err) + m.Lock() + affectedReleases.Failed = append(affectedReleases.Failed, release) + m.Unlock() + } else { + st.logger.Infof("Successfully rolled back release %s", release.Name) + m.Lock() + for i, r := range affectedReleases.Upgraded { + if r.Name == release.Name && r.Namespace == release.Namespace { + affectedReleases.Upgraded = append(affectedReleases.Upgraded[:i], affectedReleases.Upgraded[i+1:]...) + break + } + } + affectedReleases.Failed = append(affectedReleases.Failed, release) + m.Unlock() + } +} + func (st *HelmState) listReleases(context helmexec.HelmContext, helm helmexec.Interface, release *ReleaseSpec) (string, error) { flags := st.kubeConnectionFlags(release) if release.Namespace != "" { @@ -3508,9 +3537,7 @@ func (st *HelmState) flagsForUpgrade(helm helmexec.Interface, release *ReleaseSp flags = append(flags, "--recreate-pods") } - if release.Atomic != nil && *release.Atomic || release.Atomic == nil && st.HelmDefaults.Atomic { - flags = append(flags, "--atomic") - } + flags = st.appendAtomicFlags(flags, release, opt) if release.CleanupOnFail != nil && *release.CleanupOnFail || release.CleanupOnFail == nil && st.HelmDefaults.CleanupOnFail { flags = append(flags, "--cleanup-on-fail") diff --git a/pkg/testutil/mocks.go b/pkg/testutil/mocks.go index 453a931e..5b1f46d8 100644 --- a/pkg/testutil/mocks.go +++ b/pkg/testutil/mocks.go @@ -120,6 +120,10 @@ func (helm *noCallHelmExec) DeleteRelease(context helmexec.HelmContext, name str helm.doPanic() return nil } +func (helm *noCallHelmExec) RollbackRelease(context helmexec.HelmContext, name string, flags ...string) error { + helm.doPanic() + return nil +} func (helm *noCallHelmExec) List(context helmexec.HelmContext, filter string, flags ...string) (string, error) { helm.doPanic()