From 8e38fb02dd1ee4d63c016572668de3a389b47abb Mon Sep 17 00:00:00 2001 From: yxxhero Date: Tue, 3 Mar 2026 20:45:16 +0800 Subject: [PATCH] fix: enable kubedog tracking with atomic/rollback-on-failure helm flags When using atomic: true with trackMode: kubedog, the --atomic flag was passed to Helm which caused Helm to wait for resources and rollback internally before helmfile could start kubedog tracking. This prevented users from seeing deployment logs and errors. Changes: - Add RollbackRelease method to helmexec interface and implementation - Add appendAtomicFlags function that skips --atomic when kubedog is enabled - Add handleKubedogFailure function to manually rollback on tracking failure - Update flagsForUpgrade to use new atomic flag handling - Add tests for appendAtomicFlags behavior Now when atomic: true and trackMode: kubedog are both enabled: 1. Helm upgrade runs without --atomic (no internal wait) 2. kubedog tracks resources immediately (users can see logs) 3. If kubedog tracking fails, helmfile manually executes rollback 4. Atomic semantics are preserved Fixes: #2448 Signed-off-by: yxxhero --- pkg/app/app_test.go | 3 ++ pkg/exectest/helm.go | 8 +++++ pkg/helmexec/exec.go | 9 ++++++ pkg/helmexec/helmexec.go | 1 + pkg/state/helmx.go | 21 ++++++++++++ pkg/state/helmx_test.go | 69 ++++++++++++++++++++++++++++++++++++++++ pkg/state/state.go | 33 +++++++++++++++++-- pkg/testutil/mocks.go | 4 +++ 8 files changed, 145 insertions(+), 3 deletions(-) diff --git a/pkg/app/app_test.go b/pkg/app/app_test.go index ffc500fb..77bff0b9 100644 --- a/pkg/app/app_test.go +++ b/pkg/app/app_test.go @@ -2718,6 +2718,9 @@ func (helm *mockHelmExec) ReleaseStatus(context helmexec.HelmContext, release st func (helm *mockHelmExec) DeleteRelease(context helmexec.HelmContext, name string, flags ...string) error { return nil } +func (helm *mockHelmExec) RollbackRelease(context helmexec.HelmContext, name string, flags ...string) error { + return nil +} func (helm *mockHelmExec) List(context helmexec.HelmContext, filter string, flags ...string) (string, error) { return "", nil diff --git a/pkg/exectest/helm.go b/pkg/exectest/helm.go index 2531f6f1..c386ec7d 100644 --- a/pkg/exectest/helm.go +++ b/pkg/exectest/helm.go @@ -36,6 +36,7 @@ type Helm struct { RegistryLoginHost string // Captures the host passed to RegistryLogin Releases []Release Deleted []Release + Rolledback []Release Linted []Release Unittested []Release Templated []Release @@ -181,6 +182,13 @@ func (helm *Helm) DeleteRelease(context helmexec.HelmContext, name string, flags helm.Deleted = append(helm.Deleted, Release{Name: name, Flags: flags}) return nil } +func (helm *Helm) RollbackRelease(context helmexec.HelmContext, name string, flags ...string) error { + if strings.Contains(name, "error") { + return errors.New("error") + } + helm.Rolledback = append(helm.Rolledback, Release{Name: name, Flags: flags}) + return nil +} func (helm *Helm) List(context helmexec.HelmContext, filter string, flags ...string) (string, error) { key := ListKey{Filter: filter, Flags: strings.Join(flags, " ")} diff --git a/pkg/helmexec/exec.go b/pkg/helmexec/exec.go index 4b061266..14d63df5 100644 --- a/pkg/helmexec/exec.go +++ b/pkg/helmexec/exec.go @@ -837,6 +837,15 @@ func (helm *execer) DeleteRelease(context HelmContext, name string, flags ...str return err } +func (helm *execer) RollbackRelease(context HelmContext, name string, flags ...string) error { + helm.logger.Infof("Rolling back %v", name) + preArgs := make([]string, 0) + env := make(map[string]string) + out, err := helm.exec(append(append(preArgs, "rollback", name), flags...), env, nil) + helm.info(out) + return err +} + func (helm *execer) TestRelease(context HelmContext, name string, flags ...string) error { helm.logger.Infof("Testing %v", name) preArgs := make([]string, 0) diff --git a/pkg/helmexec/helmexec.go b/pkg/helmexec/helmexec.go index 2b1457c5..9c3caebc 100644 --- a/pkg/helmexec/helmexec.go +++ b/pkg/helmexec/helmexec.go @@ -31,6 +31,7 @@ type Interface interface { Unittest(name, chart string, flags ...string) error ReleaseStatus(context HelmContext, name string, flags ...string) error DeleteRelease(context HelmContext, name string, flags ...string) error + RollbackRelease(context HelmContext, name string, flags ...string) error TestRelease(context HelmContext, name string, flags ...string) error List(context HelmContext, filter string, flags ...string) (string, error) DecryptSecret(context HelmContext, name string, flags ...string) (string, error) diff --git a/pkg/state/helmx.go b/pkg/state/helmx.go index 62692e00..6ac81f76 100644 --- a/pkg/state/helmx.go +++ b/pkg/state/helmx.go @@ -214,6 +214,27 @@ func (st *HelmState) appendWaitFlags(flags []string, release *ReleaseSpec, ops * return flags } +func (st *HelmState) shouldUseAtomic(release *ReleaseSpec, ops *SyncOpts) bool { + switch { + case release.Atomic != nil: + return *release.Atomic + default: + return st.HelmDefaults.Atomic + } +} + +func (st *HelmState) appendAtomicFlags(flags []string, release *ReleaseSpec, ops *SyncOpts) []string { + if st.shouldUseKubedog(release, ops) { + return flags + } + + if st.shouldUseAtomic(release, ops) { + flags = append(flags, "--atomic") + } + + return flags +} + // append post-renderer flags to helm flags func (st *HelmState) appendCascadeFlags(flags []string, helm helmexec.Interface, release *ReleaseSpec, cascade string) []string { // see https://github.com/helm/helm/releases/tag/v3.12.1 diff --git a/pkg/state/helmx_test.go b/pkg/state/helmx_test.go index 78c0c3d1..d15000d7 100644 --- a/pkg/state/helmx_test.go +++ b/pkg/state/helmx_test.go @@ -483,3 +483,72 @@ func TestFormatLabels(t *testing.T) { }) } } + +func TestAppendAtomicFlags(t *testing.T) { + tests := []struct { + name string + release *ReleaseSpec + syncOpts *SyncOpts + helmSpec HelmSpec + expected []string + }{ + { + name: "atomic from release", + release: &ReleaseSpec{Atomic: &[]bool{true}[0]}, + syncOpts: nil, + helmSpec: HelmSpec{}, + expected: []string{"--atomic"}, + }, + { + name: "atomic from helm defaults", + release: &ReleaseSpec{}, + syncOpts: nil, + helmSpec: HelmSpec{Atomic: true}, + expected: []string{"--atomic"}, + }, + { + name: "atomic disabled in release", + release: &ReleaseSpec{Atomic: &[]bool{false}[0]}, + syncOpts: nil, + helmSpec: HelmSpec{Atomic: true}, + expected: []string{}, + }, + { + name: "no atomic", + release: &ReleaseSpec{}, + syncOpts: nil, + helmSpec: HelmSpec{}, + expected: []string{}, + }, + { + name: "atomic skipped when kubedog enabled via release", + release: &ReleaseSpec{Atomic: &[]bool{true}[0], TrackMode: "kubedog"}, + syncOpts: nil, + helmSpec: HelmSpec{}, + expected: []string{}, + }, + { + name: "atomic skipped when kubedog enabled via syncOpts", + release: &ReleaseSpec{Atomic: &[]bool{true}[0]}, + syncOpts: &SyncOpts{TrackMode: "kubedog"}, + helmSpec: HelmSpec{}, + expected: []string{}, + }, + { + name: "atomic skipped when kubedog enabled via helm defaults", + release: &ReleaseSpec{Atomic: &[]bool{true}[0]}, + syncOpts: nil, + helmSpec: HelmSpec{TrackMode: "kubedog"}, + expected: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + st := &HelmState{} + st.HelmDefaults = tt.helmSpec + got := st.appendAtomicFlags([]string{}, tt.release, tt.syncOpts) + require.Equalf(t, tt.expected, got, "appendAtomicFlags() = %v, want %v", got, tt.expected) + }) + } +} diff --git a/pkg/state/state.go b/pkg/state/state.go index ffadaf9a..1b06b335 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -1132,6 +1132,7 @@ func (st *HelmState) SyncReleases(affectedReleases *AffectedReleases, helm helme if relErr == nil && st.shouldUseKubedog(release, opts) { if trackErr := st.trackWithKubedog(gocontext.Background(), release, helm, opts); trackErr != nil { st.logger.Warnf("kubedog tracking failed for release %s: %v", release.Name, trackErr) + st.handleKubedogFailure(context, helm, release, trackErr, m, affectedReleases) } } } else { @@ -1154,6 +1155,7 @@ func (st *HelmState) SyncReleases(affectedReleases *AffectedReleases, helm helme if st.shouldUseKubedog(release, opts) { if trackErr := st.trackWithKubedog(gocontext.Background(), release, helm, opts); trackErr != nil { st.logger.Warnf("kubedog tracking failed for release %s: %v", release.Name, trackErr) + st.handleKubedogFailure(context, helm, release, trackErr, m, affectedReleases) } } } @@ -1272,6 +1274,33 @@ func (st *HelmState) performSyncOrReinstallOfRelease(affectedReleases *AffectedR return nil } +func (st *HelmState) handleKubedogFailure(context helmexec.HelmContext, helm helmexec.Interface, release *ReleaseSpec, trackErr error, m *sync.Mutex, affectedReleases *AffectedReleases) { + if !st.shouldUseAtomic(release, nil) { + return + } + + st.logger.Infof("Rolling back release %s due to kubedog tracking failure (atomic mode enabled)", release.Name) + + rollbackFlags := st.appendConnectionFlags([]string{}, release) + if err := helm.RollbackRelease(context, release.Name, rollbackFlags...); err != nil { + st.logger.Errorf("Failed to rollback release %s after kubedog tracking failure: %v", release.Name, err) + m.Lock() + affectedReleases.Failed = append(affectedReleases.Failed, release) + m.Unlock() + } else { + st.logger.Infof("Successfully rolled back release %s", release.Name) + m.Lock() + for i, r := range affectedReleases.Upgraded { + if r.Name == release.Name && r.Namespace == release.Namespace { + affectedReleases.Upgraded = append(affectedReleases.Upgraded[:i], affectedReleases.Upgraded[i+1:]...) + break + } + } + affectedReleases.Failed = append(affectedReleases.Failed, release) + m.Unlock() + } +} + func (st *HelmState) listReleases(context helmexec.HelmContext, helm helmexec.Interface, release *ReleaseSpec) (string, error) { flags := st.kubeConnectionFlags(release) if release.Namespace != "" { @@ -3453,9 +3482,7 @@ func (st *HelmState) flagsForUpgrade(helm helmexec.Interface, release *ReleaseSp flags = append(flags, "--recreate-pods") } - if release.Atomic != nil && *release.Atomic || release.Atomic == nil && st.HelmDefaults.Atomic { - flags = append(flags, "--atomic") - } + flags = st.appendAtomicFlags(flags, release, opt) if release.CleanupOnFail != nil && *release.CleanupOnFail || release.CleanupOnFail == nil && st.HelmDefaults.CleanupOnFail { flags = append(flags, "--cleanup-on-fail") diff --git a/pkg/testutil/mocks.go b/pkg/testutil/mocks.go index 453a931e..5b1f46d8 100644 --- a/pkg/testutil/mocks.go +++ b/pkg/testutil/mocks.go @@ -120,6 +120,10 @@ func (helm *noCallHelmExec) DeleteRelease(context helmexec.HelmContext, name str helm.doPanic() return nil } +func (helm *noCallHelmExec) RollbackRelease(context helmexec.HelmContext, name string, flags ...string) error { + helm.doPanic() + return nil +} func (helm *noCallHelmExec) List(context helmexec.HelmContext, filter string, flags ...string) (string, error) { helm.doPanic()