fix: add trackFailOnError option to control kubedog exit code (#2576)

* fix: add trackFailOnError option to control kubedog exit code behavior When kubedog release tracking fails (e.g. pod ImagePullBackOff), helmfile exits with code 0 instead of a non-zero exit code. Add a trackFailOnError configuration option (default: false) that when set to true, propagates kubedog tracking failures to the exit code. The option is available as: - Per-release YAML: trackFailOnError: true - CLI flag: --track-fail-on-error (sync and apply commands) Extract trackReleaseIfEnabled helper to consolidate kubedog tracking logic from two duplicated call sites into a single maintainable method. Fixes #2507 Signed-off-by: yxxhero <aiopsclub@163.com> * fix: add //go:build ignore to server.go to fix go test CI failure The test/integration/test-cases/issue-2103/input/server.go is a package main helper binary used by the issue-2103 integration test. When go test -coverprofile runs on this package, it fails with "go: no such tool covdata" in the CI environment. Adding //go:build ignore excludes the file from go list ./... (and therefore from PKGS in the Makefile), while still allowing the integration test to build it explicitly via file path: go build -o server ./path/to/server.go Agent-Logs-Url: https://github.com/helmfile/helmfile/sessions/8a7000af-72b7-48f8-8a82-24813b5df341 Co-authored-by: yxxhero <11087727+yxxhero@users.noreply.github.com> * fix: update TestGenerateID expected hashes after adding TrackFailOnError field Adding TrackFailOnError *bool to ReleaseSpec changed the spew serialization of the struct, which changed the FNV-32a hash values produced by generateValuesID. Update temp_test.go with the new expected hash strings. Agent-Logs-Url: https://github.com/helmfile/helmfile/sessions/caa86cd9-73d1-4894-b745-fd70c0811fd6 Co-authored-by: yxxhero <11087727+yxxhero@users.noreply.github.com> --------- Signed-off-by: yxxhero <aiopsclub@163.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
2026-05-04 14:20:03 +08:00 · 2026-05-04 14:20:03 +08:00 · 420cc3ba9c
parent 7cc5fe0358
commit 420cc3ba9c
11 changed files with 72 additions and 14 deletions
--- a/cmd/apply.go
+++ b/cmd/apply.go
@ -72,6 +72,7 @@ func NewApplyCmd(globalCfg *config.GlobalImpl) *cobra.Command {
 	f.StringVar(&applyOptions.TrackMode, "track-mode", "", "Track mode for releases: 'helm' (default), 'helm-legacy' (Helm v4 only), or 'kubedog'")
 	f.IntVar(&applyOptions.TrackTimeout, "track-timeout", 0, `Timeout in seconds for kubedog tracking (0 to use default 300s timeout)`)
 	f.BoolVar(&applyOptions.TrackLogs, "track-logs", false, "Enable log streaming with kubedog tracking")
+	f.BoolVar(&applyOptions.TrackFailOnError, "track-fail-on-error", false, "Fail with non-zero exit code when kubedog tracking fails")
 	f.StringVar(&applyOptions.Description, "description", "", `Set description for all releases. If set, overridesdescriptions in helmfile.yaml. Will be passed to "helm upgrade --description"`)

 	return cmd
--- a/cmd/sync.go
+++ b/cmd/sync.go
@ -57,6 +57,7 @@ func NewSyncCmd(globalCfg *config.GlobalImpl) *cobra.Command {
 	f.StringVar(&syncOptions.TrackMode, "track-mode", "", "Track mode for releases: 'helm' (default), 'helm-legacy' (Helm v4 only), or 'kubedog'")
 	f.IntVar(&syncOptions.TrackTimeout, "track-timeout", 0, `Timeout in seconds for kubedog tracking (0 to use default 300s timeout)`)
 	f.BoolVar(&syncOptions.TrackLogs, "track-logs", false, "Enable log streaming with kubedog tracking")
+	f.BoolVar(&syncOptions.TrackFailOnError, "track-fail-on-error", false, "Fail with non-zero exit code when kubedog tracking fails")
 	f.StringVar(&syncOptions.Description, "description", "", `Set description for all releases. If set, overrides descriptions in helmfile.yaml. Will be passed to "helm upgrade --description"`)

 	return cmd
--- a/pkg/app/app.go
+++ b/pkg/app/app.go
@ -1891,6 +1891,7 @@ Do you really want to apply?
 					TrackMode:            c.TrackMode(),
 					TrackTimeout:         c.TrackTimeout(),
 					TrackLogs:            c.TrackLogs(),
+					TrackFailOnError:     c.TrackFailOnError(),
 					Description:          c.Description(),
 				}
 				return subst.SyncReleases(&affectedReleases, helm, c.Values(), c.Concurrency(), syncOpts)
@ -2361,6 +2362,7 @@ Do you really want to sync?
 					TrackMode:            c.TrackMode(),
 					TrackTimeout:         c.TrackTimeout(),
 					TrackLogs:            c.TrackLogs(),
+					TrackFailOnError:     c.TrackFailOnError(),
 					Description:          c.Description(),
 				}
 				return subst.SyncReleases(&affectedReleases, helm, c.Values(), c.Concurrency(), syncOpts)
--- a/pkg/app/app_test.go
+++ b/pkg/app/app_test.go
@ -2534,6 +2534,7 @@ type applyConfig struct {
 	trackMode                string
 	trackTimeout             int
 	trackLogs                bool
+	trackFailOnError         bool

 	// template-only options
 	includeCRDs, skipTests       bool
@ -2760,6 +2761,10 @@ func (a applyConfig) TrackLogs() bool {
 	return a.trackLogs
 }

+func (a applyConfig) TrackFailOnError() bool {
+	return a.trackFailOnError
+}
+
 func (a applyConfig) Description() string {
 	return ""
 }
--- a/pkg/app/config.go
+++ b/pkg/app/config.go
@ -91,6 +91,7 @@ type ApplyConfigProvider interface {
 	TrackMode() string
 	TrackTimeout() int
 	TrackLogs() bool
+	TrackFailOnError() bool

 	Description() string

@ -130,6 +131,7 @@ type SyncConfigProvider interface {
 	TrackMode() string
 	TrackTimeout() int
 	TrackLogs() bool
+	TrackFailOnError() bool

 	Description() string

--- a/pkg/config/apply.go
+++ b/pkg/config/apply.go
@ -88,6 +88,8 @@ type ApplyOptions struct {
 	TrackTimeout int
 	// TrackLogs enables log streaming with kubedog
 	TrackLogs bool
+	// TrackFailOnError controls whether kubedog tracking failures cause a non-zero exit code
+	TrackFailOnError bool
 	// Description is the description that will be passed to helm upgrade --description
 	Description string
 }
@ -316,6 +318,11 @@ func (a *ApplyImpl) TrackLogs() bool {
 	return a.ApplyOptions.TrackLogs
 }

+// TrackFailOnError returns whether kubedog tracking failures should cause a non-zero exit code.
+func (a *ApplyImpl) TrackFailOnError() bool {
+	return a.ApplyOptions.TrackFailOnError
+}
+
 // Description returns the description.
 func (a *ApplyImpl) Description() string {
 	return a.ApplyOptions.Description
--- a/pkg/config/sync.go
+++ b/pkg/config/sync.go
@ -59,6 +59,8 @@ type SyncOptions struct {
 	TrackTimeout int
 	// TrackLogs enables log streaming with kubedog
 	TrackLogs bool
+	// TrackFailOnError controls whether kubedog tracking failures cause a non-zero exit code
+	TrackFailOnError bool
 	// Description is the description that will be passed to helm upgrade --description
 	Description string
 }
@ -216,6 +218,11 @@ func (t *SyncImpl) TrackLogs() bool {
 	return t.SyncOptions.TrackLogs
 }

+// TrackFailOnError returns whether kubedog tracking failures should cause a non-zero exit code.
+func (t *SyncImpl) TrackFailOnError() bool {
+	return t.SyncOptions.TrackFailOnError
+}
+
 // Description returns the description.
 func (t *SyncImpl) Description() string {
 	return t.SyncOptions.Description
--- a/pkg/state/helmx.go
+++ b/pkg/state/helmx.go
@ -197,6 +197,32 @@ func (st *HelmState) shouldUseKubedog(release *ReleaseSpec, ops *SyncOpts) bool
 	return st.getTrackMode(release, ops) == string(kubedog.TrackModeKubedog)
 }

+func (st *HelmState) shouldFailOnTrackError(release *ReleaseSpec, ops *SyncOpts) bool {
+	if release.TrackFailOnError != nil {
+		return *release.TrackFailOnError
+	}
+	if ops != nil {
+		return ops.TrackFailOnError
+	}
+	return false
+}
+
+// trackReleaseIfEnabled performs kubedog tracking for a release if trackMode is "kubedog".
+// It returns a ReleaseError if tracking fails and shouldFailOnTrackError is true.
+// The caller is responsible for mutating affectedReleases when needed.
+func (st *HelmState) trackReleaseIfEnabled(ctx context.Context, release *ReleaseSpec, helm helmexec.Interface, opts *SyncOpts) *ReleaseError {
+	if !st.shouldUseKubedog(release, opts) {
+		return nil
+	}
+	if trackErr := st.trackWithKubedog(ctx, release, helm, opts); trackErr != nil {
+		st.logger.Warnf("kubedog tracking failed for release %s: %v", release.Name, trackErr)
+		if st.shouldFailOnTrackError(release, opts) {
+			return newReleaseFailedError(release, trackErr)
+		}
+	}
+	return nil
+}
+
 func (st *HelmState) getTrackMode(release *ReleaseSpec, ops *SyncOpts) string {
 	trackMode := release.TrackMode
 	if trackMode == "" && ops != nil && ops.TrackMode != "" {
--- a/pkg/state/state.go
+++ b/pkg/state/state.go
@ -478,6 +478,8 @@ type ReleaseSpec struct {
 	KubedogQPS *float32 `yaml:"kubedogQPS,omitempty"`
 	// KubedogBurst specifies the burst for kubedog kubernetes client
 	KubedogBurst *int `yaml:"kubedogBurst,omitempty"`
+	// TrackFailOnError controls whether kubedog tracking failures cause a non-zero exit code
+	TrackFailOnError *bool `yaml:"trackFailOnError,omitempty"`
 }

 // TrackResourceSpec specifies a resource to track
@ -912,6 +914,7 @@ type SyncOpts struct {
 	TrackMode            string
 	TrackTimeout         int
 	TrackLogs            bool
+	TrackFailOnError     bool
 	Description          string
 }

@ -1138,10 +1141,8 @@ func (st *HelmState) SyncReleases(affectedReleases *AffectedReleases, helm helme
 					}
 				} else if release.UpdateStrategy == UpdateStrategyReinstallIfForbidden {
 					relErr = st.performSyncOrReinstallOfRelease(affectedReleases, helm, context, release, chart, m, flags...)
-					if relErr == nil && st.shouldUseKubedog(release, opts) {
-						if trackErr := st.trackWithKubedog(gocontext.Background(), release, helm, opts); trackErr != nil {
-							st.logger.Warnf("kubedog tracking failed for release %s: %v", release.Name, trackErr)
-						}
+					if relErr == nil {
+						relErr = st.trackReleaseIfEnabled(gocontext.Background(), release, helm, opts)
 					}
 				} else {
 					if err := helm.SyncRelease(context, release.Name, chart, release.Namespace, flags...); err != nil {
@ -1160,10 +1161,11 @@ func (st *HelmState) SyncReleases(affectedReleases *AffectedReleases, helm helme
 							release.installedVersion = installedVersion
 						}

-						if st.shouldUseKubedog(release, opts) {
-							if trackErr := st.trackWithKubedog(gocontext.Background(), release, helm, opts); trackErr != nil {
-								st.logger.Warnf("kubedog tracking failed for release %s: %v", release.Name, trackErr)
-							}
+						if trackErr := st.trackReleaseIfEnabled(gocontext.Background(), release, helm, opts); trackErr != nil {
+							m.Lock()
+							affectedReleases.Failed = append(affectedReleases.Failed, release)
+							m.Unlock()
+							relErr = trackErr
 						}
 					}
 				}
--- a/pkg/state/temp_test.go
+++ b/pkg/state/temp_test.go
@ -38,39 +38,39 @@ func TestGenerateID(t *testing.T) {
 	run(testcase{
 		subject: "baseline",
 		release: ReleaseSpec{Name: "foo", Chart: "incubator/raw"},
-		want:    "foo-values-6ccb848dcd",
+		want:    "foo-values-7f6f8d74dd",
 	})

 	run(testcase{
 		subject: "different bytes content",
 		release: ReleaseSpec{Name: "foo", Chart: "incubator/raw"},
 		data:    []byte(`{"k":"v"}`),
-		want:    "foo-values-5bcbbc4c85",
+		want:    "foo-values-5fc74c864c",
 	})

 	run(testcase{
 		subject: "different map content",
 		release: ReleaseSpec{Name: "foo", Chart: "incubator/raw"},
 		data:    map[string]any{"k": "v"},
-		want:    "foo-values-7c6468f955",
+		want:    "foo-values-77df88dd65",
 	})

 	run(testcase{
 		subject: "different chart",
 		release: ReleaseSpec{Name: "foo", Chart: "stable/envoy"},
-		want:    "foo-values-8645f5847f",
+		want:    "foo-values-77c96457f7",
 	})

 	run(testcase{
 		subject: "different name",
 		release: ReleaseSpec{Name: "bar", Chart: "incubator/raw"},
-		want:    "bar-values-54bd8c865",
+		want:    "bar-values-6695f7ff4c",
 	})

 	run(testcase{
 		subject: "specific ns",
 		release: ReleaseSpec{Name: "foo", Chart: "incubator/raw", Namespace: "myns"},
-		want:    "myns-foo-values-b4849b445",
+		want:    "myns-foo-values-9b9484d4c",
 	})

 	for id, n := range ids {
--- a/test/integration/test-cases/issue-2103/input/server.go
+++ b/test/integration/test-cases/issue-2103/input/server.go
@ -1,5 +1,10 @@
 // server.go is a small HTTP server used by the issue-2103 integration test.
 // It serves different YAML content based on the "ref" query parameter.
+// It is excluded from normal `go test ./...` runs; the integration test builds it
+// explicitly via its file path.
+
+//go:build ignore
+
 package main

 import (