diff --git a/docs/index.md b/docs/index.md index 84948bd6..8e60f201 100644 --- a/docs/index.md +++ b/docs/index.md @@ -665,6 +665,33 @@ The `helmfile init` sub-command checks the dependencies required for helmfile op The `helmfile cache` sub-command is designed for cache management. Go-getter-backed remote file system are cached by `helmfile`. There is no TTL implemented, if you need to update the cached files or directories, you need to clean individually or run a full cleanup with `helmfile cache cleanup` +#### OCI Chart Cache + +OCI charts are cached in the shared cache directory (`~/.cache/helmfile` by default, or `$HELMFILE_CACHE_HOME`). This cache is shared across all helmfile processes. + +**Cache Behavior:** + +- When a chart exists in the shared cache and is valid, it is reused without re-downloading +- The `--skip-refresh` flag can be used to skip checking for updates to cached charts stored in process-specific temporary directories (it does not affect charts already present in the shared cache) +- When running multiple helmfile processes in parallel (e.g., as an ArgoCD plugin), charts in the shared cache are not refreshed/deleted to prevent race conditions + +**Forcing a Cache Refresh:** + +To force a refresh of cached OCI charts, run: +```bash +helmfile cache cleanup +``` + +This will clear the shared cache, allowing the next helmfile command to re-download charts. + +#### cache info + +Display information about the cache directory. + +#### cache cleanup + +Remove all cached files from the cache directory. + ### sync The `helmfile sync` sub-command sync your cluster state as described in your `helmfile`. The default helmfile is `helmfile.yaml`, but any YAML file can be passed by specifying a `--file path/to/your/yaml/file` flag. @@ -1867,6 +1894,14 @@ export MY_OCI_REGISTRY_USERNAME=spongebob export MY_OCI_REGISTRY_PASSWORD=squarepants ``` +### OCI Chart Caching + +OCI charts are automatically cached in the shared cache directory (`~/.cache/helmfile` by default, or the directory specified by `HELMFILE_CACHE_HOME`). This improves performance by avoiding redundant downloads. + +**Multi-Process Safety:** When running multiple helmfile processes in parallel (e.g., as an ArgoCD plugin), charts in the shared cache are not deleted or refreshed to prevent race conditions where one process might delete a chart that another is using. To force a cache refresh, run `helmfile cache cleanup` first. + +See the [cache](#cache) section for more details on cache management. + ## Attribution We use: diff --git a/pkg/state/oci_chart_lock_test.go b/pkg/state/oci_chart_lock_test.go index 8f42471b..fec614f7 100644 --- a/pkg/state/oci_chart_lock_test.go +++ b/pkg/state/oci_chart_lock_test.go @@ -11,6 +11,10 @@ import ( "github.com/gofrs/flock" "github.com/stretchr/testify/require" + "go.uber.org/zap" + + "github.com/helmfile/helmfile/pkg/filesystem" + "github.com/helmfile/helmfile/pkg/remote" ) // TestOCIChartFileLock tests that the file locking mechanism works correctly @@ -485,3 +489,150 @@ func TestOCIChartDoubleCheckLocking(t *testing.T) { require.NoError(t, err, "chart should exist in cache") }) } + +// TestIsSharedCachePath tests the isSharedCachePath function to ensure it correctly +// identifies paths within the shared cache directory. +func TestIsSharedCachePath(t *testing.T) { + t.Run("path in shared cache is detected", func(t *testing.T) { + // Create a HelmState with test logger + logger := createTestLogger(t) + st := &HelmState{ + logger: logger, + fs: filesystem.DefaultFileSystem(), + } + + // Get the shared cache directory + sharedCacheDir := remote.CacheDir() + + // Test path inside shared cache + chartPath := filepath.Join(sharedCacheDir, "envoyproxy", "gateway-helm", "1.6.2", "gateway-helm") + require.True(t, st.isSharedCachePath(chartPath), "path in shared cache should return true") + }) + + t.Run("path outside shared cache is not detected", func(t *testing.T) { + logger := createTestLogger(t) + st := &HelmState{ + logger: logger, + fs: filesystem.DefaultFileSystem(), + } + + // Test path outside shared cache (temp directory) + tempDir, err := os.MkdirTemp("", "helmfile-test-*") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + + chartPath := filepath.Join(tempDir, "mychart") + require.False(t, st.isSharedCachePath(chartPath), "path outside shared cache should return false") + }) + + t.Run("relative path handling", func(t *testing.T) { + logger := createTestLogger(t) + st := &HelmState{ + logger: logger, + fs: filesystem.DefaultFileSystem(), + } + + // Test relative path + require.False(t, st.isSharedCachePath("./relative/path"), "relative path should return false") + }) + + t.Run("shared cache path as prefix only", func(t *testing.T) { + logger := createTestLogger(t) + st := &HelmState{ + logger: logger, + fs: filesystem.DefaultFileSystem(), + } + + // Test path that has shared cache dir as a prefix but not as the actual parent + // e.g., /home/user/.cache/helmfile-other/path should not match + sharedCacheDir := remote.CacheDir() + nonSubpath := sharedCacheDir + "-other/chart" + require.False(t, st.isSharedCachePath(nonSubpath), "path with shared cache as prefix but not subdirectory should return false") + }) + + t.Run("symlink to shared cache directory", func(t *testing.T) { + logger := createTestLogger(t) + st := &HelmState{ + logger: logger, + fs: filesystem.DefaultFileSystem(), + } + + sharedCacheDir := remote.CacheDir() + chartRelPath := filepath.Join("envoyproxy", "gateway-helm", "1.6.2", "gateway-helm") + targetChartPath := filepath.Join(sharedCacheDir, chartRelPath) + + if err := os.MkdirAll(targetChartPath, 0755); err != nil { + t.Skipf("skipping symlink test; unable to create target directory: %v", err) + } + defer os.RemoveAll(filepath.Join(sharedCacheDir, "envoyproxy")) + + tempDir, err := os.MkdirTemp("", "helmfile-symlink-test-*") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + + symlinkPath := filepath.Join(tempDir, "cache-link") + + if err := os.Symlink(sharedCacheDir, symlinkPath); err != nil { + t.Skipf("skipping symlink test; unable to create symlink: %v", err) + } + + chartPath := filepath.Join(symlinkPath, chartRelPath) + require.True(t, st.isSharedCachePath(chartPath), "path within symlinked shared cache directory should return true") + }) + + t.Run("exact match with shared cache directory", func(t *testing.T) { + logger := createTestLogger(t) + st := &HelmState{ + logger: logger, + fs: filesystem.DefaultFileSystem(), + } + + sharedCacheDir := remote.CacheDir() + require.True(t, st.isSharedCachePath(sharedCacheDir), "shared cache directory itself should return true") + }) +} + +// TestAcquireChartLockSharedCacheSkipRefresh verifies that acquireChartLock +// returns chartActionUseCached instead of chartActionRefresh when the chart +// exists in the shared cache, even when refresh is requested. This tests the +// core fix for the race condition issue #2387. +func TestAcquireChartLockSharedCacheSkipRefresh(t *testing.T) { + logger := createTestLogger(t) + + sharedCacheDir := remote.CacheDir() + chartPath := filepath.Join(sharedCacheDir, "testrepo", "testchart", "1.0.0", "testchart") + + err := os.MkdirAll(chartPath, 0755) + require.NoError(t, err) + defer os.RemoveAll(filepath.Join(sharedCacheDir, "testrepo")) + + chartFile := filepath.Join(chartPath, "Chart.yaml") + err = os.WriteFile(chartFile, []byte("name: testchart\nversion: 1.0.0\n"), 0644) + require.NoError(t, err) + + st := &HelmState{ + logger: logger, + fs: filesystem.DefaultFileSystem(), + } + + opts := ChartPrepareOptions{ + SkipRefresh: false, + } + + result, err := st.acquireChartLock(chartPath, opts, nil) + require.NoError(t, err) + require.NotNil(t, result) + defer result.Release(logger) + + require.Equal(t, chartActionUseCached, result.action, "shared cache chart should use cached, not refresh") + + _, err = os.Stat(chartFile) + require.NoError(t, err, "chart in shared cache should not be deleted") +} + +func createTestLogger(t *testing.T) *zap.SugaredLogger { + t.Helper() + logger, err := zap.NewDevelopment() + require.NoError(t, err) + return logger.Sugar() +} diff --git a/pkg/state/state.go b/pkg/state/state.go index 19d74e2d..e42252ee 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -4539,6 +4539,33 @@ func (st *HelmState) addToChartCache(key ChartCacheKey, path string) { downloadedCharts[key] = path } +// isSharedCachePath returns true if the chartPath is within the shared cache directory. +// Charts in the shared cache should not be deleted during refresh to prevent race conditions +// when multiple processes are using the same cached chart. +func (st *HelmState) isSharedCachePath(chartPath string) bool { + sharedCacheDir := remote.CacheDir() + absChartPath, err := filepath.Abs(chartPath) + if err != nil { + st.logger.Debugf("failed to get absolute path for chartPath %q: %v", chartPath, err) + return false + } + absSharedCache, err := filepath.Abs(sharedCacheDir) + if err != nil { + st.logger.Debugf("failed to get absolute path for shared cache dir %q: %v", sharedCacheDir, err) + return false + } + resolvedChartPath, err := filepath.EvalSymlinks(absChartPath) + if err != nil { + resolvedChartPath = absChartPath + } + resolvedSharedCache, err := filepath.EvalSymlinks(absSharedCache) + if err != nil { + resolvedSharedCache = absSharedCache + } + return resolvedChartPath == resolvedSharedCache || + strings.HasPrefix(resolvedChartPath, resolvedSharedCache+string(filepath.Separator)) +} + // chartDownloadAction represents what action should be taken after acquiring locks type chartDownloadAction int @@ -4590,7 +4617,9 @@ func (r *chartLockResult) Release(logger *zap.SugaredLogger) { // NOTE: The callback is executed while holding an exclusive lock, so it should be fast and non-blocking. // // IMPORTANT: Locks are released immediately after download completes. -// The tempDir cleanup is deferred until after helm operations, so charts won't be deleted mid-use. +// For process-specific temp directories, cleanup is deferred until after helm operations. +// For the shared cache directory, refresh is skipped entirely to prevent race conditions +// between processes (use `helmfile cache cleanup` to force refresh). func (st *HelmState) acquireChartLock(chartPath string, opts ChartPrepareOptions, skipRefreshCheck func() bool) (*chartLockResult, error) { result := &chartLockResult{ chartPath: chartPath, @@ -4651,6 +4680,14 @@ func (st *HelmState) acquireChartLock(chartPath string, opts ChartPrepareOptions st.logger.Debugf("Skipping refresh for chart at %s (another worker already cached)", result.cachedPath) return result, nil } + // Skip refresh for shared cache paths to prevent race conditions when multiple + // processes are using the same cached chart. Use `helmfile cache cleanup` to force refresh. + if st.isSharedCachePath(chartPath) { + st.logger.Infof("Skipping refresh for chart at %s (shared cache, another process may be using it; run 'helmfile cache cleanup' to force refresh)", chartPath) + result.action = chartActionUseCached + result.cachedPath = filepath.Dir(fullChartPath) + return result, nil + } // Proceed with refresh: delete and re-download st.logger.Debugf("Refreshing chart at %s (exclusive lock)", chartPath) if err := os.RemoveAll(chartPath); err != nil { @@ -4660,12 +4697,18 @@ func (st *HelmState) acquireChartLock(chartPath string, opts ChartPrepareOptions result.action = chartActionRefresh } else { // Directory exists but invalid (no Chart.yaml) - corrupted cache - st.logger.Debugf("Chart directory at %s is corrupted: %v, will re-download", chartPath, err) - if err := os.RemoveAll(chartPath); err != nil { + // Skip deletion for shared cache paths to prevent race conditions + if st.isSharedCachePath(chartPath) { result.Release(st.logger) - return nil, fmt.Errorf("failed to remove corrupted chart directory: %w", err) + return nil, fmt.Errorf("chart directory at %s is corrupted (%w); cannot safely remove from shared cache while other processes may be running. Please run 'helmfile cache cleanup' to clear the cache", chartPath, err) + } else { + st.logger.Debugf("Chart directory at %s is corrupted: %v, will re-download", chartPath, err) + if err := os.RemoveAll(chartPath); err != nil { + result.Release(st.logger) + return nil, fmt.Errorf("failed to remove corrupted chart directory: %w", err) + } + result.action = chartActionDownload } - result.action = chartActionDownload } } else { result.action = chartActionDownload