From d1e87d074bf672533d4f618695b5d1284ab19ecd Mon Sep 17 00:00:00 2001 From: Aditya Menon Date: Thu, 27 Nov 2025 00:19:14 +0100 Subject: [PATCH] fix: prevent deadlock when multiple releases share the same chart When multiple releases use the same OCI chart (e.g., same chart different values), workers in PrepareCharts would deadlock: 1. Worker 1 acquires lock for chart/path, downloads, adds to cache 2. Worker 2 finds chart in cache, tries to acquire lock on same path 3. Worker 2 blocks waiting for Worker 1's lock 4. Collector waits for Worker 2's result 5. Worker 1's lock held until PrepareCharts finishes -> deadlock The fix: when using the in-memory chart cache (which means another worker in the same process already downloaded the chart), don't acquire another lock. This is safe because: - The in-memory cache is only used within a single helmfile process - The tempDir cleanup is deferred until after helm callback completes - Cross-process coordination is still handled by file locks during downloads This fixes the "signal: killed" test failures in CI for: - oci_chart_pull_direct - oci_chart_pull_once - oci_chart_pull_once2 Signed-off-by: Aditya Menon --- pkg/state/state.go | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/pkg/state/state.go b/pkg/state/state.go index 9240d7a0..728441cd 100644 --- a/pkg/state/state.go +++ b/pkg/state/state.go @@ -1466,19 +1466,13 @@ func (st *HelmState) processLocalChart(normalizedChart, dir string, release *Rel // The lock ensures the chart won't be deleted while helm is using it. func (st *HelmState) forcedDownloadChart(chartName, dir string, release *ReleaseSpec, helm helmexec.Interface, opts ChartPrepareOptions) (string, *chartLockResult, error) { // Check global chart cache first for non-OCI charts + // If found, another worker in this process already downloaded and holds the lock. + // We don't need to acquire another lock - the tempDir won't be deleted until + // after helm operations complete (cleanup is deferred in withPreparedCharts). cacheKey := st.getChartCacheKey(release) if cachedPath, exists := st.checkChartCache(cacheKey); exists && st.fs.DirectoryExistsAt(cachedPath) { st.logger.Debugf("Chart %s:%s already downloaded, using cached version at %s", chartName, release.Version, cachedPath) - // Still need to acquire a shared lock to prevent deletion while using - chartPath, err := generateChartPath(chartName, dir, release, opts.OutputDirTemplate) - if err != nil { - return "", nil, err - } - lockResult, lockErr := st.acquireChartLock(chartPath, opts) - if lockErr != nil { - return "", nil, lockErr - } - return cachedPath, lockResult, nil + return cachedPath, nil, nil } chartPath, err := generateChartPath(chartName, dir, release, opts.OutputDirTemplate) @@ -4609,17 +4603,14 @@ func (st *HelmState) getOCIChart(release *ReleaseSpec, tempDir string, helm helm return nil, nil, nil } - // Check global chart cache first (in-memory cache, no lock needed) + // Check global chart cache first (in-memory cache) + // If found, another worker in this process already downloaded and holds the lock. + // We don't need to acquire another lock - the tempDir won't be deleted until + // after helm operations complete (cleanup is deferred in withPreparedCharts). cacheKey := st.getChartCacheKey(release) if cachedPath, exists := st.checkChartCache(cacheKey); exists && st.fs.DirectoryExistsAt(cachedPath) { st.logger.Debugf("OCI chart %s:%s already downloaded, using cached version at %s", chartName, chartVersion, cachedPath) - // Still need to acquire a shared lock to prevent deletion while using - chartPath, _ := st.getOCIChartPath(tempDir, release, chartName, chartVersion, opts.OutputDirTemplate) - lockResult, lockErr := st.acquireChartLock(chartPath, opts) - if lockErr != nil { - return nil, nil, lockErr - } - return &cachedPath, lockResult, nil + return &cachedPath, nil, nil } if opts.OutputDirTemplate == "" {