helmfile/pkg/state/oci_chart_lock_test.go

package state

import (
	"context"
	"os"
	"path/filepath"
	"sync"
	"sync/atomic"
	"testing"
	"time"

	"github.com/gofrs/flock"
	"github.com/stretchr/testify/require"
)

// TestOCIChartFileLock tests that the file locking mechanism works correctly
// to prevent race conditions when multiple processes/goroutines try to access
// the same chart cache path concurrently.
func TestOCIChartFileLock(t *testing.T) {
	t.Run("concurrent lock acquisition is serialized", func(t *testing.T) {
		// Create a temporary directory for the test
		tempDir, err := os.MkdirTemp("", "helmfile-lock-test-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test-chart.lock")

		// Track the order of lock acquisition
		var lockOrder []int
		var mu sync.Mutex
		var wg sync.WaitGroup

		// Number of concurrent goroutines trying to acquire the lock
		numGoroutines := 5

		for i := 0; i < numGoroutines; i++ {
			wg.Add(1)
			go func(id int) {
				defer wg.Done()

				fileLock := flock.New(lockFilePath)
				err := fileLock.Lock()
				require.NoError(t, err)

				// Record the order this goroutine acquired the lock
				mu.Lock()
				lockOrder = append(lockOrder, id)
				mu.Unlock()

				// Simulate some work while holding the lock
				time.Sleep(10 * time.Millisecond)

				err = fileLock.Unlock()
				require.NoError(t, err)
			}(i)
		}

		wg.Wait()

		// Verify all goroutines acquired the lock exactly once
		require.Len(t, lockOrder, numGoroutines, "all goroutines should have acquired the lock")
	})

	t.Run("lock prevents concurrent writes", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-lock-test-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test-chart.lock")
		dataFilePath := filepath.Join(tempDir, "data.txt")

		var wg sync.WaitGroup
		var writeCount atomic.Int32

		// Multiple goroutines try to write to the same file
		numGoroutines := 10
		for i := 0; i < numGoroutines; i++ {
			wg.Add(1)
			go func() {
				defer wg.Done()

				fileLock := flock.New(lockFilePath)
				err := fileLock.Lock()
				require.NoError(t, err)
				defer fileLock.Unlock()

				// Check if file exists (like double-check locking pattern)
				if _, err := os.Stat(dataFilePath); os.IsNotExist(err) {
					// Only first goroutine to acquire lock should write
					err = os.WriteFile(dataFilePath, []byte("written"), 0644)
					require.NoError(t, err)
					writeCount.Add(1)
				}
			}()
		}

		wg.Wait()

		// Only one goroutine should have written the file
		require.Equal(t, int32(1), writeCount.Load(), "only one goroutine should write when using double-check locking")

		// Verify file exists with correct content
		content, err := os.ReadFile(dataFilePath)
		require.NoError(t, err)
		require.Equal(t, "written", string(content))
	})

	t.Run("lock file is created in correct directory", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-lock-test-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		// Simulate nested chart path structure
		chartPath := filepath.Join(tempDir, "registry", "charts", "myapp", "1.0.0")
		lockFilePath := chartPath + ".lock"

		// Create parent directories (like in getOCIChart)
		lockFileDir := filepath.Dir(lockFilePath)
		err = os.MkdirAll(lockFileDir, 0755)
		require.NoError(t, err)

		// Acquire and release lock
		fileLock := flock.New(lockFilePath)
		err = fileLock.Lock()
		require.NoError(t, err)

		// Verify lock file was created
		_, err = os.Stat(lockFilePath)
		require.NoError(t, err, "lock file should be created")

		err = fileLock.Unlock()
		require.NoError(t, err)
	})

	t.Run("TryLockContext respects timeout", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-lock-test-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test-chart.lock")

		// First goroutine acquires lock and holds it
		fileLock1 := flock.New(lockFilePath)
		err = fileLock1.Lock()
		require.NoError(t, err)

		// Second goroutine tries to acquire with short timeout
		done := make(chan bool)
		go func() {
			fileLock2 := flock.New(lockFilePath)
			ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
			defer cancel()

			locked, err := fileLock2.TryLockContext(ctx, 10*time.Millisecond)
			// TryLockContext returns error when context times out
			// Either locked is false, or we get a context deadline exceeded error
			if err != nil {
				require.ErrorIs(t, err, context.DeadlineExceeded, "should timeout with deadline exceeded")
			} else {
				require.False(t, locked, "should not acquire lock within timeout")
			}
			done <- true
		}()

		select {
		case <-done:
			// Success - timeout worked
		case <-time.After(2 * time.Second):
			t.Fatal("timeout test took too long")
		}

		// Release the lock
		err = fileLock1.Unlock()
		require.NoError(t, err)
	})
}

// TestOCIChartSharedExclusiveLocks tests the reader-writer lock pattern
// where shared locks allow concurrent reads and exclusive locks are used for writes.
func TestOCIChartSharedExclusiveLocks(t *testing.T) {
	t.Run("multiple shared locks can be acquired simultaneously", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-shared-lock-test-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test-chart.lock")

		// Number of concurrent readers
		numReaders := 5
		var wg sync.WaitGroup
		var activeReaders atomic.Int32
		var maxConcurrentReaders atomic.Int32

		for i := 0; i < numReaders; i++ {
			wg.Add(1)
			go func() {
				defer wg.Done()

				fileLock := flock.New(lockFilePath)
				// Acquire shared (read) lock
				err := fileLock.RLock()
				require.NoError(t, err)

				// Track how many readers are active simultaneously
				current := activeReaders.Add(1)

				// Update max concurrent readers
				for {
					max := maxConcurrentReaders.Load()
					if current <= max {
						break
					}
					if maxConcurrentReaders.CompareAndSwap(max, current) {
						break
					}
				}

				// Simulate reading while holding shared lock
				time.Sleep(50 * time.Millisecond)

				activeReaders.Add(-1)
				err = fileLock.Unlock()
				require.NoError(t, err)
			}()
		}

		wg.Wait()

		// Multiple readers should have been active at the same time
		require.Greater(t, maxConcurrentReaders.Load(), int32(1),
			"multiple shared locks should be held concurrently")
	})

	t.Run("exclusive lock blocks shared locks", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-excl-block-test-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test-chart.lock")

		// First, acquire exclusive lock
		writerLock := flock.New(lockFilePath)
		err = writerLock.Lock()
		require.NoError(t, err)

		// Try to acquire shared lock with timeout - should fail
		readerResult := make(chan bool)
		go func() {
			readerLock := flock.New(lockFilePath)
			ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
			defer cancel()

			locked, err := readerLock.TryRLockContext(ctx, 10*time.Millisecond)
			if err != nil {
				readerResult <- false
				return
			}
			if locked {
				readerLock.Unlock()
			}
			readerResult <- locked
		}()

		select {
		case acquired := <-readerResult:
			require.False(t, acquired, "shared lock should not be acquired while exclusive lock is held")
		case <-time.After(2 * time.Second):
			t.Fatal("test timed out")
		}

		// Release exclusive lock
		err = writerLock.Unlock()
		require.NoError(t, err)
	})

	t.Run("shared locks block exclusive lock", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-shared-block-test-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test-chart.lock")

		// First, acquire shared lock
		readerLock := flock.New(lockFilePath)
		err = readerLock.RLock()
		require.NoError(t, err)

		// Try to acquire exclusive lock with timeout - should fail
		writerResult := make(chan bool)
		go func() {
			writerLock := flock.New(lockFilePath)
			ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
			defer cancel()

			locked, err := writerLock.TryLockContext(ctx, 10*time.Millisecond)
			if err != nil {
				writerResult <- false
				return
			}
			if locked {
				writerLock.Unlock()
			}
			writerResult <- locked
		}()

		select {
		case acquired := <-writerResult:
			require.False(t, acquired, "exclusive lock should not be acquired while shared lock is held")
		case <-time.After(2 * time.Second):
			t.Fatal("test timed out")
		}

		// Release shared lock
		err = readerLock.Unlock()
		require.NoError(t, err)
	})

	t.Run("exclusive lock acquired after shared lock released", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-lock-release-test-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test-chart.lock")

		// Acquire shared lock
		readerLock := flock.New(lockFilePath)
		err = readerLock.RLock()
		require.NoError(t, err)

		// Start writer waiting for lock
		writerDone := make(chan bool)
		go func() {
			writerLock := flock.New(lockFilePath)
			// Use longer timeout
			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
			defer cancel()

			locked, err := writerLock.TryLockContext(ctx, 10*time.Millisecond)
			if err == nil && locked {
				writerLock.Unlock()
				writerDone <- true
				return
			}
			writerDone <- false
		}()

		// Release shared lock after a short delay
		time.Sleep(50 * time.Millisecond)
		err = readerLock.Unlock()
		require.NoError(t, err)

		// Writer should now succeed
		select {
		case success := <-writerDone:
			require.True(t, success, "writer should acquire lock after reader releases")
		case <-time.After(3 * time.Second):
			t.Fatal("test timed out waiting for writer")
		}
	})
}

// TestChartLockResultRelease tests the Release method of chartLockResult
// handles both shared and exclusive locks correctly.
func TestChartLockResultRelease(t *testing.T) {
	t.Run("release exclusive lock", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-release-excl-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test.lock")
		fileLock := flock.New(lockFilePath)
		err = fileLock.Lock()
		require.NoError(t, err)

		rwMutex := &sync.RWMutex{}
		rwMutex.Lock()

		result := &chartLockResult{
			fileLock:       fileLock,
			inProcessMutex: rwMutex,
			isExclusive:    true,
			chartPath:      lockFilePath,
		}

		// Release should not panic
		result.Release(nil)

		// Verify lock is released - should be able to acquire again
		fileLock2 := flock.New(lockFilePath)
		locked, err := fileLock2.TryLock()
		require.NoError(t, err)
		require.True(t, locked, "should be able to acquire lock after release")
		fileLock2.Unlock()
	})

	t.Run("release shared lock", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-release-shared-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		lockFilePath := filepath.Join(tempDir, "test.lock")
		fileLock := flock.New(lockFilePath)
		err = fileLock.RLock()
		require.NoError(t, err)

		rwMutex := &sync.RWMutex{}
		rwMutex.RLock()

		result := &chartLockResult{
			fileLock:       fileLock,
			inProcessMutex: rwMutex,
			isExclusive:    false,
			chartPath:      lockFilePath,
		}

		// Release should not panic
		result.Release(nil)

		// Verify lock is released - should be able to acquire exclusive lock
		fileLock2 := flock.New(lockFilePath)
		locked, err := fileLock2.TryLock()
		require.NoError(t, err)
		require.True(t, locked, "should be able to acquire exclusive lock after shared release")
		fileLock2.Unlock()
	})

	t.Run("release nil result is safe", func(t *testing.T) {
		var result *chartLockResult
		// Should not panic
		result.Release(nil)
	})
}

// TestOCIChartDoubleCheckLocking verifies the double-check locking pattern
// works correctly to avoid unnecessary work when the cache is populated
// by another process while waiting for the lock.
func TestOCIChartDoubleCheckLocking(t *testing.T) {
	t.Run("second waiter uses cache populated by first", func(t *testing.T) {
		tempDir, err := os.MkdirTemp("", "helmfile-double-check-*")
		require.NoError(t, err)
		defer os.RemoveAll(tempDir)

		chartPath := filepath.Join(tempDir, "myrepo", "mychart", "1.0.0")
		lockFilePath := chartPath + ".lock"

		// Create lock directory
		err = os.MkdirAll(filepath.Dir(lockFilePath), 0755)
		require.NoError(t, err)

		var pullCount atomic.Int32
		var wg sync.WaitGroup

		// Simulate two processes trying to download the same chart
		for i := 0; i < 2; i++ {
			wg.Add(1)
			go func() {
				defer wg.Done()

				fileLock := flock.New(lockFilePath)
				err := fileLock.Lock()
				require.NoError(t, err)
				defer fileLock.Unlock()

				// Double-check: after acquiring lock, check if directory exists
				if _, err := os.Stat(chartPath); os.IsNotExist(err) {
					// Simulate chart pull
					time.Sleep(50 * time.Millisecond) // Simulate download time
					err = os.MkdirAll(chartPath, 0755)
					require.NoError(t, err)
					err = os.WriteFile(filepath.Join(chartPath, "Chart.yaml"), []byte("name: mychart"), 0644)
					require.NoError(t, err)
					pullCount.Add(1)
				}
				// If directory exists, skip pull (use cached)
			}()
		}

		wg.Wait()

		// Only one process should have actually pulled the chart
		require.Equal(t, int32(1), pullCount.Load(), "only one process should pull the chart")

		// Chart should exist
		_, err = os.Stat(filepath.Join(chartPath, "Chart.yaml"))
		require.NoError(t, err, "chart should exist in cache")
	})
}