fix: include query params in HTTP getter cache key (#2399)

* fix: include query params in HTTP getter cache key (#2103)

When helmfile caches remote HTTP files fetched via the "normal" getter
(plain https:// URLs without a git:: prefix), the cache key did not
include query parameters. This caused URLs that differ only in query
params (e.g. ?ref=commit1 vs ?ref=commit2) to share the same cache
directory, silently returning the wrong file version.

The root cause was in Fetch() where the "normal" getter branch
overwrote the cache key with only scheme + host, discarding query
params that were correctly computed earlier.

Fix: extract the query-params suffix into a reusable variable and
apply it in both the default and "normal" getter cache key paths.

Signed-off-by: Aditya Menon <amenon@canarytechnologies.com>

* Update pkg/remote/remote.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Aditya Menon <amenon@canarytechnologies.com>

* Update pkg/remote/remote_test.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Aditya Menon <amenon@canarytechnologies.com>

---------

Signed-off-by: Aditya Menon <amenon@canarytechnologies.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Aditya Menon 2026-02-14 07:01:53 +05:30 committed by GitHub
parent 5c43fa6465
commit c6b962dbbf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 303 additions and 13 deletions

View File

@ -2,6 +2,8 @@ package remote
import (
"context"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io"
@ -235,16 +237,36 @@ func (r *Remote) Fetch(path string, cacheDirOpt ...string) (string, error) {
should_cache := query.Get("cache") != "false"
delete(query, "cache")
// Precompute a redacted copy of the query for cache keys and debug logs.
// Sensitive query parameters are hashed so that different credentials produce
// distinct cache keys without writing the raw secret to disk or log output.
// Note: go-getter may still log the full URL internally.
var paramsSuffix string
var redactedQuery neturl.Values
if len(query) > 0 {
redactedQuery = maps.Clone(query)
for key := range redactedQuery {
lk := strings.ToLower(key)
if strings.Contains(lk, "token") || strings.Contains(lk, "password") ||
strings.Contains(lk, "secret") || strings.Contains(lk, "key") ||
strings.Contains(lk, "signature") {
orig := redactedQuery[key]
hashed := make([]string, len(orig))
for i, v := range orig {
h := sha256.Sum256([]byte(v))
hashed[i] = hex.EncodeToString(h[:])[:8]
}
redactedQuery[key] = hashed
}
}
paramsSuffix = strings.ReplaceAll(redactedQuery.Encode(), "&", "_")
}
var cacheKey string
replacer := strings.NewReplacer(":", "", "//", "_", "/", "_", ".", "_")
dirKey := replacer.Replace(srcDir)
if len(query) > 0 {
q := maps.Clone(query)
if q.Has("sshkey") {
q.Set("sshkey", "redacted")
}
paramsKey := strings.ReplaceAll(q.Encode(), "&", "_")
cacheKey = fmt.Sprintf("%s.%s", dirKey, paramsKey)
if paramsSuffix != "" {
cacheKey = fmt.Sprintf("%s.%s", dirKey, paramsSuffix)
} else {
cacheKey = dirKey
}
@ -258,7 +280,13 @@ func (r *Remote) Fetch(path string, cacheDirOpt ...string) (string, error) {
cacheDirPath := filepath.Join(r.Home, getterDst)
if u.Getter == "normal" {
srcDir = fmt.Sprintf("%s://%s", u.Scheme, u.Host)
cacheKey = replacer.Replace(srcDir)
normalDirKey := replacer.Replace(srcDir)
if paramsSuffix != "" {
cacheKey = fmt.Sprintf("%s.%s", normalDirKey, paramsSuffix)
} else {
cacheKey = normalDirKey
}
getterDst = filepath.Join(cacheBaseDir, cacheKey)
cacheDirPath = filepath.Join(r.Home, cacheKey, u.Dir)
}
@ -268,7 +296,7 @@ func (r *Remote) Fetch(path string, cacheDirOpt ...string) (string, error) {
{
if r.fs.FileExistsAt(cacheDirPath) {
return "", fmt.Errorf("%s is not directory. please remove it so that variant could use it for dependency caching", getterDst)
return "", fmt.Errorf("%s is not a directory. Please remove it so that helmfile can use it for dependency caching", cacheDirPath)
}
if u.Getter == "normal" {
@ -294,7 +322,11 @@ func (r *Remote) Fetch(path string, cacheDirOpt ...string) (string, error) {
getterSrc = strings.Join([]string{getterSrc, query.Encode()}, "?")
}
r.Logger.Debugf("remote> downloading %s to %s", getterSrc, getterDst)
logSrc := path
if redactedQuery != nil {
logSrc = strings.Join([]string{strings.SplitN(path, "?", 2)[0], redactedQuery.Encode()}, "?")
}
r.Logger.Debugf("remote> downloading %s to %s", logSrc, cacheDirPath)
switch {
case u.Getter == "normal" && u.Scheme == "s3":

View File

@ -157,8 +157,11 @@ func TestRemote_SShGitHub_WithSshKey(t *testing.T) {
cleanfs := map[string]string{
CacheDir(): "",
}
// Note: "0b44c081" is the first 8 characters of the SHA256 hash of the test SSH key.
// It is intentionally hardcoded here as part of the expected cache key format and replaces
// the previous "redacted" placeholder to reflect the actual hashing behavior.
cachefs := map[string]string{
filepath.Join(CacheDir(), "ssh_github_com_helmfile_helmfiles_git.ref=0.40.0_sshkey=redacted/releases/kiam.yaml"): "foo: bar",
filepath.Join(CacheDir(), "ssh_github_com_helmfile_helmfiles_git.ref=0.40.0_sshkey=0b44c081/releases/kiam.yaml"): "foo: bar",
}
testcases := []struct {
@ -205,7 +208,7 @@ func TestRemote_SShGitHub_WithSshKey(t *testing.T) {
t.Fatalf("unexpected error: %v", err)
}
expectedFile := filepath.Join(CacheDir(), "ssh_github_com_helmfile_helmfiles_git.ref=0.40.0_sshkey=redacted/releases/kiam.yaml")
expectedFile := filepath.Join(CacheDir(), "ssh_github_com_helmfile_helmfiles_git.ref=0.40.0_sshkey=0b44c081/releases/kiam.yaml")
if file != expectedFile {
t.Errorf("unexpected file located: %s vs expected: %s", file, expectedFile)
}
@ -497,6 +500,15 @@ func TestParse(t *testing.T) {
file: "values.yaml",
query: "",
},
{
name: "https scheme normal with query params",
input: "https://gitlab.example.com/api/v4/projects/test/repository/files/values.yaml/raw?ref=abc123",
getter: "normal",
scheme: "https",
dir: "api/v4/projects/test/repository/files/values.yaml",
file: "raw",
query: "ref=abc123",
},
}
for _, tt := range testcases {
@ -685,3 +697,110 @@ func TestAWSSDKLogLevelInit(t *testing.T) {
})
}
}
func TestRemote_HttpUrlWithQueryParams(t *testing.T) {
cacheDir := CacheDir()
cleanfs := map[string]string{
cacheDir: "",
}
cachefs := map[string]string{
filepath.Join(cacheDir, "https_gitlab_example_com.ref=abc123/api/v4/projects/test/repository/files/values.yaml", "raw"): "cached: content",
}
testcases := []struct {
name string
files map[string]string
expectCacheHit bool
}{
{name: "cache miss", files: cleanfs, expectCacheHit: false},
{name: "cache hit", files: cachefs, expectCacheHit: true},
}
for _, tt := range testcases {
t.Run(tt.name, func(t *testing.T) {
testfs := testhelper.NewTestFs(tt.files)
hit := true
get := func(wd, src, dst string) error {
hit = false
return nil
}
getter := &testGetter{get: get}
remote := &Remote{
Logger: helmexec.NewLogger(io.Discard, "debug"),
Home: cacheDir,
Getter: getter,
S3Getter: getter,
HttpGetter: getter,
fs: testfs.ToFileSystem(),
}
url := "https://gitlab.example.com/api/v4/projects/test/repository/files/values.yaml/raw?ref=abc123"
file, err := remote.Fetch(url)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
expectedFile := filepath.Join(cacheDir, "https_gitlab_example_com.ref=abc123/api/v4/projects/test/repository/files/values.yaml", "raw")
if file != expectedFile {
t.Errorf("unexpected file located: %s vs expected: %s", file, expectedFile)
}
if tt.expectCacheHit && !hit {
t.Errorf("unexpected cache miss")
}
if !tt.expectCacheHit && hit {
t.Errorf("unexpected cache hit")
}
})
}
}
func TestRemote_HttpUrlQueryParamsAvoidCacheCollision(t *testing.T) {
cacheDir := CacheDir()
cleanfs := map[string]string{
cacheDir: "",
}
testfs := testhelper.NewTestFs(cleanfs)
get := func(wd, src, dst string) error {
return nil
}
getter := &testGetter{get: get}
remote := &Remote{
Logger: helmexec.NewLogger(io.Discard, "debug"),
Home: cacheDir,
Getter: getter,
S3Getter: getter,
HttpGetter: getter,
fs: testfs.ToFileSystem(),
}
url1 := "https://gitlab.example.com/api/v4/projects/test/repository/files/values.yaml/raw?ref=29b5609"
url2 := "https://gitlab.example.com/api/v4/projects/test/repository/files/values.yaml/raw?ref=d80839c"
file1, err := remote.Fetch(url1)
if err != nil {
t.Fatalf("unexpected error fetching url1: %v", err)
}
file2, err := remote.Fetch(url2)
if err != nil {
t.Fatalf("unexpected error fetching url2: %v", err)
}
if file1 == file2 {
t.Errorf("expected different cache paths for different query params, but both resolved to: %s", file1)
}
// Verify both contain the ref in the path
if !strings.Contains(file1, "ref=29b5609") {
t.Errorf("expected file1 path to contain ref=29b5609, got: %s", file1)
}
if !strings.Contains(file2, "ref=d80839c") {
t.Errorf("expected file2 path to contain ref=d80839c, got: %s", file2)
}
}

View File

@ -36,7 +36,12 @@ export SOPS_PGP_FP="B2D6D7BBEC03B2E66571C8C00AD18E16CFDEF700"
# FUNCTIONS ----------------------------------------------------------------------------------------------------------
function wait_deploy_ready() {
${kubectl} rollout status deployment ${1}
${kubectl} rollout status deployment ${1} --timeout=300s || {
info "Deployment ${1} rollout timed out, checking pod status:"
${kubectl} get pods -o wide --namespace=${test_ns} 2>/dev/null || true
${kubectl} describe deployment ${1} --namespace=${test_ns} 2>/dev/null | tail -20 || true
fail "Deployment ${1} failed to become ready within 300s"
}
while [ "$(${kubectl} get deploy ${1} -o=jsonpath='{.status.readyReplicas}')" == "0" ]; do
info "Waiting for deployment ${1} to be ready"
sleep 1
@ -123,6 +128,7 @@ ${kubectl} create namespace ${test_ns} || fail "Could not create namespace ${tes
. ${dir}/test-cases/issue-2297-local-chart-transformers.sh
. ${dir}/test-cases/issue-2309-kube-context-template.sh
. ${dir}/test-cases/issue-2355.sh
. ${dir}/test-cases/issue-2103.sh
# ALL DONE -----------------------------------------------------------------------------------------------------------

View File

@ -0,0 +1,86 @@
#!/usr/bin/env bash
# Test for issue #2103: HTTP remote cache key should include query parameters.
# Without the fix, two URLs differing only in ?ref= share the same cache and
# the second fetch silently returns stale content from the first.
issue_2103_input_dir="${cases_dir}/issue-2103/input"
issue_2103_tmp_dir=$(mktemp -d)
cleanup_issue_2103() {
kill "${server_pid}" 2>/dev/null || true
rm -rf "${issue_2103_tmp_dir}"
unset HELMFILE_CACHE_HOME HTTP_SERVER_URL
}
trap cleanup_issue_2103 EXIT
test_start "issue-2103: HTTP cache key includes query params"
# --- Start a small HTTP server that returns different YAML per ?ref= ----------
info "Building test HTTP server"
go build -o "${issue_2103_tmp_dir}/server" "${issue_2103_input_dir}/server.go" \
|| fail "Could not build test HTTP server"
"${issue_2103_tmp_dir}/server" > "${issue_2103_tmp_dir}/server_addr.txt" &
server_pid=$!
# Poll until the server writes its address (up to 10 seconds: 20 x 0.5s)
for i in $(seq 1 20); do
if ! kill -0 "${server_pid}" 2>/dev/null; then
fail "Test HTTP server failed to start"
fi
if [ -s "${issue_2103_tmp_dir}/server_addr.txt" ]; then
break
fi
sleep 0.5
done
if [ ! -s "${issue_2103_tmp_dir}/server_addr.txt" ]; then
fail "Test HTTP server did not write its address in time"
fi
server_url=$(cat "${issue_2103_tmp_dir}/server_addr.txt")
info "Test HTTP server running at ${server_url}"
# --- Fetch remote values through helmfile and verify cache has both refs ------
export HTTP_SERVER_URL="${server_url}"
export TEST_NS="${test_ns}"
export HELMFILE_CACHE_HOME="${issue_2103_tmp_dir}/cache"
info "Running helmfile template with two releases using different ?ref= values"
${helmfile} -f "${issue_2103_input_dir}/helmfile.yaml.gotmpl" template \
> "${issue_2103_tmp_dir}/template_output.txt" 2>&1 || {
helmfile_exit_code=$?
info "helmfile template output:"
cat "${issue_2103_tmp_dir}/template_output.txt"
fail "helmfile template failed with exit code ${helmfile_exit_code}"
}
# Verify that two distinct cache directories were created for the two refs.
# With the fix, the cache key includes the query params so each ref gets its own dir.
# Without the fix, both refs would share one cache dir and the second would be stale.
info "Checking cached files for distinct ref values"
cached_commit1=$(find "${issue_2103_tmp_dir}/cache" -type f -name "raw" -exec grep -l "version: commit1" {} \; || true)
cached_commit2=$(find "${issue_2103_tmp_dir}/cache" -type f -name "raw" -exec grep -l "version: commit2" {} \; || true)
if [ -n "${cached_commit1}" ] && [ -n "${cached_commit2}" ]; then
info "Found separate cached files:"
info " commit1: ${cached_commit1}"
info " commit2: ${cached_commit2}"
if [ "${cached_commit1}" = "${cached_commit2}" ]; then
fail "Issue #2103 regression: both refs cached to the same file"
fi
info "Cache keys are distinct — fix is working"
else
info "Cache directory contents:"
find "${issue_2103_tmp_dir}/cache" -type f 2>/dev/null
info "Template output:"
cat "${issue_2103_tmp_dir}/template_output.txt"
fail "Issue #2103 regression: query-param-specific values were not cached separately"
fi
trap - EXIT
test_pass "issue-2103: HTTP cache key includes query params"

View File

@ -0,0 +1,11 @@
releases:
- name: test-release-commit1
namespace: {{ env "TEST_NS" | default "default" }}
chart: ../../../charts/raw
values:
- {{ env "HTTP_SERVER_URL" }}/api/v4/projects/test/files/values.yaml/raw?ref=commit1
- name: test-release-commit2
namespace: {{ env "TEST_NS" | default "default" }}
chart: ../../../charts/raw
values:
- {{ env "HTTP_SERVER_URL" }}/api/v4/projects/test/files/values.yaml/raw?ref=commit2

View File

@ -0,0 +1,36 @@
// server.go is a small HTTP server used by the issue-2103 integration test.
// It serves different YAML content based on the "ref" query parameter.
package main
import (
"fmt"
"net"
"net/http"
"os"
)
func main() {
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
ref := r.URL.Query().Get("ref")
if ref == "" {
ref = "unknown"
}
w.Header().Set("Content-Type", "text/plain")
_, _ = fmt.Fprintf(w, "version: %s\n", ref)
})
listener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to listen: %v\n", err)
os.Exit(1)
}
// Print the address so the test script can capture it.
fmt.Printf("http://%s\n", listener.Addr().String())
if err := http.Serve(listener, mux); err != nil {
fmt.Fprintf(os.Stderr, "Server error: %v\n", err)
os.Exit(1)
}
}