diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 33761e746..b65f301bf 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -783,15 +783,51 @@ func filesToSave(deps []string) ([]string, error) { } } // remove duplicates + deduped := deduplicatePaths(srcFiles) + + return deduped, nil +} + +// deduplicatePaths returns a deduplicated slice of shortest paths +// For example {"usr/lib", "usr/lib/ssl"} will return only {"usr/lib"} +func deduplicatePaths(paths []string) []string { + type node struct { + children map[string]*node + value bool + } + + root := &node{children: make(map[string]*node)} + + // Create a tree marking all present paths + for _, f := range paths { + parts := strings.Split(f, "/") + current := root + for i := 0; i < len(parts)-1; i++ { + part := parts[i] + if _, ok := current.children[part]; !ok { + current.children[part] = &node{children: make(map[string]*node)} + } + current = current.children[part] + } + current.children[parts[len(parts)-1]] = &node{children: make(map[string]*node), value: true} + } + + // Collect all paths deduped := []string{} - m := map[string]struct{}{} - for _, f := range srcFiles { - if _, ok := m[f]; !ok { - deduped = append(deduped, f) - m[f] = struct{}{} + var traverse func(*node, string) + traverse = func(n *node, path string) { + if n.value { + deduped = append(deduped, strings.TrimPrefix(path, "/")) + return + } + for k, v := range n.children { + traverse(v, path+"/"+k) } } - return deduped, nil + + traverse(root, "") + + return deduped } func fetchExtraStages(stages []config.KanikoStage, opts *config.KanikoOptions) error { diff --git a/pkg/executor/build_test.go b/pkg/executor/build_test.go index 7a8cdd2dd..603969f20 100644 --- a/pkg/executor/build_test.go +++ b/pkg/executor/build_test.go @@ -468,6 +468,41 @@ func Test_filesToSave(t *testing.T) { } } +func TestDeduplicatePaths(t *testing.T) { + tests := []struct { + name string + input []string + want []string + }{ + { + name: "no duplicates", + input: []string{"file1.txt", "file2.txt", "usr/lib"}, + want: []string{"file1.txt", "file2.txt", "usr/lib"}, + }, + { + name: "duplicates", + input: []string{"file1.txt", "file2.txt", "file2.txt", "usr/lib"}, + want: []string{"file1.txt", "file2.txt", "usr/lib"}, + }, + { + name: "duplicates with paths", + input: []string{"file1.txt", "file2.txt", "file2.txt", "usr/lib", "usr/lib/ssl"}, + want: []string{"file1.txt", "file2.txt", "usr/lib"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := deduplicatePaths(tt.input) + sort.Strings(tt.want) + sort.Strings(got) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("TestDeduplicatePaths() = %v, want %v", got, tt.want) + } + }) + } +} + func TestInitializeConfig(t *testing.T) { tests := []struct { description string