Merge pull request #1069 from cvgw/u/cgwippern/implement-filepath-resolver

Resolve filepaths before scanning for changes
This commit is contained in:
Tejal Desai 2020-02-25 00:19:38 -08:00 committed by GitHub
commit a1af057f99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 611 additions and 60 deletions

View File

@ -0,0 +1,10 @@
FROM registry.access.redhat.com/ubi7/ubi:7.7-214
# Install GCC, GCC-C++ and make libraries for build environment
# Then clean caches
RUN yum --disableplugin=subscription-manager update -y \
&& yum --disableplugin=subscription-manager install -y \
gcc-4.8.5-39.el7 \
gcc-c++-4.8.5-39.el7 \
make-3.82-24.el7 \
&& yum --disableplugin=subscription-manager clean all

164
pkg/filesystem/resolve.go Normal file
View File

@ -0,0 +1,164 @@
/*
Copyright 2020 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filesystem
import (
"os"
"path/filepath"
"github.com/GoogleContainerTools/kaniko/pkg/util"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// ResolvePaths takes a slice of file paths and a slice of whitelist entries. It resolve each
// file path according to a set of rules and then returns a slice of resolved paths or error.
// File paths are resolved according to the following rules:
// * If path is whitelisted, skip it.
// * If path is a symlink, resolve it's ancestor link and add it to the output set.
// * If path is a symlink, resolve it's target. If the target is not whitelisted add it to the
// output set.
// * Add all ancestors of each path to the output set.
func ResolvePaths(paths []string, wl []util.WhitelistEntry) (pathsToAdd []string, err error) {
logrus.Info("Resolving paths")
fileSet := make(map[string]bool)
for _, f := range paths {
// If the given path is part of the whitelist ignore it
if util.IsInProvidedWhitelist(f, wl) {
logrus.Debugf("path %s is whitelisted, ignoring it", f)
continue
}
link, e := resolveSymlinkAncestor(f)
if e != nil {
err = e
return
}
if f != link {
logrus.Tracef("updated link %s to %s", f, link)
}
if !fileSet[link] {
pathsToAdd = append(pathsToAdd, link)
}
fileSet[link] = true
var evaled string
// If the path is a symlink we need to also consider the target of that
// link
evaled, err = filepath.EvalSymlinks(f)
if err != nil {
if !os.IsNotExist(err) {
logrus.Errorf("couldn't eval %s with link %s", f, link)
return
}
logrus.Debugf("symlink path %s, target does not exist", f)
}
// If the given path is a symlink and the target is part of the whitelist
// ignore the target
if util.IsInProvidedWhitelist(evaled, wl) {
logrus.Debugf("path %s is whitelisted, ignoring it", evaled)
continue
}
if !fileSet[evaled] {
pathsToAdd = append(pathsToAdd, evaled)
}
fileSet[evaled] = true
}
// Also add parent directories to keep the permission of them correctly.
pathsToAdd = filesWithParentDirs(pathsToAdd)
return
}
// filesWithParentDirs returns every ancestor path for each provided file path.
// I.E. /foo/bar/baz/boom.txt => [/, /foo, /foo/bar, /foo/bar/baz, /foo/bar/baz/boom.txt]
func filesWithParentDirs(files []string) []string {
filesSet := map[string]bool{}
for _, file := range files {
file = filepath.Clean(file)
filesSet[file] = true
for _, dir := range util.ParentDirectories(file) {
dir = filepath.Clean(dir)
filesSet[dir] = true
}
}
newFiles := []string{}
for file := range filesSet {
newFiles = append(newFiles, file)
}
return newFiles
}
// resolveSymlinkAncestor returns the ancestor link of the provided symlink path or returns the
// the path if it is not a link. The ancestor link is the filenode whose type is a Symlink.
// E.G /baz/boom/bar.txt links to /usr/bin/bar.txt but /baz/boom/bar.txt itself is not a link.
// Instead /bar/boom is actually a link to /usr/bin. In this case resolveSymlinkAncestor would
// return /bar/boom.
func resolveSymlinkAncestor(path string) (string, error) {
if !filepath.IsAbs(path) {
return "", errors.New("dest path must be abs")
}
last := ""
newPath := path
loop:
for newPath != "/" {
fi, err := os.Lstat(newPath)
if err != nil {
return "", errors.Wrap(err, "failed to lstat")
}
if util.IsSymlink(fi) {
last = filepath.Base(newPath)
newPath = filepath.Dir(newPath)
} else {
// Even if the filenode pointed to by newPath is a regular file,
// one of its ancestors could be a symlink. We call filepath.EvalSymlinks
// to test whether there are any links in the path. If the output of
// EvalSymlinks is different than the input we know one of the nodes in the
// the path is a link.
target, err := filepath.EvalSymlinks(newPath)
if err != nil {
return "", err
}
if target != newPath {
last = filepath.Base(newPath)
newPath = filepath.Dir(newPath)
} else {
break loop
}
}
}
newPath = filepath.Join(newPath, last)
return filepath.Clean(newPath), nil
}

View File

@ -0,0 +1,378 @@
/*
Copyright 2020 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filesystem
import (
"io/ioutil"
"os"
"path/filepath"
"reflect"
"sort"
"testing"
"github.com/GoogleContainerTools/kaniko/pkg/util"
)
func Test_ResolvePaths(t *testing.T) {
validateResults := func(
t *testing.T,
actualFiles,
expectedFiles []string,
err error,
) {
if err != nil {
t.Errorf("expected err to be nil but was %s", err)
}
// Sort so that comparison is against consistent order
sort.Strings(actualFiles)
sort.Strings(expectedFiles)
if !reflect.DeepEqual(actualFiles, expectedFiles) {
t.Errorf("expected files to equal %s but was %s",
expectedFiles, actualFiles,
)
}
}
t.Run("list of files", func(t *testing.T) {
dir, err := ioutil.TempDir("", "snapshot-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
files := []string{
"/foo/bar.txt",
"/baz/boom.txt",
}
t.Run("all are symlinks", func(t *testing.T) {
for _, f := range files {
fLink := filepath.Join(dir, "link", f)
fTarget := filepath.Join(dir, "target", f)
if err := os.MkdirAll(filepath.Dir(fTarget), 0777); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(fTarget, []byte{}, 0777); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(fLink), 0777); err != nil {
t.Fatal(err)
}
if err := os.Symlink(fTarget, fLink); err != nil {
t.Fatal(err)
}
}
t.Run("none are whitelisted", func(t *testing.T) {
wl := []util.WhitelistEntry{}
inputFiles := []string{}
expectedFiles := []string{}
for _, f := range files {
link := filepath.Join(dir, "link", f)
expectedFiles = append(expectedFiles, link)
inputFiles = append(inputFiles, link)
target := filepath.Join(dir, "target", f)
expectedFiles = append(expectedFiles, target)
}
expectedFiles = filesWithParentDirs(expectedFiles)
files, err := ResolvePaths(inputFiles, wl)
validateResults(t, files, expectedFiles, err)
})
t.Run("some are whitelisted", func(t *testing.T) {
wl := []util.WhitelistEntry{
{
Path: filepath.Join(dir, "link", "baz"),
},
{
Path: filepath.Join(dir, "target", "foo"),
},
}
expectedFiles := []string{}
inputFiles := []string{}
for _, f := range files {
link := filepath.Join(dir, "link", f)
inputFiles = append(inputFiles, link)
if util.IsInProvidedWhitelist(link, wl) {
t.Logf("skipping %s", link)
continue
}
expectedFiles = append(expectedFiles, link)
target := filepath.Join(dir, "target", f)
if util.IsInProvidedWhitelist(target, wl) {
t.Logf("skipping %s", target)
continue
}
expectedFiles = append(expectedFiles, target)
}
link := filepath.Join(dir, "link", "zoom/")
target := filepath.Join(dir, "target", "zaam/")
if err := os.MkdirAll(target, 0777); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(filepath.Join(target, "meow.txt"), []byte{}, 0777); err != nil {
t.Fatal(err)
}
if err := os.Symlink(target, link); err != nil {
t.Fatal(err)
}
file := filepath.Join(link, "meow.txt")
inputFiles = append(inputFiles, file)
expectedFiles = append(expectedFiles, link)
targetFile := filepath.Join(target, "meow.txt")
expectedFiles = append(expectedFiles, targetFile)
expectedFiles = filesWithParentDirs(expectedFiles)
files, err := ResolvePaths(inputFiles, wl)
validateResults(t, files, expectedFiles, err)
})
})
})
t.Run("empty set of files", func(t *testing.T) {
inputFiles := []string{}
expectedFiles := []string{}
wl := []util.WhitelistEntry{}
files, err := ResolvePaths(inputFiles, wl)
validateResults(t, files, expectedFiles, err)
})
}
func Test_resolveSymlinkAncestor(t *testing.T) {
setupDirs := func(t *testing.T) (string, string) {
testDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatal(err)
}
targetDir := filepath.Join(testDir, "bar", "baz")
if err := os.MkdirAll(targetDir, 0777); err != nil {
t.Fatal(err)
}
targetPath := filepath.Join(targetDir, "bam.txt")
if err := ioutil.WriteFile(targetPath, []byte("meow"), 0777); err != nil {
t.Fatal(err)
}
return testDir, targetPath
}
t.Run("path is a symlink", func(t *testing.T) {
testDir, targetPath := setupDirs(t)
defer os.RemoveAll(testDir)
linkDir := filepath.Join(testDir, "foo", "buzz")
if err := os.MkdirAll(linkDir, 0777); err != nil {
t.Fatal(err)
}
linkPath := filepath.Join(linkDir, "zoom.txt")
if err := os.Symlink(targetPath, linkPath); err != nil {
t.Fatal(err)
}
expected := linkPath
actual, err := resolveSymlinkAncestor(linkPath)
if err != nil {
t.Errorf("expected err to be nil but was %s", err)
}
if actual != expected {
t.Errorf("expected result to be %s not %s", expected, actual)
}
})
t.Run("path is a dead symlink", func(t *testing.T) {
testDir, targetPath := setupDirs(t)
defer os.RemoveAll(testDir)
linkDir := filepath.Join(testDir, "foo", "buzz")
if err := os.MkdirAll(linkDir, 0777); err != nil {
t.Fatal(err)
}
linkPath := filepath.Join(linkDir, "zoom.txt")
if err := os.Symlink(targetPath, linkPath); err != nil {
t.Fatal(err)
}
if err := os.Remove(targetPath); err != nil {
t.Fatal(err)
}
expected := linkPath
actual, err := resolveSymlinkAncestor(linkPath)
if err != nil {
t.Errorf("expected err to be nil but was %s", err)
}
if actual != expected {
t.Errorf("expected result to be %s not %s", expected, actual)
}
})
t.Run("path is not a symlink", func(t *testing.T) {
testDir, targetPath := setupDirs(t)
defer os.RemoveAll(testDir)
expected := targetPath
actual, err := resolveSymlinkAncestor(targetPath)
if err != nil {
t.Errorf("expected err to be nil but was %s", err)
}
if actual != expected {
t.Errorf("expected result to be %s not %s", expected, actual)
}
})
t.Run("parent of path is a symlink", func(t *testing.T) {
testDir, targetPath := setupDirs(t)
defer os.RemoveAll(testDir)
targetDir := filepath.Dir(targetPath)
linkDir := filepath.Join(testDir, "foo")
if err := os.MkdirAll(linkDir, 0777); err != nil {
t.Fatal(err)
}
linkDir = filepath.Join(linkDir, "gaz")
if err := os.Symlink(targetDir, linkDir); err != nil {
t.Fatal(err)
}
linkPath := filepath.Join(linkDir, filepath.Base(targetPath))
expected := linkDir
actual, err := resolveSymlinkAncestor(linkPath)
if err != nil {
t.Errorf("expected err to be nil but was %s", err)
}
if actual != expected {
t.Errorf("expected result to be %s not %s", expected, actual)
}
})
t.Run("parent of path is a dead symlink", func(t *testing.T) {
testDir, targetPath := setupDirs(t)
defer os.RemoveAll(testDir)
targetDir := filepath.Dir(targetPath)
linkDir := filepath.Join(testDir, "foo")
if err := os.MkdirAll(linkDir, 0777); err != nil {
t.Fatal(err)
}
linkDir = filepath.Join(linkDir, "gaz")
if err := os.Symlink(targetDir, linkDir); err != nil {
t.Fatal(err)
}
if err := os.RemoveAll(targetDir); err != nil {
t.Fatal(err)
}
linkPath := filepath.Join(linkDir, filepath.Base(targetPath))
_, err := resolveSymlinkAncestor(linkPath)
if err == nil {
t.Error("expected err to not be nil")
}
})
t.Run("great grandparent of path is a symlink", func(t *testing.T) {
testDir, targetPath := setupDirs(t)
defer os.RemoveAll(testDir)
targetDir := filepath.Dir(targetPath)
linkDir := filepath.Join(testDir, "foo")
if err := os.Symlink(filepath.Dir(targetDir), linkDir); err != nil {
t.Fatal(err)
}
linkPath := filepath.Join(
linkDir,
filepath.Join(
filepath.Base(targetDir),
filepath.Base(targetPath),
),
)
expected := linkDir
actual, err := resolveSymlinkAncestor(linkPath)
if err != nil {
t.Errorf("expected err to be nil but was %s", err)
}
if actual != expected {
t.Errorf("expected result to be %s not %s", expected, actual)
}
})
}

View File

@ -24,6 +24,7 @@ import (
"sort"
"syscall"
"github.com/GoogleContainerTools/kaniko/pkg/filesystem"
"github.com/GoogleContainerTools/kaniko/pkg/timing"
"github.com/karrick/godirwalk"
@ -41,11 +42,12 @@ var snapshotPathPrefix = constants.KanikoDir
type Snapshotter struct {
l *LayeredMap
directory string
whitelist []util.WhitelistEntry
}
// NewSnapshotter creates a new snapshotter rooted at d
func NewSnapshotter(l *LayeredMap, d string) *Snapshotter {
return &Snapshotter{l: l, directory: d}
return &Snapshotter{l: l, directory: d, whitelist: util.Whitelist()}
}
// Init initializes a new snapshotter
@ -73,12 +75,15 @@ func (s *Snapshotter) TakeSnapshot(files []string) (string, error) {
logrus.Info("No files changed in this command, skipping snapshotting.")
return "", nil
}
filesToAdd, err := filesystem.ResolvePaths(files, s.whitelist)
if err != nil {
return "", nil
}
logrus.Info("Taking snapshot of files...")
logrus.Debugf("Taking snapshot of files %v", files)
// Also add parent directories to keep the permission of them correctly.
filesToAdd := filesWithParentDirs(files)
sort.Strings(filesToAdd)
// Add files to the layered map
@ -130,18 +135,23 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) {
s.l.Snapshot()
timer := timing.Start("Walking filesystem")
// Save the fs state in a map to iterate over later.
memFs := map[string]*godirwalk.Dirent{}
foundPaths := make([]string, 0)
godirwalk.Walk(s.directory, &godirwalk.Options{
Callback: func(path string, ent *godirwalk.Dirent) error {
if util.IsInWhitelist(path) {
if util.IsDestDir(path) {
logrus.Tracef("Skipping paths under %s, as it is a whitelisted directory", path)
return filepath.SkipDir
}
return nil
}
memFs[path] = ent
foundPaths = append(foundPaths, path)
return nil
},
Unsorted: true,
@ -149,19 +159,31 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) {
)
timing.DefaultRun.Stop(timer)
resolvedFiles, err := filesystem.ResolvePaths(foundPaths, s.whitelist)
if err != nil {
return nil, nil, err
}
resolvedMemFs := make(map[string]bool)
for _, f := range resolvedFiles {
resolvedMemFs[f] = true
}
// First handle whiteouts
// Get a list of all the files that existed before this layer
existingPaths := s.l.getFlattenedPathsForWhiteOut()
// Find the delta by removing everything left in this layer.
for p := range memFs {
for p := range resolvedMemFs {
delete(existingPaths, p)
}
// The paths left here are the ones that have been deleted in this layer.
filesToWhiteOut := []string{}
for path := range existingPaths {
// Only add the whiteout if the directory for the file still exists.
dir := filepath.Dir(path)
if _, ok := memFs[dir]; ok {
if _, ok := resolvedMemFs[dir]; ok {
if s.l.MaybeAddWhiteout(path) {
logrus.Debugf("Adding whiteout for %s", path)
filesToWhiteOut = append(filesToWhiteOut, path)
@ -170,7 +192,7 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) {
}
filesToAdd := []string{}
for path := range memFs {
for path := range resolvedMemFs {
if util.CheckWhitelist(path) {
logrus.Tracef("Not adding %s to layer, as it's whitelisted", path)
continue
@ -181,19 +203,11 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) {
return nil, nil, fmt.Errorf("could not check if file has changed %s %s", path, err)
}
if fileChanged {
// Get target file for symlinks so the symlink is not a dead link.
files, err := filesWithLinks(path)
if err != nil {
return nil, nil, err
}
logrus.Tracef("Adding files %s to layer, because it was changed.", files)
filesToAdd = append(filesToAdd, files...)
logrus.Tracef("Adding file %s to layer, because it was changed.", path)
filesToAdd = append(filesToAdd, path)
}
}
// Also add parent directories to keep their permissions correctly.
filesToAdd = filesWithParentDirs(filesToAdd)
sort.Strings(filesToAdd)
// Add files to the layered map
for _, file := range filesToAdd {
@ -221,27 +235,6 @@ func writeToTar(t util.Tar, files, whiteouts []string) error {
return nil
}
func filesWithParentDirs(files []string) []string {
filesSet := map[string]bool{}
for _, file := range files {
file = filepath.Clean(file)
filesSet[file] = true
for _, dir := range util.ParentDirectories(file) {
dir = filepath.Clean(dir)
filesSet[dir] = true
}
}
newFiles := []string{}
for file := range filesSet {
newFiles = append(newFiles, file)
}
return newFiles
}
// filesWithLinks returns the symlink and the target path if its exists.
func filesWithLinks(path string) ([]string, error) {
link, err := util.GetSymLink(path)

View File

@ -63,19 +63,16 @@ func TestSnapshotFSFileChange(t *testing.T) {
fooPath: "newbaz1",
batPath: "baz",
}
for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(fooPath) {
snapshotFiles[dir] = ""
}
for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) {
snapshotFiles[dir] = ""
}
numFiles := 0
actualFiles := []string{}
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
numFiles++
actualFiles = append(actualFiles, hdr.Name)
if _, isFile := snapshotFiles[hdr.Name]; !isFile {
t.Fatalf("File %s unexpectedly in tar", hdr.Name)
}
@ -84,8 +81,8 @@ func TestSnapshotFSFileChange(t *testing.T) {
t.Fatalf("Contents of %s incorrect, expected: %s, actual: %s", hdr.Name, snapshotFiles[hdr.Name], string(contents))
}
}
if numFiles != len(snapshotFiles) {
t.Fatalf("Incorrect number of files were added, expected: 2, actual: %v", numFiles)
if len(actualFiles) != len(snapshotFiles) {
t.Fatalf("Incorrect number of files were added, expected: %d, actual: %d", len(snapshotFiles), len(actualFiles))
}
}
@ -155,17 +152,15 @@ func TestSnapshotFSChangePermissions(t *testing.T) {
snapshotFiles := map[string]string{
batPathWithoutLeadingSlash: "baz2",
}
for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) {
snapshotFiles[dir] = ""
}
numFiles := 0
foundFiles := []string{}
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
t.Logf("Info %s in tar", hdr.Name)
numFiles++
foundFiles = append(foundFiles, hdr.Name)
if _, isFile := snapshotFiles[hdr.Name]; !isFile {
t.Fatalf("File %s unexpectedly in tar", hdr.Name)
}
@ -174,8 +169,11 @@ func TestSnapshotFSChangePermissions(t *testing.T) {
t.Fatalf("Contents of %s incorrect, expected: %s, actual: %s", hdr.Name, snapshotFiles[hdr.Name], string(contents))
}
}
if numFiles != len(snapshotFiles) {
t.Fatalf("Incorrect number of files were added, expected: 1, got: %v", numFiles)
if len(foundFiles) != len(snapshotFiles) {
t.Logf("expected\n%v\nto equal\n%v", foundFiles, snapshotFiles)
t.Fatalf("Incorrect number of files were added, expected: %d, got: %d",
len(snapshotFiles),
len(foundFiles))
}
}

View File

@ -83,6 +83,10 @@ type FSConfig struct {
type FSOpt func(*FSConfig)
func Whitelist() []WhitelistEntry {
return whitelist
}
func IncludeWhiteout() FSOpt {
return func(opts *FSConfig) {
opts.includeWhiteout = true
@ -356,8 +360,12 @@ func ExtractFile(dest string, hdr *tar.Header, tr io.Reader) error {
}
func IsInWhitelist(path string) bool {
for _, wl := range whitelist {
if !wl.PrefixMatchOnly && path == wl.Path {
return IsInProvidedWhitelist(path, whitelist)
}
func IsInProvidedWhitelist(path string, wl []WhitelistEntry) bool {
for _, entry := range wl {
if !entry.PrefixMatchOnly && path == entry.Path {
return true
}
}