Resolve filepaths before scanning for changes

This commit is contained in:
cvgw 2020-02-20 08:29:21 -08:00
parent f5f1c3ab0e
commit a675ad998a
6 changed files with 429 additions and 57 deletions

View File

@ -0,0 +1,10 @@
FROM registry.access.redhat.com/ubi7/ubi:7.7-214
# Install GCC, GCC-C++ and make libraries for build environment
# Then clean caches
RUN yum --disableplugin=subscription-manager update -y \
&& yum --disableplugin=subscription-manager install -y \
gcc-4.8.5-39.el7 \
gcc-c++-4.8.5-39.el7 \
make-3.82-24.el7 \
&& yum --disableplugin=subscription-manager clean all

158
pkg/filesystem/resolve.go Normal file
View File

@ -0,0 +1,158 @@
/*
Copyright 2020 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filesystem
import (
"os"
"path/filepath"
"github.com/GoogleContainerTools/kaniko/pkg/util"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// ResolvePaths takes a slice of file paths and a slice of whitelist entries. It resolve each
// file path according to a set of rules and then returns a slice of resolved paths or error.
// File paths are resolved according to the following rules:
// * If path is whitelisted, skip it.
// * If path is a symlink, resolve it's ancestor link and add it to the output set.
// * If path is a symlink, resolve it's target. If the target is not whitelisted add it to the
// output set.
// * Add all ancestors of each path to the output set.
func ResolvePaths(paths []string, wl []util.WhitelistEntry) (pathsToAdd []string, err error) {
logrus.Info("Resolving paths")
fileSet := make(map[string]bool)
for _, f := range paths {
// If the given path is part of the whitelist ignore it
if util.IsInProvidedWhitelist(f, wl) {
logrus.Debugf("path %s is whitelisted, ignoring it", f)
continue
}
link, e := resolveSymlinkAncestor(f)
if e != nil {
err = e
return
}
if f != link {
logrus.Tracef("updated link %s to %s", f, link)
}
if !fileSet[link] {
pathsToAdd = append(pathsToAdd, link)
}
fileSet[link] = true
var evaled string
// If the path is a symlink we need to also consider the target of that
// link
evaled, err = filepath.EvalSymlinks(f)
if err != nil {
if !os.IsNotExist(err) {
logrus.Errorf("couldn't eval %s with link %s", f, link)
return
}
logrus.Debugf("symlink path %s, target does not exist", f)
}
// If the given path is a symlink and the target is part of the whitelist
// ignore the target
if util.IsInProvidedWhitelist(evaled, wl) {
logrus.Debugf("path %s is whitelisted, ignoring it", evaled)
continue
}
if !fileSet[evaled] {
pathsToAdd = append(pathsToAdd, evaled)
}
fileSet[evaled] = true
}
// Also add parent directories to keep the permission of them correctly.
pathsToAdd = filesWithParentDirs(pathsToAdd)
return
}
// filesWithParentDirs returns every ancestor path for each provided file path.
// I.E. /foo/bar/baz/boom.txt => [/, /foo, /foo/bar, /foo/bar/baz, /foo/bar/baz/boom.txt]
func filesWithParentDirs(files []string) []string {
filesSet := map[string]bool{}
for _, file := range files {
file = filepath.Clean(file)
filesSet[file] = true
for _, dir := range util.ParentDirectories(file) {
dir = filepath.Clean(dir)
filesSet[dir] = true
}
}
newFiles := []string{}
for file := range filesSet {
newFiles = append(newFiles, file)
}
return newFiles
}
// resolveSymlinkAncestor returns the ancestor link of the provided symlink path or returns the
// the path if it is not a link. The ancestor link is the filenode whose type is a Symlink.
// E.G /baz/boom/bar.txt links to /usr/bin/bar.txt but /baz/boom/bar.txt itself is not a link.
// Instead /bar/boom is actually a link to /usr/bin. In this case resolveSymlinkAncestor would
// return /bar/boom.
func resolveSymlinkAncestor(path string) (string, error) {
if !filepath.IsAbs(path) {
return "", errors.New("dest path must be abs")
}
last := ""
newPath := path
loop:
for newPath != "/" {
fi, err := os.Lstat(newPath)
if err != nil {
return "", errors.Wrap(err, "failed to lstat")
}
switch mode := fi.Mode(); {
case mode&os.ModeSymlink != 0:
last = filepath.Base(newPath)
newPath = filepath.Dir(newPath)
default:
target, err := filepath.EvalSymlinks(newPath)
if err != nil {
return "", err
}
if target != newPath {
last = filepath.Base(newPath)
newPath = filepath.Dir(newPath)
} else {
break loop
}
}
}
newPath = filepath.Join(newPath, last)
return filepath.Clean(newPath), nil
}

View File

@ -0,0 +1,185 @@
/*
Copyright 2020 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package filesystem
import (
"io/ioutil"
"os"
"path/filepath"
"reflect"
"sort"
"testing"
"github.com/GoogleContainerTools/kaniko/pkg/util"
)
func Test_ResolvePaths(t *testing.T) {
validateResults := func(
t *testing.T,
actualFiles,
expectedFiles []string,
err error,
) {
if err != nil {
t.Errorf("expected err to be nil but was %s", err)
}
// Sort so that comparison is against consistent order
sort.Strings(actualFiles)
sort.Strings(expectedFiles)
if !reflect.DeepEqual(actualFiles, expectedFiles) {
t.Errorf("expected files to equal %s but was %s",
expectedFiles, actualFiles,
)
}
}
t.Run("list of files", func(t *testing.T) {
dir, err := ioutil.TempDir("", "snapshot-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(dir)
files := []string{
"/foo/bar.txt",
"/baz/boom.txt",
}
t.Run("all are symlinks", func(t *testing.T) {
for _, f := range files {
fLink := filepath.Join(dir, "link", f)
fTarget := filepath.Join(dir, "target", f)
if err := os.MkdirAll(filepath.Dir(fTarget), 0777); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(fTarget, []byte{}, 0777); err != nil {
t.Fatal(err)
}
if err := os.MkdirAll(filepath.Dir(fLink), 0777); err != nil {
t.Fatal(err)
}
if err := os.Symlink(fTarget, fLink); err != nil {
t.Fatal(err)
}
}
t.Run("none are whitelisted", func(t *testing.T) {
wl := []util.WhitelistEntry{}
inputFiles := []string{}
expectedFiles := []string{}
for _, f := range files {
link := filepath.Join(dir, "link", f)
expectedFiles = append(expectedFiles, link)
inputFiles = append(inputFiles, link)
target := filepath.Join(dir, "target", f)
expectedFiles = append(expectedFiles, target)
}
expectedFiles = filesWithParentDirs(expectedFiles)
files, err := ResolvePaths(inputFiles, wl)
validateResults(t, files, expectedFiles, err)
})
t.Run("some are whitelisted", func(t *testing.T) {
wl := []util.WhitelistEntry{
{
Path: filepath.Join(dir, "link", "baz"),
},
{
Path: filepath.Join(dir, "target", "foo"),
},
}
expectedFiles := []string{}
inputFiles := []string{}
for _, f := range files {
link := filepath.Join(dir, "link", f)
inputFiles = append(inputFiles, link)
if util.IsInProvidedWhitelist(link, wl) {
t.Logf("skipping %s", link)
continue
}
expectedFiles = append(expectedFiles, link)
target := filepath.Join(dir, "target", f)
if util.IsInProvidedWhitelist(target, wl) {
t.Logf("skipping %s", target)
continue
}
expectedFiles = append(expectedFiles, target)
}
link := filepath.Join(dir, "link", "zoom/")
target := filepath.Join(dir, "target", "zaam/")
if err := os.MkdirAll(target, 0777); err != nil {
t.Fatal(err)
}
if err := ioutil.WriteFile(filepath.Join(target, "meow.txt"), []byte{}, 0777); err != nil {
t.Fatal(err)
}
if err := os.Symlink(target, link); err != nil {
t.Fatal(err)
}
file := filepath.Join(link, "meow.txt")
inputFiles = append(inputFiles, file)
expectedFiles = append(expectedFiles, link)
targetFile := filepath.Join(target, "meow.txt")
expectedFiles = append(expectedFiles, targetFile)
expectedFiles = filesWithParentDirs(expectedFiles)
files, err := ResolvePaths(inputFiles, wl)
validateResults(t, files, expectedFiles, err)
})
})
})
t.Run("empty set of files", func(t *testing.T) {
inputFiles := []string{}
expectedFiles := []string{}
wl := []util.WhitelistEntry{}
files, err := ResolvePaths(inputFiles, wl)
validateResults(t, files, expectedFiles, err)
})
}

View File

@ -22,8 +22,10 @@ import (
"os"
"path/filepath"
"sort"
"strings"
"syscall"
"github.com/GoogleContainerTools/kaniko/pkg/filesystem"
"github.com/GoogleContainerTools/kaniko/pkg/timing"
"github.com/karrick/godirwalk"
@ -41,11 +43,12 @@ var snapshotPathPrefix = constants.KanikoDir
type Snapshotter struct {
l *LayeredMap
directory string
whitelist []util.WhitelistEntry
}
// NewSnapshotter creates a new snapshotter rooted at d
func NewSnapshotter(l *LayeredMap, d string) *Snapshotter {
return &Snapshotter{l: l, directory: d}
return &Snapshotter{l: l, directory: d, whitelist: util.Whitelist()}
}
// Init initializes a new snapshotter
@ -73,12 +76,15 @@ func (s *Snapshotter) TakeSnapshot(files []string) (string, error) {
logrus.Info("No files changed in this command, skipping snapshotting.")
return "", nil
}
filesToAdd, err := filesystem.ResolvePaths(files, s.whitelist)
if err != nil {
return "", nil
}
logrus.Info("Taking snapshot of files...")
logrus.Debugf("Taking snapshot of files %v", files)
// Also add parent directories to keep the permission of them correctly.
filesToAdd := filesWithParentDirs(files)
sort.Strings(filesToAdd)
// Add files to the layered map
@ -149,19 +155,42 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) {
)
timing.DefaultRun.Stop(timer)
filesToResolve := make([]string, 0, len(memFs))
for file := range memFs {
if strings.HasPrefix(file, "/tmp/dir") {
logrus.Infof("found %s", file)
}
filesToResolve = append(filesToResolve, file)
}
resolvedFiles, err := filesystem.ResolvePaths(filesToResolve, s.whitelist)
if err != nil {
return nil, nil, err
}
resolvedMemFs := make(map[string]bool)
for _, f := range resolvedFiles {
if strings.HasPrefix(f, "/tmp/dir") {
logrus.Infof("found again %s", f)
}
resolvedMemFs[f] = true
}
// First handle whiteouts
// Get a list of all the files that existed before this layer
existingPaths := s.l.getFlattenedPathsForWhiteOut()
// Find the delta by removing everything left in this layer.
for p := range memFs {
for p := range resolvedMemFs {
delete(existingPaths, p)
}
// The paths left here are the ones that have been deleted in this layer.
filesToWhiteOut := []string{}
for path := range existingPaths {
// Only add the whiteout if the directory for the file still exists.
dir := filepath.Dir(path)
if _, ok := memFs[dir]; ok {
if _, ok := resolvedMemFs[dir]; ok {
if s.l.MaybeAddWhiteout(path) {
logrus.Debugf("Adding whiteout for %s", path)
filesToWhiteOut = append(filesToWhiteOut, path)
@ -170,7 +199,7 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) {
}
filesToAdd := []string{}
for path := range memFs {
for path := range resolvedMemFs {
if util.CheckWhitelist(path) {
logrus.Tracef("Not adding %s to layer, as it's whitelisted", path)
continue
@ -181,19 +210,11 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) {
return nil, nil, fmt.Errorf("could not check if file has changed %s %s", path, err)
}
if fileChanged {
// Get target file for symlinks so the symlink is not a dead link.
files, err := filesWithLinks(path)
if err != nil {
return nil, nil, err
}
logrus.Tracef("Adding files %s to layer, because it was changed.", files)
filesToAdd = append(filesToAdd, files...)
logrus.Tracef("Adding file %s to layer, because it was changed.", path)
filesToAdd = append(filesToAdd, path)
}
}
// Also add parent directories to keep their permissions correctly.
filesToAdd = filesWithParentDirs(filesToAdd)
sort.Strings(filesToAdd)
// Add files to the layered map
for _, file := range filesToAdd {
@ -221,27 +242,6 @@ func writeToTar(t util.Tar, files, whiteouts []string) error {
return nil
}
func filesWithParentDirs(files []string) []string {
filesSet := map[string]bool{}
for _, file := range files {
file = filepath.Clean(file)
filesSet[file] = true
for _, dir := range util.ParentDirectories(file) {
dir = filepath.Clean(dir)
filesSet[dir] = true
}
}
newFiles := []string{}
for file := range filesSet {
newFiles = append(newFiles, file)
}
return newFiles
}
// filesWithLinks returns the symlink and the target path if its exists.
func filesWithLinks(path string) ([]string, error) {
link, err := util.GetSymLink(path)

View File

@ -63,19 +63,24 @@ func TestSnapshotFSFileChange(t *testing.T) {
fooPath: "newbaz1",
batPath: "baz",
}
for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(fooPath) {
snapshotFiles[dir] = ""
}
for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) {
snapshotFiles[dir] = ""
}
numFiles := 0
// Their parents didn't change so they shouldn't be included
//for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(fooPath) {
// snapshotFiles[dir] = ""
//}
//for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) {
// snapshotFiles[dir] = ""
//}
actualFiles := []string{}
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
numFiles++
actualFiles = append(actualFiles, hdr.Name)
if _, isFile := snapshotFiles[hdr.Name]; !isFile {
t.Fatalf("File %s unexpectedly in tar", hdr.Name)
}
@ -84,8 +89,8 @@ func TestSnapshotFSFileChange(t *testing.T) {
t.Fatalf("Contents of %s incorrect, expected: %s, actual: %s", hdr.Name, snapshotFiles[hdr.Name], string(contents))
}
}
if numFiles != len(snapshotFiles) {
t.Fatalf("Incorrect number of files were added, expected: 2, actual: %v", numFiles)
if len(actualFiles) != len(snapshotFiles) {
t.Fatalf("Incorrect number of files were added, expected: %d, actual: %d", len(snapshotFiles), len(actualFiles))
}
}
@ -155,17 +160,20 @@ func TestSnapshotFSChangePermissions(t *testing.T) {
snapshotFiles := map[string]string{
batPathWithoutLeadingSlash: "baz2",
}
for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) {
snapshotFiles[dir] = ""
}
numFiles := 0
// The parents haven't changed so they shouldn't be in the tar
//for _, dir := range util.ParentDirectoriesWithoutLeadingSlash(batPath) {
// snapshotFiles[dir] = ""
//}
foundFiles := []string{}
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
t.Logf("Info %s in tar", hdr.Name)
numFiles++
foundFiles = append(foundFiles, hdr.Name)
if _, isFile := snapshotFiles[hdr.Name]; !isFile {
t.Fatalf("File %s unexpectedly in tar", hdr.Name)
}
@ -174,8 +182,11 @@ func TestSnapshotFSChangePermissions(t *testing.T) {
t.Fatalf("Contents of %s incorrect, expected: %s, actual: %s", hdr.Name, snapshotFiles[hdr.Name], string(contents))
}
}
if numFiles != len(snapshotFiles) {
t.Fatalf("Incorrect number of files were added, expected: 1, got: %v", numFiles)
if len(foundFiles) != len(snapshotFiles) {
t.Logf("expected\n%v\nto equal\n%v", foundFiles, snapshotFiles)
t.Fatalf("Incorrect number of files were added, expected: %d, got: %d",
len(snapshotFiles),
len(foundFiles))
}
}

View File

@ -83,6 +83,10 @@ type FSConfig struct {
type FSOpt func(*FSConfig)
func Whitelist() []WhitelistEntry {
return whitelist
}
func IncludeWhiteout() FSOpt {
return func(opts *FSConfig) {
opts.includeWhiteout = true
@ -358,8 +362,12 @@ func ExtractFile(dest string, hdr *tar.Header, tr io.Reader) error {
}
func IsInWhitelist(path string) bool {
for _, wl := range whitelist {
if !wl.PrefixMatchOnly && path == wl.Path {
return IsInProvidedWhitelist(path, whitelist)
}
func IsInProvidedWhitelist(path string, wl []WhitelistEntry) bool {
for _, entry := range wl {
if !entry.PrefixMatchOnly && path == entry.Path {
return true
}
}