diff --git a/integration/dockerfiles/Dockerfile_test_run_redo b/integration/dockerfiles/Dockerfile_test_run_redo new file mode 100644 index 000000000..e6aa5ef31 --- /dev/null +++ b/integration/dockerfiles/Dockerfile_test_run_redo @@ -0,0 +1,26 @@ +# Copyright 2020 Google, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM debian:9.11 +RUN echo "hey" > /etc/foo +RUN echo "baz" > /etc/baz +RUN cp /etc/baz /etc/bar +RUN rm /etc/baz + +# Test with ARG +ARG file +RUN echo "run" > $file + +RUN echo "test home" > $HOME/file +COPY context/foo $HOME/foo diff --git a/integration/images.go b/integration/images.go index c221d6504..ea5d1f464 100644 --- a/integration/images.go +++ b/integration/images.go @@ -49,6 +49,7 @@ const ( var argsMap = map[string][]string{ "Dockerfile_test_run": {"file=/file"}, "Dockerfile_test_run_new": {"file=/file"}, + "Dockerfile_test_run_redo": {"file=/file"}, "Dockerfile_test_workdir": {"workdir=/arg/workdir"}, "Dockerfile_test_add": {"file=context/foo"}, "Dockerfile_test_arg_secret": {"SSH_PRIVATE_KEY", "SSH_PUBLIC_KEY=Pµbl1cK€Y"}, @@ -76,6 +77,7 @@ var additionalDockerFlagsMap = map[string][]string{ var additionalKanikoFlagsMap = map[string][]string{ "Dockerfile_test_add": {"--single-snapshot"}, "Dockerfile_test_run_new": {"--use-new-run=true"}, + "Dockerfile_test_run_redo": {"--snapshotMode=redo"}, "Dockerfile_test_scratch": {"--single-snapshot"}, "Dockerfile_test_maintainer": {"--single-snapshot"}, "Dockerfile_test_target": {"--target=second"}, diff --git a/pkg/commands/run_marker.go b/pkg/commands/run_marker.go index c468e209b..a27b520a1 100644 --- a/pkg/commands/run_marker.go +++ b/pkg/commands/run_marker.go @@ -60,7 +60,7 @@ func (r *RunMarkerCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfi } return fi.ModTime().After(markerInfo.ModTime()), nil } - r.Files = util.WalkFS("/", isNewer) + r.Files, _ = util.WalkFS("/", map[string]struct{}{}, isNewer) logrus.Debugf("files changed %s", r.Files) return nil } diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index aaa0c815a..0d6ddc9f5 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -47,6 +47,7 @@ const ( // Various snapshot modes: SnapshotModeTime = "time" SnapshotModeFull = "full" + SnapshotModeRedo = "redo" // NoBaseImage is the scratch image NoBaseImage = "scratch" diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 56dc80d31..799b40ee9 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -799,14 +799,17 @@ func saveStageAsTarball(path string, image v1.Image) error { } func getHasher(snapshotMode string) (func(string) (string, error), error) { - if snapshotMode == constants.SnapshotModeTime { + switch snapshotMode { + case constants.SnapshotModeTime: logrus.Info("Only file modification time will be considered when snapshotting") return util.MtimeHasher(), nil - } - if snapshotMode == constants.SnapshotModeFull { + case constants.SnapshotModeFull: return util.Hasher(), nil + case constants.SnapshotModeRedo: + return util.RedoHasher(), nil + default: + return nil, fmt.Errorf("%s is not a valid snapshot mode", snapshotMode) } - return nil, fmt.Errorf("%s is not a valid snapshot mode", snapshotMode) } func resolveOnBuild(stage *config.KanikoStage, config *v1.Config, stageNameToIdx map[string]string) error { diff --git a/pkg/snapshot/layered_map.go b/pkg/snapshot/layered_map.go index 78377f959..976ad19d1 100644 --- a/pkg/snapshot/layered_map.go +++ b/pkg/snapshot/layered_map.go @@ -30,9 +30,10 @@ import ( ) type LayeredMap struct { - layers []map[string]string - whiteouts []map[string]struct{} - hasher func(string) (string, error) + layers []map[string]string + whiteouts []map[string]struct{} + layerHashCache map[string]string + hasher func(string) (string, error) // cacheHasher doesn't include mtime in it's hash so that filesystem cache keys are stable cacheHasher func(string) (string, error) } @@ -43,6 +44,7 @@ func NewLayeredMap(h func(string) (string, error), c func(string) (string, error cacheHasher: c, } l.layers = []map[string]string{} + l.layerHashCache = map[string]string{} return &l } @@ -103,7 +105,14 @@ func (l *LayeredMap) MaybeAddWhiteout(s string) bool { // Add will add the specified file s to the layered map. func (l *LayeredMap) Add(s string) error { // Use hash function and add to layers - newV, err := l.hasher(s) + newV, err := func(s string) (string, error) { + if v, ok := l.layerHashCache[s]; ok { + // clear it cache for next layer. + delete(l.layerHashCache, s) + return v, nil + } + return l.hasher(s) + }(s) if err != nil { return fmt.Errorf("error creating hash for %s: %v", s, err) } @@ -126,6 +135,7 @@ func (l *LayeredMap) CheckFileChange(s string) (bool, error) { } return false, err } + l.layerHashCache[s] = newV oldV, ok := l.Get(s) if ok && newV == oldV { return false, nil diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index 882fa866f..e8523d4e3 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -34,7 +34,6 @@ import ( // For testing var snapshotPathPrefix = config.KanikoDir -var allPass = func(s string) (bool, error) { return true, nil } // Snapshotter holds the root directory from which to take snapshots, and a list of snapshots taken type Snapshotter struct { @@ -94,16 +93,14 @@ func (s *Snapshotter) TakeSnapshot(files []string, shdCheckDelete bool) (string, // Get whiteout paths filesToWhiteout := []string{} if shdCheckDelete { - existingPaths := s.l.getFlattenedPathsForWhiteOut() - foundFiles := util.WalkFS(s.directory, allPass) - for _, file := range foundFiles { - delete(existingPaths, file) - } + _, deletedFiles := util.WalkFS(s.directory, s.l.getFlattenedPathsForWhiteOut(), func(s string) (bool, error) { + return true, nil + }) // The paths left here are the ones that have been deleted in this layer. - for path := range existingPaths { + for path := range deletedFiles { // Only add the whiteout if the directory for the file still exists. dir := filepath.Dir(path) - if _, ok := existingPaths[dir]; !ok { + if _, ok := deletedFiles[dir]; !ok { if s.l.MaybeAddWhiteout(path) { logrus.Debugf("Adding whiteout for %s", path) filesToWhiteout = append(filesToWhiteout, path) @@ -152,48 +149,28 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { s.l.Snapshot() - foundPaths := util.WalkFS(s.directory, allPass) + changedPaths, deletedPaths := util.WalkFS(s.directory, s.l.getFlattenedPathsForWhiteOut(), s.l.CheckFileChange) timer := timing.Start("Resolving Paths") - // First handle whiteouts - // Get a list of all the files that existed before this layer - existingPaths := s.l.getFlattenedPathsForWhiteOut() filesToAdd := []string{} - resolvedMemFs := make(map[string]bool) - - for _, path := range foundPaths { - delete(existingPaths, path) - resolvedFiles, err := filesystem.ResolvePaths([]string{path}, s.ignorelist) - if err != nil { - return nil, nil, err - } - for _, path := range resolvedFiles { - // Continue if this path is already processed - if _, ok := resolvedMemFs[path]; ok { - continue - } - if util.CheckIgnoreList(path) { - logrus.Tracef("Not adding %s to layer, as it's whitelisted", path) - continue - } - // Only add changed files. - fileChanged, err := s.l.CheckFileChange(path) - if err != nil { - return nil, nil, fmt.Errorf("could not check if file has changed %s %s", path, err) - } - if fileChanged { - logrus.Tracef("Adding file %s to layer, because it was changed.", path) - filesToAdd = append(filesToAdd, path) - } + resolvedFiles, err := filesystem.ResolvePaths(changedPaths, s.ignorelist) + if err != nil { + return nil, nil, err + } + for _, path := range resolvedFiles { + if util.CheckIgnoreList(path) { + logrus.Tracef("Not adding %s to layer, as it's whitelisted", path) + continue } + filesToAdd = append(filesToAdd, path) } // The paths left here are the ones that have been deleted in this layer. filesToWhiteOut := []string{} - for path := range existingPaths { + for path := range deletedPaths { // Only add the whiteout if the directory for the file still exists. dir := filepath.Dir(path) - if _, ok := existingPaths[dir]; !ok { + if _, ok := deletedPaths[dir]; !ok { if s.l.MaybeAddWhiteout(path) { logrus.Debugf("Adding whiteout for %s", path) filesToWhiteOut = append(filesToWhiteOut, path) diff --git a/pkg/snapshot/snapshot_test.go b/pkg/snapshot/snapshot_test.go index 314ddec9f..7150ae0c4 100644 --- a/pkg/snapshot/snapshot_test.go +++ b/pkg/snapshot/snapshot_test.go @@ -66,6 +66,7 @@ func TestSnapshotFSFileChange(t *testing.T) { } for _, path := range util.ParentDirectoriesWithoutLeadingSlash(batPath) { if path == "/" { + snapshotFiles["/"] = "" continue } snapshotFiles[path+"/"] = "" @@ -164,6 +165,7 @@ func TestSnapshotFSChangePermissions(t *testing.T) { } for _, path := range util.ParentDirectoriesWithoutLeadingSlash(batPathWithoutLeadingSlash) { if path == "/" { + snapshotFiles["/"] = "" continue } snapshotFiles[path+"/"] = "" diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index 75f1d3870..9a34f2974 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -878,7 +878,7 @@ func UpdateInitialIgnoreList(ignoreVarRun bool) { }) } -func WalkFS(dir string, f func(string) (bool, error)) []string { +func WalkFS(dir string, existingPaths map[string]struct{}, f func(string) (bool, error)) ([]string, map[string]struct{}) { foundPaths := make([]string, 0) timer := timing.Start("Walking filesystem") godirwalk.Walk(dir, &godirwalk.Options{ @@ -891,6 +891,7 @@ func WalkFS(dir string, f func(string) (bool, error)) []string { return nil } + delete(existingPaths, path) if t, err := f(path); err != nil { return err } else if t { @@ -902,5 +903,5 @@ func WalkFS(dir string, f func(string) (bool, error)) []string { }, ) timing.DefaultRun.Stop(timer) - return foundPaths + return foundPaths, existingPaths } diff --git a/pkg/util/util.go b/pkg/util/util.go index a1dd77f4f..790520e90 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -115,6 +115,27 @@ func MtimeHasher() func(string) (string, error) { return hasher } +// RedoHasher returns a hash function, which looks at mtime, size, filemode, owner uid and gid +// Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same. +func RedoHasher() func(string) (string, error) { + hasher := func(p string) (string, error) { + h := md5.New() + fi, err := os.Lstat(p) + if err != nil { + return "", err + } + h.Write([]byte(fi.Mode().String())) + h.Write([]byte(fi.ModTime().String())) + h.Write([]byte(strconv.FormatInt(fi.Size(), 16))) + h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Uid), 36))) + h.Write([]byte(",")) + h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Gid), 36))) + + return hex.EncodeToString(h.Sum(nil)), nil + } + return hasher +} + // SHA256 returns the shasum of the contents of r func SHA256(r io.Reader) (string, error) { hasher := sha256.New()