From 7f64037a8ce9f140181b53d3d66ee6ef01b370ec Mon Sep 17 00:00:00 2001 From: Christie Wilson Date: Fri, 24 Aug 2018 15:50:58 -0700 Subject: [PATCH] Separate snapshotting of parent dirs from files To make the logic a bit more clear, when snapshotting files, the parent dirs are now snapshotted in a different loop from the files we are actually trying to snapshot. Unfortunately this loop is nearly duplicated but I did managed to group some fo the related logic together: - A function to check if the file should be snapshotted (e.g. isn't whitelisted, etc.) - Created a `Tar` type to handle some of the logic around tar-ing, e.g. tracking hardlinks and stat-ing files before adding them One side effect of this is that now when snapshoting the file system, files will be stat-ed twice. --- integration/integration_test.go | 3 +- pkg/snapshot/snapshot.go | 106 +++++++++++++++++--------------- pkg/util/tar_util.go | 45 +++++++++++--- pkg/util/tar_util_test.go | 12 +--- 4 files changed, 97 insertions(+), 69 deletions(-) diff --git a/integration/integration_test.go b/integration/integration_test.go index c16ff5f64..372922a43 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -136,8 +136,7 @@ func TestMain(m *testing.M) { fmt.Println("Building kaniko image") cmd := exec.Command("docker", "build", "-t", ExecutorImage, "-f", "../deploy/Dockerfile", "..") - _, err = RunCommandWithoutTest(cmd) - if err != nil { + if _, err = RunCommandWithoutTest(cmd); err != nil { fmt.Printf("Building kaniko failed: %s", err) os.Exit(1) } diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index 7385a8d2d..4f01441dc 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -17,7 +17,6 @@ limitations under the License. package snapshot import ( - "archive/tar" "bytes" "fmt" "io" @@ -35,7 +34,6 @@ import ( type Snapshotter struct { l *LayeredMap directory string - hardlinks map[uint64]string } // NewSnapshotter creates a new snapshotter rooted at d @@ -55,7 +53,6 @@ func (s *Snapshotter) Init() error { // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { buf := bytes.NewBuffer([]byte{}) - var filesAdded bool filesAdded, err := s.snapshotFiles(buf, files) if err != nil { return nil, err @@ -71,7 +68,6 @@ func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed func (s *Snapshotter) TakeSnapshotFS() ([]byte, error) { buf := bytes.NewBuffer([]byte{}) - var filesAdded bool filesAdded, err := s.snapShotFS(buf) if err != nil { return nil, err @@ -83,10 +79,24 @@ func (s *Snapshotter) TakeSnapshotFS() ([]byte, error) { return contents, err } +func shouldSnapshot(file string, snapshottedFiles map[string]bool) (bool, error) { + if val, ok := snapshottedFiles[file]; ok && val { + return false, nil + } + whitelisted, err := util.CheckWhitelist(file) + if err != nil { + return false, fmt.Errorf("Error checking for %s in whitelist: %s", file, err) + } + if whitelisted && !isBuildFile(file) { + logrus.Infof("Not adding %s to layer, as it's whitelisted", file) + return false, nil + } + return true, nil +} + // snapshotFiles creates a snapshot (tar) and adds the specified files. // It will not add files which are whitelisted. func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { - s.hardlinks = map[uint64]string{} s.l.Snapshot() if len(files) == 0 { logrus.Info("No files changed in this command, skipping snapshotting.") @@ -94,59 +104,60 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { } logrus.Infof("Taking snapshot of files %v...", files) snapshottedFiles := make(map[string]bool) - n := len(files) - for _, file := range files { - parentDirs := util.ParentDirectories(file) - // If we do end up snapshotting the dirs, we need to snapshot them before - // we snapshot their contents - files = append(parentDirs, files...) - } filesAdded := false - w := tar.NewWriter(f) - defer w.Close() - // Now create the tar. - for i, file := range files { + t := util.NewTar(f) + defer t.Close() + + // First add to the tar any parent directories that haven't been added + parentDirs := []string{} + for _, file := range files { + parents := util.ParentDirectories(file) + parentDirs = append(parentDirs, parents...) + } + for _, file := range parentDirs { file = filepath.Clean(file) - if val, ok := snapshottedFiles[file]; ok && val { - continue - } - whitelisted, err := util.CheckWhitelist(file) + shouldSnapshot, err := shouldSnapshot(file, snapshottedFiles) if err != nil { - return false, err + return false, fmt.Errorf("Error checking if parent dir %s can be snapshotted: %s", file, err) } - if whitelisted && !isBuildFile(file) { - logrus.Infof("Not adding %s to layer, as it's whitelisted", file) + if !shouldSnapshot { continue } snapshottedFiles[file] = true - info, err := os.Lstat(file) + + fileAdded, err := s.l.MaybeAdd(file) if err != nil { - return false, err + return false, fmt.Errorf("Unable to add parent dir %s to layered map: %s", file, err) } - var fileAdded bool - lastParentFileIndex := len(files) - n - isParentDir := i < lastParentFileIndex - - // If this is parent dir of the file we're snapshotting, only snapshot it - // if it changed - if isParentDir { - fileAdded, err = s.l.MaybeAdd(file) - } else { - // If this is one of the files we are snapshotting, definitely snapshot it - err = s.l.Add(file) - fileAdded = true + if fileAdded { + err = t.AddFileToTar(file) + if err != nil { + return false, fmt.Errorf("Error adding parent dir %s to tar: %s", file, err) + } + filesAdded = true } + } + // Next add the files themselves to the tar + for _, file := range files { + file = filepath.Clean(file) + shouldSnapshot, err := shouldSnapshot(file, snapshottedFiles) if err != nil { + return false, fmt.Errorf("Error checking if file %s can be snapshotted: %s", file, err) + } + if !shouldSnapshot { + continue + } + snapshottedFiles[file] = true + + if err = s.l.Add(file); err != nil { return false, fmt.Errorf("Unable to add file %s to layered map: %s", file, err) } - if fileAdded { - filesAdded = true - if err := util.AddToTar(file, info, s.hardlinks, w); err != nil { - return false, err - } + if err = t.AddFileToTar(file); err != nil { + return false, fmt.Errorf("Error adding file %s to tar: %s", file, err) } + filesAdded = true } return filesAdded, nil } @@ -171,12 +182,11 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { // to be flushed or the disk does its own caching/buffering. syscall.Sync() - s.hardlinks = map[uint64]string{} s.l.Snapshot() existingPaths := s.l.GetFlattenedPathsForWhiteOut() filesAdded := false - w := tar.NewWriter(f) - defer w.Close() + t := util.NewTar(f) + defer t.Close() // Save the fs state in a map to iterate over later. memFs := map[string]os.FileInfo{} @@ -200,7 +210,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { if addWhiteout { logrus.Infof("Adding whiteout for %s", path) filesAdded = true - if err := util.Whiteout(path, w); err != nil { + if err := t.Whiteout(path); err != nil { return false, err } } @@ -208,7 +218,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { } // Now create the tar. - for path, info := range memFs { + for path := range memFs { whitelisted, err := util.CheckWhitelist(path) if err != nil { return false, err @@ -226,7 +236,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { if maybeAdd { logrus.Debugf("Adding %s to layer, because it was changed.", path) filesAdded = true - if err := util.AddToTar(path, info, s.hardlinks, w); err != nil { + if err := t.AddFileToTar(path); err != nil { return false, err } } diff --git a/pkg/util/tar_util.go b/pkg/util/tar_util.go index a66eea667..f95574deb 100644 --- a/pkg/util/tar_util.go +++ b/pkg/util/tar_util.go @@ -20,6 +20,7 @@ import ( "archive/tar" "compress/bzip2" "compress/gzip" + "fmt" "io" "io/ioutil" "os" @@ -31,8 +32,32 @@ import ( "github.com/sirupsen/logrus" ) -// AddToTar adds the file i to tar w at path p -func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Writer) error { +// Tar knows how to write files to a tar file. +type Tar struct { + hardlinks map[uint64]string + w *tar.Writer +} + +// NewTar will create an instance of Tar that can write files to the writer at f. +func NewTar(f io.Writer) Tar { + w := tar.NewWriter(f) + return Tar{ + w: w, + hardlinks: map[uint64]string{}, + } +} + +// Close will close any open streams used by Tar. +func (t *Tar) Close() { + t.w.Close() +} + +// AddFileToTar adds the file at path p to the tar +func (t *Tar) AddFileToTar(p string) error { + i, err := os.Lstat(p) + if err != nil { + return fmt.Errorf("Failed to get file info for %s: %s", p, err) + } linkDst := "" if i.Mode()&os.ModeSymlink != 0 { var err error @@ -51,13 +76,13 @@ func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Write } hdr.Name = p - hardlink, linkDst := checkHardlink(p, hardlinks, i) + hardlink, linkDst := t.checkHardlink(p, i) if hardlink { hdr.Linkname = linkDst hdr.Typeflag = tar.TypeLink hdr.Size = 0 } - if err := w.WriteHeader(hdr); err != nil { + if err := t.w.WriteHeader(hdr); err != nil { return err } if !(i.Mode().IsRegular()) || hardlink { @@ -68,13 +93,13 @@ func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Write return err } defer r.Close() - if _, err := io.Copy(w, r); err != nil { + if _, err := io.Copy(t.w, r); err != nil { return err } return nil } -func Whiteout(p string, w *tar.Writer) error { +func (t *Tar) Whiteout(p string) error { dir := filepath.Dir(p) name := ".wh." + filepath.Base(p) @@ -82,7 +107,7 @@ func Whiteout(p string, w *tar.Writer) error { Name: filepath.Join(dir, name), Size: 0, } - if err := w.WriteHeader(th); err != nil { + if err := t.w.WriteHeader(th); err != nil { return err } @@ -90,7 +115,7 @@ func Whiteout(p string, w *tar.Writer) error { } // Returns true if path is hardlink, and the link destination -func checkHardlink(p string, hardlinks map[uint64]string, i os.FileInfo) (bool, string) { +func (t *Tar) checkHardlink(p string, i os.FileInfo) (bool, string) { hardlink := false linkDst := "" if sys := i.Sys(); sys != nil { @@ -98,12 +123,12 @@ func checkHardlink(p string, hardlinks map[uint64]string, i os.FileInfo) (bool, nlinks := stat.Nlink if nlinks > 1 { inode := stat.Ino - if original, exists := hardlinks[inode]; exists && original != p { + if original, exists := t.hardlinks[inode]; exists && original != p { hardlink = true logrus.Debugf("%s inode exists in hardlinks map, linking to %s", p, original) linkDst = original } else { - hardlinks[inode] = p + t.hardlinks[inode] = p } } } diff --git a/pkg/util/tar_util_test.go b/pkg/util/tar_util_test.go index 1762ea6f2..e0c1becac 100644 --- a/pkg/util/tar_util_test.go +++ b/pkg/util/tar_util_test.go @@ -17,7 +17,6 @@ limitations under the License. package util import ( - "archive/tar" "compress/gzip" "io" "io/ioutil" @@ -92,16 +91,11 @@ func setUpFilesAndTars(testDir string) error { } func createTar(testdir string, writer io.Writer) error { - - w := tar.NewWriter(writer) - defer w.Close() + t := NewTar(writer) + defer t.Close() for _, regFile := range regularFiles { filePath := filepath.Join(testdir, regFile) - fi, err := os.Stat(filePath) - if err != nil { - return err - } - if err := AddToTar(filePath, fi, map[uint64]string{}, w); err != nil { + if err := t.AddFileToTar(filePath); err != nil { return err } }