diff --git a/Gopkg.lock b/Gopkg.lock index d9e377b47..a33dedd67 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -973,6 +973,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "e2edf813355250ce0afb670aa8ed7096b44d93edaa813598a2717417bfd30d5a" + inputs-digest = "cb0a41d18a5ad50aed7a41a7de5d4cf9d6dd43d473fef064cd891dbe3677c3e1" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index 9b98d5dea..bf5263598 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -11,10 +11,6 @@ required = [ name = "github.com/prometheus/client_golang" revision = "a40133b69fbd73ee655606a9bf5f8b9b9bf758dd" -[[override]] - name = "github.com/containers/storage" - revision = "1e5ce40cdb84ab66e26186435b1273e04b879fef" - [[override]] name = "github.com/opencontainers/runc" revision = "4fc53a81fb7c994640722ac585fa9ca548971871" @@ -31,10 +27,6 @@ required = [ name = "google.golang.org/grpc" revision = "8124abf74e7633d82a5b96585b0da487d0e6eed0" -[[constraint]] - name = "github.com/genuinetools/amicontained" - version = "0.4.0" - [[constraint]] name = "github.com/aws/aws-sdk-go" version = "1.13.56" @@ -46,11 +38,3 @@ required = [ [[override]] name = "k8s.io/apimachinery" version = "kubernetes-1.11.0" - -[[override]] - name = "github.com/tonistiigi/fsutil" - branch = "master" - -[[constraint]] - name = "github.com/docker/docker" - revision = "71cd53e4a197b303c6ba086bd584ffd67a884281" diff --git a/README.md b/README.md index 6b20567a9..e8e91be46 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ We do **not** recommend running the kaniko executor binary in another image, as - [Security](#security) - [Comparison with Other Tools](#comparison-with-other-tools) - [Community](#community) +- [Limitations](#limitations) _If you are interested in contributing to kaniko, see [DEVELOPMENT.md](DEVELOPMENT.md) and [CONTRIBUTING.md](CONTRIBUTING.md)._ @@ -256,7 +257,8 @@ To configure credentials, you will need to do the following: #### --snapshotMode You can set the `--snapshotMode=` flag to set how kaniko will snapshot the filesystem. -If `--snapshotMode=time` is set, only file mtime will be considered when snapshotting. +If `--snapshotMode=time` is set, only file mtime will be considered when snapshotting (see +[limitations related to mtime](#mtime-and-snapshotting)). #### --build-arg @@ -356,3 +358,23 @@ provides. [kaniko-users](https://groups.google.com/forum/#!forum/kaniko-users) Google group To Contribute to kaniko, see [DEVELOPMENT.md](DEVELOPMENT.md) and [CONTRIBUTING.md](CONTRIBUTING.md). + +## Limitations + +### mtime and snapshotting + +When taking a snapshot, kaniko's hashing algorithms include (or in the case of +[`--snapshotMode=time`](#--snapshotmode), only use) a file's +[`mtime`](https://en.wikipedia.org/wiki/Inode#POSIX_inode_description) to determine +if the file has changed. Unfortunately there is a delay between when changes to a +file are made and when the `mtime` is updated. This means: + +* With the time-only snapshot mode (`--snapshotMode=time`), kaniko may miss changes + introduced by `RUN` commands entirely. +* With the default snapshot mode (`--snapshotMode=full`), whether or not kaniko will + add a layer in the case where a `RUN` command modifies a file **but the contents do + not** change is theoretically non-deterministic. This _does not affect the contents_ + which will still be correct, but it does affect the number of layers. + +_Note that these issues are currently theoretical only. If you see this issue occur, please +[open an issue](https://github.com/GoogleContainerTools/kaniko/issues)._ \ No newline at end of file diff --git a/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times b/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times new file mode 100644 index 000000000..6acea2a4b --- /dev/null +++ b/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times @@ -0,0 +1,49 @@ +FROM alpine@sha256:5ce5f501c457015c4b91f91a15ac69157d9b06f1a75cf9107bf2b62e0843983a +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo diff --git a/integration/integration_test.go b/integration/integration_test.go index f9e35d05f..372922a43 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -135,11 +135,9 @@ func TestMain(m *testing.M) { defer DeleteFromBucket(fileInBucket) fmt.Println("Building kaniko image") - buildKaniko := exec.Command("docker", "build", "-t", ExecutorImage, "-f", "../deploy/Dockerfile", "..") - err = buildKaniko.Run() - if err != nil { - fmt.Print(err) - fmt.Print("Building kaniko failed.") + cmd := exec.Command("docker", "build", "-t", ExecutorImage, "-f", "../deploy/Dockerfile", "..") + if _, err = RunCommandWithoutTest(cmd); err != nil { + fmt.Printf("Building kaniko failed: %s", err) os.Exit(1) } diff --git a/pkg/commands/add.go b/pkg/commands/add.go index cf6ff7772..e0e317967 100644 --- a/pkg/commands/add.go +++ b/pkg/commands/add.go @@ -79,16 +79,12 @@ func (a *AddCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile.Bui a.snapshotFiles = append(a.snapshotFiles, urlDest) } else if util.IsFileLocalTarArchive(fullPath) { logrus.Infof("Unpacking local tar archive %s to %s", src, dest) - if err := util.UnpackLocalTarArchive(fullPath, dest); err != nil { - return err - } - // Add the unpacked files to the snapshotter - filesAdded, err := util.Files(dest) + extractedFiles, err := util.UnpackLocalTarArchive(fullPath, dest) if err != nil { return err } - logrus.Debugf("Added %v from local tar archive %s", filesAdded, src) - a.snapshotFiles = append(a.snapshotFiles, filesAdded...) + logrus.Debugf("Added %v from local tar archive %s", extractedFiles, src) + a.snapshotFiles = append(a.snapshotFiles, extractedFiles...) } else { unresolvedSrcs = append(unresolvedSrcs, src) } diff --git a/pkg/commands/copy.go b/pkg/commands/copy.go index 5bd66282d..020a61bb4 100644 --- a/pkg/commands/copy.go +++ b/pkg/commands/copy.go @@ -79,10 +79,7 @@ func (c *CopyCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile.Bu // we need to add '/' to the end to indicate the destination is a directory dest = filepath.Join(cwd, dest) + "/" } - if err := util.CopyDir(fullPath, dest); err != nil { - return err - } - copiedFiles, err := util.Files(dest) + copiedFiles, err := util.CopyDir(fullPath, dest) if err != nil { return err } diff --git a/pkg/commands/volume.go b/pkg/commands/volume.go index 5e798e128..0fdad0c0e 100644 --- a/pkg/commands/volume.go +++ b/pkg/commands/volume.go @@ -17,6 +17,7 @@ limitations under the License. package commands import ( + "fmt" "os" "strings" @@ -53,13 +54,14 @@ func (v *VolumeCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile. return err } - logrus.Infof("Creating directory %s", volume) - if err := os.MkdirAll(volume, 0755); err != nil { - return err + // Only create and snapshot the dir if it didn't exist already + if _, err := os.Stat(volume); os.IsNotExist(err) { + logrus.Infof("Creating directory %s", volume) + v.snapshotFiles = []string{volume} + if err := os.MkdirAll(volume, 0755); err != nil { + return fmt.Errorf("Could not create directory for volume %s: %s", volume, err) + } } - - //Check if directory already exists? - v.snapshotFiles = append(v.snapshotFiles, volume) } config.Volumes = existingVolumes diff --git a/pkg/commands/workdir.go b/pkg/commands/workdir.go index 00487eaee..67186f73f 100644 --- a/pkg/commands/workdir.go +++ b/pkg/commands/workdir.go @@ -47,8 +47,14 @@ func (w *WorkdirCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile config.WorkingDir = filepath.Join(config.WorkingDir, resolvedWorkingDir) } logrus.Infof("Changed working directory to %s", config.WorkingDir) - w.snapshotFiles = []string{config.WorkingDir} - return os.MkdirAll(config.WorkingDir, 0755) + + // Only create and snapshot the dir if it didn't exist already + if _, err := os.Stat(config.WorkingDir); os.IsNotExist(err) { + logrus.Infof("Creating directory %s", config.WorkingDir) + w.snapshotFiles = []string{config.WorkingDir} + return os.MkdirAll(config.WorkingDir, 0755) + } + return nil } // FilesToSnapshot returns the workingdir, which should have been created if it didn't already exist diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 21544da6d..60e29b2a2 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -85,23 +85,41 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { if err := dockerCommand.ExecuteCommand(&imageConfig.Config, buildArgs); err != nil { return nil, err } - // Don't snapshot if it's not the final stage and not the final command - // Also don't snapshot if it's the final stage, not the final command, and single snapshot is set - if (!stage.FinalStage && !finalCmd) || (stage.FinalStage && !finalCmd && opts.SingleSnapshot) { - continue - } - // Now, we get the files to snapshot from this command and take the snapshot snapshotFiles := dockerCommand.FilesToSnapshot() - if finalCmd { - snapshotFiles = nil + var contents []byte + + // If this is an intermediate stage, we only snapshot for the last command and we + // want to snapshot the entire filesystem since we aren't tracking what was changed + // by previous commands. + if !stage.FinalStage { + if finalCmd { + contents, err = snapshotter.TakeSnapshotFS() + } + } else { + // If we are in single snapshot mode, we only take a snapshot once, after all + // commands have completed. + if opts.SingleSnapshot { + if finalCmd { + contents, err = snapshotter.TakeSnapshotFS() + } + } else { + // Otherwise, in the final stage we take a snapshot at each command. If we know + // the files that were changed, we'll snapshot those explicitly, otherwise we'll + // check if anything in the filesystem changed. + if snapshotFiles != nil { + contents, err = snapshotter.TakeSnapshot(snapshotFiles) + } else { + contents, err = snapshotter.TakeSnapshotFS() + } + } } - contents, err := snapshotter.TakeSnapshot(snapshotFiles) if err != nil { - return nil, err + return nil, fmt.Errorf("Error taking snapshot of files for command %s: %s", dockerCommand, err) } + util.MoveVolumeWhitelistToWhitelist() if contents == nil { - logrus.Info("No files were changed, appending empty layer to config.") + logrus.Info("No files were changed, appending empty layer to config. No layer added to image.") continue } // Append the layer to the image diff --git a/pkg/snapshot/layered_map.go b/pkg/snapshot/layered_map.go index 6fd0ca4ac..0d382d766 100644 --- a/pkg/snapshot/layered_map.go +++ b/pkg/snapshot/layered_map.go @@ -17,6 +17,7 @@ limitations under the License. package snapshot import ( + "fmt" "path/filepath" "strings" ) @@ -82,6 +83,20 @@ func (l *LayeredMap) MaybeAddWhiteout(s string) (bool, error) { return true, nil } +// Add will add the specified file s to the layered map. +func (l *LayeredMap) Add(s string) error { + newV, err := l.hasher(s) + if err != nil { + return fmt.Errorf("Error creating hash for %s: %s", s, err) + } + l.layers[len(l.layers)-1][s] = newV + return nil +} + +// MaybeAdd will add the specified file s to the layered map if +// the layered map's hashing function determines it has changed. If +// it has not changed, it will not be added. Returns true if the file +// was added. func (l *LayeredMap) MaybeAdd(s string) (bool, error) { oldV, ok := l.Get(s) newV, err := l.hasher(s) diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index f0a88cbe1..4f01441dc 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -17,12 +17,13 @@ limitations under the License. package snapshot import ( - "archive/tar" "bytes" + "fmt" "io" "io/ioutil" "os" "path/filepath" + "syscall" "github.com/GoogleContainerTools/kaniko/pkg/constants" "github.com/GoogleContainerTools/kaniko/pkg/util" @@ -33,7 +34,6 @@ import ( type Snapshotter struct { l *LayeredMap directory string - hardlinks map[uint64]string } // NewSnapshotter creates a new snapshotter rooted at d @@ -49,17 +49,11 @@ func (s *Snapshotter) Init() error { return nil } -// TakeSnapshot takes a snapshot of the filesystem, avoiding directories in the whitelist, and creates +// TakeSnapshot takes a snapshot of the specified files, avoiding directories in the whitelist, and creates // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { buf := bytes.NewBuffer([]byte{}) - var filesAdded bool - var err error - if files == nil { - filesAdded, err = s.snapShotFS(buf) - } else { - filesAdded, err = s.snapshotFiles(buf, files) - } + filesAdded, err := s.snapshotFiles(buf, files) if err != nil { return nil, err } @@ -70,10 +64,39 @@ func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { return contents, err } -// snapshotFiles takes a snapshot of specific files -// Used for ADD/COPY commands, when we know which files have changed +// TakeSnapshotFS takes a snapshot of the filesystem, avoiding directories in the whitelist, and creates +// a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed +func (s *Snapshotter) TakeSnapshotFS() ([]byte, error) { + buf := bytes.NewBuffer([]byte{}) + filesAdded, err := s.snapShotFS(buf) + if err != nil { + return nil, err + } + contents := buf.Bytes() + if !filesAdded { + return nil, nil + } + return contents, err +} + +func shouldSnapshot(file string, snapshottedFiles map[string]bool) (bool, error) { + if val, ok := snapshottedFiles[file]; ok && val { + return false, nil + } + whitelisted, err := util.CheckWhitelist(file) + if err != nil { + return false, fmt.Errorf("Error checking for %s in whitelist: %s", file, err) + } + if whitelisted && !isBuildFile(file) { + logrus.Infof("Not adding %s to layer, as it's whitelisted", file) + return false, nil + } + return true, nil +} + +// snapshotFiles creates a snapshot (tar) and adds the specified files. +// It will not add files which are whitelisted. func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { - s.hardlinks = map[uint64]string{} s.l.Snapshot() if len(files) == 0 { logrus.Info("No files changed in this command, skipping snapshotting.") @@ -81,45 +104,61 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { } logrus.Infof("Taking snapshot of files %v...", files) snapshottedFiles := make(map[string]bool) - for _, file := range files { - parentDirs := util.ParentDirectories(file) - files = append(parentDirs, files...) - } filesAdded := false - w := tar.NewWriter(f) - defer w.Close() - // Now create the tar. + t := util.NewTar(f) + defer t.Close() + + // First add to the tar any parent directories that haven't been added + parentDirs := []string{} for _, file := range files { + parents := util.ParentDirectories(file) + parentDirs = append(parentDirs, parents...) + } + for _, file := range parentDirs { file = filepath.Clean(file) - if val, ok := snapshottedFiles[file]; ok && val { - continue - } - whitelisted, err := util.CheckWhitelist(file) + shouldSnapshot, err := shouldSnapshot(file, snapshottedFiles) if err != nil { - return false, err + return false, fmt.Errorf("Error checking if parent dir %s can be snapshotted: %s", file, err) } - if whitelisted && !isBuildFile(file) { - logrus.Infof("Not adding %s to layer, as it's whitelisted", file) + if !shouldSnapshot { continue } snapshottedFiles[file] = true - info, err := os.Lstat(file) + + fileAdded, err := s.l.MaybeAdd(file) if err != nil { - return false, err + return false, fmt.Errorf("Unable to add parent dir %s to layered map: %s", file, err) } - // Only add to the tar if we add it to the layeredmap. - addFile, err := s.l.MaybeAdd(file) - if err != nil { - return false, err - } - if addFile { - filesAdded = true - if err := util.AddToTar(file, info, s.hardlinks, w); err != nil { - return false, err + + if fileAdded { + err = t.AddFileToTar(file) + if err != nil { + return false, fmt.Errorf("Error adding parent dir %s to tar: %s", file, err) } + filesAdded = true } } + // Next add the files themselves to the tar + for _, file := range files { + file = filepath.Clean(file) + shouldSnapshot, err := shouldSnapshot(file, snapshottedFiles) + if err != nil { + return false, fmt.Errorf("Error checking if file %s can be snapshotted: %s", file, err) + } + if !shouldSnapshot { + continue + } + snapshottedFiles[file] = true + + if err = s.l.Add(file); err != nil { + return false, fmt.Errorf("Unable to add file %s to layered map: %s", file, err) + } + if err = t.AddFileToTar(file); err != nil { + return false, fmt.Errorf("Error adding file %s to tar: %s", file, err) + } + filesAdded = true + } return filesAdded, nil } @@ -132,14 +171,22 @@ func isBuildFile(file string) bool { return false } +// shapShotFS creates a snapshot (tar) of all files in the system which are not +// whitelisted and which have changed. func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { logrus.Info("Taking snapshot of full filesystem...") - s.hardlinks = map[uint64]string{} + + // Some of the operations that follow (e.g. hashing) depend on the file system being synced, + // for example the hashing function that determines if files are equal uses the mtime of the files, + // which can lag if sync is not called. Unfortunately there can still be lag if too much data needs + // to be flushed or the disk does its own caching/buffering. + syscall.Sync() + s.l.Snapshot() existingPaths := s.l.GetFlattenedPathsForWhiteOut() filesAdded := false - w := tar.NewWriter(f) - defer w.Close() + t := util.NewTar(f) + defer t.Close() // Save the fs state in a map to iterate over later. memFs := map[string]os.FileInfo{} @@ -163,7 +210,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { if addWhiteout { logrus.Infof("Adding whiteout for %s", path) filesAdded = true - if err := util.Whiteout(path, w); err != nil { + if err := t.Whiteout(path); err != nil { return false, err } } @@ -171,7 +218,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { } // Now create the tar. - for path, info := range memFs { + for path := range memFs { whitelisted, err := util.CheckWhitelist(path) if err != nil { return false, err @@ -189,7 +236,7 @@ func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { if maybeAdd { logrus.Debugf("Adding %s to layer, because it was changed.", path) filesAdded = true - if err := util.AddToTar(path, info, s.hardlinks, w); err != nil { + if err := t.AddFileToTar(path); err != nil { return false, err } } diff --git a/pkg/snapshot/snapshot_test.go b/pkg/snapshot/snapshot_test.go index 934e862d9..72b6a750a 100644 --- a/pkg/snapshot/snapshot_test.go +++ b/pkg/snapshot/snapshot_test.go @@ -29,7 +29,7 @@ import ( "github.com/pkg/errors" ) -func TestSnapshotFileChange(t *testing.T) { +func TestSnapshotFSFileChange(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) @@ -45,7 +45,7 @@ func TestSnapshotFileChange(t *testing.T) { t.Fatalf("Error setting up fs: %s", err) } // Take another snapshot - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } @@ -81,7 +81,7 @@ func TestSnapshotFileChange(t *testing.T) { } } -func TestSnapshotChangePermissions(t *testing.T) { +func TestSnapshotFSChangePermissions(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) if err != nil { @@ -93,7 +93,7 @@ func TestSnapshotChangePermissions(t *testing.T) { t.Fatalf("Error changing permissions on %s: %v", batPath, err) } // Take another snapshot - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } @@ -141,7 +141,6 @@ func TestSnapshotFiles(t *testing.T) { } filesToSnapshot := []string{ filepath.Join(testDir, "foo"), - filepath.Join(testDir, "kaniko/file"), } contents, err := snapshotter.TakeSnapshot(filesToSnapshot) if err != nil { @@ -166,14 +165,14 @@ func TestSnapshotFiles(t *testing.T) { testutil.CheckErrorAndDeepEqual(t, false, nil, expectedFiles, actualFiles) } -func TestEmptySnapshot(t *testing.T) { +func TestEmptySnapshotFS(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) if err != nil { t.Fatal(err) } // Take snapshot with no changes - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index 1a4f958bb..67a972002 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -155,7 +155,9 @@ func ChildDirInWhitelist(path, directory string) bool { return false } -func unTar(r io.Reader, dest string) error { +// unTar returns a list of files that have been extracted from the tar archive at r to the path at dest +func unTar(r io.Reader, dest string) ([]string, error) { + var extractedFiles []string tr := tar.NewReader(r) for { hdr, err := tr.Next() @@ -163,13 +165,14 @@ func unTar(r io.Reader, dest string) error { break } if err != nil { - return err + return nil, err } if err := extractFile(dest, hdr, tr); err != nil { - return err + return nil, err } + extractedFiles = append(extractedFiles, dest) } - return nil + return extractedFiles, nil } func extractFile(dest string, hdr *tar.Header, tr io.Reader) error { @@ -349,24 +352,6 @@ func RelativeFiles(fp string, root string) ([]string, error) { return files, err } -// Files returns a list of all files rooted at root -func Files(root string) ([]string, error) { - var files []string - logrus.Debugf("Getting files and contents at root %s", root) - err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { - whitelisted, err := CheckWhitelist(path) - if err != nil { - return err - } - if whitelisted { - return nil - } - files = append(files, path) - return err - }) - return files, err -} - // ParentDirectories returns a list of paths to all parent directories // Ex. /some/temp/dir -> [/, /some, /some/temp, /some/temp/dir] func ParentDirectories(path string) []string { @@ -459,16 +444,18 @@ func DownloadFileToDest(rawurl, dest string) error { } // CopyDir copies the file or directory at src to dest -func CopyDir(src, dest string) error { +// It returns a list of files it copied over +func CopyDir(src, dest string) ([]string, error) { files, err := RelativeFiles("", src) if err != nil { - return err + return nil, err } + var copiedFiles []string for _, file := range files { fullPath := filepath.Join(src, file) fi, err := os.Lstat(fullPath) if err != nil { - return err + return nil, err } destPath := filepath.Join(dest, file) if fi.IsDir() { @@ -478,24 +465,25 @@ func CopyDir(src, dest string) error { gid := int(fi.Sys().(*syscall.Stat_t).Gid) if err := os.MkdirAll(destPath, fi.Mode()); err != nil { - return err + return nil, err } if err := os.Chown(destPath, uid, gid); err != nil { - return err + return nil, err } } else if fi.Mode()&os.ModeSymlink != 0 { // If file is a symlink, we want to create the same relative symlink if err := CopySymlink(fullPath, destPath); err != nil { - return err + return nil, err } } else { // ... Else, we want to copy over a file if err := CopyFile(fullPath, destPath); err != nil { - return err + return nil, err } } + copiedFiles = append(copiedFiles, destPath) } - return nil + return copiedFiles, nil } // CopySymlink copies the symlink at src to dest diff --git a/pkg/util/tar_util.go b/pkg/util/tar_util.go index a66eea667..898844784 100644 --- a/pkg/util/tar_util.go +++ b/pkg/util/tar_util.go @@ -20,6 +20,7 @@ import ( "archive/tar" "compress/bzip2" "compress/gzip" + "fmt" "io" "io/ioutil" "os" @@ -31,8 +32,32 @@ import ( "github.com/sirupsen/logrus" ) -// AddToTar adds the file i to tar w at path p -func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Writer) error { +// Tar knows how to write files to a tar file. +type Tar struct { + hardlinks map[uint64]string + w *tar.Writer +} + +// NewTar will create an instance of Tar that can write files to the writer at f. +func NewTar(f io.Writer) Tar { + w := tar.NewWriter(f) + return Tar{ + w: w, + hardlinks: map[uint64]string{}, + } +} + +// Close will close any open streams used by Tar. +func (t *Tar) Close() { + t.w.Close() +} + +// AddFileToTar adds the file at path p to the tar +func (t *Tar) AddFileToTar(p string) error { + i, err := os.Lstat(p) + if err != nil { + return fmt.Errorf("Failed to get file info for %s: %s", p, err) + } linkDst := "" if i.Mode()&os.ModeSymlink != 0 { var err error @@ -51,13 +76,13 @@ func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Write } hdr.Name = p - hardlink, linkDst := checkHardlink(p, hardlinks, i) + hardlink, linkDst := t.checkHardlink(p, i) if hardlink { hdr.Linkname = linkDst hdr.Typeflag = tar.TypeLink hdr.Size = 0 } - if err := w.WriteHeader(hdr); err != nil { + if err := t.w.WriteHeader(hdr); err != nil { return err } if !(i.Mode().IsRegular()) || hardlink { @@ -68,13 +93,13 @@ func AddToTar(p string, i os.FileInfo, hardlinks map[uint64]string, w *tar.Write return err } defer r.Close() - if _, err := io.Copy(w, r); err != nil { + if _, err := io.Copy(t.w, r); err != nil { return err } return nil } -func Whiteout(p string, w *tar.Writer) error { +func (t *Tar) Whiteout(p string) error { dir := filepath.Dir(p) name := ".wh." + filepath.Base(p) @@ -82,7 +107,7 @@ func Whiteout(p string, w *tar.Writer) error { Name: filepath.Join(dir, name), Size: 0, } - if err := w.WriteHeader(th); err != nil { + if err := t.w.WriteHeader(th); err != nil { return err } @@ -90,7 +115,7 @@ func Whiteout(p string, w *tar.Writer) error { } // Returns true if path is hardlink, and the link destination -func checkHardlink(p string, hardlinks map[uint64]string, i os.FileInfo) (bool, string) { +func (t *Tar) checkHardlink(p string, i os.FileInfo) (bool, string) { hardlink := false linkDst := "" if sys := i.Sys(); sys != nil { @@ -98,12 +123,12 @@ func checkHardlink(p string, hardlinks map[uint64]string, i os.FileInfo) (bool, nlinks := stat.Nlink if nlinks > 1 { inode := stat.Ino - if original, exists := hardlinks[inode]; exists && original != p { + if original, exists := t.hardlinks[inode]; exists && original != p { hardlink = true logrus.Debugf("%s inode exists in hardlinks map, linking to %s", p, original) linkDst = original } else { - hardlinks[inode] = p + t.hardlinks[inode] = p } } } @@ -112,17 +137,17 @@ func checkHardlink(p string, hardlinks map[uint64]string, i os.FileInfo) (bool, } // UnpackLocalTarArchive unpacks the tar archive at path to the directory dest -// Returns true if the path was actually unpacked -func UnpackLocalTarArchive(path, dest string) error { +// Returns the files extracted from the tar archive +func UnpackLocalTarArchive(path, dest string) ([]string, error) { // First, we need to check if the path is a local tar archive if compressed, compressionLevel := fileIsCompressedTar(path); compressed { file, err := os.Open(path) if err != nil { - return err + return nil, err } defer file.Close() if compressionLevel == archive.Gzip { - return UnpackCompressedTar(path, dest) + return nil, UnpackCompressedTar(path, dest) } else if compressionLevel == archive.Bzip2 { bzr := bzip2.NewReader(file) return unTar(bzr, dest) @@ -131,12 +156,12 @@ func UnpackLocalTarArchive(path, dest string) error { if fileIsUncompressedTar(path) { file, err := os.Open(path) if err != nil { - return err + return nil, err } defer file.Close() return unTar(file, dest) } - return errors.New("path does not lead to local tar archive") + return nil, errors.New("path does not lead to local tar archive") } //IsFileLocalTarArchive returns true if the file is a local tar archive @@ -198,5 +223,6 @@ func UnpackCompressedTar(path, dir string) error { return err } defer gzr.Close() - return unTar(gzr, dir) + _, err = unTar(gzr, dir) + return err } diff --git a/pkg/util/tar_util_test.go b/pkg/util/tar_util_test.go index 1762ea6f2..e0c1becac 100644 --- a/pkg/util/tar_util_test.go +++ b/pkg/util/tar_util_test.go @@ -17,7 +17,6 @@ limitations under the License. package util import ( - "archive/tar" "compress/gzip" "io" "io/ioutil" @@ -92,16 +91,11 @@ func setUpFilesAndTars(testDir string) error { } func createTar(testdir string, writer io.Writer) error { - - w := tar.NewWriter(writer) - defer w.Close() + t := NewTar(writer) + defer t.Close() for _, regFile := range regularFiles { filePath := filepath.Join(testdir, regFile) - fi, err := os.Stat(filePath) - if err != nil { - return err - } - if err := AddToTar(filePath, fi, map[uint64]string{}, w); err != nil { + if err := t.AddFileToTar(filePath); err != nil { return err } } diff --git a/pkg/util/util.go b/pkg/util/util.go index c95935ec9..73d3f70e0 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -19,12 +19,13 @@ package util import ( "crypto/md5" "encoding/hex" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" "io" "os" "strconv" "syscall" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // SetLogLevel sets the logrus logging level @@ -68,7 +69,8 @@ func Hasher() func(string) (string, error) { return hasher } -// MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed +// MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed. +// Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same. func MtimeHasher() func(string) (string, error) { hasher := func(p string) (string, error) { h := md5.New() diff --git a/testutil/util.go b/testutil/util.go index 64819c3ef..5101ba415 100644 --- a/testutil/util.go +++ b/testutil/util.go @@ -40,6 +40,7 @@ func SetupFiles(path string, files map[string]string) error { } func CheckErrorAndDeepEqual(t *testing.T, shouldErr bool, err error, expected, actual interface{}) { + t.Helper() if err := checkErr(shouldErr, err); err != nil { t.Error(err) return