diff --git a/README.md b/README.md index 6b20567a9..8a4051fdb 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ We do **not** recommend running the kaniko executor binary in another image, as - [Security](#security) - [Comparison with Other Tools](#comparison-with-other-tools) - [Community](#community) +- [Limitations](#limitations) _If you are interested in contributing to kaniko, see [DEVELOPMENT.md](DEVELOPMENT.md) and [CONTRIBUTING.md](CONTRIBUTING.md)._ @@ -256,7 +257,8 @@ To configure credentials, you will need to do the following: #### --snapshotMode You can set the `--snapshotMode=` flag to set how kaniko will snapshot the filesystem. -If `--snapshotMode=time` is set, only file mtime will be considered when snapshotting. +If `--snapshotMode=time` is set, only file mtime will be considered when snapshotting (see +[limitations related to mtime](#mtime-and-snapshotting)). #### --build-arg @@ -356,3 +358,19 @@ provides. [kaniko-users](https://groups.google.com/forum/#!forum/kaniko-users) Google group To Contribute to kaniko, see [DEVELOPMENT.md](DEVELOPMENT.md) and [CONTRIBUTING.md](CONTRIBUTING.md). + +## Limitations + +### mtime and snapshotting + +When taking a snapshot, kaniko's hashing algorithms include (or in the case of +[`--snapshotMode=time`](#--snapshotmode), only use) a file's +[`mtime`](https://en.wikipedia.org/wiki/Inode#POSIX_inode_description) to determine +if the file has changed. Unfortunately there is a delay between when changes to a +file are made and when the `mtime` is updated. This means: + +* With the default snapshot mode (`--snapshotMode=full`), whether or not kaniko will + add a layer in the case where a `RUN` command modifies a file but the contents do + not change is non-deterministic. +* With the time-only snapshot mode (`--snapshotMode=time`), kaniko may miss changes + introduced by `RUN` commands entirely. \ No newline at end of file diff --git a/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times b/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times new file mode 100644 index 000000000..6acea2a4b --- /dev/null +++ b/integration/dockerfiles/Dockerfile_test_copy_same_file_many_times @@ -0,0 +1,49 @@ +FROM alpine@sha256:5ce5f501c457015c4b91f91a15ac69157d9b06f1a75cf9107bf2b62e0843983a +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo +COPY context/foo /foo diff --git a/integration/integration_test.go b/integration/integration_test.go index f9e35d05f..c16ff5f64 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -135,11 +135,10 @@ func TestMain(m *testing.M) { defer DeleteFromBucket(fileInBucket) fmt.Println("Building kaniko image") - buildKaniko := exec.Command("docker", "build", "-t", ExecutorImage, "-f", "../deploy/Dockerfile", "..") - err = buildKaniko.Run() + cmd := exec.Command("docker", "build", "-t", ExecutorImage, "-f", "../deploy/Dockerfile", "..") + _, err = RunCommandWithoutTest(cmd) if err != nil { - fmt.Print(err) - fmt.Print("Building kaniko failed.") + fmt.Printf("Building kaniko failed: %s", err) os.Exit(1) } diff --git a/pkg/commands/volume.go b/pkg/commands/volume.go index 5e798e128..0fdad0c0e 100644 --- a/pkg/commands/volume.go +++ b/pkg/commands/volume.go @@ -17,6 +17,7 @@ limitations under the License. package commands import ( + "fmt" "os" "strings" @@ -53,13 +54,14 @@ func (v *VolumeCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile. return err } - logrus.Infof("Creating directory %s", volume) - if err := os.MkdirAll(volume, 0755); err != nil { - return err + // Only create and snapshot the dir if it didn't exist already + if _, err := os.Stat(volume); os.IsNotExist(err) { + logrus.Infof("Creating directory %s", volume) + v.snapshotFiles = []string{volume} + if err := os.MkdirAll(volume, 0755); err != nil { + return fmt.Errorf("Could not create directory for volume %s: %s", volume, err) + } } - - //Check if directory already exists? - v.snapshotFiles = append(v.snapshotFiles, volume) } config.Volumes = existingVolumes diff --git a/pkg/commands/workdir.go b/pkg/commands/workdir.go index 00487eaee..67186f73f 100644 --- a/pkg/commands/workdir.go +++ b/pkg/commands/workdir.go @@ -47,8 +47,14 @@ func (w *WorkdirCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile config.WorkingDir = filepath.Join(config.WorkingDir, resolvedWorkingDir) } logrus.Infof("Changed working directory to %s", config.WorkingDir) - w.snapshotFiles = []string{config.WorkingDir} - return os.MkdirAll(config.WorkingDir, 0755) + + // Only create and snapshot the dir if it didn't exist already + if _, err := os.Stat(config.WorkingDir); os.IsNotExist(err) { + logrus.Infof("Creating directory %s", config.WorkingDir) + w.snapshotFiles = []string{config.WorkingDir} + return os.MkdirAll(config.WorkingDir, 0755) + } + return nil } // FilesToSnapshot returns the workingdir, which should have been created if it didn't already exist diff --git a/pkg/executor/build.go b/pkg/executor/build.go index a3f958e62..cb04737bc 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -87,23 +87,41 @@ func DoBuild(opts *options.KanikoOptions) (v1.Image, error) { if err := dockerCommand.ExecuteCommand(&imageConfig.Config, buildArgs); err != nil { return nil, err } - // Don't snapshot if it's not the final stage and not the final command - // Also don't snapshot if it's the final stage, not the final command, and single snapshot is set - if (!finalStage && !finalCmd) || (finalStage && !finalCmd && opts.SingleSnapshot) { - continue - } - // Now, we get the files to snapshot from this command and take the snapshot snapshotFiles := dockerCommand.FilesToSnapshot() - if finalCmd { - snapshotFiles = nil + var contents []byte + + // If this is an intermediate stage, we only snapshot for the last command and we + // want to snapshot the entire filesystem since we aren't tracking what was changed + // by previous commands. + if !finalStage { + if finalCmd { + contents, err = snapshotter.TakeSnapshotFS() + } + } else { + // If we are in single snapshot mode, we only take a snapshot once, after all + // commands have completed. + if opts.SingleSnapshot { + if finalCmd { + contents, err = snapshotter.TakeSnapshotFS() + } + } else { + // Otherwise, in the final stage we take a snapshot at each command. If we know + // the files that were changed, we'll snapshot those explicitly, otherwise we'll + // check if anything in the filesystem changed. + if snapshotFiles != nil { + contents, err = snapshotter.TakeSnapshot(snapshotFiles) + } else { + contents, err = snapshotter.TakeSnapshotFS() + } + } } - contents, err := snapshotter.TakeSnapshot(snapshotFiles) if err != nil { - return nil, err + return nil, fmt.Errorf("Error taking snapshot of files for command %s: %s", dockerCommand, err) } + util.MoveVolumeWhitelistToWhitelist() if contents == nil { - logrus.Info("No files were changed, appending empty layer to config.") + logrus.Info("No files were changed, appending empty layer to config. No layer added to image.") continue } // Append the layer to the image diff --git a/pkg/snapshot/layered_map.go b/pkg/snapshot/layered_map.go index 6fd0ca4ac..0d382d766 100644 --- a/pkg/snapshot/layered_map.go +++ b/pkg/snapshot/layered_map.go @@ -17,6 +17,7 @@ limitations under the License. package snapshot import ( + "fmt" "path/filepath" "strings" ) @@ -82,6 +83,20 @@ func (l *LayeredMap) MaybeAddWhiteout(s string) (bool, error) { return true, nil } +// Add will add the specified file s to the layered map. +func (l *LayeredMap) Add(s string) error { + newV, err := l.hasher(s) + if err != nil { + return fmt.Errorf("Error creating hash for %s: %s", s, err) + } + l.layers[len(l.layers)-1][s] = newV + return nil +} + +// MaybeAdd will add the specified file s to the layered map if +// the layered map's hashing function determines it has changed. If +// it has not changed, it will not be added. Returns true if the file +// was added. func (l *LayeredMap) MaybeAdd(s string) (bool, error) { oldV, ok := l.Get(s) newV, err := l.hasher(s) diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index f0a88cbe1..7385a8d2d 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -19,10 +19,12 @@ package snapshot import ( "archive/tar" "bytes" + "fmt" "io" "io/ioutil" "os" "path/filepath" + "syscall" "github.com/GoogleContainerTools/kaniko/pkg/constants" "github.com/GoogleContainerTools/kaniko/pkg/util" @@ -49,17 +51,12 @@ func (s *Snapshotter) Init() error { return nil } -// TakeSnapshot takes a snapshot of the filesystem, avoiding directories in the whitelist, and creates +// TakeSnapshot takes a snapshot of the specified files, avoiding directories in the whitelist, and creates // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { buf := bytes.NewBuffer([]byte{}) var filesAdded bool - var err error - if files == nil { - filesAdded, err = s.snapShotFS(buf) - } else { - filesAdded, err = s.snapshotFiles(buf, files) - } + filesAdded, err := s.snapshotFiles(buf, files) if err != nil { return nil, err } @@ -70,8 +67,24 @@ func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { return contents, err } -// snapshotFiles takes a snapshot of specific files -// Used for ADD/COPY commands, when we know which files have changed +// TakeSnapshotFS takes a snapshot of the filesystem, avoiding directories in the whitelist, and creates +// a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed +func (s *Snapshotter) TakeSnapshotFS() ([]byte, error) { + buf := bytes.NewBuffer([]byte{}) + var filesAdded bool + filesAdded, err := s.snapShotFS(buf) + if err != nil { + return nil, err + } + contents := buf.Bytes() + if !filesAdded { + return nil, nil + } + return contents, err +} + +// snapshotFiles creates a snapshot (tar) and adds the specified files. +// It will not add files which are whitelisted. func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { s.hardlinks = map[uint64]string{} s.l.Snapshot() @@ -81,8 +94,11 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { } logrus.Infof("Taking snapshot of files %v...", files) snapshottedFiles := make(map[string]bool) + n := len(files) for _, file := range files { parentDirs := util.ParentDirectories(file) + // If we do end up snapshotting the dirs, we need to snapshot them before + // we snapshot their contents files = append(parentDirs, files...) } filesAdded := false @@ -90,7 +106,7 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { defer w.Close() // Now create the tar. - for _, file := range files { + for i, file := range files { file = filepath.Clean(file) if val, ok := snapshottedFiles[file]; ok && val { continue @@ -108,12 +124,24 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) { if err != nil { return false, err } - // Only add to the tar if we add it to the layeredmap. - addFile, err := s.l.MaybeAdd(file) - if err != nil { - return false, err + + var fileAdded bool + lastParentFileIndex := len(files) - n + isParentDir := i < lastParentFileIndex + + // If this is parent dir of the file we're snapshotting, only snapshot it + // if it changed + if isParentDir { + fileAdded, err = s.l.MaybeAdd(file) + } else { + // If this is one of the files we are snapshotting, definitely snapshot it + err = s.l.Add(file) + fileAdded = true } - if addFile { + if err != nil { + return false, fmt.Errorf("Unable to add file %s to layered map: %s", file, err) + } + if fileAdded { filesAdded = true if err := util.AddToTar(file, info, s.hardlinks, w); err != nil { return false, err @@ -132,8 +160,17 @@ func isBuildFile(file string) bool { return false } +// shapShotFS creates a snapshot (tar) of all files in the system which are not +// whitelisted and which have changed. func (s *Snapshotter) snapShotFS(f io.Writer) (bool, error) { logrus.Info("Taking snapshot of full filesystem...") + + // Some of the operations that follow (e.g. hashing) depend on the file system being synced, + // for example the hashing function that determines if files are equal uses the mtime of the files, + // which can lag if sync is not called. Unfortunately there can still be lag if too much data needs + // to be flushed or the disk does its own caching/buffering. + syscall.Sync() + s.hardlinks = map[uint64]string{} s.l.Snapshot() existingPaths := s.l.GetFlattenedPathsForWhiteOut() diff --git a/pkg/snapshot/snapshot_test.go b/pkg/snapshot/snapshot_test.go index 934e862d9..c0dc1afdb 100644 --- a/pkg/snapshot/snapshot_test.go +++ b/pkg/snapshot/snapshot_test.go @@ -29,7 +29,7 @@ import ( "github.com/pkg/errors" ) -func TestSnapshotFileChange(t *testing.T) { +func TestSnapshotFSFileChange(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) @@ -45,7 +45,7 @@ func TestSnapshotFileChange(t *testing.T) { t.Fatalf("Error setting up fs: %s", err) } // Take another snapshot - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } @@ -81,7 +81,7 @@ func TestSnapshotFileChange(t *testing.T) { } } -func TestSnapshotChangePermissions(t *testing.T) { +func TestSnapshotFSChangePermissions(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) if err != nil { @@ -93,7 +93,7 @@ func TestSnapshotChangePermissions(t *testing.T) { t.Fatalf("Error changing permissions on %s: %v", batPath, err) } // Take another snapshot - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } @@ -166,14 +166,14 @@ func TestSnapshotFiles(t *testing.T) { testutil.CheckErrorAndDeepEqual(t, false, nil, expectedFiles, actualFiles) } -func TestEmptySnapshot(t *testing.T) { +func TestEmptySnapshotFS(t *testing.T) { testDir, snapshotter, err := setUpTestDir() defer os.RemoveAll(testDir) if err != nil { t.Fatal(err) } // Take snapshot with no changes - contents, err := snapshotter.TakeSnapshot(nil) + contents, err := snapshotter.TakeSnapshotFS() if err != nil { t.Fatalf("Error taking snapshot of fs: %s", err) } diff --git a/pkg/util/util.go b/pkg/util/util.go index c95935ec9..73d3f70e0 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -19,12 +19,13 @@ package util import ( "crypto/md5" "encoding/hex" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" "io" "os" "strconv" "syscall" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // SetLogLevel sets the logrus logging level @@ -68,7 +69,8 @@ func Hasher() func(string) (string, error) { return hasher } -// MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed +// MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed. +// Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same. func MtimeHasher() func(string) (string, error) { hasher := func(p string) (string, error) { h := md5.New()