From 1ffae47fdd91d38006c1d28e14e66869742cbdba Mon Sep 17 00:00:00 2001 From: dlorenc Date: Wed, 23 Jan 2019 13:46:12 -0600 Subject: [PATCH] Change cache key calculation to be more reproducible. (#525) Before we were using the full image digest, but that contains a timestamp. Now we only use the layers themselves and the image config (env vars, etc.). Also fix a bug in unpacking the layers themselves. mtimes can change during unpacking, so set them all once at the end. --- integration/dockerfiles/Dockerfile_test_cache | 14 +++++++-- pkg/executor/build.go | 7 ++++- pkg/util/fs_util.go | 20 ++++++++++--- pkg/util/tar_util.go | 1 + pkg/util/util.go | 29 +++++++++++++++++++ 5 files changed, 63 insertions(+), 8 deletions(-) diff --git a/integration/dockerfiles/Dockerfile_test_cache b/integration/dockerfiles/Dockerfile_test_cache index e3ebe304d..71644d560 100644 --- a/integration/dockerfiles/Dockerfile_test_cache +++ b/integration/dockerfiles/Dockerfile_test_cache @@ -16,7 +16,15 @@ # If the image is built twice, /date should be the same in both images # if the cache is implemented correctly -FROM gcr.io/google-appengine/debian9@sha256:1d6a9a6d106bd795098f60f4abb7083626354fa6735e81743c7f8cfca11259f0 +FROM alpine as base_stage + +RUN mkdir foo && echo base_stage > foo/base + +FROM base_stage as cached_stage + +RUN echo cached_stage > foo/cache + +FROM cached_stage as bug_stage + +RUN echo bug_stage > foo/bug RUN date > /date -COPY context/foo /foo -RUN echo hey diff --git a/pkg/executor/build.go b/pkg/executor/build.go index fe1bfb73f..c6c077685 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -146,6 +146,7 @@ func (s *stageBuilder) optimize(compositeKey CompositeCache, cfg v1.Config, cmds img, err := layerCache.RetrieveLayer(ck) if err != nil { logrus.Infof("No cached layer found for cmd %s", command.String()) + logrus.Debugf("Key missing was: %s", compositeKey.Key()) break } @@ -167,7 +168,11 @@ func (s *stageBuilder) optimize(compositeKey CompositeCache, cfg v1.Config, cmds func (s *stageBuilder) build() error { // Set the initial cache key to be the base image digest, the build args and the SrcContext. - compositeKey := NewCompositeCache(s.baseImageDigest) + dgst, err := util.ReproducibleDigest(s.image) + if err != nil { + return err + } + compositeKey := NewCompositeCache(dgst) compositeKey.AddKey(s.opts.BuildArgs...) cmds := []commands.DockerCommand{} diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index 8cc912fb4..0699314a1 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -75,7 +75,10 @@ func GetFSFromImage(root string, img v1.Image) ([]string, error) { if err != nil { return nil, err } - extractedFiles := []string{} + + // Store a map of files to their mtime. We need to set mtimes in a second pass because creating files + // can change the mtime of a directory. + extractedFiles := map[string]time.Time{} for i, l := range layers { logrus.Debugf("Extracting layer %d", i) @@ -106,10 +109,17 @@ func GetFSFromImage(root string, img v1.Image) ([]string, error) { if err := extractFile(root, hdr, tr); err != nil { return nil, err } - extractedFiles = append(extractedFiles, filepath.Join(root, filepath.Clean(hdr.Name))) + extractedFiles[filepath.Join(root, filepath.Clean(hdr.Name))] = hdr.ModTime } } - return extractedFiles, nil + + fileNames := []string{} + for f, t := range extractedFiles { + fileNames = append(fileNames, f) + os.Chtimes(f, time.Time{}, t) + } + + return fileNames, nil } // DeleteFilesystem deletes the extracted image file system @@ -272,6 +282,7 @@ func extractFile(dest string, hdr *tar.Header, tr io.Reader) error { return err } } + return nil } @@ -382,7 +393,8 @@ func RelativeFiles(fp string, root string) ([]string, error) { } // ParentDirectories returns a list of paths to all parent directories -// Ex. /some/temp/dir -> [/, /some, /some/temp, /some/temp/dir] +// Ex. /some/temp/dir -> [/some, /some/temp, /some/temp/dir] +// This purposefully excludes the /. func ParentDirectories(path string) []string { path = filepath.Clean(path) dirs := strings.Split(path, "/") diff --git a/pkg/util/tar_util.go b/pkg/util/tar_util.go index bc1cc67a0..b72785707 100644 --- a/pkg/util/tar_util.go +++ b/pkg/util/tar_util.go @@ -54,6 +54,7 @@ func (t *Tar) Close() { // AddFileToTar adds the file at path p to the tar func (t *Tar) AddFileToTar(p string) error { + logrus.Debugf("Adding file %s to tar", p) i, err := os.Lstat(p) if err != nil { return fmt.Errorf("Failed to get file info for %s: %s", p, err) diff --git a/pkg/util/util.go b/pkg/util/util.go index 873cbae20..e7344c88f 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -20,11 +20,14 @@ import ( "crypto/md5" "crypto/sha256" "encoding/hex" + "encoding/json" "io" "os" "strconv" "syscall" + "github.com/google/go-containerregistry/pkg/v1" + "github.com/google/go-containerregistry/pkg/v1/partial" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -127,3 +130,29 @@ func SHA256(r io.Reader) (string, error) { } return hex.EncodeToString(hasher.Sum(make([]byte, 0, hasher.Size()))), nil } + +type ReproducibleManifest struct { + Layers []v1.Descriptor + Config v1.Config +} + +func ReproducibleDigest(img partial.WithManifestAndConfigFile) (string, error) { + mfst, err := img.Manifest() + if err != nil { + return "", err + } + cfg, err := img.ConfigFile() + if err != nil { + return "", err + } + rm := ReproducibleManifest{ + Layers: mfst.Layers, + Config: cfg.Config, + } + + b, err := json.Marshal(rm) + if err != nil { + return "", err + } + return string(b), nil +}