Change cache key calculation to be more reproducible. (#525)

Before we were using the full image digest, but that contains a timestamp. Now
we only use the layers themselves and the image config (env vars, etc.).

Also fix a bug in unpacking the layers themselves. mtimes can change during unpacking,
so set them all once at the end.
This commit is contained in:
dlorenc 2019-01-23 13:46:12 -06:00 committed by GitHub
parent a493035287
commit 1ffae47fdd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 63 additions and 8 deletions

View File

@ -16,7 +16,15 @@
# If the image is built twice, /date should be the same in both images
# if the cache is implemented correctly
FROM gcr.io/google-appengine/debian9@sha256:1d6a9a6d106bd795098f60f4abb7083626354fa6735e81743c7f8cfca11259f0
FROM alpine as base_stage
RUN mkdir foo && echo base_stage > foo/base
FROM base_stage as cached_stage
RUN echo cached_stage > foo/cache
FROM cached_stage as bug_stage
RUN echo bug_stage > foo/bug
RUN date > /date
COPY context/foo /foo
RUN echo hey

View File

@ -146,6 +146,7 @@ func (s *stageBuilder) optimize(compositeKey CompositeCache, cfg v1.Config, cmds
img, err := layerCache.RetrieveLayer(ck)
if err != nil {
logrus.Infof("No cached layer found for cmd %s", command.String())
logrus.Debugf("Key missing was: %s", compositeKey.Key())
break
}
@ -167,7 +168,11 @@ func (s *stageBuilder) optimize(compositeKey CompositeCache, cfg v1.Config, cmds
func (s *stageBuilder) build() error {
// Set the initial cache key to be the base image digest, the build args and the SrcContext.
compositeKey := NewCompositeCache(s.baseImageDigest)
dgst, err := util.ReproducibleDigest(s.image)
if err != nil {
return err
}
compositeKey := NewCompositeCache(dgst)
compositeKey.AddKey(s.opts.BuildArgs...)
cmds := []commands.DockerCommand{}

View File

@ -75,7 +75,10 @@ func GetFSFromImage(root string, img v1.Image) ([]string, error) {
if err != nil {
return nil, err
}
extractedFiles := []string{}
// Store a map of files to their mtime. We need to set mtimes in a second pass because creating files
// can change the mtime of a directory.
extractedFiles := map[string]time.Time{}
for i, l := range layers {
logrus.Debugf("Extracting layer %d", i)
@ -106,10 +109,17 @@ func GetFSFromImage(root string, img v1.Image) ([]string, error) {
if err := extractFile(root, hdr, tr); err != nil {
return nil, err
}
extractedFiles = append(extractedFiles, filepath.Join(root, filepath.Clean(hdr.Name)))
extractedFiles[filepath.Join(root, filepath.Clean(hdr.Name))] = hdr.ModTime
}
}
return extractedFiles, nil
fileNames := []string{}
for f, t := range extractedFiles {
fileNames = append(fileNames, f)
os.Chtimes(f, time.Time{}, t)
}
return fileNames, nil
}
// DeleteFilesystem deletes the extracted image file system
@ -272,6 +282,7 @@ func extractFile(dest string, hdr *tar.Header, tr io.Reader) error {
return err
}
}
return nil
}
@ -382,7 +393,8 @@ func RelativeFiles(fp string, root string) ([]string, error) {
}
// ParentDirectories returns a list of paths to all parent directories
// Ex. /some/temp/dir -> [/, /some, /some/temp, /some/temp/dir]
// Ex. /some/temp/dir -> [/some, /some/temp, /some/temp/dir]
// This purposefully excludes the /.
func ParentDirectories(path string) []string {
path = filepath.Clean(path)
dirs := strings.Split(path, "/")

View File

@ -54,6 +54,7 @@ func (t *Tar) Close() {
// AddFileToTar adds the file at path p to the tar
func (t *Tar) AddFileToTar(p string) error {
logrus.Debugf("Adding file %s to tar", p)
i, err := os.Lstat(p)
if err != nil {
return fmt.Errorf("Failed to get file info for %s: %s", p, err)

View File

@ -20,11 +20,14 @@ import (
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"io"
"os"
"strconv"
"syscall"
"github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/partial"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -127,3 +130,29 @@ func SHA256(r io.Reader) (string, error) {
}
return hex.EncodeToString(hasher.Sum(make([]byte, 0, hasher.Size()))), nil
}
type ReproducibleManifest struct {
Layers []v1.Descriptor
Config v1.Config
}
func ReproducibleDigest(img partial.WithManifestAndConfigFile) (string, error) {
mfst, err := img.Manifest()
if err != nil {
return "", err
}
cfg, err := img.ConfigFile()
if err != nil {
return "", err
}
rm := ReproducibleManifest{
Layers: mfst.Layers,
Config: cfg.Config,
}
b, err := json.Marshal(rm)
if err != nil {
return "", err
}
return string(b), nil
}