Merge pull request #337 from priyawadhwa/hasher

Add Key() to LayeredMap and Snapshotter
This commit is contained in:
Tejal Desai 2018-09-11 09:29:50 -07:00 committed by GitHub
commit 06defa6552
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 157 additions and 6 deletions

View File

@ -60,7 +60,7 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) {
if err := util.GetFSFromImage(constants.RootDir, sourceImage); err != nil { if err := util.GetFSFromImage(constants.RootDir, sourceImage); err != nil {
return nil, err return nil, err
} }
l := snapshot.NewLayeredMap(hasher) l := snapshot.NewLayeredMap(hasher, util.CacheHasher())
snapshotter := snapshot.NewSnapshotter(l, constants.RootDir) snapshotter := snapshot.NewSnapshotter(l, constants.RootDir)
// Take initial snapshot // Take initial snapshot
if err := snapshotter.Init(); err != nil { if err := snapshotter.Init(); err != nil {

View File

@ -17,20 +17,28 @@ limitations under the License.
package snapshot package snapshot
import ( import (
"bytes"
"encoding/json"
"fmt" "fmt"
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/GoogleContainerTools/kaniko/pkg/util"
) )
type LayeredMap struct { type LayeredMap struct {
layers []map[string]string layers []map[string]string
whiteouts []map[string]string whiteouts []map[string]string
added []map[string]string
hasher func(string) (string, error) hasher func(string) (string, error)
// cacheHasher doesn't include mtime in it's hash so that filesystem cache keys are stable
cacheHasher func(string) (string, error)
} }
func NewLayeredMap(h func(string) (string, error)) *LayeredMap { func NewLayeredMap(h func(string) (string, error), c func(string) (string, error)) *LayeredMap {
l := LayeredMap{ l := LayeredMap{
hasher: h, hasher: h,
cacheHasher: c,
} }
l.layers = []map[string]string{} l.layers = []map[string]string{}
return &l return &l
@ -39,8 +47,18 @@ func NewLayeredMap(h func(string) (string, error)) *LayeredMap {
func (l *LayeredMap) Snapshot() { func (l *LayeredMap) Snapshot() {
l.whiteouts = append(l.whiteouts, map[string]string{}) l.whiteouts = append(l.whiteouts, map[string]string{})
l.layers = append(l.layers, map[string]string{}) l.layers = append(l.layers, map[string]string{})
l.added = append(l.added, map[string]string{})
} }
// Key returns a hash for added files
func (l *LayeredMap) Key() (string, error) {
c := bytes.NewBuffer([]byte{})
enc := json.NewEncoder(c)
enc.Encode(l.added)
return util.SHA256(c)
}
// GetFlattenedPathsForWhiteOut returns all paths in the current FS
func (l *LayeredMap) GetFlattenedPathsForWhiteOut() map[string]struct{} { func (l *LayeredMap) GetFlattenedPathsForWhiteOut() map[string]struct{} {
paths := map[string]struct{}{} paths := map[string]struct{}{}
for _, l := range l.layers { for _, l := range l.layers {
@ -85,11 +103,18 @@ func (l *LayeredMap) MaybeAddWhiteout(s string) (bool, error) {
// Add will add the specified file s to the layered map. // Add will add the specified file s to the layered map.
func (l *LayeredMap) Add(s string) error { func (l *LayeredMap) Add(s string) error {
// Use hash function and add to layers
newV, err := l.hasher(s) newV, err := l.hasher(s)
if err != nil { if err != nil {
return fmt.Errorf("Error creating hash for %s: %s", s, err) return fmt.Errorf("Error creating hash for %s: %v", s, err)
} }
l.layers[len(l.layers)-1][s] = newV l.layers[len(l.layers)-1][s] = newV
// Use cache hash function and add to added
cacheV, err := l.cacheHasher(s)
if err != nil {
return fmt.Errorf("Error creating cache hash for %s: %v", s, err)
}
l.added[len(l.added)-1][s] = cacheV
return nil return nil
} }

View File

@ -0,0 +1,79 @@
/*
Copyright 2018 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package snapshot
import (
"testing"
)
func Test_CacheKey(t *testing.T) {
tests := []struct {
name string
map1 map[string]string
map2 map[string]string
equal bool
}{
{
name: "maps are the same",
map1: map[string]string{
"a": "apple",
"b": "bat",
"c": "cat",
"d": "dog",
"e": "egg",
},
map2: map[string]string{
"c": "cat",
"d": "dog",
"b": "bat",
"a": "apple",
"e": "egg",
},
equal: true,
},
{
name: "maps are different",
map1: map[string]string{
"a": "apple",
"b": "bat",
"c": "cat",
},
map2: map[string]string{
"c": "",
"b": "bat",
"a": "apple",
},
equal: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
lm1 := LayeredMap{added: []map[string]string{test.map1}}
lm2 := LayeredMap{added: []map[string]string{test.map2}}
k1, err := lm1.Key()
if err != nil {
t.Fatalf("error getting key for map 1: %v", err)
}
k2, err := lm2.Key()
if err != nil {
t.Fatalf("error getting key for map 2: %v", err)
}
if test.equal != (k1 == k2) {
t.Fatalf("unexpected result: \nExpected\n%s\nActual\n%s\n", k1, k2)
}
})
}
}

View File

@ -49,6 +49,11 @@ func (s *Snapshotter) Init() error {
return nil return nil
} }
// Key returns a string based on the current state of the file system
func (s *Snapshotter) Key() (string, error) {
return s.l.Key()
}
// TakeSnapshot takes a snapshot of the specified files, avoiding directories in the whitelist, and creates // TakeSnapshot takes a snapshot of the specified files, avoiding directories in the whitelist, and creates
// a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed // a tarball of the changed files. Return contents of the tarball, and whether or not any files were changed
func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) { func (s *Snapshotter) TakeSnapshot(files []string) ([]byte, error) {
@ -102,7 +107,8 @@ func (s *Snapshotter) snapshotFiles(f io.Writer, files []string) (bool, error) {
logrus.Info("No files changed in this command, skipping snapshotting.") logrus.Info("No files changed in this command, skipping snapshotting.")
return false, nil return false, nil
} }
logrus.Infof("Taking snapshot of files %v...", files) logrus.Info("Taking snapshot of files...")
logrus.Debugf("Taking snapshot of files %v", files)
snapshottedFiles := make(map[string]bool) snapshottedFiles := make(map[string]bool)
filesAdded := false filesAdded := false

View File

@ -198,7 +198,7 @@ func setUpTestDir() (string, *Snapshotter, error) {
} }
// Take the initial snapshot // Take the initial snapshot
l := NewLayeredMap(util.Hasher()) l := NewLayeredMap(util.Hasher(), util.CacheHasher())
snapshotter := NewSnapshotter(l, testDir) snapshotter := NewSnapshotter(l, testDir)
if err := snapshotter.Init(); err != nil { if err := snapshotter.Init(); err != nil {
return testDir, nil, errors.Wrap(err, "initializing snapshotter") return testDir, nil, errors.Wrap(err, "initializing snapshotter")

View File

@ -18,6 +18,7 @@ package util
import ( import (
"crypto/md5" "crypto/md5"
"crypto/sha256"
"encoding/hex" "encoding/hex"
"io" "io"
"os" "os"
@ -72,6 +73,36 @@ func Hasher() func(string) (string, error) {
return hasher return hasher
} }
// CacheHasher takes into account everything the regular hasher does except for mtime
func CacheHasher() func(string) (string, error) {
hasher := func(p string) (string, error) {
h := md5.New()
fi, err := os.Lstat(p)
if err != nil {
return "", err
}
h.Write([]byte(fi.Mode().String()))
h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Uid), 36)))
h.Write([]byte(","))
h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Gid), 36)))
if fi.Mode().IsRegular() {
f, err := os.Open(p)
if err != nil {
return "", err
}
defer f.Close()
if _, err := io.Copy(h, f); err != nil {
return "", err
}
}
return hex.EncodeToString(h.Sum(nil)), nil
}
return hasher
}
// MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed. // MtimeHasher returns a hash function, which only looks at mtime to determine if a file has changed.
// Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same. // Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same.
func MtimeHasher() func(string) (string, error) { func MtimeHasher() func(string) (string, error) {
@ -86,3 +117,13 @@ func MtimeHasher() func(string) (string, error) {
} }
return hasher return hasher
} }
// SHA256 returns the shasum of the contents of r
func SHA256(r io.Reader) (string, error) {
hasher := sha256.New()
_, err := io.Copy(hasher, r)
if err != nil {
return "", err
}
return hex.EncodeToString(hasher.Sum(make([]byte, 0, hasher.Size()))), nil
}