From ede69f85077844f41bf2175e0dfd6854209cfa2f Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Sat, 6 Jun 2020 01:24:46 -0700 Subject: [PATCH 1/6] check file changed in loop --- pkg/snapshot/snapshot.go | 41 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index f253dfbd2..47a798428 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -142,14 +142,16 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { if util.IsInIgnoreList(path) { if util.IsDestDir(path) { logrus.Tracef("Skipping paths under %s, as it is a ignored directory", path) - return filepath.SkipDir } return nil } - - foundPaths = append(foundPaths, path) + if ok, err := s.l.CheckFileChange(path); err != nil { + return err + } else if ok { + foundPaths = append(foundPaths, path) + } return nil }, @@ -163,32 +165,15 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { existingPaths := s.l.getFlattenedPathsForWhiteOut() filesToAdd := []string{} - resolvedMemFs := make(map[string]bool) - - for _, path := range foundPaths { + resolvedFiles, err := filesystem.ResolvePaths(foundPaths, s.ignorelist) + if err != nil { + return nil, nil, err + } + for _, path := range resolvedFiles { delete(existingPaths, path) - resolvedFiles, err := filesystem.ResolvePaths([]string{path}, s.ignorelist) - if err != nil { - return nil, nil, err - } - for _, path := range resolvedFiles { - // Continue if this path is already processed - if _, ok := resolvedMemFs[path]; ok { - continue - } - if util.CheckIgnoreList(path) { - logrus.Tracef("Not adding %s to layer, as it's whitelisted", path) - continue - } - // Only add changed files. - fileChanged, err := s.l.CheckFileChange(path) - if err != nil { - return nil, nil, fmt.Errorf("could not check if file has changed %s %s", path, err) - } - if fileChanged { - logrus.Tracef("Adding file %s to layer, because it was changed.", path) - filesToAdd = append(filesToAdd, path) - } + if util.CheckIgnoreList(path) { + logrus.Tracef("Not adding %s to layer, as it's whitelisted", path) + filesToAdd = append(filesToAdd, path) } } From cbf4452fe91334fa881b9811b66b034da9f53020 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Sat, 6 Jun 2020 10:03:37 -0700 Subject: [PATCH 2/6] fix snapshot --- pkg/snapshot/snapshot.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index 47a798428..2a84c50da 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -152,7 +152,6 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { } else if ok { foundPaths = append(foundPaths, path) } - return nil }, Unsorted: true, @@ -173,8 +172,9 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { delete(existingPaths, path) if util.CheckIgnoreList(path) { logrus.Tracef("Not adding %s to layer, as it's whitelisted", path) - filesToAdd = append(filesToAdd, path) + continue } + filesToAdd = append(filesToAdd, path) } // The paths left here are the ones that have been deleted in this layer. From b9d25dd5db80db57e9d9e7c1136b805c52f585c3 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Sat, 6 Jun 2020 10:50:24 -0700 Subject: [PATCH 3/6] better whiteout logic --- pkg/snapshot/snapshot.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/snapshot/snapshot.go b/pkg/snapshot/snapshot.go index 2a84c50da..783d83083 100644 --- a/pkg/snapshot/snapshot.go +++ b/pkg/snapshot/snapshot.go @@ -135,7 +135,10 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { timer := timing.Start("Walking filesystem") - foundPaths := make([]string, 0) + changedPaths := make([]string, 0) + + // Get a list of all the files that existed before this layer + existingPaths := s.l.getFlattenedPathsForWhiteOut() godirwalk.Walk(s.directory, &godirwalk.Options{ Callback: func(path string, ent *godirwalk.Dirent) error { @@ -150,8 +153,9 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { if ok, err := s.l.CheckFileChange(path); err != nil { return err } else if ok { - foundPaths = append(foundPaths, path) + changedPaths = append(changedPaths, path) } + delete(existingPaths, path) return nil }, Unsorted: true, @@ -159,17 +163,13 @@ func (s *Snapshotter) scanFullFilesystem() ([]string, []string, error) { ) timing.DefaultRun.Stop(timer) timer = timing.Start("Resolving Paths") - // First handle whiteouts - // Get a list of all the files that existed before this layer - existingPaths := s.l.getFlattenedPathsForWhiteOut() filesToAdd := []string{} - resolvedFiles, err := filesystem.ResolvePaths(foundPaths, s.ignorelist) + resolvedFiles, err := filesystem.ResolvePaths(changedPaths, s.ignorelist) if err != nil { return nil, nil, err } for _, path := range resolvedFiles { - delete(existingPaths, path) if util.CheckIgnoreList(path) { logrus.Tracef("Not adding %s to layer, as it's whitelisted", path) continue From c576771497694588d36be907d50a5218d4e38c34 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Sat, 6 Jun 2020 11:37:35 -0700 Subject: [PATCH 4/6] fix tests --- pkg/snapshot/snapshot_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/snapshot/snapshot_test.go b/pkg/snapshot/snapshot_test.go index 1b03bbd4f..abe211038 100644 --- a/pkg/snapshot/snapshot_test.go +++ b/pkg/snapshot/snapshot_test.go @@ -66,6 +66,7 @@ func TestSnapshotFSFileChange(t *testing.T) { } for _, path := range util.ParentDirectoriesWithoutLeadingSlash(batPath) { if path == "/" { + snapshotFiles["/"] = "" continue } snapshotFiles[path+"/"] = "" @@ -164,6 +165,7 @@ func TestSnapshotFSChangePermissions(t *testing.T) { } for _, path := range util.ParentDirectoriesWithoutLeadingSlash(batPathWithoutLeadingSlash) { if path == "/" { + snapshotFiles["/"] = "" continue } snapshotFiles[path+"/"] = "" From 988114d022ffd80a67c51c7ab3f65616807f4904 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Sat, 6 Jun 2020 15:27:39 -0700 Subject: [PATCH 5/6] re-use cache when checking and adding --- pkg/snapshot/layered_map.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pkg/snapshot/layered_map.go b/pkg/snapshot/layered_map.go index 0097b57a5..c6dbbf6c8 100644 --- a/pkg/snapshot/layered_map.go +++ b/pkg/snapshot/layered_map.go @@ -30,9 +30,10 @@ import ( ) type LayeredMap struct { - layers []map[string]string - whiteouts []map[string]string - hasher func(string) (string, error) + layers []map[string]string + whiteouts []map[string]string + layerHashCache map[string]string + hasher func(string) (string, error) // cacheHasher doesn't include mtime in it's hash so that filesystem cache keys are stable cacheHasher func(string) (string, error) } @@ -103,7 +104,14 @@ func (l *LayeredMap) MaybeAddWhiteout(s string) bool { // Add will add the specified file s to the layered map. func (l *LayeredMap) Add(s string) error { // Use hash function and add to layers - newV, err := l.hasher(s) + newV, err := func(s string) (string, error) { + if v, ok := l.layerHashCache[s]; ok { + // clear it cache for next layer. + delete(l.layerHashCache, s) + return v, nil + } + return l.hasher(s) + }(s) if err != nil { return fmt.Errorf("error creating hash for %s: %v", s, err) } @@ -126,6 +134,7 @@ func (l *LayeredMap) CheckFileChange(s string) (bool, error) { } return false, err } + l.layerHashCache[s] = newV oldV, ok := l.Get(s) if ok && newV == oldV { return false, nil From 8d3bbd20f4fa3c6d2e0a2f286ec9a11b9cc0d10d Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Sat, 6 Jun 2020 15:49:39 -0700 Subject: [PATCH 6/6] initialize --- pkg/snapshot/layered_map.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/snapshot/layered_map.go b/pkg/snapshot/layered_map.go index c6dbbf6c8..49bff0101 100644 --- a/pkg/snapshot/layered_map.go +++ b/pkg/snapshot/layered_map.go @@ -44,6 +44,7 @@ func NewLayeredMap(h func(string) (string, error), c func(string) (string, error cacheHasher: c, } l.layers = []map[string]string{} + l.layerHashCache = map[string]string{} return &l }