Even faster snapshotting with godirwalk. (#504)
This switches from filepath.Walk to godirwalk.Walk for even faster snapshotting. A quick test shows a 40% improvement on the dockerfile_mv_add build.
This commit is contained in:
parent
c3afcc0c7d
commit
a044e2b6e4
|
|
@ -561,6 +561,14 @@
|
|||
revision = "ab8a2e0c74be9d3be70b3184d9acc634935ded82"
|
||||
version = "1.1.4"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:75b5172f534d05a5abff749f1cf002351f834a2a5592812210aca5e139f9ddad"
|
||||
name = "github.com/karrick/godirwalk"
|
||||
packages = ["."]
|
||||
pruneopts = "NUT"
|
||||
revision = "cceff240ca8af695e41738831646717e80d2f846"
|
||||
version = "v1.7.7"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:d0164259ed17929689df11205194d80288e8ae25351778f7a3421a24774c36f8"
|
||||
name = "github.com/mattn/go-shellwords"
|
||||
|
|
@ -1174,7 +1182,9 @@
|
|||
"github.com/aws/aws-sdk-go/service/s3",
|
||||
"github.com/aws/aws-sdk-go/service/s3/s3manager",
|
||||
"github.com/docker/docker/builder/dockerfile",
|
||||
"github.com/docker/docker/builder/dockerignore",
|
||||
"github.com/docker/docker/pkg/archive",
|
||||
"github.com/docker/docker/pkg/fileutils",
|
||||
"github.com/docker/docker/pkg/signal",
|
||||
"github.com/genuinetools/amicontained/container",
|
||||
"github.com/google/go-cmp/cmp",
|
||||
|
|
@ -1189,14 +1199,17 @@
|
|||
"github.com/google/go-containerregistry/pkg/v1/remote",
|
||||
"github.com/google/go-containerregistry/pkg/v1/tarball",
|
||||
"github.com/google/go-github/github",
|
||||
"github.com/karrick/godirwalk",
|
||||
"github.com/moby/buildkit/frontend/dockerfile/instructions",
|
||||
"github.com/moby/buildkit/frontend/dockerfile/parser",
|
||||
"github.com/moby/buildkit/frontend/dockerfile/shell",
|
||||
"github.com/pkg/errors",
|
||||
"github.com/sirupsen/logrus",
|
||||
"github.com/spf13/cobra",
|
||||
"github.com/spf13/pflag",
|
||||
"golang.org/x/net/context",
|
||||
"golang.org/x/oauth2",
|
||||
"golang.org/x/sync/errgroup",
|
||||
"k8s.io/client-go/discovery",
|
||||
]
|
||||
solver-name = "gps-cdcl"
|
||||
|
|
|
|||
|
|
@ -19,10 +19,11 @@ package snapshot
|
|||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/karrick/godirwalk"
|
||||
|
||||
"github.com/GoogleContainerTools/kaniko/pkg/constants"
|
||||
|
||||
"github.com/GoogleContainerTools/kaniko/pkg/util"
|
||||
|
|
@ -141,22 +142,22 @@ func (s *Snapshotter) TakeSnapshotFS() (string, error) {
|
|||
defer t.Close()
|
||||
|
||||
// Save the fs state in a map to iterate over later.
|
||||
memFs := map[string]os.FileInfo{}
|
||||
filepath.Walk(s.directory, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if util.IsInWhitelist(path) {
|
||||
if util.IsDestDir(path) {
|
||||
logrus.Infof("Skipping paths under %s, as it is a whitelisted directory", path)
|
||||
return filepath.SkipDir
|
||||
memFs := map[string]*godirwalk.Dirent{}
|
||||
godirwalk.Walk(s.directory, &godirwalk.Options{
|
||||
Callback: func(path string, ent *godirwalk.Dirent) error {
|
||||
if util.IsInWhitelist(path) {
|
||||
if util.IsDestDir(path) {
|
||||
logrus.Infof("Skipping paths under %s, as it is a whitelisted directory", path)
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
memFs[path] = ent
|
||||
return nil
|
||||
}
|
||||
|
||||
memFs[path] = info
|
||||
return nil
|
||||
})
|
||||
},
|
||||
Unsorted: true,
|
||||
},
|
||||
)
|
||||
|
||||
// First handle whiteouts
|
||||
for p := range memFs {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,25 @@
|
|||
BSD 2-Clause License
|
||||
|
||||
Copyright (c) 2017, Karrick McDermott
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Dirent stores the name and file system mode type of discovered file system
|
||||
// entries.
|
||||
type Dirent struct {
|
||||
name string
|
||||
modeType os.FileMode
|
||||
}
|
||||
|
||||
// NewDirent returns a newly initialized Dirent structure, or an error. This
|
||||
// function does not follow symbolic links.
|
||||
//
|
||||
// This function is rarely used, as Dirent structures are provided by other
|
||||
// functions in this library that read and walk directories.
|
||||
func NewDirent(osPathname string) (*Dirent, error) {
|
||||
fi, err := os.Lstat(osPathname)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot lstat")
|
||||
}
|
||||
return &Dirent{
|
||||
name: filepath.Base(osPathname),
|
||||
modeType: fi.Mode() & os.ModeType,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Name returns the basename of the file system entry.
|
||||
func (de Dirent) Name() string { return de.name }
|
||||
|
||||
// ModeType returns the mode bits that specify the file system node type. We
|
||||
// could make our own enum-like data type for encoding the file type, but Go's
|
||||
// runtime already gives us architecture independent file modes, as discussed in
|
||||
// `os/types.go`:
|
||||
//
|
||||
// Go's runtime FileMode type has same definition on all systems, so that
|
||||
// information about files can be moved from one system to another portably.
|
||||
func (de Dirent) ModeType() os.FileMode { return de.modeType }
|
||||
|
||||
// IsDir returns true if and only if the Dirent represents a file system
|
||||
// directory. Note that on some operating systems, more than one file mode bit
|
||||
// may be set for a node. For instance, on Windows, a symbolic link that points
|
||||
// to a directory will have both the directory and the symbolic link bits set.
|
||||
func (de Dirent) IsDir() bool { return de.modeType&os.ModeDir != 0 }
|
||||
|
||||
// IsRegular returns true if and only if the Dirent represents a regular
|
||||
// file. That is, it ensures that no mode type bits are set.
|
||||
func (de Dirent) IsRegular() bool { return de.modeType&os.ModeType == 0 }
|
||||
|
||||
// IsSymlink returns true if and only if the Dirent represents a file system
|
||||
// symbolic link. Note that on some operating systems, more than one file mode
|
||||
// bit may be set for a node. For instance, on Windows, a symbolic link that
|
||||
// points to a directory will have both the directory and the symbolic link bits
|
||||
// set.
|
||||
func (de Dirent) IsSymlink() bool { return de.modeType&os.ModeSymlink != 0 }
|
||||
|
||||
// Dirents represents a slice of Dirent pointers, which are sortable by
|
||||
// name. This type satisfies the `sort.Interface` interface.
|
||||
type Dirents []*Dirent
|
||||
|
||||
// Len returns the count of Dirent structures in the slice.
|
||||
func (l Dirents) Len() int { return len(l) }
|
||||
|
||||
// Less returns true if and only if the Name of the element specified by the
|
||||
// first index is lexicographically less than that of the second index.
|
||||
func (l Dirents) Less(i, j int) bool { return l[i].name < l[j].name }
|
||||
|
||||
// Swap exchanges the two Dirent entries specified by the two provided indexes.
|
||||
func (l Dirents) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
Package godirwalk provides functions to read and traverse directory trees.
|
||||
|
||||
In short, why do I use this library?
|
||||
|
||||
* It's faster than `filepath.Walk`.
|
||||
|
||||
* It's more correct on Windows than `filepath.Walk`.
|
||||
|
||||
* It's more easy to use than `filepath.Walk`.
|
||||
|
||||
* It's more flexible than `filepath.Walk`.
|
||||
|
||||
USAGE
|
||||
|
||||
This library will normalize the provided top level directory name based on the
|
||||
os-specific path separator by calling `filepath.Clean` on its first
|
||||
argument. However it always provides the pathname created by using the correct
|
||||
os-specific path separator when invoking the provided callback function.
|
||||
|
||||
dirname := "some/directory/root"
|
||||
err := godirwalk.Walk(dirname, &godirwalk.Options{
|
||||
Callback: func(osPathname string, de *godirwalk.Dirent) error {
|
||||
fmt.Printf("%s %s\n", de.ModeType(), osPathname)
|
||||
return nil
|
||||
},
|
||||
})
|
||||
|
||||
This library not only provides functions for traversing a file system directory
|
||||
tree, but also for obtaining a list of immediate descendants of a particular
|
||||
directory, typically much more quickly than using `os.ReadDir` or
|
||||
`os.ReadDirnames`.
|
||||
*/
|
||||
package godirwalk
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
package godirwalk
|
||||
|
||||
// ReadDirents returns a sortable slice of pointers to Dirent structures, each
|
||||
// representing the file system name and mode type for one of the immediate
|
||||
// descendant of the specified directory. If the specified directory is a
|
||||
// symbolic link, it will be resolved.
|
||||
//
|
||||
// If an optional scratch buffer is provided that is at least one page of
|
||||
// memory, it will be used when reading directory entries from the file system.
|
||||
//
|
||||
// children, err := godirwalk.ReadDirents(osDirname, nil)
|
||||
// if err != nil {
|
||||
// return nil, errors.Wrap(err, "cannot get list of directory children")
|
||||
// }
|
||||
// sort.Sort(children)
|
||||
// for _, child := range children {
|
||||
// fmt.Printf("%s %s\n", child.ModeType, child.Name)
|
||||
// }
|
||||
func ReadDirents(osDirname string, scratchBuffer []byte) (Dirents, error) {
|
||||
return readdirents(osDirname, scratchBuffer)
|
||||
}
|
||||
|
||||
// ReadDirnames returns a slice of strings, representing the immediate
|
||||
// descendants of the specified directory. If the specified directory is a
|
||||
// symbolic link, it will be resolved.
|
||||
//
|
||||
// If an optional scratch buffer is provided that is at least one page of
|
||||
// memory, it will be used when reading directory entries from the file system.
|
||||
//
|
||||
// Note that this function, depending on operating system, may or may not invoke
|
||||
// the ReadDirents function, in order to prepare the list of immediate
|
||||
// descendants. Therefore, if your program needs both the names and the file
|
||||
// system mode types of descendants, it will always be faster to invoke
|
||||
// ReadDirents directly, rather than calling this function, then looping over
|
||||
// the results and calling os.Stat for each child.
|
||||
//
|
||||
// children, err := godirwalk.ReadDirnames(osDirname, nil)
|
||||
// if err != nil {
|
||||
// return nil, errors.Wrap(err, "cannot get list of directory children")
|
||||
// }
|
||||
// sort.Strings(children)
|
||||
// for _, child := range children {
|
||||
// fmt.Printf("%s\n", child)
|
||||
// }
|
||||
func ReadDirnames(osDirname string, scratchBuffer []byte) ([]string, error) {
|
||||
return readdirnames(osDirname, scratchBuffer)
|
||||
}
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
// +build darwin freebsd linux netbsd openbsd
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func readdirents(osDirname string, scratchBuffer []byte) (Dirents, error) {
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Open")
|
||||
}
|
||||
|
||||
var entries Dirents
|
||||
|
||||
fd := int(dh.Fd())
|
||||
|
||||
if len(scratchBuffer) < MinimumScratchBufferSize {
|
||||
scratchBuffer = make([]byte, DefaultScratchBufferSize)
|
||||
}
|
||||
|
||||
var de *syscall.Dirent
|
||||
|
||||
for {
|
||||
n, err := syscall.ReadDirent(fd, scratchBuffer)
|
||||
if err != nil {
|
||||
_ = dh.Close() // ignore potential error returned by Close
|
||||
return nil, errors.Wrap(err, "cannot ReadDirent")
|
||||
}
|
||||
if n <= 0 {
|
||||
break // end of directory reached
|
||||
}
|
||||
// Loop over the bytes returned by reading the directory entries.
|
||||
buf := scratchBuffer[:n]
|
||||
for len(buf) > 0 {
|
||||
de = (*syscall.Dirent)(unsafe.Pointer(&buf[0])) // point entry to first syscall.Dirent in buffer
|
||||
buf = buf[de.Reclen:] // advance buffer
|
||||
|
||||
if inoFromDirent(de) == 0 {
|
||||
continue // this item has been deleted, but not yet removed from directory
|
||||
}
|
||||
|
||||
nameSlice := nameFromDirent(de)
|
||||
namlen := len(nameSlice)
|
||||
if (namlen == 0) || (namlen == 1 && nameSlice[0] == '.') || (namlen == 2 && nameSlice[0] == '.' && nameSlice[1] == '.') {
|
||||
continue // skip unimportant entries
|
||||
}
|
||||
osChildname := string(nameSlice)
|
||||
|
||||
// Convert syscall constant, which is in purview of OS, to a
|
||||
// constant defined by Go, assumed by this project to be stable.
|
||||
var mode os.FileMode
|
||||
switch de.Type {
|
||||
case syscall.DT_REG:
|
||||
// regular file
|
||||
case syscall.DT_DIR:
|
||||
mode = os.ModeDir
|
||||
case syscall.DT_LNK:
|
||||
mode = os.ModeSymlink
|
||||
case syscall.DT_CHR:
|
||||
mode = os.ModeDevice | os.ModeCharDevice
|
||||
case syscall.DT_BLK:
|
||||
mode = os.ModeDevice
|
||||
case syscall.DT_FIFO:
|
||||
mode = os.ModeNamedPipe
|
||||
case syscall.DT_SOCK:
|
||||
mode = os.ModeSocket
|
||||
default:
|
||||
// If syscall returned unknown type (e.g., DT_UNKNOWN, DT_WHT),
|
||||
// then resolve actual mode by getting stat.
|
||||
fi, err := os.Lstat(filepath.Join(osDirname, osChildname))
|
||||
if err != nil {
|
||||
_ = dh.Close() // ignore potential error returned by Close
|
||||
return nil, errors.Wrap(err, "cannot Stat")
|
||||
}
|
||||
// We only care about the bits that identify the type of a file
|
||||
// system node, and can ignore append, exclusive, temporary,
|
||||
// setuid, setgid, permission bits, and sticky bits, which are
|
||||
// coincident to the bits that declare type of the file system
|
||||
// node.
|
||||
mode = fi.Mode() & os.ModeType
|
||||
}
|
||||
|
||||
entries = append(entries, &Dirent{name: osChildname, modeType: mode})
|
||||
}
|
||||
}
|
||||
if err = dh.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
func readdirnames(osDirname string, scratchBuffer []byte) ([]string, error) {
|
||||
des, err := readdirents(osDirname, scratchBuffer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
names := make([]string, len(des))
|
||||
for i, v := range des {
|
||||
names[i] = v.name
|
||||
}
|
||||
return names, nil
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// The functions in this file are mere wrappers of what is already provided by
|
||||
// standard library, in order to provide the same API as this library provides.
|
||||
//
|
||||
// The scratch buffer argument is ignored by this architecture.
|
||||
//
|
||||
// Please send PR or link to article if you know of a more performant way of
|
||||
// enumerating directory contents and mode types on Windows.
|
||||
|
||||
func readdirents(osDirname string, _ []byte) (Dirents, error) {
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Open")
|
||||
}
|
||||
|
||||
fileinfos, err := dh.Readdir(0)
|
||||
if er := dh.Close(); err == nil {
|
||||
err = er
|
||||
}
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Readdir")
|
||||
}
|
||||
|
||||
entries := make(Dirents, len(fileinfos))
|
||||
for i, info := range fileinfos {
|
||||
entries[i] = &Dirent{name: info.Name(), modeType: info.Mode() & os.ModeType}
|
||||
}
|
||||
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
func readdirnames(osDirname string, _ []byte) ([]string, error) {
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Open")
|
||||
}
|
||||
|
||||
entries, err := dh.Readdirnames(0)
|
||||
if er := dh.Close(); err == nil {
|
||||
err = er
|
||||
}
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Readdirnames")
|
||||
}
|
||||
|
||||
return entries, nil
|
||||
}
|
||||
|
|
@ -0,0 +1,367 @@
|
|||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// DefaultScratchBufferSize specifies the size of the scratch buffer that will
|
||||
// be allocated by Walk, ReadDirents, or ReadDirnames when a scratch buffer is
|
||||
// not provided or the scratch buffer that is provided is smaller than
|
||||
// MinimumScratchBufferSize bytes. This may seem like a large value; however,
|
||||
// when a program intends to enumerate large directories, having a larger
|
||||
// scratch buffer results in fewer operating system calls.
|
||||
const DefaultScratchBufferSize = 64 * 1024
|
||||
|
||||
// MinimumScratchBufferSize specifies the minimum size of the scratch buffer
|
||||
// that Walk, ReadDirents, and ReadDirnames will use when reading file entries
|
||||
// from the operating system. It is initialized to the result from calling
|
||||
// `os.Getpagesize()` during program startup.
|
||||
var MinimumScratchBufferSize int
|
||||
|
||||
func init() {
|
||||
MinimumScratchBufferSize = os.Getpagesize()
|
||||
}
|
||||
|
||||
// Options provide parameters for how the Walk function operates.
|
||||
type Options struct {
|
||||
// ErrorCallback specifies a function to be invoked in the case of an error
|
||||
// that could potentially be ignored while walking a file system
|
||||
// hierarchy. When set to nil or left as its zero-value, any error condition
|
||||
// causes Walk to immediately return the error describing what took
|
||||
// place. When non-nil, this user supplied function is invoked with the OS
|
||||
// pathname of the file system object that caused the error along with the
|
||||
// error that took place. The return value of the supplied ErrorCallback
|
||||
// function determines whether the error will cause Walk to halt immediately
|
||||
// as it would were no ErrorCallback value provided, or skip this file
|
||||
// system node yet continue on with the remaining nodes in the file system
|
||||
// hierarchy.
|
||||
//
|
||||
// ErrorCallback is invoked both for errors that are returned by the
|
||||
// runtime, and for errors returned by other user supplied callback
|
||||
// functions.
|
||||
ErrorCallback func(string, error) ErrorAction
|
||||
|
||||
// FollowSymbolicLinks specifies whether Walk will follow symbolic links
|
||||
// that refer to directories. When set to false or left as its zero-value,
|
||||
// Walk will still invoke the callback function with symbolic link nodes,
|
||||
// but if the symbolic link refers to a directory, it will not recurse on
|
||||
// that directory. When set to true, Walk will recurse on symbolic links
|
||||
// that refer to a directory.
|
||||
FollowSymbolicLinks bool
|
||||
|
||||
// Unsorted controls whether or not Walk will sort the immediate descendants
|
||||
// of a directory by their relative names prior to visiting each of those
|
||||
// entries.
|
||||
//
|
||||
// When set to false or left at its zero-value, Walk will get the list of
|
||||
// immediate descendants of a particular directory, sort that list by
|
||||
// lexical order of their names, and then visit each node in the list in
|
||||
// sorted order. This will cause Walk to always traverse the same directory
|
||||
// tree in the same order, however may be inefficient for directories with
|
||||
// many immediate descendants.
|
||||
//
|
||||
// When set to true, Walk skips sorting the list of immediate descendants
|
||||
// for a directory, and simply visits each node in the order the operating
|
||||
// system enumerated them. This will be more fast, but with the side effect
|
||||
// that the traversal order may be different from one invocation to the
|
||||
// next.
|
||||
Unsorted bool
|
||||
|
||||
// Callback is a required function that Walk will invoke for every file
|
||||
// system node it encounters.
|
||||
Callback WalkFunc
|
||||
|
||||
// PostChildrenCallback is an option function that Walk will invoke for
|
||||
// every file system directory it encounters after its children have been
|
||||
// processed.
|
||||
PostChildrenCallback WalkFunc
|
||||
|
||||
// ScratchBuffer is an optional byte slice to use as a scratch buffer for
|
||||
// Walk to use when reading directory entries, to reduce amount of garbage
|
||||
// generation. Not all architectures take advantage of the scratch
|
||||
// buffer. If omitted or the provided buffer has fewer bytes than
|
||||
// MinimumScratchBufferSize, then a buffer with DefaultScratchBufferSize
|
||||
// bytes will be created and used once per Walk invocation.
|
||||
ScratchBuffer []byte
|
||||
}
|
||||
|
||||
// ErrorAction defines a set of actions the Walk function could take based on
|
||||
// the occurrence of an error while walking the file system. See the
|
||||
// documentation for the ErrorCallback field of the Options structure for more
|
||||
// information.
|
||||
type ErrorAction int
|
||||
|
||||
const (
|
||||
// Halt is the ErrorAction return value when the upstream code wants to halt
|
||||
// the walk process when a runtime error takes place. It matches the default
|
||||
// action the Walk function would take were no ErrorCallback provided.
|
||||
Halt ErrorAction = iota
|
||||
|
||||
// SkipNode is the ErrorAction return value when the upstream code wants to
|
||||
// ignore the runtime error for the current file system node, skip
|
||||
// processing of the node that caused the error, and continue walking the
|
||||
// file system hierarchy with the remaining nodes.
|
||||
SkipNode
|
||||
)
|
||||
|
||||
// WalkFunc is the type of the function called for each file system node visited
|
||||
// by Walk. The pathname argument will contain the argument to Walk as a prefix;
|
||||
// that is, if Walk is called with "dir", which is a directory containing the
|
||||
// file "a", the provided WalkFunc will be invoked with the argument "dir/a",
|
||||
// using the correct os.PathSeparator for the Go Operating System architecture,
|
||||
// GOOS. The directory entry argument is a pointer to a Dirent for the node,
|
||||
// providing access to both the basename and the mode type of the file system
|
||||
// node.
|
||||
//
|
||||
// If an error is returned by the Callback or PostChildrenCallback functions,
|
||||
// and no ErrorCallback function is provided, processing stops. If an
|
||||
// ErrorCallback function is provided, then it is invoked with the OS pathname
|
||||
// of the node that caused the error along along with the error. The return
|
||||
// value of the ErrorCallback function determines whether to halt processing, or
|
||||
// skip this node and continue processing remaining file system nodes.
|
||||
//
|
||||
// The exception is when the function returns the special value
|
||||
// filepath.SkipDir. If the function returns filepath.SkipDir when invoked on a
|
||||
// directory, Walk skips the directory's contents entirely. If the function
|
||||
// returns filepath.SkipDir when invoked on a non-directory file system node,
|
||||
// Walk skips the remaining files in the containing directory. Note that any
|
||||
// supplied ErrorCallback function is not invoked with filepath.SkipDir when the
|
||||
// Callback or PostChildrenCallback functions return that special value.
|
||||
type WalkFunc func(osPathname string, directoryEntry *Dirent) error
|
||||
|
||||
// Walk walks the file tree rooted at the specified directory, calling the
|
||||
// specified callback function for each file system node in the tree, including
|
||||
// root, symbolic links, and other node types. The nodes are walked in lexical
|
||||
// order, which makes the output deterministic but means that for very large
|
||||
// directories this function can be inefficient.
|
||||
//
|
||||
// This function is often much faster than filepath.Walk because it does not
|
||||
// invoke os.Stat for every node it encounters, but rather obtains the file
|
||||
// system node type when it reads the parent directory.
|
||||
//
|
||||
// If a runtime error occurs, either from the operating system or from the
|
||||
// upstream Callback or PostChildrenCallback functions, processing typically
|
||||
// halts. However, when an ErrorCallback function is provided in the provided
|
||||
// Options structure, that function is invoked with the error along with the OS
|
||||
// pathname of the file system node that caused the error. The ErrorCallback
|
||||
// function's return value determines the action that Walk will then take.
|
||||
//
|
||||
// func main() {
|
||||
// dirname := "."
|
||||
// if len(os.Args) > 1 {
|
||||
// dirname = os.Args[1]
|
||||
// }
|
||||
// err := godirwalk.Walk(dirname, &godirwalk.Options{
|
||||
// Callback: func(osPathname string, de *godirwalk.Dirent) error {
|
||||
// fmt.Printf("%s %s\n", de.ModeType(), osPathname)
|
||||
// return nil
|
||||
// },
|
||||
// ErrorCallback: func(osPathname string, err error) godirwalk.ErrorAction {
|
||||
// // Your program may want to log the error somehow.
|
||||
// fmt.Fprintf(os.Stderr, "ERROR: %s\n", err)
|
||||
//
|
||||
// // For the purposes of this example, a simple SkipNode will suffice,
|
||||
// // although in reality perhaps additional logic might be called for.
|
||||
// return godirwalk.SkipNode
|
||||
// },
|
||||
// })
|
||||
// if err != nil {
|
||||
// fmt.Fprintf(os.Stderr, "%s\n", err)
|
||||
// os.Exit(1)
|
||||
// }
|
||||
// }
|
||||
func Walk(pathname string, options *Options) error {
|
||||
pathname = filepath.Clean(pathname)
|
||||
|
||||
var fi os.FileInfo
|
||||
var err error
|
||||
|
||||
if options.FollowSymbolicLinks {
|
||||
fi, err = os.Stat(pathname)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot Stat")
|
||||
}
|
||||
} else {
|
||||
fi, err = os.Lstat(pathname)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot Lstat")
|
||||
}
|
||||
}
|
||||
|
||||
mode := fi.Mode()
|
||||
if mode&os.ModeDir == 0 {
|
||||
return errors.Errorf("cannot Walk non-directory: %s", pathname)
|
||||
}
|
||||
|
||||
dirent := &Dirent{
|
||||
name: filepath.Base(pathname),
|
||||
modeType: mode & os.ModeType,
|
||||
}
|
||||
|
||||
// If ErrorCallback is nil, set to a default value that halts the walk
|
||||
// process on all operating system errors. This is done to allow error
|
||||
// handling to be more succinct in the walk code.
|
||||
if options.ErrorCallback == nil {
|
||||
options.ErrorCallback = defaultErrorCallback
|
||||
}
|
||||
|
||||
if len(options.ScratchBuffer) < MinimumScratchBufferSize {
|
||||
options.ScratchBuffer = make([]byte, DefaultScratchBufferSize)
|
||||
}
|
||||
|
||||
err = walk(pathname, dirent, options)
|
||||
if err == filepath.SkipDir {
|
||||
return nil // silence SkipDir for top level
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// defaultErrorCallback always returns Halt because if the upstream code did not
|
||||
// provide an ErrorCallback function, walking the file system hierarchy ought to
|
||||
// halt upon any operating system error.
|
||||
func defaultErrorCallback(_ string, _ error) ErrorAction { return Halt }
|
||||
|
||||
// walk recursively traverses the file system node specified by pathname and the
|
||||
// Dirent.
|
||||
func walk(osPathname string, dirent *Dirent, options *Options) error {
|
||||
err := options.Callback(osPathname, dirent)
|
||||
if err != nil {
|
||||
if err == filepath.SkipDir {
|
||||
return err
|
||||
}
|
||||
err = errors.Wrap(err, "Callback") // wrap potential errors returned by callback
|
||||
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// On some platforms, an entry can have more than one mode type bit set.
|
||||
// For instance, it could have both the symlink bit and the directory bit
|
||||
// set indicating it's a symlink to a directory.
|
||||
if dirent.IsSymlink() {
|
||||
if !options.FollowSymbolicLinks {
|
||||
return nil
|
||||
}
|
||||
// Only need to Stat entry if platform did not already have os.ModeDir
|
||||
// set, such as would be the case for unix like operating systems. (This
|
||||
// guard eliminates extra os.Stat check on Windows.)
|
||||
if !dirent.IsDir() {
|
||||
referent, err := os.Readlink(osPathname)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot Readlink")
|
||||
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
var osp string
|
||||
if filepath.IsAbs(referent) {
|
||||
osp = referent
|
||||
} else {
|
||||
osp = filepath.Join(filepath.Dir(osPathname), referent)
|
||||
}
|
||||
|
||||
fi, err := os.Stat(osp)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot Stat")
|
||||
if action := options.ErrorCallback(osp, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
dirent.modeType = fi.Mode() & os.ModeType
|
||||
}
|
||||
}
|
||||
|
||||
if !dirent.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If get here, then specified pathname refers to a directory.
|
||||
deChildren, err := ReadDirents(osPathname, options.ScratchBuffer)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot ReadDirents")
|
||||
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if !options.Unsorted {
|
||||
sort.Sort(deChildren) // sort children entries unless upstream says to leave unsorted
|
||||
}
|
||||
|
||||
for _, deChild := range deChildren {
|
||||
osChildname := filepath.Join(osPathname, deChild.name)
|
||||
err = walk(osChildname, deChild, options)
|
||||
if err != nil {
|
||||
if err != filepath.SkipDir {
|
||||
return err
|
||||
}
|
||||
// If received skipdir on a directory, stop processing that
|
||||
// directory, but continue to its siblings. If received skipdir on a
|
||||
// non-directory, stop processing remaining siblings.
|
||||
if deChild.IsSymlink() {
|
||||
// Only need to Stat entry if platform did not already have
|
||||
// os.ModeDir set, such as would be the case for unix like
|
||||
// operating systems. (This guard eliminates extra os.Stat check
|
||||
// on Windows.)
|
||||
if !deChild.IsDir() {
|
||||
// Resolve symbolic link referent to determine whether node
|
||||
// is directory or not.
|
||||
referent, err := os.Readlink(osChildname)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot Readlink")
|
||||
if action := options.ErrorCallback(osChildname, err); action == SkipNode {
|
||||
continue // with next child
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
var osp string
|
||||
if filepath.IsAbs(referent) {
|
||||
osp = referent
|
||||
} else {
|
||||
osp = filepath.Join(osPathname, referent)
|
||||
}
|
||||
|
||||
fi, err := os.Stat(osp)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot Stat")
|
||||
if action := options.ErrorCallback(osp, err); action == SkipNode {
|
||||
continue // with next child
|
||||
}
|
||||
return err
|
||||
}
|
||||
deChild.modeType = fi.Mode() & os.ModeType
|
||||
}
|
||||
}
|
||||
if !deChild.IsDir() {
|
||||
// If not directory, return immediately, thus skipping remainder
|
||||
// of siblings.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if options.PostChildrenCallback == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
err = options.PostChildrenCallback(osPathname, dirent)
|
||||
if err == nil || err == filepath.SkipDir {
|
||||
return err
|
||||
}
|
||||
|
||||
err = errors.Wrap(err, "PostChildrenCallback") // wrap potential errors returned by callback
|
||||
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
// +build dragonfly freebsd openbsd netbsd
|
||||
|
||||
package godirwalk
|
||||
|
||||
import "syscall"
|
||||
|
||||
func inoFromDirent(de *syscall.Dirent) uint64 {
|
||||
return uint64(de.Fileno)
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
// +build darwin linux
|
||||
|
||||
package godirwalk
|
||||
|
||||
import "syscall"
|
||||
|
||||
func inoFromDirent(de *syscall.Dirent) uint64 {
|
||||
return de.Ino
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
// +build darwin dragonfly freebsd netbsd openbsd
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func nameFromDirent(de *syscall.Dirent) []byte {
|
||||
// Because this GOOS' syscall.Dirent provides a Namlen field that says how
|
||||
// long the name is, this function does not need to search for the NULL
|
||||
// byte.
|
||||
ml := int(de.Namlen)
|
||||
|
||||
// Convert syscall.Dirent.Name, which is array of int8, to []byte, by
|
||||
// overwriting Cap, Len, and Data slice header fields to values from
|
||||
// syscall.Dirent fields. Setting the Cap, Len, and Data field values for
|
||||
// the slice header modifies what the slice header points to, and in this
|
||||
// case, the name buffer.
|
||||
var name []byte
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&name))
|
||||
sh.Cap = ml
|
||||
sh.Len = ml
|
||||
sh.Data = uintptr(unsafe.Pointer(&de.Name[0]))
|
||||
|
||||
return name
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
// +build nacl linux solaris
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func nameFromDirent(de *syscall.Dirent) []byte {
|
||||
// Because this GOOS' syscall.Dirent does not provide a field that specifies
|
||||
// the name length, this function must first calculate the max possible name
|
||||
// length, and then search for the NULL byte.
|
||||
ml := int(uint64(de.Reclen) - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)))
|
||||
|
||||
// Convert syscall.Dirent.Name, which is array of int8, to []byte, by
|
||||
// overwriting Cap, Len, and Data slice header fields to values from
|
||||
// syscall.Dirent fields. Setting the Cap, Len, and Data field values for
|
||||
// the slice header modifies what the slice header points to, and in this
|
||||
// case, the name buffer.
|
||||
var name []byte
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&name))
|
||||
sh.Cap = ml
|
||||
sh.Len = ml
|
||||
sh.Data = uintptr(unsafe.Pointer(&de.Name[0]))
|
||||
|
||||
if index := bytes.IndexByte(name, 0); index >= 0 {
|
||||
// Found NULL byte; set slice's cap and len accordingly.
|
||||
sh.Cap = index
|
||||
sh.Len = index
|
||||
}
|
||||
|
||||
return name
|
||||
}
|
||||
Loading…
Reference in New Issue