Update GowalkDir dependency to pick up fixes. Added default 90 min timeout for walking FS. Override it with environment variable. Add slowjam to kaniko pod (#1530)
This commit is contained in:
parent
6cffb679aa
commit
9f76932171
|
|
@ -762,6 +762,13 @@ use kaniko.
|
|||
|
||||
You may be able to achieve the same default seccomp profile that Docker uses in your Pod by setting [seccomp](https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp) profiles with annotations on a [PodSecurityPolicy](https://cloud.google.com/kubernetes-engine/docs/how-to/pod-security-policies) to create or update security policies on your cluster.
|
||||
|
||||
## Kaniko Builds - Profiling
|
||||
If your builds are taking long, we recently added support to analyze kaniko function
|
||||
calls using [Slow Jam](https://github.com/google/slowjam)
|
||||
To start profiling,
|
||||
1. Add an environment variable `STACKLOG_PATH` to your [pod definition](https://github.com/GoogleContainerTools/kaniko/blob/master/examples/pod-build-profile.yaml#L15).
|
||||
2. If you are using the kaniko `debug` image, you can copy the file in the `pre-stop` container lifecyle hook.
|
||||
|
||||
## Comparison with Other Tools
|
||||
|
||||
Similar tools include:
|
||||
|
|
|
|||
|
|
@ -20,9 +20,14 @@ import (
|
|||
"os"
|
||||
|
||||
"github.com/GoogleContainerTools/kaniko/cmd/executor/cmd"
|
||||
|
||||
"github.com/google/slowjam/pkg/stacklog"
|
||||
)
|
||||
|
||||
func main() {
|
||||
s := stacklog.MustStartFromEnv("STACKLOG_PATH")
|
||||
defer s.Stop()
|
||||
|
||||
if err := cmd.RootCmd.Execute(); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: kaniko
|
||||
spec:
|
||||
containers:
|
||||
- name: kaniko
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
lifecycle:
|
||||
preStop:
|
||||
exec:
|
||||
command: ["/bin/sh","-c","cat $STACKLOG_PATH"]
|
||||
env:
|
||||
- name: STACKLOG_PATH
|
||||
value: /workspace/kaniko.slog
|
||||
args: ["--dockerfile=/workspace/dockerfile",
|
||||
"--context=dir://workspace",
|
||||
"--destination=<user-name>/<repo>"] # replace with your dockerhub account
|
||||
volumeMounts:
|
||||
- name: kaniko-secret
|
||||
mountPath: /kaniko/.docker
|
||||
- name: dockerfile-storage
|
||||
mountPath: /workspace
|
||||
restartPolicy: Never
|
||||
volumes:
|
||||
- name: kaniko-secret
|
||||
secret:
|
||||
secretName: regcred
|
||||
items:
|
||||
- key: .dockerconfigjson
|
||||
path: config.json
|
||||
- name: dockerfile-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: dockerfile-claim
|
||||
3
go.mod
3
go.mod
|
|
@ -28,9 +28,10 @@ require (
|
|||
github.com/google/go-github v17.0.0+incompatible
|
||||
github.com/google/go-querystring v1.0.0 // indirect
|
||||
github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible // indirect
|
||||
github.com/google/slowjam v1.0.0
|
||||
github.com/hashicorp/go-memdb v0.0.0-20180223233045-1289e7fffe71 // indirect
|
||||
github.com/hashicorp/go-uuid v1.0.1 // indirect
|
||||
github.com/karrick/godirwalk v1.7.7
|
||||
github.com/karrick/godirwalk v1.16.1
|
||||
github.com/mattn/go-ieproxy v0.0.1 // indirect
|
||||
github.com/mattn/go-shellwords v1.0.10 // indirect
|
||||
github.com/minio/highwayhash v1.0.0
|
||||
|
|
|
|||
12
go.sum
12
go.sum
|
|
@ -338,6 +338,8 @@ github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hf
|
|||
github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
||||
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
|
||||
github.com/google/shlex v0.0.0-20150127133951-6f45313302b9/go.mod h1:RpwtwJQFrIEPstU94h88MWPXP2ektJZ8cZ0YntAmXiE=
|
||||
github.com/google/slowjam v1.0.0 h1:dA9flW4oGTJcSy8FpEvdq8JKwPFVgqYwMmjhqlb2L+s=
|
||||
github.com/google/slowjam v1.0.0/go.mod h1:mNktULbvWfYVMKKmpt94Rp3jMtmhQZLS0iR+W84S0mM=
|
||||
github.com/google/uuid v1.0.0 h1:b4Gk+7WdP/d3HZH8EJsZpvV7EtDOgaZLtnaNGIu1adA=
|
||||
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
|
||||
|
|
@ -411,8 +413,8 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1
|
|||
github.com/jstemmer/go-junit-report v0.9.1 h1:6QPYqodiu3GuPL+7mfx+NwDdp2eTkp9IfEUpgAwUN0o=
|
||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
|
||||
github.com/karrick/godirwalk v1.7.7 h1:lLkPCA+C0u1pI4fLFseaupvh5/THlPJIqSPmnGGViKs=
|
||||
github.com/karrick/godirwalk v1.7.7/go.mod h1:2c9FRhkDxdIbgkOnCEvnSWs71Bhugbl46shStcFDJ34=
|
||||
github.com/karrick/godirwalk v1.16.1 h1:DynhcF+bztK8gooS0+NDJFrdNZjJ3gzVzC545UNA9iw=
|
||||
github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
|
||||
github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd h1:Coekwdh0v2wtGp9Gmz1Ze3eVRAWJMLokvN3QjdzCHLY=
|
||||
github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
|
||||
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
|
||||
|
|
@ -437,13 +439,17 @@ github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN
|
|||
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.7.0/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs=
|
||||
github.com/maruel/panicparse v1.5.0 h1:etK4QAf/Spw8eyowKbOHRkOfhblp/kahGUy96RvbMjI=
|
||||
github.com/maruel/panicparse v1.5.0/go.mod h1:aOutY/MUjdj80R0AEVI9qE2zHqig+67t2ffUDDiLzAM=
|
||||
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
|
||||
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
|
||||
github.com/mattn/go-ieproxy v0.0.0-20190610004146-91bb50d98149/go.mod h1:31jz6HNzdxOmlERGGEc4v/dMssOfmp2p5bT/okiKFFc=
|
||||
github.com/mattn/go-ieproxy v0.0.0-20190702010315-6dee0af9227d h1:oNAwILwmgWKFpuU+dXvI6dl9jG2mAWAZLX3r9s0PPiw=
|
||||
github.com/mattn/go-ieproxy v0.0.0-20190702010315-6dee0af9227d/go.mod h1:31jz6HNzdxOmlERGGEc4v/dMssOfmp2p5bT/okiKFFc=
|
||||
github.com/mattn/go-ieproxy v0.0.1 h1:qiyop7gCflfhwCzGyeT0gro3sF9AIg9HU98JORTkqfI=
|
||||
github.com/mattn/go-ieproxy v0.0.1/go.mod h1:pYabZ6IHcRpFh7vIaLfK7rdcWgFEb3SFJ6/gNWuh88E=
|
||||
github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
|
||||
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
|
||||
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
|
||||
github.com/mattn/go-shellwords v1.0.10 h1:Y7Xqm8piKOO3v10Thp7Z36h4FYFjt5xB//6XvOrs2Gw=
|
||||
github.com/mattn/go-shellwords v1.0.10/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
|
||||
|
|
@ -451,6 +457,7 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0j
|
|||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||
github.com/maxbrunsfeld/counterfeiter/v6 v6.2.2 h1:g+4J5sZg6osfvEfkRZxJ1em0VT95/UOZgi/l7zi1/oE=
|
||||
github.com/maxbrunsfeld/counterfeiter/v6 v6.2.2/go.mod h1:eD9eIE7cdwcMi9rYluz88Jz2VyhSmden33/aXg4oVIY=
|
||||
github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
|
||||
github.com/minio/highwayhash v1.0.0 h1:iMSDhgUILCr0TNm8LWlSjF8N0ZIj2qbO8WHp6Q/J2BA=
|
||||
github.com/minio/highwayhash v1.0.0/go.mod h1:xQboMTeM9nY9v/LlAOxFctujiv5+Aq2hR5dxBpaMbdc=
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
|
|
@ -787,6 +794,7 @@ golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7w
|
|||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ import (
|
|||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleContainerTools/kaniko/pkg/config"
|
||||
"github.com/GoogleContainerTools/kaniko/pkg/timing"
|
||||
"github.com/docker/docker/builder/dockerignore"
|
||||
"github.com/docker/docker/pkg/fileutils"
|
||||
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||
|
|
@ -38,14 +40,16 @@ import (
|
|||
otiai10Cpy "github.com/otiai10/copy"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/GoogleContainerTools/kaniko/pkg/config"
|
||||
"github.com/GoogleContainerTools/kaniko/pkg/timing"
|
||||
)
|
||||
|
||||
const DoNotChangeUID = -1
|
||||
const DoNotChangeGID = -1
|
||||
|
||||
const (
|
||||
snapshotTimeout = "SNAPSHOT_TIMEOUT_DURATION"
|
||||
defaultTimeout = "90m"
|
||||
)
|
||||
|
||||
type IgnoreListEntry struct {
|
||||
Path string
|
||||
PrefixMatchOnly bool
|
||||
|
|
@ -904,20 +908,55 @@ func UpdateInitialIgnoreList(ignoreVarRun bool) {
|
|||
})
|
||||
}
|
||||
|
||||
type walkFSResult struct {
|
||||
filesAdded []string
|
||||
existingPaths map[string]struct{}
|
||||
}
|
||||
|
||||
// WalkFS given a directory and list of existing files,
|
||||
// returns a list of changed filed determined by changeFunc and a list
|
||||
// of deleted files.
|
||||
// It timesout after 90 mins. Can be configured via setting an environment variable
|
||||
// SNAPSHOT_TIMEOUT in the kaniko pod definition.
|
||||
func WalkFS(dir string, existingPaths map[string]struct{}, changeFunc func(string) (bool, error)) ([]string, map[string]struct{}) {
|
||||
timeOutStr := os.Getenv(snapshotTimeout)
|
||||
if timeOutStr == "" {
|
||||
logrus.Tracef("%s environment not set. Using default snapshot timeout %s", snapshotTimeout, defaultTimeout)
|
||||
timeOutStr = defaultTimeout
|
||||
}
|
||||
timeOut, err := time.ParseDuration(timeOutStr)
|
||||
if err != nil {
|
||||
logrus.Fatalf("could not parse duration %s", timeOutStr)
|
||||
}
|
||||
timer := timing.Start("Walking filesystem with timeout")
|
||||
ch := make(chan walkFSResult, 1)
|
||||
|
||||
go func() {
|
||||
ch <- gowalkDir(dir, existingPaths, changeFunc)
|
||||
}()
|
||||
|
||||
// Listen on our channel AND a timeout channel - which ever happens first.
|
||||
select {
|
||||
case res := <-ch:
|
||||
timing.DefaultRun.Stop(timer)
|
||||
return res.filesAdded, res.existingPaths
|
||||
case <-time.After(timeOut):
|
||||
timing.DefaultRun.Stop(timer)
|
||||
logrus.Fatalf("timed out snapshotting FS in %s", timeOutStr)
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
func gowalkDir(dir string, existingPaths map[string]struct{}, changeFunc func(string) (bool, error)) walkFSResult {
|
||||
foundPaths := make([]string, 0)
|
||||
timer := timing.Start("Walking filesystem")
|
||||
godirwalk.Walk(dir, &godirwalk.Options{
|
||||
Callback: func(path string, ent *godirwalk.Dirent) error {
|
||||
logrus.Tracef("Analyzing path %s", dir)
|
||||
if IsInIgnoreList(path) {
|
||||
if IsDestDir(path) {
|
||||
logrus.Tracef("Skipping paths under %s, as it is a ignored directory", path)
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
delete(existingPaths, path)
|
||||
|
|
@ -931,8 +970,7 @@ func WalkFS(dir string, existingPaths map[string]struct{}, changeFunc func(strin
|
|||
Unsorted: true,
|
||||
},
|
||||
)
|
||||
timing.DefaultRun.Stop(timer)
|
||||
return foundPaths, existingPaths
|
||||
return walkFSResult{foundPaths, existingPaths}
|
||||
}
|
||||
|
||||
// GetFSInfoMap given a directory gets a map of FileInfo for all files
|
||||
|
|
|
|||
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
Copyright 2020 Google LLC
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package stacklog logs the Go stack to disk in a loop for later analysis
|
||||
package stacklog
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
// DefaultPoll is how often to poll stack status by default
|
||||
defaultPoll = 125 * time.Millisecond
|
||||
|
||||
// DefaultQuiet can be set to disable stderr messages by default
|
||||
defaultQuiet = false
|
||||
)
|
||||
|
||||
// Config defines how to configure a stack logger.
|
||||
type Config struct {
|
||||
Path string
|
||||
Poll time.Duration
|
||||
Quiet bool
|
||||
}
|
||||
|
||||
// Start begins logging stacks to an output file.
|
||||
func Start(c Config) (*Stacklog, error) {
|
||||
if c.Poll == 0 {
|
||||
c.Poll = defaultPoll
|
||||
}
|
||||
|
||||
if c.Path == "" {
|
||||
tf, err := ioutil.TempFile("", "*.slog")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("default path: %w", err)
|
||||
}
|
||||
|
||||
c.Path = tf.Name()
|
||||
}
|
||||
|
||||
if !c.Quiet {
|
||||
fmt.Fprintf(os.Stderr, "stacklog: logging to %s, sampling every %s\n", c.Path, c.Poll)
|
||||
}
|
||||
|
||||
s := &Stacklog{
|
||||
ticker: time.NewTicker(c.Poll),
|
||||
path: c.Path,
|
||||
quiet: c.Quiet,
|
||||
}
|
||||
|
||||
f, err := os.Create(c.Path)
|
||||
if err != nil {
|
||||
return s, err
|
||||
}
|
||||
|
||||
s.f = f
|
||||
go s.loop()
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// MustStartFromEnv logs stacks to an output file based on the environment.
|
||||
func MustStartFromEnv(key string) *Stacklog {
|
||||
val := os.Getenv(key)
|
||||
if val == "" {
|
||||
return &Stacklog{}
|
||||
}
|
||||
|
||||
s, err := Start(Config{Path: val, Quiet: defaultQuiet, Poll: defaultPoll})
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("stacklog from environment %q: %v", key, err))
|
||||
}
|
||||
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
go func() {
|
||||
<-sigs
|
||||
s.Stop()
|
||||
}()
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Stacklog controls the stack logger.
|
||||
type Stacklog struct {
|
||||
ticker *time.Ticker
|
||||
f *os.File
|
||||
quiet bool
|
||||
path string
|
||||
samples int
|
||||
}
|
||||
|
||||
// loop periodically records the stack log to disk.
|
||||
func (s *Stacklog) loop() {
|
||||
for range s.ticker.C {
|
||||
if _, err := s.f.Write([]byte(fmt.Sprintf("%d\n", time.Now().UnixNano()))); err != nil {
|
||||
if !s.quiet {
|
||||
fmt.Fprintf(os.Stderr, "stacklog: write failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := s.f.Write(DumpStacks()); err != nil {
|
||||
if !s.quiet {
|
||||
fmt.Fprintf(os.Stderr, "stacklog: write failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := s.f.Write([]byte("-\n")); err != nil {
|
||||
if !s.quiet {
|
||||
fmt.Fprintf(os.Stderr, "stacklog: write failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
s.samples++
|
||||
}
|
||||
}
|
||||
|
||||
// DumpStacks returns a formatted stack trace of goroutines, using a large enough buffer to capture the entire trace.
|
||||
func DumpStacks() []byte {
|
||||
buf := make([]byte, 1024)
|
||||
|
||||
for {
|
||||
n := runtime.Stack(buf, true)
|
||||
if n < len(buf) {
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
buf = make([]byte, 2*len(buf))
|
||||
}
|
||||
}
|
||||
|
||||
// Stop stops logging stacks to disk.
|
||||
func (s *Stacklog) Stop() {
|
||||
if s == nil || s.f == nil {
|
||||
return
|
||||
}
|
||||
|
||||
s.ticker.Stop()
|
||||
|
||||
if !s.quiet {
|
||||
fmt.Fprintf(os.Stderr, "stacklog: stopped. stored %d samples to %s\n", s.samples, s.path)
|
||||
}
|
||||
}
|
||||
|
|
@ -12,3 +12,8 @@
|
|||
|
||||
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
|
||||
.glide/
|
||||
|
||||
examples/remove-empty-directories/remove-empty-directories
|
||||
examples/sizes/sizes
|
||||
examples/walk-fast/walk-fast
|
||||
examples/walk-stdlib/walk-stdlib
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
`godirwalk` is a library for traversing a directory tree on a file
|
||||
system.
|
||||
|
||||
[](https://godoc.org/github.com/karrick/godirwalk) [](https://dev.azure.com/microsoft0235/microsoft/_build/latest?definitionId=1&branchName=master)
|
||||
|
||||
In short, why do I use this library?
|
||||
|
||||
1. It's faster than `filepath.Walk`.
|
||||
|
|
@ -24,6 +26,12 @@ provided callback function.
|
|||
dirname := "some/directory/root"
|
||||
err := godirwalk.Walk(dirname, &godirwalk.Options{
|
||||
Callback: func(osPathname string, de *godirwalk.Dirent) error {
|
||||
// Following string operation is not most performant way
|
||||
// of doing this, but common enough to warrant a simple
|
||||
// example here:
|
||||
if strings.Contains(osPathname, ".git") {
|
||||
return godirwalk.SkipThis
|
||||
}
|
||||
fmt.Printf("%s %s\n", de.ModeType(), osPathname)
|
||||
return nil
|
||||
},
|
||||
|
|
@ -36,9 +44,6 @@ directory tree, but also for obtaining a list of immediate descendants
|
|||
of a particular directory, typically much more quickly than using
|
||||
`os.ReadDir` or `os.ReadDirnames`.
|
||||
|
||||
Documentation is available via
|
||||
[](https://godoc.org/github.com/karrick/godirwalk).
|
||||
|
||||
## Description
|
||||
|
||||
Here's why I use `godirwalk` in preference to `filepath.Walk`,
|
||||
|
|
@ -48,24 +53,24 @@ Here's why I use `godirwalk` in preference to `filepath.Walk`,
|
|||
|
||||
When compared against `filepath.Walk` in benchmarks, it has been
|
||||
observed to run between five and ten times the speed on darwin, at
|
||||
speeds comparable to the that of the unix `find` utility; about twice
|
||||
the speed on linux; and about four times the speed on Windows.
|
||||
speeds comparable to the that of the unix `find` utility; and about
|
||||
twice the speed on linux; and about four times the speed on Windows.
|
||||
|
||||
How does it obtain this performance boost? It does less work to give
|
||||
you nearly the same output. This library calls the same `syscall`
|
||||
functions to do the work, but it makes fewer calls, does not throw
|
||||
away information that it might need, and creates less memory churn
|
||||
along the way by reusing the same scratch buffer rather than
|
||||
reallocating a new buffer every time it reads data from the operating
|
||||
system.
|
||||
along the way by reusing the same scratch buffer for reading from a
|
||||
directory rather than reallocating a new buffer every time it reads
|
||||
file system entry data from the operating system.
|
||||
|
||||
While traversing a file system directory tree, `filepath.Walk` obtains
|
||||
the list of immediate descendants of a directory, and throws away the
|
||||
file system node type information provided by the operating system
|
||||
that comes with the node's name. Then, immediately prior to invoking
|
||||
the callback function, `filepath.Walk` invokes `os.Stat` for each
|
||||
node, and passes the returned `os.FileInfo` information to the
|
||||
callback.
|
||||
node type information for the file system entry that is provided by
|
||||
the operating system that comes with the node's name. Then,
|
||||
immediately prior to invoking the callback function, `filepath.Walk`
|
||||
invokes `os.Stat` for each node, and passes the returned `os.FileInfo`
|
||||
information to the callback.
|
||||
|
||||
While the `os.FileInfo` information provided by `os.Stat` is extremely
|
||||
helpful--and even includes the `os.FileMode` data--providing it
|
||||
|
|
@ -86,31 +91,37 @@ entire `os.FileInfo` data structure, the callback can easiy invoke
|
|||
##### macOS
|
||||
|
||||
```Bash
|
||||
go test -bench=.
|
||||
$ go test -bench=. -benchmem
|
||||
goos: darwin
|
||||
goarch: amd64
|
||||
pkg: github.com/karrick/godirwalk
|
||||
BenchmarkFilepathWalk-8 1 3001274570 ns/op
|
||||
BenchmarkGoDirWalk-8 3 465573172 ns/op
|
||||
BenchmarkFlameGraphFilepathWalk-8 1 6957916936 ns/op
|
||||
BenchmarkFlameGraphGoDirWalk-8 1 4210582571 ns/op
|
||||
BenchmarkReadDirnamesStandardLibrary-12 50000 26250 ns/op 10360 B/op 16 allocs/op
|
||||
BenchmarkReadDirnamesThisLibrary-12 50000 24372 ns/op 5064 B/op 20 allocs/op
|
||||
BenchmarkFilepathWalk-12 1 1099524875 ns/op 228415912 B/op 416952 allocs/op
|
||||
BenchmarkGodirwalk-12 2 526754589 ns/op 103110464 B/op 451442 allocs/op
|
||||
BenchmarkGodirwalkUnsorted-12 3 509219296 ns/op 100751400 B/op 378800 allocs/op
|
||||
BenchmarkFlameGraphFilepathWalk-12 1 7478618820 ns/op 2284138176 B/op 4169453 allocs/op
|
||||
BenchmarkFlameGraphGodirwalk-12 1 4977264058 ns/op 1031105328 B/op 4514423 allocs/op
|
||||
PASS
|
||||
ok github.com/karrick/godirwalk 16.822s
|
||||
ok github.com/karrick/godirwalk 21.219s
|
||||
```
|
||||
|
||||
##### Linux
|
||||
|
||||
```Bash
|
||||
go test -bench=.
|
||||
$ go test -bench=. -benchmem
|
||||
goos: linux
|
||||
goarch: amd64
|
||||
pkg: github.com/karrick/godirwalk
|
||||
BenchmarkFilepathWalk-12 1 1609189170 ns/op
|
||||
BenchmarkGoDirWalk-12 5 211336628 ns/op
|
||||
BenchmarkFlameGraphFilepathWalk-12 1 3968119932 ns/op
|
||||
BenchmarkFlameGraphGoDirWalk-12 1 2139598998 ns/op
|
||||
BenchmarkReadDirnamesStandardLibrary-12 100000 15458 ns/op 10360 B/op 16 allocs/op
|
||||
BenchmarkReadDirnamesThisLibrary-12 100000 14646 ns/op 5064 B/op 20 allocs/op
|
||||
BenchmarkFilepathWalk-12 2 631034745 ns/op 228210216 B/op 416939 allocs/op
|
||||
BenchmarkGodirwalk-12 3 358714883 ns/op 102988664 B/op 451437 allocs/op
|
||||
BenchmarkGodirwalkUnsorted-12 3 355363915 ns/op 100629234 B/op 378796 allocs/op
|
||||
BenchmarkFlameGraphFilepathWalk-12 1 6086913991 ns/op 2282104720 B/op 4169417 allocs/op
|
||||
BenchmarkFlameGraphGodirwalk-12 1 3456398824 ns/op 1029886400 B/op 4514373 allocs/op
|
||||
PASS
|
||||
ok github.com/karrick/godirwalk 9.007s
|
||||
ok github.com/karrick/godirwalk 19.179s
|
||||
```
|
||||
|
||||
### It's more correct on Windows than `filepath.Walk`
|
||||
|
|
@ -136,13 +147,20 @@ The takeaway is that behavior is different based on which platform
|
|||
until it is fixed in the standard library, it presents a compatibility
|
||||
problem.
|
||||
|
||||
This library correctly identifies symbolic links that point to
|
||||
directories and will only follow them when `FollowSymbolicLinks` is
|
||||
set to true. Behavior on Windows and other operating systems is
|
||||
identical.
|
||||
This library fixes the above problem such that it will never follow
|
||||
logical file sytem loops on either unix or Windows. Furthermore, it
|
||||
will only follow symbolic links when `FollowSymbolicLinks` is set to
|
||||
true. Behavior on Windows and other operating systems is identical.
|
||||
|
||||
### It's more easy to use than `filepath.Walk`
|
||||
|
||||
While this library strives to mimic the behavior of the incredibly
|
||||
well-written `filepath.Walk` standard library, there are places where
|
||||
it deviates a bit in order to provide a more easy or intuitive caller
|
||||
interface.
|
||||
|
||||
#### Callback interface does not send you an error to check
|
||||
|
||||
Since this library does not invoke `os.Stat` on every file system node
|
||||
it encounters, there is no possible error event for the callback
|
||||
function to filter on. The third argument in the `filepath.WalkFunc`
|
||||
|
|
@ -150,23 +168,105 @@ function signature to pass the error from `os.Stat` to the callback
|
|||
function is no longer necessary, and thus eliminated from signature of
|
||||
the callback function from this library.
|
||||
|
||||
Also, `filepath.Walk` invokes the callback function with a solidus
|
||||
delimited pathname regardless of the os-specific path separator. This
|
||||
library invokes the callback function with the os-specific pathname
|
||||
separator, obviating a call to `filepath.Clean` in the callback
|
||||
function for each node prior to actually using the provided pathname.
|
||||
Furthermore, this slight interface difference between
|
||||
`filepath.WalkFunc` and this library's `WalkFunc` eliminates the
|
||||
boilerplate code that callback handlers must write when they use
|
||||
`filepath.Walk`. Rather than every callback function needing to check
|
||||
the error value passed into it and branch accordingly, users of this
|
||||
library do not even have an error value to check immediately upon
|
||||
entry into the callback function. This is an improvement both in
|
||||
runtime performance and code clarity.
|
||||
|
||||
#### Callback function is invoked with OS specific file system path separator
|
||||
|
||||
On every OS platform `filepath.Walk` invokes the callback function
|
||||
with a solidus (`/`) delimited pathname. By contrast this library
|
||||
invokes the callback with the os-specific pathname separator,
|
||||
obviating a call to `filepath.Clean` in the callback function for each
|
||||
node prior to actually using the provided pathname.
|
||||
|
||||
In other words, even on Windows, `filepath.Walk` will invoke the
|
||||
callback with `some/path/to/foo.txt`, requiring well written clients
|
||||
to perform pathname normalization for every file prior to working with
|
||||
the specified file. In truth, many clients developed on unix and not
|
||||
tested on Windows neglect this subtlety, and will result in software
|
||||
bugs when running on Windows. This library would invoke the callback
|
||||
function with `some\path\to\foo.txt` for the same file when running on
|
||||
Windows, eliminating the need to normalize the pathname by the client,
|
||||
and lessen the likelyhood that a client will work on unix but not on
|
||||
the specified file. This is a hidden boilerplate requirement to create
|
||||
truly os agnostic callback functions. In truth, many clients developed
|
||||
on unix and not tested on Windows neglect this subtlety, and will
|
||||
result in software bugs when someone tries to run that software on
|
||||
Windows.
|
||||
|
||||
This library invokes the callback function with `some\path\to\foo.txt`
|
||||
for the same file when running on Windows, eliminating the need to
|
||||
normalize the pathname by the client, and lessen the likelyhood that a
|
||||
client will work on unix but not on Windows.
|
||||
|
||||
This enhancement eliminates necessity for some more boilerplate code
|
||||
in callback functions while improving the runtime performance of this
|
||||
library.
|
||||
|
||||
#### `godirwalk.SkipThis` is more intuitive to use than `filepath.SkipDir`
|
||||
|
||||
One arguably confusing aspect of the `filepath.WalkFunc` interface
|
||||
that this library must emulate is how a caller tells the `Walk`
|
||||
function to skip file system entries. With both `filepath.Walk` and
|
||||
this library's `Walk`, when a callback function wants to skip a
|
||||
directory and not descend into its children, it returns
|
||||
`filepath.SkipDir`. If the callback function returns
|
||||
`filepath.SkipDir` for a non-directory, `filepath.Walk` and this
|
||||
library will stop processing any more entries in the current
|
||||
directory. This is not necessarily what most developers want or
|
||||
expect. If you want to simply skip a particular non-directory entry
|
||||
but continue processing entries in the directory, the callback
|
||||
function must return nil.
|
||||
|
||||
The implications of this interface design is when you want to walk a
|
||||
file system hierarchy and skip an entry, you have to return a
|
||||
different value based on what type of file system entry that node
|
||||
is. To skip an entry, if the entry is a directory, you must return
|
||||
`filepath.SkipDir`, and if entry is not a directory, you must return
|
||||
`nil`. This is an unfortunate hurdle I have observed many developers
|
||||
struggling with, simply because it is not an intuitive interface.
|
||||
|
||||
Here is an example callback function that adheres to
|
||||
`filepath.WalkFunc` interface to have it skip any file system entry
|
||||
whose full pathname includes a particular substring, `optSkip`. Note
|
||||
that this library still supports identical behavior of `filepath.Walk`
|
||||
when the callback function returns `filepath.SkipDir`.
|
||||
|
||||
```Go
|
||||
func callback1(osPathname string, de *godirwalk.Dirent) error {
|
||||
if optSkip != "" && strings.Contains(osPathname, optSkip) {
|
||||
if b, err := de.IsDirOrSymlinkToDir(); b == true && err == nil {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// Process file like normal...
|
||||
return nil
|
||||
}
|
||||
```
|
||||
|
||||
This library attempts to eliminate some of that logic boilerplate
|
||||
required in callback functions by providing a new token error value,
|
||||
`SkipThis`, which a callback function may return to skip the current
|
||||
file system entry regardless of what type of entry it is. If the
|
||||
current entry is a directory, its children will not be enumerated,
|
||||
exactly as if the callback had returned `filepath.SkipDir`. If the
|
||||
current entry is a non-directory, the next file system entry in the
|
||||
current directory will be enumerated, exactly as if the callback
|
||||
returned `nil`. The following example callback function has identical
|
||||
behavior as the previous, but has less boilerplate, and admittedly
|
||||
logic that I find more simple to follow.
|
||||
|
||||
```Go
|
||||
func callback2(osPathname string, de *godirwalk.Dirent) error {
|
||||
if optSkip != "" && strings.Contains(osPathname, optSkip) {
|
||||
return godirwalk.SkipThis
|
||||
}
|
||||
// Process file like normal...
|
||||
return nil
|
||||
}
|
||||
```
|
||||
|
||||
### It's more flexible than `filepath.Walk`
|
||||
|
||||
#### Configurable Handling of Symbolic Links
|
||||
|
|
@ -177,26 +277,38 @@ does. However, it does invoke the callback function with each node it
|
|||
finds, including symbolic links. If a particular use case exists to
|
||||
follow symbolic links when traversing a directory tree, this library
|
||||
can be invoked in manner to do so, by setting the
|
||||
`FollowSymbolicLinks` parameter to true.
|
||||
`FollowSymbolicLinks` config parameter to `true`.
|
||||
|
||||
#### Configurable Sorting of Directory Children
|
||||
|
||||
The default behavior of this library is to always sort the immediate
|
||||
descendants of a directory prior to visiting each node, just like
|
||||
`filepath.Walk` does. This is usually the desired behavior. However,
|
||||
this does come at a performance penalty to sort the names when a
|
||||
directory node has many entries. If a particular use case exists that
|
||||
does not require sorting the directory's immediate descendants prior
|
||||
to visiting its nodes, this library will skip the sorting step when
|
||||
the `Unsorted` parameter is set to true.
|
||||
this does come at slight performance and memory penalties required to
|
||||
sort the names when a directory node has many entries. Additionally if
|
||||
caller specifies `Unsorted` enumeration in the configuration
|
||||
parameter, reading directories is lazily performed as the caller
|
||||
consumes entries. If a particular use case exists that does not
|
||||
require sorting the directory's immediate descendants prior to
|
||||
visiting its nodes, this library will skip the sorting step when the
|
||||
`Unsorted` parameter is set to `true`.
|
||||
|
||||
Here's an interesting read of the potential hazzards of traversing a
|
||||
file system hierarchy in a non-deterministic order. If you know the
|
||||
problem you are solving is not affected by the order files are
|
||||
visited, then I encourage you to use `Unsorted`. Otherwise skip
|
||||
setting this option.
|
||||
|
||||
[Researchers find bug in Python script may have affected hundreds of studies](https://arstechnica.com/information-technology/2019/10/chemists-discover-cross-platform-python-scripts-not-so-cross-platform/)
|
||||
|
||||
#### Configurable Post Children Callback
|
||||
|
||||
This library provides upstream code with the ability to specify a
|
||||
callback to be invoked for each directory after its children are
|
||||
processed. This has been used to recursively delete empty directories
|
||||
after traversing the file system in a more efficient manner. See the
|
||||
`examples/clean-empties` directory for an example of this usage.
|
||||
callback function to be invoked for each directory after its children
|
||||
are processed. This has been used to recursively delete empty
|
||||
directories after traversing the file system in a more efficient
|
||||
manner. See the `examples/clean-empties` directory for an example of
|
||||
this usage.
|
||||
|
||||
#### Configurable Error Callback
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
# Go
|
||||
# Build your Go project.
|
||||
# Add steps that test, save build artifacts, deploy, and more:
|
||||
# https://docs.microsoft.com/azure/devops/pipelines/languages/go
|
||||
|
||||
trigger:
|
||||
- master
|
||||
|
||||
variables:
|
||||
GOVERSION: 1.13
|
||||
|
||||
jobs:
|
||||
- job: Linux
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
steps:
|
||||
- task: GoTool@0
|
||||
displayName: 'Use Go $(GOVERSION)'
|
||||
inputs:
|
||||
version: $(GOVERSION)
|
||||
- task: Go@0
|
||||
inputs:
|
||||
command: test
|
||||
arguments: -race -v ./...
|
||||
displayName: 'Execute Tests'
|
||||
|
||||
- job: Mac
|
||||
pool:
|
||||
vmImage: 'macos-latest'
|
||||
steps:
|
||||
- task: GoTool@0
|
||||
displayName: 'Use Go $(GOVERSION)'
|
||||
inputs:
|
||||
version: $(GOVERSION)
|
||||
- task: Go@0
|
||||
inputs:
|
||||
command: test
|
||||
arguments: -race -v ./...
|
||||
displayName: 'Execute Tests'
|
||||
|
||||
- job: Windows
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
steps:
|
||||
- task: GoTool@0
|
||||
displayName: 'Use Go $(GOVERSION)'
|
||||
inputs:
|
||||
version: $(GOVERSION)
|
||||
- task: Go@0
|
||||
inputs:
|
||||
command: test
|
||||
arguments: -race -v ./...
|
||||
displayName: 'Execute Tests'
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
# for version in v1.9.1 v1.10.0 v1.10.3 v1.10.12 v1.11.2 v1.11.3 v1.12.0 v1.13.1 v1.14.0 v1.14.1 ; do
|
||||
for version in v1.10.12 v1.14.1 v1.15.2 ; do
|
||||
echo "### $version" > $version.txt
|
||||
git checkout -- go.mod && git checkout $version && go test -run=NONE -bench=Benchmark2 >> $version.txt || exit 1
|
||||
done
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
// +build godirwalk_debug
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
// debug formats and prints arguments to stderr for development builds
|
||||
func debug(f string, a ...interface{}) {
|
||||
// fmt.Fprintf(os.Stderr, f, a...)
|
||||
os.Stderr.Write([]byte("godirwalk: " + fmt.Sprintf(f, a...)))
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
// +build !godirwalk_debug
|
||||
|
||||
package godirwalk
|
||||
|
||||
// debug is a no-op for release builds
|
||||
func debug(_ string, _ ...interface{}) {}
|
||||
|
|
@ -3,35 +3,72 @@ package godirwalk
|
|||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Dirent stores the name and file system mode type of discovered file system
|
||||
// entries.
|
||||
type Dirent struct {
|
||||
name string
|
||||
modeType os.FileMode
|
||||
name string // base name of the file system entry.
|
||||
path string // path name of the file system entry.
|
||||
modeType os.FileMode // modeType is the type of file system entry.
|
||||
}
|
||||
|
||||
// NewDirent returns a newly initialized Dirent structure, or an error. This
|
||||
// function does not follow symbolic links.
|
||||
//
|
||||
// This function is rarely used, as Dirent structures are provided by other
|
||||
// functions in this library that read and walk directories.
|
||||
// functions in this library that read and walk directories, but is provided,
|
||||
// however, for the occasion when a program needs to create a Dirent.
|
||||
func NewDirent(osPathname string) (*Dirent, error) {
|
||||
fi, err := os.Lstat(osPathname)
|
||||
modeType, err := modeType(osPathname)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot lstat")
|
||||
return nil, err
|
||||
}
|
||||
return &Dirent{
|
||||
name: filepath.Base(osPathname),
|
||||
modeType: fi.Mode() & os.ModeType,
|
||||
path: filepath.Dir(osPathname),
|
||||
modeType: modeType,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Name returns the basename of the file system entry.
|
||||
func (de Dirent) Name() string { return de.name }
|
||||
// IsDir returns true if and only if the Dirent represents a file system
|
||||
// directory. Note that on some operating systems, more than one file mode bit
|
||||
// may be set for a node. For instance, on Windows, a symbolic link that points
|
||||
// to a directory will have both the directory and the symbolic link bits set.
|
||||
func (de Dirent) IsDir() bool { return de.modeType&os.ModeDir != 0 }
|
||||
|
||||
// IsDirOrSymlinkToDir returns true if and only if the Dirent represents a file
|
||||
// system directory, or a symbolic link to a directory. Note that if the Dirent
|
||||
// is not a directory but is a symbolic link, this method will resolve by
|
||||
// sending a request to the operating system to follow the symbolic link.
|
||||
func (de Dirent) IsDirOrSymlinkToDir() (bool, error) {
|
||||
if de.IsDir() {
|
||||
return true, nil
|
||||
}
|
||||
if !de.IsSymlink() {
|
||||
return false, nil
|
||||
}
|
||||
// Does this symlink point to a directory?
|
||||
info, err := os.Stat(filepath.Join(de.path, de.name))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return info.IsDir(), nil
|
||||
}
|
||||
|
||||
// IsRegular returns true if and only if the Dirent represents a regular file.
|
||||
// That is, it ensures that no mode type bits are set.
|
||||
func (de Dirent) IsRegular() bool { return de.modeType&os.ModeType == 0 }
|
||||
|
||||
// IsSymlink returns true if and only if the Dirent represents a file system
|
||||
// symbolic link. Note that on some operating systems, more than one file mode
|
||||
// bit may be set for a node. For instance, on Windows, a symbolic link that
|
||||
// points to a directory will have both the directory and the symbolic link bits
|
||||
// set.
|
||||
func (de Dirent) IsSymlink() bool { return de.modeType&os.ModeSymlink != 0 }
|
||||
|
||||
// IsDevice returns true if and only if the Dirent represents a device file.
|
||||
func (de Dirent) IsDevice() bool { return de.modeType&os.ModeDevice != 0 }
|
||||
|
||||
// ModeType returns the mode bits that specify the file system node type. We
|
||||
// could make our own enum-like data type for encoding the file type, but Go's
|
||||
|
|
@ -42,32 +79,25 @@ func (de Dirent) Name() string { return de.name }
|
|||
// information about files can be moved from one system to another portably.
|
||||
func (de Dirent) ModeType() os.FileMode { return de.modeType }
|
||||
|
||||
// IsDir returns true if and only if the Dirent represents a file system
|
||||
// directory. Note that on some operating systems, more than one file mode bit
|
||||
// may be set for a node. For instance, on Windows, a symbolic link that points
|
||||
// to a directory will have both the directory and the symbolic link bits set.
|
||||
func (de Dirent) IsDir() bool { return de.modeType&os.ModeDir != 0 }
|
||||
// Name returns the base name of the file system entry.
|
||||
func (de Dirent) Name() string { return de.name }
|
||||
|
||||
// IsRegular returns true if and only if the Dirent represents a regular
|
||||
// file. That is, it ensures that no mode type bits are set.
|
||||
func (de Dirent) IsRegular() bool { return de.modeType&os.ModeType == 0 }
|
||||
// reset releases memory held by entry err and name, and resets mode type to 0.
|
||||
func (de *Dirent) reset() {
|
||||
de.name = ""
|
||||
de.path = ""
|
||||
de.modeType = 0
|
||||
}
|
||||
|
||||
// IsSymlink returns true if and only if the Dirent represents a file system
|
||||
// symbolic link. Note that on some operating systems, more than one file mode
|
||||
// bit may be set for a node. For instance, on Windows, a symbolic link that
|
||||
// points to a directory will have both the directory and the symbolic link bits
|
||||
// set.
|
||||
func (de Dirent) IsSymlink() bool { return de.modeType&os.ModeSymlink != 0 }
|
||||
|
||||
// Dirents represents a slice of Dirent pointers, which are sortable by
|
||||
// Dirents represents a slice of Dirent pointers, which are sortable by base
|
||||
// name. This type satisfies the `sort.Interface` interface.
|
||||
type Dirents []*Dirent
|
||||
|
||||
// Len returns the count of Dirent structures in the slice.
|
||||
func (l Dirents) Len() int { return len(l) }
|
||||
|
||||
// Less returns true if and only if the Name of the element specified by the
|
||||
// first index is lexicographically less than that of the second index.
|
||||
// Less returns true if and only if the base name of the element specified by
|
||||
// the first index is lexicographically less than that of the second index.
|
||||
func (l Dirents) Less(i, j int) bool { return l[i].name < l[j].name }
|
||||
|
||||
// Swap exchanges the two Dirent entries specified by the two provided indexes.
|
||||
|
|
|
|||
|
|
@ -30,5 +30,13 @@ This library not only provides functions for traversing a file system directory
|
|||
tree, but also for obtaining a list of immediate descendants of a particular
|
||||
directory, typically much more quickly than using `os.ReadDir` or
|
||||
`os.ReadDirnames`.
|
||||
|
||||
scratchBuffer := make([]byte, godirwalk.MinimumScratchBufferSize)
|
||||
|
||||
names, err := godirwalk.ReadDirnames("some/directory", scratchBuffer)
|
||||
// ...
|
||||
|
||||
entries, err := godirwalk.ReadDirents("another/directory", scratchBuffer)
|
||||
// ...
|
||||
*/
|
||||
package godirwalk
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
module github.com/karrick/godirwalk
|
||||
|
||||
require github.com/pkg/errors v0.8.0
|
||||
go 1.13
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
// +build darwin linux
|
||||
// +build aix darwin linux nacl solaris
|
||||
|
||||
package godirwalk
|
||||
|
||||
import "syscall"
|
||||
|
||||
func inoFromDirent(de *syscall.Dirent) uint64 {
|
||||
return de.Ino
|
||||
return uint64(de.Ino)
|
||||
}
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
)
|
||||
|
||||
// modeType returns the mode type of the file system entry identified by
|
||||
// osPathname by calling os.LStat function, to intentionally not follow symbolic
|
||||
// links.
|
||||
//
|
||||
// Even though os.LStat provides all file mode bits, we want to ensure same
|
||||
// values returned to caller regardless of whether we obtained file mode bits
|
||||
// from syscall or stat call. Therefore mask out the additional file mode bits
|
||||
// that are provided by stat but not by the syscall, so users can rely on their
|
||||
// values.
|
||||
func modeType(osPathname string) (os.FileMode, error) {
|
||||
fi, err := os.Lstat(osPathname)
|
||||
if err == nil {
|
||||
return fi.Mode() & os.ModeType, nil
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
// +build darwin dragonfly freebsd linux netbsd openbsd
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// modeTypeFromDirent converts a syscall defined constant, which is in purview
|
||||
// of OS, to a constant defined by Go, assumed by this project to be stable.
|
||||
//
|
||||
// When the syscall constant is not recognized, this function falls back to a
|
||||
// Stat on the file system.
|
||||
func modeTypeFromDirent(de *syscall.Dirent, osDirname, osBasename string) (os.FileMode, error) {
|
||||
switch de.Type {
|
||||
case syscall.DT_REG:
|
||||
return 0, nil
|
||||
case syscall.DT_DIR:
|
||||
return os.ModeDir, nil
|
||||
case syscall.DT_LNK:
|
||||
return os.ModeSymlink, nil
|
||||
case syscall.DT_CHR:
|
||||
return os.ModeDevice | os.ModeCharDevice, nil
|
||||
case syscall.DT_BLK:
|
||||
return os.ModeDevice, nil
|
||||
case syscall.DT_FIFO:
|
||||
return os.ModeNamedPipe, nil
|
||||
case syscall.DT_SOCK:
|
||||
return os.ModeSocket, nil
|
||||
default:
|
||||
// If syscall returned unknown type (e.g., DT_UNKNOWN, DT_WHT), then
|
||||
// resolve actual mode by reading file information.
|
||||
return modeType(filepath.Join(osDirname, osBasename))
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
// +build aix js nacl solaris
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// modeTypeFromDirent converts a syscall defined constant, which is in purview
|
||||
// of OS, to a constant defined by Go, assumed by this project to be stable.
|
||||
//
|
||||
// Because some operating system syscall.Dirent structures do not include a Type
|
||||
// field, fall back on Stat of the file system.
|
||||
func modeTypeFromDirent(_ *syscall.Dirent, osDirname, osBasename string) (os.FileMode, error) {
|
||||
return modeType(filepath.Join(osDirname, osBasename))
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// +build darwin dragonfly freebsd netbsd openbsd
|
||||
// +build aix darwin dragonfly freebsd netbsd openbsd
|
||||
|
||||
package godirwalk
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
// +build nacl linux solaris
|
||||
// +build nacl linux js solaris
|
||||
|
||||
package godirwalk
|
||||
|
||||
|
|
@ -9,18 +9,18 @@ import (
|
|||
"unsafe"
|
||||
)
|
||||
|
||||
func nameFromDirent(de *syscall.Dirent) []byte {
|
||||
// nameOffset is a compile time constant
|
||||
const nameOffset = int(unsafe.Offsetof(syscall.Dirent{}.Name))
|
||||
|
||||
func nameFromDirent(de *syscall.Dirent) (name []byte) {
|
||||
// Because this GOOS' syscall.Dirent does not provide a field that specifies
|
||||
// the name length, this function must first calculate the max possible name
|
||||
// length, and then search for the NULL byte.
|
||||
ml := int(uint64(de.Reclen) - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)))
|
||||
ml := int(de.Reclen) - nameOffset
|
||||
|
||||
// Convert syscall.Dirent.Name, which is array of int8, to []byte, by
|
||||
// overwriting Cap, Len, and Data slice header fields to values from
|
||||
// syscall.Dirent fields. Setting the Cap, Len, and Data field values for
|
||||
// the slice header modifies what the slice header points to, and in this
|
||||
// case, the name buffer.
|
||||
var name []byte
|
||||
// overwriting Cap, Len, and Data slice header fields to the max possible
|
||||
// name length computed above, and finding the terminating NULL byte.
|
||||
sh := (*reflect.SliceHeader)(unsafe.Pointer(&name))
|
||||
sh.Cap = ml
|
||||
sh.Len = ml
|
||||
|
|
@ -30,7 +30,13 @@ func nameFromDirent(de *syscall.Dirent) []byte {
|
|||
// Found NULL byte; set slice's cap and len accordingly.
|
||||
sh.Cap = index
|
||||
sh.Len = index
|
||||
return
|
||||
}
|
||||
|
||||
return name
|
||||
// NOTE: This branch is not expected, but included for defensive
|
||||
// programming, and provides a hard stop on the name based on the structure
|
||||
// field array size.
|
||||
sh.Cap = len(de.Name)
|
||||
sh.Len = sh.Cap
|
||||
return
|
||||
}
|
||||
|
|
@ -6,7 +6,10 @@ package godirwalk
|
|||
// symbolic link, it will be resolved.
|
||||
//
|
||||
// If an optional scratch buffer is provided that is at least one page of
|
||||
// memory, it will be used when reading directory entries from the file system.
|
||||
// memory, it will be used when reading directory entries from the file
|
||||
// system. If you plan on calling this function in a loop, you will have
|
||||
// significantly better performance if you allocate a scratch buffer and use it
|
||||
// each time you call this function.
|
||||
//
|
||||
// children, err := godirwalk.ReadDirents(osDirname, nil)
|
||||
// if err != nil {
|
||||
|
|
@ -17,7 +20,7 @@ package godirwalk
|
|||
// fmt.Printf("%s %s\n", child.ModeType, child.Name)
|
||||
// }
|
||||
func ReadDirents(osDirname string, scratchBuffer []byte) (Dirents, error) {
|
||||
return readdirents(osDirname, scratchBuffer)
|
||||
return readDirents(osDirname, scratchBuffer)
|
||||
}
|
||||
|
||||
// ReadDirnames returns a slice of strings, representing the immediate
|
||||
|
|
@ -25,14 +28,17 @@ func ReadDirents(osDirname string, scratchBuffer []byte) (Dirents, error) {
|
|||
// symbolic link, it will be resolved.
|
||||
//
|
||||
// If an optional scratch buffer is provided that is at least one page of
|
||||
// memory, it will be used when reading directory entries from the file system.
|
||||
// memory, it will be used when reading directory entries from the file
|
||||
// system. If you plan on calling this function in a loop, you will have
|
||||
// significantly better performance if you allocate a scratch buffer and use it
|
||||
// each time you call this function.
|
||||
//
|
||||
// Note that this function, depending on operating system, may or may not invoke
|
||||
// the ReadDirents function, in order to prepare the list of immediate
|
||||
// descendants. Therefore, if your program needs both the names and the file
|
||||
// system mode types of descendants, it will always be faster to invoke
|
||||
// ReadDirents directly, rather than calling this function, then looping over
|
||||
// the results and calling os.Stat for each child.
|
||||
// the results and calling os.Stat or os.LStat for each entry.
|
||||
//
|
||||
// children, err := godirwalk.ReadDirnames(osDirname, nil)
|
||||
// if err != nil {
|
||||
|
|
@ -43,5 +49,5 @@ func ReadDirents(osDirname string, scratchBuffer []byte) (Dirents, error) {
|
|||
// fmt.Printf("%s\n", child)
|
||||
// }
|
||||
func ReadDirnames(osDirname string, scratchBuffer []byte) ([]string, error) {
|
||||
return readdirnames(osDirname, scratchBuffer)
|
||||
return readDirnames(osDirname, scratchBuffer)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,109 +1,131 @@
|
|||
// +build darwin freebsd linux netbsd openbsd
|
||||
// +build !windows
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func readdirents(osDirname string, scratchBuffer []byte) (Dirents, error) {
|
||||
// MinimumScratchBufferSize specifies the minimum size of the scratch buffer
|
||||
// that ReadDirents, ReadDirnames, Scanner, and Walk will use when reading file
|
||||
// entries from the operating system. During program startup it is initialized
|
||||
// to the result from calling `os.Getpagesize()` for non Windows environments,
|
||||
// and 0 for Windows.
|
||||
var MinimumScratchBufferSize = os.Getpagesize()
|
||||
|
||||
func newScratchBuffer() []byte { return make([]byte, MinimumScratchBufferSize) }
|
||||
|
||||
func readDirents(osDirname string, scratchBuffer []byte) ([]*Dirent, error) {
|
||||
var entries []*Dirent
|
||||
var workBuffer []byte
|
||||
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Open")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var entries Dirents
|
||||
|
||||
fd := int(dh.Fd())
|
||||
|
||||
if len(scratchBuffer) < MinimumScratchBufferSize {
|
||||
scratchBuffer = make([]byte, DefaultScratchBufferSize)
|
||||
scratchBuffer = newScratchBuffer()
|
||||
}
|
||||
|
||||
var de *syscall.Dirent
|
||||
|
||||
var sde syscall.Dirent
|
||||
for {
|
||||
if len(workBuffer) == 0 {
|
||||
n, err := syscall.ReadDirent(fd, scratchBuffer)
|
||||
// n, err := unix.ReadDirent(fd, scratchBuffer)
|
||||
if err != nil {
|
||||
_ = dh.Close() // ignore potential error returned by Close
|
||||
return nil, errors.Wrap(err, "cannot ReadDirent")
|
||||
}
|
||||
if n <= 0 {
|
||||
break // end of directory reached
|
||||
}
|
||||
// Loop over the bytes returned by reading the directory entries.
|
||||
buf := scratchBuffer[:n]
|
||||
for len(buf) > 0 {
|
||||
de = (*syscall.Dirent)(unsafe.Pointer(&buf[0])) // point entry to first syscall.Dirent in buffer
|
||||
buf = buf[de.Reclen:] // advance buffer
|
||||
|
||||
if inoFromDirent(de) == 0 {
|
||||
continue // this item has been deleted, but not yet removed from directory
|
||||
}
|
||||
|
||||
nameSlice := nameFromDirent(de)
|
||||
namlen := len(nameSlice)
|
||||
if (namlen == 0) || (namlen == 1 && nameSlice[0] == '.') || (namlen == 2 && nameSlice[0] == '.' && nameSlice[1] == '.') {
|
||||
continue // skip unimportant entries
|
||||
}
|
||||
osChildname := string(nameSlice)
|
||||
|
||||
// Convert syscall constant, which is in purview of OS, to a
|
||||
// constant defined by Go, assumed by this project to be stable.
|
||||
var mode os.FileMode
|
||||
switch de.Type {
|
||||
case syscall.DT_REG:
|
||||
// regular file
|
||||
case syscall.DT_DIR:
|
||||
mode = os.ModeDir
|
||||
case syscall.DT_LNK:
|
||||
mode = os.ModeSymlink
|
||||
case syscall.DT_CHR:
|
||||
mode = os.ModeDevice | os.ModeCharDevice
|
||||
case syscall.DT_BLK:
|
||||
mode = os.ModeDevice
|
||||
case syscall.DT_FIFO:
|
||||
mode = os.ModeNamedPipe
|
||||
case syscall.DT_SOCK:
|
||||
mode = os.ModeSocket
|
||||
default:
|
||||
// If syscall returned unknown type (e.g., DT_UNKNOWN, DT_WHT),
|
||||
// then resolve actual mode by getting stat.
|
||||
fi, err := os.Lstat(filepath.Join(osDirname, osChildname))
|
||||
if err != nil {
|
||||
_ = dh.Close() // ignore potential error returned by Close
|
||||
return nil, errors.Wrap(err, "cannot Stat")
|
||||
}
|
||||
// We only care about the bits that identify the type of a file
|
||||
// system node, and can ignore append, exclusive, temporary,
|
||||
// setuid, setgid, permission bits, and sticky bits, which are
|
||||
// coincident to the bits that declare type of the file system
|
||||
// node.
|
||||
mode = fi.Mode() & os.ModeType
|
||||
}
|
||||
|
||||
entries = append(entries, &Dirent{name: osChildname, modeType: mode})
|
||||
if err == syscall.EINTR /* || err == unix.EINTR */ {
|
||||
continue
|
||||
}
|
||||
_ = dh.Close()
|
||||
return nil, err
|
||||
}
|
||||
if n <= 0 { // end of directory: normal exit
|
||||
if err = dh.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
workBuffer = scratchBuffer[:n] // trim work buffer to number of bytes read
|
||||
}
|
||||
|
||||
copy((*[unsafe.Sizeof(syscall.Dirent{})]byte)(unsafe.Pointer(&sde))[:], workBuffer)
|
||||
workBuffer = workBuffer[reclen(&sde):] // advance buffer for next iteration through loop
|
||||
|
||||
if inoFromDirent(&sde) == 0 {
|
||||
continue // inode set to 0 indicates an entry that was marked as deleted
|
||||
}
|
||||
|
||||
nameSlice := nameFromDirent(&sde)
|
||||
nameLength := len(nameSlice)
|
||||
|
||||
if nameLength == 0 || (nameSlice[0] == '.' && (nameLength == 1 || (nameLength == 2 && nameSlice[1] == '.'))) {
|
||||
continue
|
||||
}
|
||||
|
||||
childName := string(nameSlice)
|
||||
mt, err := modeTypeFromDirent(&sde, osDirname, childName)
|
||||
if err != nil {
|
||||
_ = dh.Close()
|
||||
return nil, err
|
||||
}
|
||||
entries = append(entries, &Dirent{name: childName, path: osDirname, modeType: mt})
|
||||
}
|
||||
}
|
||||
|
||||
func readdirnames(osDirname string, scratchBuffer []byte) ([]string, error) {
|
||||
des, err := readdirents(osDirname, scratchBuffer)
|
||||
func readDirnames(osDirname string, scratchBuffer []byte) ([]string, error) {
|
||||
var entries []string
|
||||
var workBuffer []byte
|
||||
var sde *syscall.Dirent
|
||||
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
names := make([]string, len(des))
|
||||
for i, v := range des {
|
||||
names[i] = v.name
|
||||
fd := int(dh.Fd())
|
||||
|
||||
if len(scratchBuffer) < MinimumScratchBufferSize {
|
||||
scratchBuffer = newScratchBuffer()
|
||||
}
|
||||
|
||||
for {
|
||||
if len(workBuffer) == 0 {
|
||||
n, err := syscall.ReadDirent(fd, scratchBuffer)
|
||||
// n, err := unix.ReadDirent(fd, scratchBuffer)
|
||||
if err != nil {
|
||||
if err == syscall.EINTR /* || err == unix.EINTR */ {
|
||||
continue
|
||||
}
|
||||
_ = dh.Close()
|
||||
return nil, err
|
||||
}
|
||||
if n <= 0 { // end of directory: normal exit
|
||||
if err = dh.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
workBuffer = scratchBuffer[:n] // trim work buffer to number of bytes read
|
||||
}
|
||||
|
||||
sde = (*syscall.Dirent)(unsafe.Pointer(&workBuffer[0])) // point entry to first syscall.Dirent in buffer
|
||||
// Handle first entry in the work buffer.
|
||||
workBuffer = workBuffer[reclen(sde):] // advance buffer for next iteration through loop
|
||||
|
||||
if inoFromDirent(sde) == 0 {
|
||||
continue // inode set to 0 indicates an entry that was marked as deleted
|
||||
}
|
||||
|
||||
nameSlice := nameFromDirent(sde)
|
||||
nameLength := len(nameSlice)
|
||||
|
||||
if nameLength == 0 || (nameSlice[0] == '.' && (nameLength == 1 || (nameLength == 2 && nameSlice[1] == '.'))) {
|
||||
continue
|
||||
}
|
||||
|
||||
entries = append(entries, string(nameSlice))
|
||||
}
|
||||
return names, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,54 +1,66 @@
|
|||
// +build windows
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
import "os"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
// MinimumScratchBufferSize specifies the minimum size of the scratch buffer
|
||||
// that ReadDirents, ReadDirnames, Scanner, and Walk will use when reading file
|
||||
// entries from the operating system. During program startup it is initialized
|
||||
// to the result from calling `os.Getpagesize()` for non Windows environments,
|
||||
// and 0 for Windows.
|
||||
var MinimumScratchBufferSize = 0
|
||||
|
||||
// The functions in this file are mere wrappers of what is already provided by
|
||||
// standard library, in order to provide the same API as this library provides.
|
||||
//
|
||||
// The scratch buffer argument is ignored by this architecture.
|
||||
//
|
||||
// Please send PR or link to article if you know of a more performant way of
|
||||
// enumerating directory contents and mode types on Windows.
|
||||
func newScratchBuffer() []byte { return nil }
|
||||
|
||||
func readdirents(osDirname string, _ []byte) (Dirents, error) {
|
||||
func readDirents(osDirname string, _ []byte) ([]*Dirent, error) {
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Open")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fileinfos, err := dh.Readdir(0)
|
||||
if er := dh.Close(); err == nil {
|
||||
err = er
|
||||
}
|
||||
fileinfos, err := dh.Readdir(-1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Readdir")
|
||||
_ = dh.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
entries := make(Dirents, len(fileinfos))
|
||||
for i, info := range fileinfos {
|
||||
entries[i] = &Dirent{name: info.Name(), modeType: info.Mode() & os.ModeType}
|
||||
entries := make([]*Dirent, len(fileinfos))
|
||||
|
||||
for i, fi := range fileinfos {
|
||||
entries[i] = &Dirent{
|
||||
name: fi.Name(),
|
||||
path: osDirname,
|
||||
modeType: fi.Mode() & os.ModeType,
|
||||
}
|
||||
}
|
||||
|
||||
if err = dh.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
func readdirnames(osDirname string, _ []byte) ([]string, error) {
|
||||
func readDirnames(osDirname string, _ []byte) ([]string, error) {
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Open")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
entries, err := dh.Readdirnames(0)
|
||||
if er := dh.Close(); err == nil {
|
||||
err = er
|
||||
}
|
||||
fileinfos, err := dh.Readdir(-1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot Readdirnames")
|
||||
_ = dh.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
entries := make([]string, len(fileinfos))
|
||||
|
||||
for i, fi := range fileinfos {
|
||||
entries[i] = fi.Name()
|
||||
}
|
||||
|
||||
if err = dh.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return entries, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
// +build dragonfly
|
||||
|
||||
package godirwalk
|
||||
|
||||
import "syscall"
|
||||
|
||||
func reclen(de *syscall.Dirent) uint64 {
|
||||
return (16 + uint64(de.Namlen) + 1 + 7) &^ 7
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
// +build nacl linux js solaris aix darwin freebsd netbsd openbsd
|
||||
|
||||
package godirwalk
|
||||
|
||||
import "syscall"
|
||||
|
||||
func reclen(de *syscall.Dirent) uint64 {
|
||||
return uint64(de.Reclen)
|
||||
}
|
||||
|
|
@ -0,0 +1,166 @@
|
|||
// +build !windows
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Scanner is an iterator to enumerate the contents of a directory.
|
||||
type Scanner struct {
|
||||
scratchBuffer []byte // read directory bytes from file system into this buffer
|
||||
workBuffer []byte // points into scratchBuffer, from which we chunk out directory entries
|
||||
osDirname string
|
||||
childName string
|
||||
err error // err is the error associated with scanning directory
|
||||
statErr error // statErr is any error return while attempting to stat an entry
|
||||
dh *os.File // used to close directory after done reading
|
||||
de *Dirent // most recently decoded directory entry
|
||||
sde syscall.Dirent
|
||||
fd int // file descriptor used to read entries from directory
|
||||
}
|
||||
|
||||
// NewScanner returns a new directory Scanner that lazily enumerates the
|
||||
// contents of a single directory.
|
||||
//
|
||||
// scanner, err := godirwalk.NewScanner(dirname)
|
||||
// if err != nil {
|
||||
// fatal("cannot scan directory: %s", err)
|
||||
// }
|
||||
//
|
||||
// for scanner.Scan() {
|
||||
// dirent, err := scanner.Dirent()
|
||||
// if err != nil {
|
||||
// warning("cannot get dirent: %s", err)
|
||||
// continue
|
||||
// }
|
||||
// name := dirent.Name()
|
||||
// if name == "break" {
|
||||
// break
|
||||
// }
|
||||
// if name == "continue" {
|
||||
// continue
|
||||
// }
|
||||
// fmt.Printf("%v %v\n", dirent.ModeType(), dirent.Name())
|
||||
// }
|
||||
// if err := scanner.Err(); err != nil {
|
||||
// fatal("cannot scan directory: %s", err)
|
||||
// }
|
||||
func NewScanner(osDirname string) (*Scanner, error) {
|
||||
return NewScannerWithScratchBuffer(osDirname, nil)
|
||||
}
|
||||
|
||||
// NewScannerWithScratchBuffer returns a new directory Scanner that lazily
|
||||
// enumerates the contents of a single directory. On platforms other than
|
||||
// Windows it uses the provided scratch buffer to read from the file system. On
|
||||
// Windows the scratch buffer is ignored.
|
||||
func NewScannerWithScratchBuffer(osDirname string, scratchBuffer []byte) (*Scanner, error) {
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(scratchBuffer) < MinimumScratchBufferSize {
|
||||
scratchBuffer = newScratchBuffer()
|
||||
}
|
||||
scanner := &Scanner{
|
||||
scratchBuffer: scratchBuffer,
|
||||
osDirname: osDirname,
|
||||
dh: dh,
|
||||
fd: int(dh.Fd()),
|
||||
}
|
||||
return scanner, nil
|
||||
}
|
||||
|
||||
// Dirent returns the current directory entry while scanning a directory.
|
||||
func (s *Scanner) Dirent() (*Dirent, error) {
|
||||
if s.de == nil {
|
||||
s.de = &Dirent{name: s.childName, path: s.osDirname}
|
||||
s.de.modeType, s.statErr = modeTypeFromDirent(&s.sde, s.osDirname, s.childName)
|
||||
}
|
||||
return s.de, s.statErr
|
||||
}
|
||||
|
||||
// done is called when directory scanner unable to continue, with either the
|
||||
// triggering error, or nil when there are simply no more entries to read from
|
||||
// the directory.
|
||||
func (s *Scanner) done(err error) {
|
||||
if s.dh == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if cerr := s.dh.Close(); err == nil {
|
||||
s.err = cerr
|
||||
}
|
||||
|
||||
s.osDirname, s.childName = "", ""
|
||||
s.scratchBuffer, s.workBuffer = nil, nil
|
||||
s.dh, s.de, s.statErr = nil, nil, nil
|
||||
s.sde = syscall.Dirent{}
|
||||
s.fd = 0
|
||||
}
|
||||
|
||||
// Err returns any error associated with scanning a directory. It is normal to
|
||||
// call Err after Scan returns false, even though they both ensure Scanner
|
||||
// resources are released. Do not call until done scanning a directory.
|
||||
func (s *Scanner) Err() error {
|
||||
s.done(nil)
|
||||
return s.err
|
||||
}
|
||||
|
||||
// Name returns the base name of the current directory entry while scanning a
|
||||
// directory.
|
||||
func (s *Scanner) Name() string { return s.childName }
|
||||
|
||||
// Scan potentially reads and then decodes the next directory entry from the
|
||||
// file system.
|
||||
//
|
||||
// When it returns false, this releases resources used by the Scanner then
|
||||
// returns any error associated with closing the file system directory resource.
|
||||
func (s *Scanner) Scan() bool {
|
||||
if s.dh == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
s.de = nil
|
||||
|
||||
for {
|
||||
// When the work buffer has nothing remaining to decode, we need to load
|
||||
// more data from disk.
|
||||
if len(s.workBuffer) == 0 {
|
||||
n, err := syscall.ReadDirent(s.fd, s.scratchBuffer)
|
||||
// n, err := unix.ReadDirent(s.fd, s.scratchBuffer)
|
||||
if err != nil {
|
||||
if err == syscall.EINTR /* || err == unix.EINTR */ {
|
||||
continue
|
||||
}
|
||||
s.done(err)
|
||||
return false
|
||||
}
|
||||
if n <= 0 { // end of directory: normal exit
|
||||
s.done(nil)
|
||||
return false
|
||||
}
|
||||
s.workBuffer = s.scratchBuffer[:n] // trim work buffer to number of bytes read
|
||||
}
|
||||
|
||||
// point entry to first syscall.Dirent in buffer
|
||||
copy((*[unsafe.Sizeof(syscall.Dirent{})]byte)(unsafe.Pointer(&s.sde))[:], s.workBuffer)
|
||||
s.workBuffer = s.workBuffer[reclen(&s.sde):] // advance buffer for next iteration through loop
|
||||
|
||||
if inoFromDirent(&s.sde) == 0 {
|
||||
continue // inode set to 0 indicates an entry that was marked as deleted
|
||||
}
|
||||
|
||||
nameSlice := nameFromDirent(&s.sde)
|
||||
nameLength := len(nameSlice)
|
||||
|
||||
if nameLength == 0 || (nameSlice[0] == '.' && (nameLength == 1 || (nameLength == 2 && nameSlice[1] == '.'))) {
|
||||
continue
|
||||
}
|
||||
|
||||
s.childName = string(nameSlice)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
// +build windows
|
||||
|
||||
package godirwalk
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Scanner is an iterator to enumerate the contents of a directory.
|
||||
type Scanner struct {
|
||||
osDirname string
|
||||
childName string
|
||||
dh *os.File // dh is handle to open directory
|
||||
de *Dirent
|
||||
err error // err is the error associated with scanning directory
|
||||
childMode os.FileMode
|
||||
}
|
||||
|
||||
// NewScanner returns a new directory Scanner that lazily enumerates the
|
||||
// contents of a single directory.
|
||||
//
|
||||
// scanner, err := godirwalk.NewScanner(dirname)
|
||||
// if err != nil {
|
||||
// fatal("cannot scan directory: %s", err)
|
||||
// }
|
||||
//
|
||||
// for scanner.Scan() {
|
||||
// dirent, err := scanner.Dirent()
|
||||
// if err != nil {
|
||||
// warning("cannot get dirent: %s", err)
|
||||
// continue
|
||||
// }
|
||||
// name := dirent.Name()
|
||||
// if name == "break" {
|
||||
// break
|
||||
// }
|
||||
// if name == "continue" {
|
||||
// continue
|
||||
// }
|
||||
// fmt.Printf("%v %v\n", dirent.ModeType(), dirent.Name())
|
||||
// }
|
||||
// if err := scanner.Err(); err != nil {
|
||||
// fatal("cannot scan directory: %s", err)
|
||||
// }
|
||||
func NewScanner(osDirname string) (*Scanner, error) {
|
||||
dh, err := os.Open(osDirname)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
scanner := &Scanner{
|
||||
osDirname: osDirname,
|
||||
dh: dh,
|
||||
}
|
||||
return scanner, nil
|
||||
}
|
||||
|
||||
// NewScannerWithScratchBuffer returns a new directory Scanner that lazily
|
||||
// enumerates the contents of a single directory. On platforms other than
|
||||
// Windows it uses the provided scratch buffer to read from the file system. On
|
||||
// Windows the scratch buffer parameter is ignored.
|
||||
func NewScannerWithScratchBuffer(osDirname string, scratchBuffer []byte) (*Scanner, error) {
|
||||
return NewScanner(osDirname)
|
||||
}
|
||||
|
||||
// Dirent returns the current directory entry while scanning a directory.
|
||||
func (s *Scanner) Dirent() (*Dirent, error) {
|
||||
if s.de == nil {
|
||||
s.de = &Dirent{
|
||||
name: s.childName,
|
||||
path: s.osDirname,
|
||||
modeType: s.childMode,
|
||||
}
|
||||
}
|
||||
return s.de, nil
|
||||
}
|
||||
|
||||
// done is called when directory scanner unable to continue, with either the
|
||||
// triggering error, or nil when there are simply no more entries to read from
|
||||
// the directory.
|
||||
func (s *Scanner) done(err error) {
|
||||
if s.dh == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if cerr := s.dh.Close(); err == nil {
|
||||
s.err = cerr
|
||||
}
|
||||
|
||||
s.childName, s.osDirname = "", ""
|
||||
s.de, s.dh = nil, nil
|
||||
}
|
||||
|
||||
// Err returns any error associated with scanning a directory. It is normal to
|
||||
// call Err after Scan returns false, even though they both ensure Scanner
|
||||
// resources are released. Do not call until done scanning a directory.
|
||||
func (s *Scanner) Err() error {
|
||||
s.done(nil)
|
||||
return s.err
|
||||
}
|
||||
|
||||
// Name returns the base name of the current directory entry while scanning a
|
||||
// directory.
|
||||
func (s *Scanner) Name() string { return s.childName }
|
||||
|
||||
// Scan potentially reads and then decodes the next directory entry from the
|
||||
// file system.
|
||||
//
|
||||
// When it returns false, this releases resources used by the Scanner then
|
||||
// returns any error associated with closing the file system directory resource.
|
||||
func (s *Scanner) Scan() bool {
|
||||
if s.dh == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
s.de = nil
|
||||
|
||||
fileinfos, err := s.dh.Readdir(1)
|
||||
if err != nil {
|
||||
s.done(err)
|
||||
return false
|
||||
}
|
||||
|
||||
if l := len(fileinfos); l != 1 {
|
||||
s.done(fmt.Errorf("expected a single entry rather than %d", l))
|
||||
return false
|
||||
}
|
||||
|
||||
fi := fileinfos[0]
|
||||
s.childMode = fi.Mode() & os.ModeType
|
||||
s.childName = fi.Name()
|
||||
return true
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package godirwalk
|
||||
|
||||
import "sort"
|
||||
|
||||
type scanner interface {
|
||||
Dirent() (*Dirent, error)
|
||||
Err() error
|
||||
Name() string
|
||||
Scan() bool
|
||||
}
|
||||
|
||||
// sortedScanner enumerates through a directory's contents after reading the
|
||||
// entire directory and sorting the entries by name. Used by walk to simplify
|
||||
// its implementation.
|
||||
type sortedScanner struct {
|
||||
dd []*Dirent
|
||||
de *Dirent
|
||||
}
|
||||
|
||||
func newSortedScanner(osPathname string, scratchBuffer []byte) (*sortedScanner, error) {
|
||||
deChildren, err := ReadDirents(osPathname, scratchBuffer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sort.Sort(deChildren)
|
||||
return &sortedScanner{dd: deChildren}, nil
|
||||
}
|
||||
|
||||
func (d *sortedScanner) Err() error {
|
||||
d.dd, d.de = nil, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *sortedScanner) Dirent() (*Dirent, error) { return d.de, nil }
|
||||
|
||||
func (d *sortedScanner) Name() string { return d.de.name }
|
||||
|
||||
func (d *sortedScanner) Scan() bool {
|
||||
if len(d.dd) > 0 {
|
||||
d.de, d.dd = d.dd[0], d.dd[1:]
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
@ -1,31 +1,12 @@
|
|||
package godirwalk
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// DefaultScratchBufferSize specifies the size of the scratch buffer that will
|
||||
// be allocated by Walk, ReadDirents, or ReadDirnames when a scratch buffer is
|
||||
// not provided or the scratch buffer that is provided is smaller than
|
||||
// MinimumScratchBufferSize bytes. This may seem like a large value; however,
|
||||
// when a program intends to enumerate large directories, having a larger
|
||||
// scratch buffer results in fewer operating system calls.
|
||||
const DefaultScratchBufferSize = 64 * 1024
|
||||
|
||||
// MinimumScratchBufferSize specifies the minimum size of the scratch buffer
|
||||
// that Walk, ReadDirents, and ReadDirnames will use when reading file entries
|
||||
// from the operating system. It is initialized to the result from calling
|
||||
// `os.Getpagesize()` during program startup.
|
||||
var MinimumScratchBufferSize int
|
||||
|
||||
func init() {
|
||||
MinimumScratchBufferSize = os.Getpagesize()
|
||||
}
|
||||
|
||||
// Options provide parameters for how the Walk function operates.
|
||||
type Options struct {
|
||||
// ErrorCallback specifies a function to be invoked in the case of an error
|
||||
|
|
@ -84,9 +65,16 @@ type Options struct {
|
|||
// Walk to use when reading directory entries, to reduce amount of garbage
|
||||
// generation. Not all architectures take advantage of the scratch
|
||||
// buffer. If omitted or the provided buffer has fewer bytes than
|
||||
// MinimumScratchBufferSize, then a buffer with DefaultScratchBufferSize
|
||||
// MinimumScratchBufferSize, then a buffer with MinimumScratchBufferSize
|
||||
// bytes will be created and used once per Walk invocation.
|
||||
ScratchBuffer []byte
|
||||
|
||||
// AllowNonDirectory causes Walk to bypass the check that ensures it is
|
||||
// being called on a directory node, or when FollowSymbolicLinks is true, a
|
||||
// symbolic link that points to a directory. Leave this value false to have
|
||||
// Walk return an error when called on a non-directory. Set this true to
|
||||
// have Walk run even when called on a non-directory node.
|
||||
AllowNonDirectory bool
|
||||
}
|
||||
|
||||
// ErrorAction defines a set of actions the Walk function could take based on
|
||||
|
|
@ -108,6 +96,11 @@ const (
|
|||
SkipNode
|
||||
)
|
||||
|
||||
// SkipThis is used as a return value from WalkFuncs to indicate that the file
|
||||
// system entry named in the call is to be skipped. It is not returned as an
|
||||
// error by any function.
|
||||
var SkipThis = errors.New("skip this directory entry")
|
||||
|
||||
// WalkFunc is the type of the function called for each file system node visited
|
||||
// by Walk. The pathname argument will contain the argument to Walk as a prefix;
|
||||
// that is, if Walk is called with "dir", which is a directory containing the
|
||||
|
|
@ -131,13 +124,60 @@ const (
|
|||
// Walk skips the remaining files in the containing directory. Note that any
|
||||
// supplied ErrorCallback function is not invoked with filepath.SkipDir when the
|
||||
// Callback or PostChildrenCallback functions return that special value.
|
||||
//
|
||||
// One arguably confusing aspect of the filepath.WalkFunc API that this library
|
||||
// must emulate is how a caller tells Walk to skip file system entries or
|
||||
// directories. With both filepath.Walk and this Walk, when a callback function
|
||||
// wants to skip a directory and not descend into its children, it returns
|
||||
// filepath.SkipDir. If the callback function returns filepath.SkipDir for a
|
||||
// non-directory, filepath.Walk and this library will stop processing any more
|
||||
// entries in the current directory, which is what many people do not want. If
|
||||
// you want to simply skip a particular non-directory entry but continue
|
||||
// processing entries in the directory, a callback function must return nil. The
|
||||
// implications of this API is when you want to walk a file system hierarchy and
|
||||
// skip an entry, when the entry is a directory, you must return one value,
|
||||
// namely filepath.SkipDir, but when the entry is a non-directory, you must
|
||||
// return a different value, namely nil. In other words, to get identical
|
||||
// behavior for two file system entry types you need to send different token
|
||||
// values.
|
||||
//
|
||||
// Here is an example callback function that adheres to filepath.Walk API to
|
||||
// have it skip any file system entry whose full pathname includes a particular
|
||||
// substring, optSkip:
|
||||
//
|
||||
// func callback1(osPathname string, de *godirwalk.Dirent) error {
|
||||
// if optSkip != "" && strings.Contains(osPathname, optSkip) {
|
||||
// if b, err := de.IsDirOrSymlinkToDir(); b == true && err == nil {
|
||||
// return filepath.SkipDir
|
||||
// }
|
||||
// return nil
|
||||
// }
|
||||
// // Process file like normal...
|
||||
// return nil
|
||||
// }
|
||||
//
|
||||
// This library attempts to eliminate some of that logic boilerplate by
|
||||
// providing a new token error value, SkipThis, which a callback function may
|
||||
// return to skip the current file system entry regardless of what type of entry
|
||||
// it is. If the current entry is a directory, its children will not be
|
||||
// enumerated, exactly as if the callback returned filepath.SkipDir. If the
|
||||
// current entry is a non-directory, the next file system entry in the current
|
||||
// directory will be enumerated, exactly as if the callback returned nil. The
|
||||
// following example callback function has identical behavior as the previous,
|
||||
// but has less boilerplate, and admittedly more simple logic.
|
||||
//
|
||||
// func callback2(osPathname string, de *godirwalk.Dirent) error {
|
||||
// if optSkip != "" && strings.Contains(osPathname, optSkip) {
|
||||
// return godirwalk.SkipThis
|
||||
// }
|
||||
// // Process file like normal...
|
||||
// return nil
|
||||
// }
|
||||
type WalkFunc func(osPathname string, directoryEntry *Dirent) error
|
||||
|
||||
// Walk walks the file tree rooted at the specified directory, calling the
|
||||
// specified callback function for each file system node in the tree, including
|
||||
// root, symbolic links, and other node types. The nodes are walked in lexical
|
||||
// order, which makes the output deterministic but means that for very large
|
||||
// directories this function can be inefficient.
|
||||
// root, symbolic links, and other node types.
|
||||
//
|
||||
// This function is often much faster than filepath.Walk because it does not
|
||||
// invoke os.Stat for every node it encounters, but rather obtains the file
|
||||
|
|
@ -175,6 +215,10 @@ type WalkFunc func(osPathname string, directoryEntry *Dirent) error
|
|||
// }
|
||||
// }
|
||||
func Walk(pathname string, options *Options) error {
|
||||
if options == nil || options.Callback == nil {
|
||||
return errors.New("cannot walk without non-nil options and Callback function")
|
||||
}
|
||||
|
||||
pathname = filepath.Clean(pathname)
|
||||
|
||||
var fi os.FileInfo
|
||||
|
|
@ -182,26 +226,28 @@ func Walk(pathname string, options *Options) error {
|
|||
|
||||
if options.FollowSymbolicLinks {
|
||||
fi, err = os.Stat(pathname)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot Stat")
|
||||
}
|
||||
} else {
|
||||
fi, err = os.Lstat(pathname)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot Lstat")
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mode := fi.Mode()
|
||||
if mode&os.ModeDir == 0 {
|
||||
return errors.Errorf("cannot Walk non-directory: %s", pathname)
|
||||
if !options.AllowNonDirectory && mode&os.ModeDir == 0 {
|
||||
return fmt.Errorf("cannot Walk non-directory: %s", pathname)
|
||||
}
|
||||
|
||||
dirent := &Dirent{
|
||||
name: filepath.Base(pathname),
|
||||
path: filepath.Dir(pathname),
|
||||
modeType: mode & os.ModeType,
|
||||
}
|
||||
|
||||
if len(options.ScratchBuffer) < MinimumScratchBufferSize {
|
||||
options.ScratchBuffer = newScratchBuffer()
|
||||
}
|
||||
|
||||
// If ErrorCallback is nil, set to a default value that halts the walk
|
||||
// process on all operating system errors. This is done to allow error
|
||||
// handling to be more succinct in the walk code.
|
||||
|
|
@ -209,15 +255,15 @@ func Walk(pathname string, options *Options) error {
|
|||
options.ErrorCallback = defaultErrorCallback
|
||||
}
|
||||
|
||||
if len(options.ScratchBuffer) < MinimumScratchBufferSize {
|
||||
options.ScratchBuffer = make([]byte, DefaultScratchBufferSize)
|
||||
}
|
||||
|
||||
err = walk(pathname, dirent, options)
|
||||
if err == filepath.SkipDir {
|
||||
return nil // silence SkipDir for top level
|
||||
}
|
||||
switch err {
|
||||
case nil, SkipThis, filepath.SkipDir:
|
||||
// silence SkipThis and filepath.SkipDir for top level
|
||||
debug("no error of significance: %v\n", err)
|
||||
return nil
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// defaultErrorCallback always returns Halt because if the upstream code did not
|
||||
|
|
@ -230,126 +276,93 @@ func defaultErrorCallback(_ string, _ error) ErrorAction { return Halt }
|
|||
func walk(osPathname string, dirent *Dirent, options *Options) error {
|
||||
err := options.Callback(osPathname, dirent)
|
||||
if err != nil {
|
||||
if err == filepath.SkipDir {
|
||||
if err == SkipThis || err == filepath.SkipDir {
|
||||
return err
|
||||
}
|
||||
err = errors.Wrap(err, "Callback") // wrap potential errors returned by callback
|
||||
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// On some platforms, an entry can have more than one mode type bit set.
|
||||
// For instance, it could have both the symlink bit and the directory bit
|
||||
// set indicating it's a symlink to a directory.
|
||||
if dirent.IsSymlink() {
|
||||
if !options.FollowSymbolicLinks {
|
||||
return nil
|
||||
}
|
||||
// Only need to Stat entry if platform did not already have os.ModeDir
|
||||
// set, such as would be the case for unix like operating systems. (This
|
||||
// guard eliminates extra os.Stat check on Windows.)
|
||||
if !dirent.IsDir() {
|
||||
referent, err := os.Readlink(osPathname)
|
||||
// Does this symlink point to a directory?
|
||||
info, err := os.Stat(osPathname)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot Readlink")
|
||||
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
} else if !dirent.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
var osp string
|
||||
if filepath.IsAbs(referent) {
|
||||
osp = referent
|
||||
// If get here, then specified pathname refers to a directory or a
|
||||
// symbolic link to a directory.
|
||||
|
||||
var ds scanner
|
||||
|
||||
if options.Unsorted {
|
||||
// When upstream does not request a sorted iteration, it's more memory
|
||||
// efficient to read a single child at a time from the file system.
|
||||
ds, err = NewScanner(osPathname)
|
||||
} else {
|
||||
osp = filepath.Join(filepath.Dir(osPathname), referent)
|
||||
// When upstream wants a sorted iteration, we must read the entire
|
||||
// directory and sort through the child names, and then iterate on each
|
||||
// child.
|
||||
ds, err = newSortedScanner(osPathname, options.ScratchBuffer)
|
||||
}
|
||||
|
||||
fi, err := os.Stat(osp)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot Stat")
|
||||
if action := options.ErrorCallback(osp, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
dirent.modeType = fi.Mode() & os.ModeType
|
||||
}
|
||||
}
|
||||
|
||||
if !dirent.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If get here, then specified pathname refers to a directory.
|
||||
deChildren, err := ReadDirents(osPathname, options.ScratchBuffer)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot ReadDirents")
|
||||
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if !options.Unsorted {
|
||||
sort.Sort(deChildren) // sort children entries unless upstream says to leave unsorted
|
||||
}
|
||||
|
||||
for _, deChild := range deChildren {
|
||||
for ds.Scan() {
|
||||
deChild, err := ds.Dirent()
|
||||
osChildname := filepath.Join(osPathname, deChild.name)
|
||||
err = walk(osChildname, deChild, options)
|
||||
if err != nil {
|
||||
if action := options.ErrorCallback(osChildname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
err = walk(osChildname, deChild, options)
|
||||
debug("osChildname: %q; error: %v\n", osChildname, err)
|
||||
if err == nil || err == SkipThis {
|
||||
continue
|
||||
}
|
||||
if err != filepath.SkipDir {
|
||||
return err
|
||||
}
|
||||
// If received skipdir on a directory, stop processing that
|
||||
// directory, but continue to its siblings. If received skipdir on a
|
||||
// non-directory, stop processing remaining siblings.
|
||||
if deChild.IsSymlink() {
|
||||
// Only need to Stat entry if platform did not already have
|
||||
// os.ModeDir set, such as would be the case for unix like
|
||||
// operating systems. (This guard eliminates extra os.Stat check
|
||||
// on Windows.)
|
||||
if !deChild.IsDir() {
|
||||
// Resolve symbolic link referent to determine whether node
|
||||
// is directory or not.
|
||||
referent, err := os.Readlink(osChildname)
|
||||
// When received SkipDir on a directory or a symbolic link to a
|
||||
// directory, stop processing that directory but continue processing
|
||||
// siblings. When received on a non-directory, stop processing
|
||||
// remaining siblings.
|
||||
isDir, err := deChild.IsDirOrSymlinkToDir()
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot Readlink")
|
||||
if action := options.ErrorCallback(osChildname, err); action == SkipNode {
|
||||
continue // with next child
|
||||
continue // ignore and continue with next sibling
|
||||
}
|
||||
return err // caller does not approve of this error
|
||||
}
|
||||
if !isDir {
|
||||
break // stop processing remaining siblings, but allow post children callback
|
||||
}
|
||||
// continue processing remaining siblings
|
||||
}
|
||||
if err = ds.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var osp string
|
||||
if filepath.IsAbs(referent) {
|
||||
osp = referent
|
||||
} else {
|
||||
osp = filepath.Join(osPathname, referent)
|
||||
}
|
||||
|
||||
fi, err := os.Stat(osp)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "cannot Stat")
|
||||
if action := options.ErrorCallback(osp, err); action == SkipNode {
|
||||
continue // with next child
|
||||
}
|
||||
return err
|
||||
}
|
||||
deChild.modeType = fi.Mode() & os.ModeType
|
||||
}
|
||||
}
|
||||
if !deChild.IsDir() {
|
||||
// If not directory, return immediately, thus skipping remainder
|
||||
// of siblings.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if options.PostChildrenCallback == nil {
|
||||
return nil
|
||||
}
|
||||
|
|
@ -359,7 +372,6 @@ func walk(osPathname string, dirent *Dirent, options *Options) error {
|
|||
return err
|
||||
}
|
||||
|
||||
err = errors.Wrap(err, "PostChildrenCallback") // wrap potential errors returned by callback
|
||||
if action := options.ErrorCallback(osPathname, err); action == SkipNode {
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -373,6 +373,9 @@ github.com/google/go-querystring/query
|
|||
github.com/google/gofuzz
|
||||
# github.com/google/martian v2.1.1-0.20190517191504-25dcb96d9e51+incompatible
|
||||
## explicit
|
||||
# github.com/google/slowjam v1.0.0
|
||||
## explicit
|
||||
github.com/google/slowjam/pkg/stacklog
|
||||
# github.com/googleapis/gax-go/v2 v2.0.5
|
||||
github.com/googleapis/gax-go/v2
|
||||
# github.com/googleapis/gnostic v0.2.2
|
||||
|
|
@ -405,7 +408,7 @@ github.com/json-iterator/go
|
|||
github.com/jstemmer/go-junit-report
|
||||
github.com/jstemmer/go-junit-report/formatter
|
||||
github.com/jstemmer/go-junit-report/parser
|
||||
# github.com/karrick/godirwalk v1.7.7
|
||||
# github.com/karrick/godirwalk v1.16.1
|
||||
## explicit
|
||||
github.com/karrick/godirwalk
|
||||
# github.com/kevinburke/ssh_config v0.0.0-20190725054713-01f96b0aa0cd
|
||||
|
|
|
|||
Loading…
Reference in New Issue