From ec3ca84ad92645d60ecb92fb79cf7726e589748a Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Thu, 4 Jun 2020 22:02:13 -0700 Subject: [PATCH 1/4] add another redo snapshotter --- Makefile | 4 ++-- pkg/constants/constants.go | 1 + pkg/executor/build.go | 11 +++++++---- pkg/util/util.go | 22 ++++++++++++++++++++++ 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 0966fe693..1d2e4aeb7 100644 --- a/Makefile +++ b/Makefile @@ -88,8 +88,8 @@ integration-test-misc: .PHONY: images images: - docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/executor:latest -f deploy/Dockerfile . - docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/executor:debug -f deploy/Dockerfile_debug . + docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/executor:perf-latest -f deploy/Dockerfile . + docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/executor:perf-debug -f deploy/Dockerfile_debug . docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/warmer:latest -f deploy/Dockerfile_warmer . .PHONY: push diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index aaa0c815a..0d6ddc9f5 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -47,6 +47,7 @@ const ( // Various snapshot modes: SnapshotModeTime = "time" SnapshotModeFull = "full" + SnapshotModeRedo = "redo" // NoBaseImage is the scratch image NoBaseImage = "scratch" diff --git a/pkg/executor/build.go b/pkg/executor/build.go index d05ddecaa..87c8cad43 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -808,14 +808,17 @@ func saveStageAsTarball(path string, image v1.Image) error { } func getHasher(snapshotMode string) (func(string) (string, error), error) { - if snapshotMode == constants.SnapshotModeTime { + switch snapshotMode { + case constants.SnapshotModeTime: logrus.Info("Only file modification time will be considered when snapshotting") return util.MtimeHasher(), nil - } - if snapshotMode == constants.SnapshotModeFull { + case constants.SnapshotModeFull: return util.Hasher(), nil + case constants.SnapshotModeRedo: + return util.RedoHasher(), nil + default: + return nil, fmt.Errorf("%s is not a valid snapshot mode", snapshotMode) } - return nil, fmt.Errorf("%s is not a valid snapshot mode", snapshotMode) } func resolveOnBuild(stage *config.KanikoStage, config *v1.Config, stageNameToIdx map[string]string) error { diff --git a/pkg/util/util.go b/pkg/util/util.go index a1dd77f4f..72fac9df5 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -115,6 +115,28 @@ func MtimeHasher() func(string) (string, error) { return hasher } +// RedoHasher returns a hash function, which looks at mtime, size, filemode, owner uid and gid +// Note that the mtime can lag, so it's possible that a file will have changed but the mtime may look the same. +func RedoHasher() func(string) (string, error) { + hasher := func(p string) (string, error) { + h := md5.New() + fi, err := os.Lstat(p) + if err != nil { + return "", err + } + h.Write([]byte(fi.Mode().String())) + h.Write([]byte(fi.ModTime().String())) + h.Write([]byte(strconv.FormatInt(fi.Size(), 64))) + h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Uid), 36))) + h.Write([]byte(",")) + h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Gid), 36))) + + return hex.EncodeToString(h.Sum(nil)), nil + } + return hasher +} + + // SHA256 returns the shasum of the contents of r func SHA256(r io.Reader) (string, error) { hasher := sha256.New() From bea020f34f3c34db0fbf2c32aecda048eaee0d08 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Fri, 5 Jun 2020 02:54:32 -0700 Subject: [PATCH 2/4] add another snapshotter --- cmd/executor/cmd/root.go | 23 ++++-- .../{Dockerfile_fs_benchmark => Dockerfile} | 0 integration/benchmark_fs/cloudbuild.yaml | 12 ++++ integration/benchmark_fs/make.sh | 3 +- integration/benchmark_test.go | 72 ++++++++++++++++++- pkg/buildcontext/gcs.go | 21 ++++++ pkg/filesystem/resolve.go | 1 - pkg/util/util.go | 3 +- 8 files changed, 123 insertions(+), 12 deletions(-) rename integration/benchmark_fs/{Dockerfile_fs_benchmark => Dockerfile} (100%) create mode 100644 integration/benchmark_fs/cloudbuild.yaml diff --git a/cmd/executor/cmd/root.go b/cmd/executor/cmd/root.go index 77afc92dd..8b60da706 100644 --- a/cmd/executor/cmd/root.go +++ b/cmd/executor/cmd/root.go @@ -115,16 +115,27 @@ var RootCmd = &cobra.Command{ benchmarkFile := os.Getenv("BENCHMARK_FILE") // false is a keyword for integration tests to turn off benchmarking if benchmarkFile != "" && benchmarkFile != "false" { - f, err := os.Create(benchmarkFile) - if err != nil { - logrus.Warnf("Unable to create benchmarking file %s: %s", benchmarkFile, err) - } - defer f.Close() s, err := timing.JSON() if err != nil { logrus.Warnf("Unable to write benchmark file: %s", err) + return + } + if strings.HasPrefix(benchmarkFile, "gs://") { + logrus.Info("uploading to gcs") + if err := buildcontext.UploadToBucket(strings.NewReader(s), benchmarkFile); err != nil { + logrus.Infof("Unable to upload %s due to %v", benchmarkFile, err) + } + logrus.Infof("benchmark file written at %s", benchmarkFile) + } else { + f, err := os.Create(benchmarkFile) + if err != nil { + logrus.Warnf("Unable to create benchmarking file %s: %s", benchmarkFile, err) + return + } + defer f.Close() + f.WriteString(s) + logrus.Infof("benchmark file written at %s", benchmarkFile) } - f.WriteString(s) } }, } diff --git a/integration/benchmark_fs/Dockerfile_fs_benchmark b/integration/benchmark_fs/Dockerfile similarity index 100% rename from integration/benchmark_fs/Dockerfile_fs_benchmark rename to integration/benchmark_fs/Dockerfile diff --git a/integration/benchmark_fs/cloudbuild.yaml b/integration/benchmark_fs/cloudbuild.yaml new file mode 100644 index 000000000..9f2e7204e --- /dev/null +++ b/integration/benchmark_fs/cloudbuild.yaml @@ -0,0 +1,12 @@ +steps: +- name: 'gcr.io/kaniko-project/executor:perf-latest' + args: + - --build-arg=NUM=${_COUNT} + - --no-push + - --snapshotMode=redo + env: + - 'BENCHMARK_FILE=gs://tejal-test/redo_gcb/benchmark_file_${_COUNT}' + timeout: 2400s +timeout: 2400s +substitutions: + _COUNT: "10000" # default value \ No newline at end of file diff --git a/integration/benchmark_fs/make.sh b/integration/benchmark_fs/make.sh index 828f420ba..e336af448 100755 --- a/integration/benchmark_fs/make.sh +++ b/integration/benchmark_fs/make.sh @@ -17,7 +17,8 @@ mkdir /workdir i=1 -while [ $i -le $1 ] +targetCnt=$(( $1 + 0 )) +while [ $i -le $targetCnt ] do cat context.txt > /workdir/somefile$i i=$(( $i + 1 )) diff --git a/integration/benchmark_test.go b/integration/benchmark_test.go index 036e96beb..fe3286e35 100644 --- a/integration/benchmark_test.go +++ b/integration/benchmark_test.go @@ -21,6 +21,7 @@ import ( "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "strconv" "sync" @@ -32,6 +33,7 @@ type result struct { totalBuildTime float64 resolvingFiles float64 walkingFiles float64 + hashingFiles float64 } func TestSnapshotBenchmark(t *testing.T) { @@ -44,7 +46,7 @@ func TestSnapshotBenchmark(t *testing.T) { } contextDir := filepath.Join(cwd, "benchmark_fs") - nums := []int{10000, 50000, 100000, 200000, 300000, 500000, 700000, 800000} + nums := []int{10000, 50000, 100000, 200000, 300000, 500000, 700000} var timeMap sync.Map var wg sync.WaitGroup @@ -53,7 +55,7 @@ func TestSnapshotBenchmark(t *testing.T) { wg.Add(1) var err error go func(num int, err *error) { - dockerfile := "Dockerfile_fs_benchmark" + dockerfile := "Dockerfile" kanikoImage := fmt.Sprintf("%s_%d", GetKanikoImage(config.imageRepo, dockerfile), num) buildArgs := []string{"--build-arg", fmt.Sprintf("NUM=%d", num)} var benchmarkDir string @@ -106,5 +108,71 @@ func newResult(t *testing.T, f string) result { if c, ok := current["Total Build Time"]; ok { r.totalBuildTime = c.Seconds() } + if c, ok := current["Hashing files"]; ok { + r.hashingFiles = c.Seconds() + } + fmt.Println(r) return r } + +func TestSnapshotBenchmarkGcloud(t *testing.T) { + if b, err := strconv.ParseBool(os.Getenv("BENCHMARK")); err != nil || !b { + t.SkipNow() + } + cwd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + contextDir := filepath.Join(cwd, "benchmark_fs") + + nums := []int{10000, 50000, 100000, 200000, 300000, 500000, 700000} + + var wg sync.WaitGroup + fmt.Println("Number of Files,Total Build Time,Walking Filesystem, Resolving Files") + for _, num := range nums { + t.Run(fmt.Sprintf("test_benchmark_%d", num), func(t *testing.T) { + wg.Add(1) + var err error + go func(num int, err error) { + dir, err := runInGcloud(contextDir, num) + if err != nil { + return + } + r := newResult(t, filepath.Join(dir, "results")) + fmt.Println(fmt.Sprintf("%d,%f,%f,%f, %f", num, r.totalBuildTime, r.walkingFiles, r.resolvingFiles, r.hashingFiles)) + wg.Done() + defer os.Remove(dir) + defer os.Chdir(cwd) + }(num, err) + if err != nil { + t.Errorf("could not run benchmark results for num %d due to %s", num, err) + } + }) + } + wg.Wait() +} + +func runInGcloud(dir string, num int) (string, error) { + os.Chdir(dir) + cmd := exec.Command("gcloud", "builds", + "submit", "--config=cloudbuild.yaml", + fmt.Sprintf("--substitutions=_COUNT=%d", num)) + _, err := RunCommandWithoutTest(cmd) + if err != nil { + return "", err + } + + // grab gcs and to temp dir and return + tmpDir, err := ioutil.TempDir("", fmt.Sprintf("%d", num)) + if err != nil { + return "", err + } + src := fmt.Sprintf("gs://tejal-test/redo_gcb/benchmark_file_%d", num) + dest := filepath.Join(tmpDir, "results") + copyCommand := exec.Command("gsutil", "cp", src, dest) + _, err = RunCommandWithoutTest(copyCommand) + if err != nil { + return "", fmt.Errorf("failed to download file to GCS bucket %s: %s", src, err) + } + return tmpDir, nil +} diff --git a/pkg/buildcontext/gcs.go b/pkg/buildcontext/gcs.go index 920028b62..f4e98920d 100644 --- a/pkg/buildcontext/gcs.go +++ b/pkg/buildcontext/gcs.go @@ -17,8 +17,10 @@ limitations under the License. package buildcontext import ( + "io" "os" "path/filepath" + "strings" "cloud.google.com/go/storage" "github.com/GoogleContainerTools/kaniko/pkg/constants" @@ -37,6 +39,25 @@ func (g *GCS) UnpackTarFromBuildContext() (string, error) { return constants.BuildContextDir, unpackTarFromGCSBucket(bucket, item, constants.BuildContextDir) } +func UploadToBucket(r io.Reader, dest string) error { + ctx := context.Background() + context := strings.SplitAfter(dest, "://")[1] + bucketName, item := util.GetBucketAndItem(context) + client, err := storage.NewClient(ctx) + if err != nil { + return err + } + bucket := client.Bucket(bucketName) + w := bucket.Object(item).NewWriter(ctx) + if _, err := io.Copy(w, r); err != nil { + return err + } + if err := w.Close(); err != nil { + return err + } + return nil +} + // unpackTarFromGCSBucket unpacks the context.tar.gz file in the given bucket to the given directory func unpackTarFromGCSBucket(bucketName, item, directory string) error { // Get the tar from the bucket diff --git a/pkg/filesystem/resolve.go b/pkg/filesystem/resolve.go index 64121146a..30a79dc05 100644 --- a/pkg/filesystem/resolve.go +++ b/pkg/filesystem/resolve.go @@ -35,7 +35,6 @@ import ( // output set. // * Add all ancestors of each path to the output set. func ResolvePaths(paths []string, wl []util.IgnoreListEntry) (pathsToAdd []string, err error) { - logrus.Infof("Resolving %d paths", len(paths)) logrus.Tracef("Resolving paths %s", paths) fileSet := make(map[string]bool) diff --git a/pkg/util/util.go b/pkg/util/util.go index 72fac9df5..790520e90 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -126,7 +126,7 @@ func RedoHasher() func(string) (string, error) { } h.Write([]byte(fi.Mode().String())) h.Write([]byte(fi.ModTime().String())) - h.Write([]byte(strconv.FormatInt(fi.Size(), 64))) + h.Write([]byte(strconv.FormatInt(fi.Size(), 16))) h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Uid), 36))) h.Write([]byte(",")) h.Write([]byte(strconv.FormatUint(uint64(fi.Sys().(*syscall.Stat_t).Gid), 36))) @@ -136,7 +136,6 @@ func RedoHasher() func(string) (string, error) { return hasher } - // SHA256 returns the shasum of the contents of r func SHA256(r io.Reader) (string, error) { hasher := sha256.New() From fc1b70d1952fff3d424303a950aab9b99a6e8cf6 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Sat, 6 Jun 2020 00:40:20 -0700 Subject: [PATCH 3/4] makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1d2e4aeb7..0966fe693 100644 --- a/Makefile +++ b/Makefile @@ -88,8 +88,8 @@ integration-test-misc: .PHONY: images images: - docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/executor:perf-latest -f deploy/Dockerfile . - docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/executor:perf-debug -f deploy/Dockerfile_debug . + docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/executor:latest -f deploy/Dockerfile . + docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/executor:debug -f deploy/Dockerfile_debug . docker build ${BUILD_ARG} --build-arg=GOARCH=$(GOARCH) -t $(REGISTRY)/warmer:latest -f deploy/Dockerfile_warmer . .PHONY: push From 35d235848259f6675e517f2630053a8f140a3371 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Sat, 6 Jun 2020 00:53:08 -0700 Subject: [PATCH 4/4] add integration tests --- .../dockerfiles/Dockerfile_test_run_redo | 26 +++++++++++++++++++ integration/images.go | 2 ++ 2 files changed, 28 insertions(+) create mode 100644 integration/dockerfiles/Dockerfile_test_run_redo diff --git a/integration/dockerfiles/Dockerfile_test_run_redo b/integration/dockerfiles/Dockerfile_test_run_redo new file mode 100644 index 000000000..e6aa5ef31 --- /dev/null +++ b/integration/dockerfiles/Dockerfile_test_run_redo @@ -0,0 +1,26 @@ +# Copyright 2020 Google, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM debian:9.11 +RUN echo "hey" > /etc/foo +RUN echo "baz" > /etc/baz +RUN cp /etc/baz /etc/bar +RUN rm /etc/baz + +# Test with ARG +ARG file +RUN echo "run" > $file + +RUN echo "test home" > $HOME/file +COPY context/foo $HOME/foo diff --git a/integration/images.go b/integration/images.go index 8865e836a..b149294c8 100644 --- a/integration/images.go +++ b/integration/images.go @@ -48,6 +48,7 @@ const ( // Arguments to build Dockerfiles with, used for both docker and kaniko builds var argsMap = map[string][]string{ "Dockerfile_test_run": {"file=/file"}, + "Dockerfile_test_run_redo": {"file=/file"}, "Dockerfile_test_workdir": {"workdir=/arg/workdir"}, "Dockerfile_test_add": {"file=context/foo"}, "Dockerfile_test_arg_secret": {"SSH_PRIVATE_KEY", "SSH_PUBLIC_KEY=Pµbl1cK€Y"}, @@ -74,6 +75,7 @@ var additionalDockerFlagsMap = map[string][]string{ // Arguments to build Dockerfiles with when building with kaniko var additionalKanikoFlagsMap = map[string][]string{ "Dockerfile_test_add": {"--single-snapshot"}, + "Dockerfile_test_run_redo": {"--snapshotMode=redo"}, "Dockerfile_test_scratch": {"--single-snapshot"}, "Dockerfile_test_maintainer": {"--single-snapshot"}, "Dockerfile_test_target": {"--target=second"},