From 6ab97e2b5f07319a856a5fd1f9d10ff60ba14878 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Fri, 5 Jun 2020 12:46:47 -0700 Subject: [PATCH 1/5] ADD GCB benchmark code --- cmd/executor/cmd/root.go | 23 ++++-- .../{Dockerfile_fs_benchmark => Dockerfile} | 0 integration/benchmark_fs/cloudbuild.yaml | 12 ++++ integration/benchmark_fs/make.sh | 3 +- integration/benchmark_test.go | 72 ++++++++++++++++++- pkg/buildcontext/gcs.go | 21 ++++++ pkg/filesystem/resolve.go | 1 - 7 files changed, 122 insertions(+), 10 deletions(-) rename integration/benchmark_fs/{Dockerfile_fs_benchmark => Dockerfile} (100%) create mode 100644 integration/benchmark_fs/cloudbuild.yaml diff --git a/cmd/executor/cmd/root.go b/cmd/executor/cmd/root.go index 77afc92dd..8b60da706 100644 --- a/cmd/executor/cmd/root.go +++ b/cmd/executor/cmd/root.go @@ -115,16 +115,27 @@ var RootCmd = &cobra.Command{ benchmarkFile := os.Getenv("BENCHMARK_FILE") // false is a keyword for integration tests to turn off benchmarking if benchmarkFile != "" && benchmarkFile != "false" { - f, err := os.Create(benchmarkFile) - if err != nil { - logrus.Warnf("Unable to create benchmarking file %s: %s", benchmarkFile, err) - } - defer f.Close() s, err := timing.JSON() if err != nil { logrus.Warnf("Unable to write benchmark file: %s", err) + return + } + if strings.HasPrefix(benchmarkFile, "gs://") { + logrus.Info("uploading to gcs") + if err := buildcontext.UploadToBucket(strings.NewReader(s), benchmarkFile); err != nil { + logrus.Infof("Unable to upload %s due to %v", benchmarkFile, err) + } + logrus.Infof("benchmark file written at %s", benchmarkFile) + } else { + f, err := os.Create(benchmarkFile) + if err != nil { + logrus.Warnf("Unable to create benchmarking file %s: %s", benchmarkFile, err) + return + } + defer f.Close() + f.WriteString(s) + logrus.Infof("benchmark file written at %s", benchmarkFile) } - f.WriteString(s) } }, } diff --git a/integration/benchmark_fs/Dockerfile_fs_benchmark b/integration/benchmark_fs/Dockerfile similarity index 100% rename from integration/benchmark_fs/Dockerfile_fs_benchmark rename to integration/benchmark_fs/Dockerfile diff --git a/integration/benchmark_fs/cloudbuild.yaml b/integration/benchmark_fs/cloudbuild.yaml new file mode 100644 index 000000000..9f2e7204e --- /dev/null +++ b/integration/benchmark_fs/cloudbuild.yaml @@ -0,0 +1,12 @@ +steps: +- name: 'gcr.io/kaniko-project/executor:perf-latest' + args: + - --build-arg=NUM=${_COUNT} + - --no-push + - --snapshotMode=redo + env: + - 'BENCHMARK_FILE=gs://tejal-test/redo_gcb/benchmark_file_${_COUNT}' + timeout: 2400s +timeout: 2400s +substitutions: + _COUNT: "10000" # default value \ No newline at end of file diff --git a/integration/benchmark_fs/make.sh b/integration/benchmark_fs/make.sh index 828f420ba..e336af448 100755 --- a/integration/benchmark_fs/make.sh +++ b/integration/benchmark_fs/make.sh @@ -17,7 +17,8 @@ mkdir /workdir i=1 -while [ $i -le $1 ] +targetCnt=$(( $1 + 0 )) +while [ $i -le $targetCnt ] do cat context.txt > /workdir/somefile$i i=$(( $i + 1 )) diff --git a/integration/benchmark_test.go b/integration/benchmark_test.go index 036e96beb..fe3286e35 100644 --- a/integration/benchmark_test.go +++ b/integration/benchmark_test.go @@ -21,6 +21,7 @@ import ( "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "strconv" "sync" @@ -32,6 +33,7 @@ type result struct { totalBuildTime float64 resolvingFiles float64 walkingFiles float64 + hashingFiles float64 } func TestSnapshotBenchmark(t *testing.T) { @@ -44,7 +46,7 @@ func TestSnapshotBenchmark(t *testing.T) { } contextDir := filepath.Join(cwd, "benchmark_fs") - nums := []int{10000, 50000, 100000, 200000, 300000, 500000, 700000, 800000} + nums := []int{10000, 50000, 100000, 200000, 300000, 500000, 700000} var timeMap sync.Map var wg sync.WaitGroup @@ -53,7 +55,7 @@ func TestSnapshotBenchmark(t *testing.T) { wg.Add(1) var err error go func(num int, err *error) { - dockerfile := "Dockerfile_fs_benchmark" + dockerfile := "Dockerfile" kanikoImage := fmt.Sprintf("%s_%d", GetKanikoImage(config.imageRepo, dockerfile), num) buildArgs := []string{"--build-arg", fmt.Sprintf("NUM=%d", num)} var benchmarkDir string @@ -106,5 +108,71 @@ func newResult(t *testing.T, f string) result { if c, ok := current["Total Build Time"]; ok { r.totalBuildTime = c.Seconds() } + if c, ok := current["Hashing files"]; ok { + r.hashingFiles = c.Seconds() + } + fmt.Println(r) return r } + +func TestSnapshotBenchmarkGcloud(t *testing.T) { + if b, err := strconv.ParseBool(os.Getenv("BENCHMARK")); err != nil || !b { + t.SkipNow() + } + cwd, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + contextDir := filepath.Join(cwd, "benchmark_fs") + + nums := []int{10000, 50000, 100000, 200000, 300000, 500000, 700000} + + var wg sync.WaitGroup + fmt.Println("Number of Files,Total Build Time,Walking Filesystem, Resolving Files") + for _, num := range nums { + t.Run(fmt.Sprintf("test_benchmark_%d", num), func(t *testing.T) { + wg.Add(1) + var err error + go func(num int, err error) { + dir, err := runInGcloud(contextDir, num) + if err != nil { + return + } + r := newResult(t, filepath.Join(dir, "results")) + fmt.Println(fmt.Sprintf("%d,%f,%f,%f, %f", num, r.totalBuildTime, r.walkingFiles, r.resolvingFiles, r.hashingFiles)) + wg.Done() + defer os.Remove(dir) + defer os.Chdir(cwd) + }(num, err) + if err != nil { + t.Errorf("could not run benchmark results for num %d due to %s", num, err) + } + }) + } + wg.Wait() +} + +func runInGcloud(dir string, num int) (string, error) { + os.Chdir(dir) + cmd := exec.Command("gcloud", "builds", + "submit", "--config=cloudbuild.yaml", + fmt.Sprintf("--substitutions=_COUNT=%d", num)) + _, err := RunCommandWithoutTest(cmd) + if err != nil { + return "", err + } + + // grab gcs and to temp dir and return + tmpDir, err := ioutil.TempDir("", fmt.Sprintf("%d", num)) + if err != nil { + return "", err + } + src := fmt.Sprintf("gs://tejal-test/redo_gcb/benchmark_file_%d", num) + dest := filepath.Join(tmpDir, "results") + copyCommand := exec.Command("gsutil", "cp", src, dest) + _, err = RunCommandWithoutTest(copyCommand) + if err != nil { + return "", fmt.Errorf("failed to download file to GCS bucket %s: %s", src, err) + } + return tmpDir, nil +} diff --git a/pkg/buildcontext/gcs.go b/pkg/buildcontext/gcs.go index 920028b62..f4e98920d 100644 --- a/pkg/buildcontext/gcs.go +++ b/pkg/buildcontext/gcs.go @@ -17,8 +17,10 @@ limitations under the License. package buildcontext import ( + "io" "os" "path/filepath" + "strings" "cloud.google.com/go/storage" "github.com/GoogleContainerTools/kaniko/pkg/constants" @@ -37,6 +39,25 @@ func (g *GCS) UnpackTarFromBuildContext() (string, error) { return constants.BuildContextDir, unpackTarFromGCSBucket(bucket, item, constants.BuildContextDir) } +func UploadToBucket(r io.Reader, dest string) error { + ctx := context.Background() + context := strings.SplitAfter(dest, "://")[1] + bucketName, item := util.GetBucketAndItem(context) + client, err := storage.NewClient(ctx) + if err != nil { + return err + } + bucket := client.Bucket(bucketName) + w := bucket.Object(item).NewWriter(ctx) + if _, err := io.Copy(w, r); err != nil { + return err + } + if err := w.Close(); err != nil { + return err + } + return nil +} + // unpackTarFromGCSBucket unpacks the context.tar.gz file in the given bucket to the given directory func unpackTarFromGCSBucket(bucketName, item, directory string) error { // Get the tar from the bucket diff --git a/pkg/filesystem/resolve.go b/pkg/filesystem/resolve.go index 64121146a..30a79dc05 100644 --- a/pkg/filesystem/resolve.go +++ b/pkg/filesystem/resolve.go @@ -35,7 +35,6 @@ import ( // output set. // * Add all ancestors of each path to the output set. func ResolvePaths(paths []string, wl []util.IgnoreListEntry) (pathsToAdd []string, err error) { - logrus.Infof("Resolving %d paths", len(paths)) logrus.Tracef("Resolving paths %s", paths) fileSet := make(map[string]bool) From 66c82666279e34eaa6b23b8c99873ff87b7c62d5 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Fri, 5 Jun 2020 13:58:30 -0700 Subject: [PATCH 2/5] remove private buckets --- integration/benchmark_fs/cloudbuild.yaml | 2 +- integration/benchmark_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/benchmark_fs/cloudbuild.yaml b/integration/benchmark_fs/cloudbuild.yaml index 9f2e7204e..01a83440b 100644 --- a/integration/benchmark_fs/cloudbuild.yaml +++ b/integration/benchmark_fs/cloudbuild.yaml @@ -5,7 +5,7 @@ steps: - --no-push - --snapshotMode=redo env: - - 'BENCHMARK_FILE=gs://tejal-test/redo_gcb/benchmark_file_${_COUNT}' + - 'BENCHMARK_FILE=gs://$PROJECT_ID/gcb/benchmark_file_${_COUNT}' timeout: 2400s timeout: 2400s substitutions: diff --git a/integration/benchmark_test.go b/integration/benchmark_test.go index fe3286e35..8503315a1 100644 --- a/integration/benchmark_test.go +++ b/integration/benchmark_test.go @@ -167,7 +167,7 @@ func runInGcloud(dir string, num int) (string, error) { if err != nil { return "", err } - src := fmt.Sprintf("gs://tejal-test/redo_gcb/benchmark_file_%d", num) + src := fmt.Sprintf("%s/gcb/benchmark_file_%d", config.gcsBucket, num) dest := filepath.Join(tmpDir, "results") copyCommand := exec.Command("gsutil", "cp", src, dest) _, err = RunCommandWithoutTest(copyCommand) From 95a8ecc200385f77b1fd11ba59af482c4c2318b8 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Fri, 5 Jun 2020 14:58:36 -0700 Subject: [PATCH 3/5] fix lint --- integration/benchmark_fs/Dockerfile | 13 +++++++++++++ integration/benchmark_test.go | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/integration/benchmark_fs/Dockerfile b/integration/benchmark_fs/Dockerfile index 4065467c4..9a70889f1 100644 --- a/integration/benchmark_fs/Dockerfile +++ b/integration/benchmark_fs/Dockerfile @@ -1,3 +1,16 @@ +# Copyright 2020 Google, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. FROM bash:4.4 ARG NUM diff --git a/integration/benchmark_test.go b/integration/benchmark_test.go index 8503315a1..7c09f6438 100644 --- a/integration/benchmark_test.go +++ b/integration/benchmark_test.go @@ -134,7 +134,8 @@ func TestSnapshotBenchmarkGcloud(t *testing.T) { wg.Add(1) var err error go func(num int, err error) { - dir, err := runInGcloud(contextDir, num) + var dir string + dir, err = runInGcloud(contextDir, num) if err != nil { return } From ba00c9fb7d373b386742341b25a4a246fcb6433b Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Fri, 5 Jun 2020 15:10:28 -0700 Subject: [PATCH 4/5] add benchmarking for gcb --- DEVELOPMENT.md | 15 +++++++++++++++ integration/benchmark_fs/cloudbuild.yaml | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 18b69b3e3..330d9ba56 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -166,6 +166,21 @@ Additionally, the integration tests can output benchmarking information to a `be BENCHMARK=true go test -v --bucket $GCS_BUCKET --repo $IMAGE_REPO ``` +#### Benchmarking your GCB runs +If you are GCB builds are slow, you can check which phases in kaniko are bottlenecks or taking more time. +To do this, add "BENCHMARK_ENV" to your cloudbuild.yaml like this. +```shell script +steps: +- name: 'gcr.io/kaniko-project/executor:latest' + args: + - --build-arg=NUM=${_COUNT} + - --no-push + - --snapshotMode=redo + env: + - 'BENCHMARK_FILE=gs://$PROJECT_ID/gcb/benchmark_file' +``` +You can download the file `gs://$PROJECT_ID/gcb/benchmark_file` using `gsutil cp` command. + ## Creating a PR When you have changes you would like to propose to kaniko, you will need to: diff --git a/integration/benchmark_fs/cloudbuild.yaml b/integration/benchmark_fs/cloudbuild.yaml index 01a83440b..829fb946b 100644 --- a/integration/benchmark_fs/cloudbuild.yaml +++ b/integration/benchmark_fs/cloudbuild.yaml @@ -1,5 +1,5 @@ steps: -- name: 'gcr.io/kaniko-project/executor:perf-latest' +- name: 'gcr.io/kaniko-project/executor:latest' args: - --build-arg=NUM=${_COUNT} - --no-push From 57818cfb7997fea58a7c4c502116326f90c50146 Mon Sep 17 00:00:00 2001 From: Tejal Desai Date: Fri, 5 Jun 2020 16:07:32 -0700 Subject: [PATCH 5/5] lint --- integration/benchmark_test.go | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/integration/benchmark_test.go b/integration/benchmark_test.go index 7c09f6438..f7364114d 100644 --- a/integration/benchmark_test.go +++ b/integration/benchmark_test.go @@ -132,11 +132,10 @@ func TestSnapshotBenchmarkGcloud(t *testing.T) { for _, num := range nums { t.Run(fmt.Sprintf("test_benchmark_%d", num), func(t *testing.T) { wg.Add(1) - var err error - go func(num int, err error) { - var dir string - dir, err = runInGcloud(contextDir, num) + go func(num int) { + dir, err := runInGcloud(contextDir, num) if err != nil { + t.Errorf("error when running in gcloud %v", err) return } r := newResult(t, filepath.Join(dir, "results")) @@ -144,10 +143,7 @@ func TestSnapshotBenchmarkGcloud(t *testing.T) { wg.Done() defer os.Remove(dir) defer os.Chdir(cwd) - }(num, err) - if err != nil { - t.Errorf("could not run benchmark results for num %d due to %s", num, err) - } + }(num) }) } wg.Wait()