diff --git a/README.md b/README.md index 10fb58ab6..037af172c 100644 --- a/README.md +++ b/README.md @@ -852,6 +852,11 @@ Cache timeout in hours. Defaults to two weeks. Set this flag to clean the filesystem at the end of the build. +#### --retry-cleanup + +Set this flag to retry cleanup when it fails +The failure can happen when a volume is being mounted into an image and receives writes during the cleanup + #### Flag `--compressed-caching` Set this to false in order to prevent tar compression for cached layers. This diff --git a/cmd/executor/cmd/root.go b/cmd/executor/cmd/root.go index 7f0339c5d..c6c124448 100644 --- a/cmd/executor/cmd/root.go +++ b/cmd/executor/cmd/root.go @@ -259,6 +259,7 @@ func addKanikoOptionsFlags() { RootCmd.PersistentFlags().BoolVarP(&opts.Cache, "cache", "", false, "Use cache when building image") RootCmd.PersistentFlags().BoolVarP(&opts.CompressedCaching, "compressed-caching", "", true, "Compress the cached layers. Decreases build time, but increases memory usage.") RootCmd.PersistentFlags().BoolVarP(&opts.Cleanup, "cleanup", "", false, "Clean the filesystem at the end") + RootCmd.PersistentFlags().BoolVarP(&opts.RetryCleanup, "retry-cleanup", "", false, "Add retries to filesystem cleanup between stages and after the build is done") RootCmd.PersistentFlags().DurationVarP(&opts.CacheTTL, "cache-ttl", "", time.Hour*336, "Cache timeout, requires value and unit of duration -> ex: 6h. Defaults to two weeks.") RootCmd.PersistentFlags().VarP(&opts.InsecureRegistries, "insecure-registry", "", "Insecure registry using plain HTTP to push and pull. Set it repeatedly for multiple registries.") RootCmd.PersistentFlags().VarP(&opts.SkipTLSVerifyRegistries, "skip-tls-verify-registry", "", "Insecure registry ignoring TLS verify to push and pull. Set it repeatedly for multiple registries.") diff --git a/pkg/config/options.go b/pkg/config/options.go index dbc1e0297..03df3d8b2 100644 --- a/pkg/config/options.go +++ b/pkg/config/options.go @@ -82,6 +82,7 @@ type KanikoOptions struct { NoPushCache bool Cache bool Cleanup bool + RetryCleanup bool CompressedCaching bool IgnoreVarRun bool SkipUnusedStages bool diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 73b2f0df2..43ab6d854 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -787,7 +787,7 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { } } if opts.Cleanup { - if err = util.DeleteFilesystem(); err != nil { + if err = util.DeleteFilesystem(opts.RetryCleanup); err != nil { return nil, err } } @@ -819,7 +819,7 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { } // Delete the filesystem - if err := util.DeleteFilesystem(); err != nil { + if err := util.DeleteFilesystem(opts.RetryCleanup); err != nil { return nil, errors.Wrap(err, fmt.Sprintf("deleting file system after stage %d", index)) } } diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index c8d5a613a..0f433b009 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -230,7 +230,7 @@ func GetFSFromLayers(root string, layers []v1.Layer, opts ...FSOpt) ([]string, e } // DeleteFilesystem deletes the extracted image file system -func DeleteFilesystem() error { +func DeleteFilesystem(retry bool) error { logrus.Info("Deleting filesystem...") return filepath.Walk(config.RootDir, func(path string, info os.FileInfo, err error) error { if err != nil { @@ -256,7 +256,19 @@ func DeleteFilesystem() error { if path == config.RootDir { return nil } - return os.RemoveAll(path) + if retry { + for i := 0; i < 3; i++ { // Retry 3 times + err := os.RemoveAll(path) + if err == nil { + return nil + } + logrus.Warnf("Error deleting %s, retrying (%d/3): %v", path, i+1, err) + time.Sleep(1 * time.Second) // Wait before retrying + } + return fmt.Errorf("failed to delete %s after 3 retries", path) + } else { + return os.RemoveAll(path) + } }) }