Stop caching COPY layers (#1408)

Cached COPY layers are expensive in that they both need to be retrieved
over the network and occupy space in the layer cache.

They are unnecessary in that we already have all resources needed to
execute the COPY locally, and doing so is a trivial file-system
operation.  This is in contrast to RUN layers, which can do
arbitrary and unbounded work.

The end result is that cached COPY commands were more expensive when
cached, not less.  Remove them.

Resolves #1357
This commit is contained in:
Ian Kerins 2020-09-30 20:18:50 -04:00 committed by GitHub
parent 60d2e981d9
commit 1240333657
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 23 additions and 356 deletions

View File

@ -384,7 +384,7 @@ as a remote image destination:
### Caching
#### Caching Layers
kaniko can cache layers created by `RUN` and `COPY` commands in a remote repository.
kaniko can cache layers created by `RUN` commands in a remote repository.
Before executing a command, kaniko checks the cache for the layer.
If it exists, kaniko will pull and extract the cached layer instead of executing the command.
If not, kaniko will execute the command and then push the newly created layer to the cache.

View File

@ -51,6 +51,8 @@ type DockerCommand interface {
RequiresUnpackedFS() bool
// Whether the output layer of this command should be cached in and
// retrieved from the layer cache.
ShouldCacheOutput() bool
// ShouldDetectDeletedFiles returns true if the command could delete files.

View File

@ -17,7 +17,6 @@ limitations under the License.
package commands
import (
"fmt"
"os"
"path/filepath"
"strings"
@ -142,90 +141,10 @@ func (c *CopyCommand) RequiresUnpackedFS() bool {
return true
}
func (c *CopyCommand) ShouldCacheOutput() bool {
return true
}
// CacheCommand returns true since this command should be cached
func (c *CopyCommand) CacheCommand(img v1.Image) DockerCommand {
return &CachingCopyCommand{
img: img,
cmd: c.cmd,
buildcontext: c.buildcontext,
extractFn: util.ExtractFile,
}
}
func (c *CopyCommand) From() string {
return c.cmd.From
}
type CachingCopyCommand struct {
BaseCommand
caching
img v1.Image
extractedFiles []string
cmd *instructions.CopyCommand
buildcontext string
extractFn util.ExtractFunction
}
func (cr *CachingCopyCommand) ExecuteCommand(config *v1.Config, buildArgs *dockerfile.BuildArgs) error {
logrus.Infof("Found cached layer, extracting to filesystem")
var err error
if cr.img == nil {
return errors.New(fmt.Sprintf("cached command image is nil %v", cr.String()))
}
layers, err := cr.img.Layers()
if err != nil {
return errors.Wrapf(err, "retrieve image layers")
}
if len(layers) != 1 {
return errors.New(fmt.Sprintf("expected %d layers but got %d", 1, len(layers)))
}
cr.layer = layers[0]
cr.extractedFiles, err = util.GetFSFromLayers(kConfig.RootDir, layers, util.ExtractFunc(cr.extractFn), util.IncludeWhiteout())
logrus.Debugf("extractedFiles: %s", cr.extractedFiles)
if err != nil {
return errors.Wrap(err, "extracting fs from image")
}
return nil
}
func (cr *CachingCopyCommand) FilesUsedFromContext(config *v1.Config, buildArgs *dockerfile.BuildArgs) ([]string, error) {
return copyCmdFilesUsedFromContext(config, buildArgs, cr.cmd, cr.buildcontext)
}
func (cr *CachingCopyCommand) FilesToSnapshot() []string {
f := cr.extractedFiles
logrus.Debugf("%d files extracted by caching copy command", len(f))
logrus.Tracef("Extracted files: %s", f)
return f
}
func (cr *CachingCopyCommand) MetadataOnly() bool {
return false
}
func (cr *CachingCopyCommand) String() string {
if cr.cmd == nil {
return "nil command"
}
return cr.cmd.String()
}
func (cr *CachingCopyCommand) From() string {
return cr.cmd.From
}
func resolveIfSymlink(destPath string) (string, error) {
if !filepath.IsAbs(destPath) {
return "", errors.New("dest path must be abs")

View File

@ -16,7 +16,6 @@ limitations under the License.
package commands
import (
"archive/tar"
"fmt"
"io"
"io/ioutil"
@ -236,159 +235,6 @@ func Test_resolveIfSymlink(t *testing.T) {
}
}
func Test_CachingCopyCommand_ExecuteCommand(t *testing.T) {
tempDir := setupTestTemp()
tarContent, err := prepareTarFixture([]string{"foo.txt"})
if err != nil {
t.Errorf("couldn't prepare tar fixture %v", err)
}
config := &v1.Config{}
buildArgs := &dockerfile.BuildArgs{}
type testCase struct {
desctiption string
expectLayer bool
expectErr bool
count *int
expectedCount int
command *CachingCopyCommand
extractedFiles []string
contextFiles []string
}
testCases := []testCase{
func() testCase {
err = ioutil.WriteFile(filepath.Join(tempDir, "foo.txt"), []byte("meow"), 0644)
if err != nil {
t.Errorf("couldn't write tempfile %v", err)
t.FailNow()
}
c := &CachingCopyCommand{
img: fakeImage{
ImageLayers: []v1.Layer{
fakeLayer{TarContent: tarContent},
},
},
buildcontext: tempDir,
cmd: &instructions.CopyCommand{
SourcesAndDest: []string{
"foo.txt", "foo.txt",
},
},
}
count := 0
tc := testCase{
desctiption: "with valid image and valid layer",
count: &count,
expectedCount: 1,
expectLayer: true,
extractedFiles: []string{"/foo.txt"},
contextFiles: []string{"foo.txt"},
}
c.extractFn = func(_ string, _ *tar.Header, _ io.Reader) error {
*tc.count++
return nil
}
tc.command = c
return tc
}(),
func() testCase {
c := &CachingCopyCommand{}
tc := testCase{
desctiption: "with no image",
expectErr: true,
}
c.extractFn = func(_ string, _ *tar.Header, _ io.Reader) error {
return nil
}
tc.command = c
return tc
}(),
func() testCase {
c := &CachingCopyCommand{
img: fakeImage{},
}
c.extractFn = func(_ string, _ *tar.Header, _ io.Reader) error {
return nil
}
return testCase{
desctiption: "with image containing no layers",
expectErr: true,
command: c,
}
}(),
func() testCase {
c := &CachingCopyCommand{
img: fakeImage{
ImageLayers: []v1.Layer{
fakeLayer{},
},
},
}
c.extractFn = func(_ string, _ *tar.Header, _ io.Reader) error {
return nil
}
tc := testCase{
desctiption: "with image one layer which has no tar content",
expectErr: false, // this one probably should fail but doesn't because of how ExecuteCommand and util.GetFSFromLayers are implemented - cvgw- 2019-11-25
expectLayer: true,
}
tc.command = c
return tc
}(),
}
for _, tc := range testCases {
t.Run(tc.desctiption, func(t *testing.T) {
c := tc.command
err := c.ExecuteCommand(config, buildArgs)
if !tc.expectErr && err != nil {
t.Errorf("Expected err to be nil but was %v", err)
} else if tc.expectErr && err == nil {
t.Error("Expected err but was nil")
}
if tc.count != nil {
if *tc.count != tc.expectedCount {
t.Errorf("Expected extractFn to be called %v times but was called %v times", tc.expectedCount, *tc.count)
}
for _, file := range tc.extractedFiles {
match := false
cFiles := c.FilesToSnapshot()
for _, cFile := range cFiles {
if file == cFile {
match = true
break
}
}
if !match {
t.Errorf("Expected extracted files to include %v but did not %v", file, cFiles)
}
}
cmdFiles, err := c.FilesUsedFromContext(
config, buildArgs,
)
if err != nil {
t.Errorf("failed to get files used from context from command %v", err)
}
if len(cmdFiles) != len(tc.contextFiles) {
t.Errorf("expected files used from context to equal %v but was %v", tc.contextFiles, cmdFiles)
}
}
if c.layer == nil && tc.expectLayer {
t.Error("expected the command to have a layer set but instead was nil")
} else if c.layer != nil && !tc.expectLayer {
t.Error("expected the command to have no layer set but instead found a layer")
}
})
}
}
func TestCopyCommand_ExecuteCommand_Extended(t *testing.T) {
setupDirs := func(t *testing.T) (string, string) {
testDir, err := ioutil.TempDir("", "")

View File

@ -185,8 +185,6 @@ func (s *stageBuilder) populateCompositeKey(command fmt.Stringer, files []string
switch v := command.(type) {
case *commands.CopyCommand:
compositeKey = s.populateCopyCmdCompositeKey(command, v.From(), compositeKey)
case *commands.CachingCopyCommand:
compositeKey = s.populateCopyCmdCompositeKey(command, v.From(), compositeKey)
}
srcCtx := s.opts.SrcContext

View File

@ -794,89 +794,6 @@ func Test_stageBuilder_build(t *testing.T) {
retrieve: true,
},
},
func() testcase {
dir, filenames := tempDirAndFile(t)
filename := filenames[0]
filepath := filepath.Join(dir, filename)
tarContent := generateTar(t, dir, filename)
ch := NewCompositeCache("", "")
ch.AddPath(filepath, "")
hash, err := ch.Hash()
if err != nil {
t.Errorf("couldn't create hash %v", err)
}
copyCommandCacheKey := hash
return testcase{
description: "copy command cache enabled and key in cache",
opts: &config.KanikoOptions{Cache: true},
layerCache: &fakeLayerCache{
retrieve: true,
img: fakeImage{
ImageLayers: []v1.Layer{
fakeLayer{
TarContent: tarContent,
},
},
},
},
rootDir: dir,
expectedCacheKeys: []string{copyCommandCacheKey},
// CachingCopyCommand is not pushed to the cache
pushedCacheKeys: []string{},
commands: getCommands(dir, []instructions.Command{
&instructions.CopyCommand{
SourcesAndDest: []string{
filename, "foo.txt",
},
},
}),
fileName: filename,
}
}(),
func() testcase {
dir, filenames := tempDirAndFile(t)
filename := filenames[0]
tarContent := []byte{}
destDir, err := ioutil.TempDir("", "baz")
if err != nil {
t.Errorf("could not create temp dir %v", err)
}
filePath := filepath.Join(dir, filename)
ch := NewCompositeCache("", "")
ch.AddPath(filePath, "")
hash, err := ch.Hash()
if err != nil {
t.Errorf("couldn't create hash %v", err)
}
return testcase{
description: "copy command cache enabled and key is not in cache",
opts: &config.KanikoOptions{Cache: true},
config: &v1.ConfigFile{Config: v1.Config{WorkingDir: destDir}},
layerCache: &fakeLayerCache{},
image: fakeImage{
ImageLayers: []v1.Layer{
fakeLayer{
TarContent: tarContent,
},
},
},
rootDir: dir,
expectedCacheKeys: []string{hash},
pushedCacheKeys: []string{hash},
commands: getCommands(dir, []instructions.Command{
&instructions.CopyCommand{
SourcesAndDest: []string{
filename, "foo.txt",
},
},
}),
fileName: filename,
}
}(),
func() testcase {
dir, filenames := tempDirAndFile(t)
filename := filenames[0]
@ -887,8 +804,6 @@ func Test_stageBuilder_build(t *testing.T) {
t.Errorf("could not create temp dir %v", err)
}
filePath := filepath.Join(dir, filename)
ch := NewCompositeCache("", fmt.Sprintf("RUN foobar"))
hash1, err := ch.Hash()
@ -896,17 +811,6 @@ func Test_stageBuilder_build(t *testing.T) {
t.Errorf("couldn't create hash %v", err)
}
ch.AddKey(fmt.Sprintf("COPY %s bar.txt", filename))
ch.AddPath(filePath, "")
hash2, err := ch.Hash()
if err != nil {
t.Errorf("couldn't create hash %v", err)
}
ch = NewCompositeCache("", fmt.Sprintf("COPY %s foo.txt", filename))
ch.AddKey(fmt.Sprintf("COPY %s bar.txt", filename))
ch.AddPath(filePath, "")
image := fakeImage{
ImageLayers: []v1.Layer{
fakeLayer{
@ -940,7 +844,7 @@ COPY %s bar.txt
cmds := stage.Commands
return testcase{
description: "cached run command followed by uncached copy command result in consistent read and write hashes",
description: "cached run command followed by copy command results in consistent read and write hashes",
opts: &config.KanikoOptions{Cache: true},
rootDir: dir,
config: &v1.ConfigFile{Config: v1.Config{WorkingDir: destDir}},
@ -950,9 +854,8 @@ COPY %s bar.txt
},
image: image,
// hash1 is the read cachekey for the first layer
// hash2 is the read cachekey for the second layer
expectedCacheKeys: []string{hash1, hash2},
pushedCacheKeys: []string{hash2},
expectedCacheKeys: []string{hash1},
pushedCacheKeys: []string{},
commands: getCommands(dir, cmds),
}
}(),
@ -960,28 +863,30 @@ COPY %s bar.txt
dir, filenames := tempDirAndFile(t)
filename := filenames[0]
tarContent := generateTar(t, filename)
destDir, err := ioutil.TempDir("", "baz")
if err != nil {
t.Errorf("could not create temp dir %v", err)
}
filePath := filepath.Join(dir, filename)
ch := NewCompositeCache("", fmt.Sprintf("COPY %s foo.txt", filename))
ch := NewCompositeCache("", fmt.Sprintf("COPY %s bar.txt", filename))
ch.AddPath(filePath, "")
hash1, err := ch.Hash()
// copy hash
_, err = ch.Hash()
if err != nil {
t.Errorf("couldn't create hash %v", err)
}
ch.AddKey(fmt.Sprintf("COPY %s bar.txt", filename))
ch.AddPath(filePath, "")
hash2, err := ch.Hash()
ch.AddKey(fmt.Sprintf("RUN foobar"))
// run hash
runHash, err := ch.Hash()
if err != nil {
t.Errorf("couldn't create hash %v", err)
}
ch = NewCompositeCache("", fmt.Sprintf("COPY %s foo.txt", filename))
ch.AddKey(fmt.Sprintf("COPY %s bar.txt", filename))
ch.AddPath(filePath, "")
image := fakeImage{
ImageLayers: []v1.Layer{
@ -993,9 +898,9 @@ COPY %s bar.txt
dockerFile := fmt.Sprintf(`
FROM ubuntu:16.04
COPY %s foo.txt
COPY %s bar.txt
`, filename, filename)
RUN foobar
`, filename)
f, _ := ioutil.TempFile("", "")
ioutil.WriteFile(f.Name(), []byte(dockerFile), 0755)
opts := &config.KanikoOptions{
@ -1012,24 +917,21 @@ COPY %s bar.txt
t.Errorf("Failed to parse stages to Kaniko Stages: %s", err)
}
_ = ResolveCrossStageInstructions(kanikoStages)
stage := kanikoStages[0]
cmds := stage.Commands
return testcase{
description: "cached copy command followed by uncached copy command result in consistent read and write hashes",
description: "copy command followed by cached run command results in consistent read and write hashes",
opts: &config.KanikoOptions{Cache: true},
rootDir: dir,
config: &v1.ConfigFile{Config: v1.Config{WorkingDir: destDir}},
layerCache: &fakeLayerCache{
keySequence: []string{hash1},
keySequence: []string{runHash},
img: image,
},
image: image,
// hash1 is the read cachekey for the first layer
// hash2 is the read cachekey for the second layer
expectedCacheKeys: []string{hash1, hash2},
pushedCacheKeys: []string{hash2},
image: image,
expectedCacheKeys: []string{runHash},
pushedCacheKeys: []string{},
commands: getCommands(dir, cmds),
}
}(),