From 5bdb87e0e7f48958c151819aaf245592554cff42 Mon Sep 17 00:00:00 2001 From: Priya Wadhwa Date: Wed, 29 Aug 2018 15:36:25 -0700 Subject: [PATCH 1/3] Extract filesystem in order rather than in reverse Extracting the layers of the filesystem in order will make it easier to extract cached layers and deal with hardlinks. This PR implements extracting in order and adds an integration tests to make sure hardlinks are extracted properly. It also fixes two bugs I found when extracting symlinks: 1. We'd get a "file exists" error when trying to symlink to an existing file with a whiteout later in the layer tarball 2. We'd get a "file exists" error when trying to create a symlink from a file that was created in a prior layer (perhaps as a regular file or as a symlink pointing to someting else) To fix both of these, we resolve all symlinks in a layer at the end. I also added logic to delete any existing paths before creating the symlink. --- .../dockerfiles/Dockerfile_hardlink_base | 3 ++ .../dockerfiles/Dockerfile_test_hardlink | 5 +++ .../dockerfiles/Dockerfile_test_mv_add | 1 + integration/integration_test.go | 32 ++++++++++++-- pkg/util/fs_util.go | 44 +++++++++---------- pkg/util/fs_util_test.go | 43 +++++++++++++++--- pkg/util/tar_util.go | 32 +++++++++----- 7 files changed, 116 insertions(+), 44 deletions(-) create mode 100644 integration/dockerfiles/Dockerfile_hardlink_base create mode 100644 integration/dockerfiles/Dockerfile_test_hardlink diff --git a/integration/dockerfiles/Dockerfile_hardlink_base b/integration/dockerfiles/Dockerfile_hardlink_base new file mode 100644 index 000000000..deaec7d60 --- /dev/null +++ b/integration/dockerfiles/Dockerfile_hardlink_base @@ -0,0 +1,3 @@ +FROM alpine@sha256:5ce5f501c457015c4b91f91a15ac69157d9b06f1a75cf9107bf2b62e0843983a AS stage1 +RUN apk --no-cache add git +RUN rm /usr/bin/git && ln -s /usr/libexec/git-core/git /usr/bin/git diff --git a/integration/dockerfiles/Dockerfile_test_hardlink b/integration/dockerfiles/Dockerfile_test_hardlink new file mode 100644 index 000000000..fd2968ee8 --- /dev/null +++ b/integration/dockerfiles/Dockerfile_test_hardlink @@ -0,0 +1,5 @@ +FROM gcr.io/kaniko-test/hardlink-base:latest +RUN ls -al /usr/libexec/git-core/git /usr/bin/git /usr/libexec/git-core/git-diff +RUN stat /usr/bin/git +RUN stat /usr/libexec/git-core/git +RUN git --version > /git-version diff --git a/integration/dockerfiles/Dockerfile_test_mv_add b/integration/dockerfiles/Dockerfile_test_mv_add index a81e2318a..d01d8c55d 100644 --- a/integration/dockerfiles/Dockerfile_test_mv_add +++ b/integration/dockerfiles/Dockerfile_test_mv_add @@ -1,4 +1,5 @@ FROM busybox@sha256:1bd6df27274fef1dd36eb529d0f4c8033f61c675d6b04213dd913f902f7cafb5 ADD context/tars /tmp/tars +RUN stat /bin/sh RUN mv /tmp/tars /foo RUN echo "hi" diff --git a/integration/integration_test.go b/integration/integration_test.go index 372922a43..bc0d40d8c 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -37,9 +37,10 @@ var config = initGCPConfig() var imageBuilder *DockerFileBuilder type gcpConfig struct { - gcsBucket string - imageRepo string - onbuildBaseImage string + gcsBucket string + imageRepo string + onbuildBaseImage string + hardlinkBaseImage string } type imageDetails struct { @@ -65,6 +66,7 @@ func initGCPConfig() *gcpConfig { c.imageRepo = c.imageRepo + "/" } c.onbuildBaseImage = c.imageRepo + "onbuild-base:latest" + c.hardlinkBaseImage = c.imageRepo + "hardlink-base:latest" return &c } @@ -141,6 +143,30 @@ func TestMain(m *testing.M) { os.Exit(1) } + fmt.Println("Building onbuild base image") + buildOnbuildBase := exec.Command("docker", "build", "-t", config.onbuildBaseImage, "-f", "dockerfiles/Dockerfile_onbuild_base", ".") + if err := buildOnbuildBase.Run(); err != nil { + fmt.Printf("error building onbuild base: %v", err) + os.Exit(1) + } + pushOnbuildBase := exec.Command("docker", "push", config.onbuildBaseImage) + if err := pushOnbuildBase.Run(); err != nil { + fmt.Printf("error pushing onbuild base %s: %v", config.onbuildBaseImage, err) + os.Exit(1) + } + + fmt.Println("Building hardlink base image") + buildHardlinkBase := exec.Command("docker", "build", "-t", config.hardlinkBaseImage, "-f", "dockerfiles/Dockerfile_hardlink_base", ".") + if err := buildHardlinkBase.Run(); err != nil { + fmt.Printf("error building hardlink base: %v", err) + os.Exit(1) + } + pushHardlinkBase := exec.Command("docker", "push", config.hardlinkBaseImage) + if err := pushHardlinkBase.Run(); err != nil { + fmt.Printf("error pushing hardlink base %s: %v", config.hardlinkBaseImage, err) + os.Exit(1) + } + dockerfiles, err := FindDockerFiles(dockerfilesPath) if err != nil { fmt.Printf("Coudn't create map of dockerfiles: %s", err) diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index 67a972002..b63a41111 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -28,6 +28,7 @@ import ( "time" "github.com/google/go-containerregistry/pkg/v1" + "github.com/pkg/errors" "github.com/GoogleContainerTools/kaniko/pkg/constants" "github.com/sirupsen/logrus" @@ -53,17 +54,14 @@ func GetFSFromImage(root string, img v1.Image) error { return err } - fs := map[string]struct{}{} - whiteouts := map[string]struct{}{} - - for i := len(layers) - 1; i >= 0; i-- { - logrus.Infof("Unpacking layer: %d", i) - l := layers[i] + for i, l := range layers { + logrus.Infof("Extracting layer %d", i) r, err := l.Uncompressed() if err != nil { return err } tr := tar.NewReader(r) + symlinks := []*tar.Header{} for { hdr, err := tr.Next() if err == io.EOF { @@ -78,16 +76,9 @@ func GetFSFromImage(root string, img v1.Image) error { if strings.HasPrefix(base, ".wh.") { logrus.Infof("Whiting out %s", path) name := strings.TrimPrefix(base, ".wh.") - whiteouts[filepath.Join(dir, name)] = struct{}{} - continue - } - - if checkWhiteouts(path, whiteouts) { - logrus.Infof("Not adding %s because it is whited out", path) - continue - } - if _, ok := fs[path]; ok { - logrus.Infof("Not adding %s because it was added by a prior layer", path) + if err := os.RemoveAll(filepath.Join(dir, name)); err != nil { + return errors.Wrapf(err, "removing whiteout %s", hdr.Name) + } continue } whitelisted, err := CheckWhitelist(path) @@ -107,13 +98,18 @@ func GetFSFromImage(root string, img v1.Image) error { logrus.Debugf("skipping symlink from %s to %s because %s is whitelisted", hdr.Linkname, path, hdr.Linkname) continue } + symlinks = append(symlinks, hdr) + continue } - fs[path] = struct{}{} - if err := extractFile(root, hdr, tr); err != nil { return err } } + for _, s := range symlinks { + if err := extractFile(root, s, nil); err != nil { + return errors.Wrapf(err, "extracting symlink %s", s.Name) + } + } } return nil } @@ -224,25 +220,29 @@ func extractFile(dest string, hdr *tar.Header, tr io.Reader) error { case tar.TypeLink: logrus.Debugf("link from %s to %s", hdr.Linkname, path) // The base directory for a link may not exist before it is created. - dir := filepath.Dir(path) if err := os.MkdirAll(dir, 0755); err != nil { return err } - if err := os.Symlink(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil { + if err := os.Link(filepath.Clean(filepath.Join("/", hdr.Linkname)), path); err != nil { return err } case tar.TypeSymlink: logrus.Debugf("symlink from %s to %s", hdr.Linkname, path) // The base directory for a symlink may not exist before it is created. - dir := filepath.Dir(path) if err := os.MkdirAll(dir, 0755); err != nil { return err } + // Check if something already exists at path + // If so, delete it + if FilepathExists(path) { + if err := os.Remove(path); err != nil { + return errors.Wrapf(err, "error removing %s to make way for new symlink", hdr.Name) + } + } if err := os.Symlink(hdr.Linkname, path); err != nil { return err } - } return nil } diff --git a/pkg/util/fs_util_test.go b/pkg/util/fs_util_test.go index 2ffd0f61d..3810f5d55 100644 --- a/pkg/util/fs_util_test.go +++ b/pkg/util/fs_util_test.go @@ -343,7 +343,7 @@ func fileExists(p string) checker { return func(root string, t *testing.T) { _, err := os.Stat(filepath.Join(root, p)) if err != nil { - t.Fatalf("File does not exist") + t.Fatalf("File %s does not exist", filepath.Join(root, p)) } } } @@ -385,6 +385,24 @@ func linkPointsTo(src, dst string) checker { } } +func filesAreHardlinks(first, second string) checker { + return func(root string, t *testing.T) { + fi1, err := os.Stat(filepath.Join(root, first)) + if err != nil { + t.Fatalf("error getting file %s", first) + } + fi2, err := os.Stat(filepath.Join(second)) + if err != nil { + t.Fatalf("error getting file %s", second) + } + stat1 := getSyscallStat_t(fi1) + stat2 := getSyscallStat_t(fi2) + if stat1.Ino != stat2.Ino { + t.Errorf("%s and %s aren't hardlinks as they dont' have the same inode", first, second) + } + } +} + func fileHeader(name string, contents string, mode int64) *tar.Header { return &tar.Header{ Name: name, @@ -429,6 +447,7 @@ func TestExtractFile(t *testing.T) { type tc struct { name string hdrs []*tar.Header + tmpdir string contents []byte checkers []checker } @@ -500,13 +519,15 @@ func TestExtractFile(t *testing.T) { }, }, { - name: "hardlink", + name: "hardlink", + tmpdir: "/tmp/hardlink", hdrs: []*tar.Header{ fileHeader("/bin/gzip", "gzip-binary", 0751), - hardlinkHeader("/bin/uncompress", "/bin/gzip"), + hardlinkHeader("/bin/uncompress", "/tmp/hardlink/bin/gzip"), }, checkers: []checker{ - linkPointsTo("/bin/uncompress", "/bin/gzip"), + fileExists("/bin/gzip"), + filesAreHardlinks("/bin/uncompress", "/tmp/hardlink/bin/gzip"), }, }, } @@ -515,11 +536,19 @@ func TestExtractFile(t *testing.T) { t.Run(tc.name, func(t *testing.T) { tc := tc t.Parallel() - r, err := ioutil.TempDir("", "") - if err != nil { - t.Fatal(err) + r := "" + var err error + + if tc.tmpdir != "" { + r = tc.tmpdir + } else { + r, err = ioutil.TempDir("", "") + if err != nil { + t.Fatal(err) + } } defer os.RemoveAll(r) + for _, hdr := range tc.hdrs { if err := extractFile(r, hdr, bytes.NewReader(tc.contents)); err != nil { t.Fatal(err) diff --git a/pkg/util/tar_util.go b/pkg/util/tar_util.go index 898844784..5f662d962 100644 --- a/pkg/util/tar_util.go +++ b/pkg/util/tar_util.go @@ -118,24 +118,32 @@ func (t *Tar) Whiteout(p string) error { func (t *Tar) checkHardlink(p string, i os.FileInfo) (bool, string) { hardlink := false linkDst := "" - if sys := i.Sys(); sys != nil { - if stat, ok := sys.(*syscall.Stat_t); ok { - nlinks := stat.Nlink - if nlinks > 1 { - inode := stat.Ino - if original, exists := t.hardlinks[inode]; exists && original != p { - hardlink = true - logrus.Debugf("%s inode exists in hardlinks map, linking to %s", p, original) - linkDst = original - } else { - t.hardlinks[inode] = p - } + stat := getSyscallStat_t(i) + if stat != nil { + nlinks := stat.Nlink + if nlinks > 1 { + inode := stat.Ino + if original, exists := t.hardlinks[inode]; exists && original != p { + hardlink = true + logrus.Debugf("%s inode exists in hardlinks map, linking to %s", p, original) + linkDst = original + } else { + t.hardlinks[inode] = p } } } return hardlink, linkDst } +func getSyscallStat_t(i os.FileInfo) *syscall.Stat_t { + if sys := i.Sys(); sys != nil { + if stat, ok := sys.(*syscall.Stat_t); ok { + return stat + } + } + return nil +} + // UnpackLocalTarArchive unpacks the tar archive at path to the directory dest // Returns the files extracted from the tar archive func UnpackLocalTarArchive(path, dest string) ([]string, error) { From 85393a60c200b9c7429af84360ec9f58374e6b5e Mon Sep 17 00:00:00 2001 From: Priya Wadhwa Date: Wed, 29 Aug 2018 16:11:03 -0700 Subject: [PATCH 2/3] Fixed unit tests --- pkg/util/fs_util.go | 14 ----------- pkg/util/fs_util_test.go | 52 ---------------------------------------- 2 files changed, 66 deletions(-) diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index b63a41111..0c020bdb5 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -247,20 +247,6 @@ func extractFile(dest string, hdr *tar.Header, tr io.Reader) error { return nil } -func checkWhiteouts(path string, whiteouts map[string]struct{}) bool { - // Don't add the file if it or it's directory are whited out. - if _, ok := whiteouts[path]; ok { - return true - } - for wd := range whiteouts { - if HasFilepathPrefix(path, wd) { - logrus.Infof("Not adding %s because it's directory is whited out", path) - return true - } - } - return false -} - func CheckWhitelist(path string) (bool, error) { abs, err := filepath.Abs(path) if err != nil { diff --git a/pkg/util/fs_util_test.go b/pkg/util/fs_util_test.go index 3810f5d55..5bfb1a403 100644 --- a/pkg/util/fs_util_test.go +++ b/pkg/util/fs_util_test.go @@ -164,58 +164,6 @@ func Test_ParentDirectories(t *testing.T) { } } -func Test_checkWhiteouts(t *testing.T) { - type args struct { - path string - whiteouts map[string]struct{} - } - tests := []struct { - name string - args args - want bool - }{ - { - name: "file whited out", - args: args{ - path: "/foo", - whiteouts: map[string]struct{}{"/foo": {}}, - }, - want: true, - }, - { - name: "directory whited out", - args: args{ - path: "/foo/bar", - whiteouts: map[string]struct{}{"/foo": {}}, - }, - want: true, - }, - { - name: "grandparent whited out", - args: args{ - path: "/foo/bar/baz", - whiteouts: map[string]struct{}{"/foo": {}}, - }, - want: true, - }, - { - name: "sibling whited out", - args: args{ - path: "/foo/bar/baz", - whiteouts: map[string]struct{}{"/foo/bat": {}}, - }, - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := checkWhiteouts(tt.args.path, tt.args.whiteouts); got != tt.want { - t.Errorf("checkWhiteouts() = %v, want %v", got, tt.want) - } - }) - } -} - func Test_CheckWhitelist(t *testing.T) { type args struct { path string From e0130c59420ea9ee6426aa9e5252a7c4b08c2693 Mon Sep 17 00:00:00 2001 From: Priya Wadhwa Date: Thu, 30 Aug 2018 10:13:06 -0700 Subject: [PATCH 3/3] Remove extra symlink check --- pkg/util/fs_util.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pkg/util/fs_util.go b/pkg/util/fs_util.go index 0c020bdb5..8cbd19782 100644 --- a/pkg/util/fs_util.go +++ b/pkg/util/fs_util.go @@ -61,7 +61,6 @@ func GetFSFromImage(root string, img v1.Image) error { return err } tr := tar.NewReader(r) - symlinks := []*tar.Header{} for { hdr, err := tr.Next() if err == io.EOF { @@ -98,18 +97,11 @@ func GetFSFromImage(root string, img v1.Image) error { logrus.Debugf("skipping symlink from %s to %s because %s is whitelisted", hdr.Linkname, path, hdr.Linkname) continue } - symlinks = append(symlinks, hdr) - continue } if err := extractFile(root, hdr, tr); err != nil { return err } } - for _, s := range symlinks { - if err := extractFile(root, s, nil); err != nil { - return errors.Wrapf(err, "extracting symlink %s", s.Name) - } - } } return nil }