From 029ee153ccc6431e743491aed1d3087f71bd1200 Mon Sep 17 00:00:00 2001 From: fedor Date: Sun, 4 Aug 2024 17:16:39 -0400 Subject: [PATCH] Optimize DiskV2 deduplication logic In case we cloned `disk.img` from a local image, check if data at offset has the expected contents already. --- Sources/tart/LocalLayerCache.swift | 22 +++++++++++++++------- Sources/tart/OCI/Layerizer/DiskV2.swift | 14 +++++++++++--- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/Sources/tart/LocalLayerCache.swift b/Sources/tart/LocalLayerCache.swift index 58c21ba..74d0edf 100644 --- a/Sources/tart/LocalLayerCache.swift +++ b/Sources/tart/LocalLayerCache.swift @@ -1,12 +1,17 @@ import Foundation struct LocalLayerCache { + struct DigestInfo { + let range: Range + let uncompressedContentDigest: String? + } + let name: String let deduplicatedBytes: UInt64 let diskURL: URL private let mappedDisk: Data - private var digestToRange: [String : Range] = [:] + private var digestToRange: [String : DigestInfo] = [:] init?(_ name: String, _ deduplicatedBytes: UInt64, _ diskURL: URL, _ manifest: OCIManifest) throws { self.name = name @@ -24,17 +29,20 @@ struct LocalLayerCache { return nil } - self.digestToRange[layer.digest] = Int(offset).. Data? { - guard let foundRange = self.digestToRange[digest] else { - return nil - } + func findInfo(_ digest: String) -> DigestInfo? { + return self.digestToRange[digest] + } - return self.mappedDisk.subdata(in: foundRange) + func subdata(_ range: Range) -> Data { + return self.mappedDisk.subdata(in: range) } } diff --git a/Sources/tart/OCI/Layerizer/DiskV2.swift b/Sources/tart/OCI/Layerizer/DiskV2.swift index f315d2f..f306ba3 100644 --- a/Sources/tart/OCI/Layerizer/DiskV2.swift +++ b/Sources/tart/OCI/Layerizer/DiskV2.swift @@ -140,9 +140,17 @@ class DiskV2: Disk { } // Check if we already have this layer contents in the local layer cache - if let localLayerCache = localLayerCache, let data = localLayerCache.find(diskLayer.digest), Digest.hash(data) == uncompressedLayerContentDigest { - // Fulfil the layer contents from the local blob cache - _ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data) + if let localLayerCache = localLayerCache, let localLayerInfo = localLayerCache.findInfo(diskLayer.digest) { + // indicates that the locally cloned disk image has the same content at the given offset + let localHit = localLayerInfo.uncompressedContentDigest == uncompressedLayerContentDigest + && localLayerInfo.range.lowerBound == diskWritingOffset + // doesn't seem that localHit can ever be false if the localLayerCache is not nil + // but let's just add extra safety here and check it + if !localHit { + // Fulfil the layer contents from the local blob cache + let data = localLayerCache.subdata(localLayerInfo.range) + _ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data) + } try disk.close() // Update the progress