Optimize DiskV2 deduplication logic

In case we cloned `disk.img` from a local image, check if data at offset has the expected contents already.
This commit is contained in:
fedor 2024-08-04 17:16:39 -04:00
parent e74e9c845a
commit 029ee153cc
2 changed files with 26 additions and 10 deletions

View File

@ -1,12 +1,17 @@
import Foundation
struct LocalLayerCache {
struct DigestInfo {
let range: Range<Data.Index>
let uncompressedContentDigest: String?
}
let name: String
let deduplicatedBytes: UInt64
let diskURL: URL
private let mappedDisk: Data
private var digestToRange: [String : Range<Data.Index>] = [:]
private var digestToRange: [String : DigestInfo] = [:]
init?(_ name: String, _ deduplicatedBytes: UInt64, _ diskURL: URL, _ manifest: OCIManifest) throws {
self.name = name
@ -24,17 +29,20 @@ struct LocalLayerCache {
return nil
}
self.digestToRange[layer.digest] = Int(offset)..<Int(offset+uncompressedSize)
self.digestToRange[layer.digest] = DigestInfo(
range: Int(offset)..<Int(offset+uncompressedSize),
uncompressedContentDigest: layer.uncompressedContentDigest()!
)
offset += uncompressedSize
}
}
func find(_ digest: String) -> Data? {
guard let foundRange = self.digestToRange[digest] else {
return nil
}
func findInfo(_ digest: String) -> DigestInfo? {
return self.digestToRange[digest]
}
return self.mappedDisk.subdata(in: foundRange)
func subdata(_ range: Range<Data.Index>) -> Data {
return self.mappedDisk.subdata(in: range)
}
}

View File

@ -140,9 +140,17 @@ class DiskV2: Disk {
}
// Check if we already have this layer contents in the local layer cache
if let localLayerCache = localLayerCache, let data = localLayerCache.find(diskLayer.digest), Digest.hash(data) == uncompressedLayerContentDigest {
// Fulfil the layer contents from the local blob cache
_ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data)
if let localLayerCache = localLayerCache, let localLayerInfo = localLayerCache.findInfo(diskLayer.digest) {
// indicates that the locally cloned disk image has the same content at the given offset
let localHit = localLayerInfo.uncompressedContentDigest == uncompressedLayerContentDigest
&& localLayerInfo.range.lowerBound == diskWritingOffset
// doesn't seem that localHit can ever be false if the localLayerCache is not nil
// but let's just add extra safety here and check it
if !localHit {
// Fulfil the layer contents from the local blob cache
let data = localLayerCache.subdata(localLayerInfo.range)
_ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data)
}
try disk.close()
// Update the progress