mirror of https://github.com/cirruslabs/tart.git
Optimize DiskV2 Deduplication (#878)
* Revert "Lowercase `tart.app` (#751)" This reverts commita9e2a19015. * Optimize DiskV2 deduplication logic In case we cloned `disk.img` from a local image, check if data at offset has the expected contents already. * Hole punch only if needed * Calculate hash only if needed * subdataChunks optimization * Reapply "Lowercase `tart.app` (#751)" This reverts commite74e9c845a. * format * Save at least 1GB on deduplication logic * Build separately * Revert "subdataChunks optimization" This reverts commite59382aeba. * Another optimization * Removed debug log * reformat * Revert "Hole punch only if needed" This reverts commit8c569fc5
This commit is contained in:
parent
33b5cfe2ed
commit
ff928ad77d
|
|
@ -8,11 +8,11 @@ task:
|
|||
name: dev-mini
|
||||
resources:
|
||||
tart-vms: 1
|
||||
build_script:
|
||||
- swift build
|
||||
test_script:
|
||||
- swift test
|
||||
integration_test_script:
|
||||
# Build Tart
|
||||
- swift build
|
||||
- codesign --sign - --entitlements Resources/tart-dev.entitlements --force .build/debug/tart
|
||||
- export PATH=$(pwd)/.build/arm64-apple-macosx/debug:$PATH
|
||||
# Run integration tests
|
||||
|
|
|
|||
|
|
@ -1,12 +1,19 @@
|
|||
import Foundation
|
||||
|
||||
struct LocalLayerCache {
|
||||
struct DigestInfo {
|
||||
let range: Range<Data.Index>
|
||||
let compressedDigest: String
|
||||
let uncompressedContentDigest: String?
|
||||
}
|
||||
|
||||
let name: String
|
||||
let deduplicatedBytes: UInt64
|
||||
let diskURL: URL
|
||||
|
||||
private let mappedDisk: Data
|
||||
private var digestToRange: [String : Range<Data.Index>] = [:]
|
||||
private var digestToRange: [String: DigestInfo] = [:]
|
||||
private var offsetToRange: [UInt64: DigestInfo] = [:]
|
||||
|
||||
init?(_ name: String, _ deduplicatedBytes: UInt64, _ diskURL: URL, _ manifest: OCIManifest) throws {
|
||||
self.name = name
|
||||
|
|
@ -24,17 +31,27 @@ struct LocalLayerCache {
|
|||
return nil
|
||||
}
|
||||
|
||||
self.digestToRange[layer.digest] = Int(offset)..<Int(offset+uncompressedSize)
|
||||
let info = DigestInfo(
|
||||
range: Int(offset)..<Int(offset + uncompressedSize),
|
||||
compressedDigest: layer.digest,
|
||||
uncompressedContentDigest: layer.uncompressedContentDigest()!
|
||||
)
|
||||
self.digestToRange[layer.digest] = info
|
||||
self.offsetToRange[offset] = info
|
||||
|
||||
offset += uncompressedSize
|
||||
}
|
||||
}
|
||||
|
||||
func find(_ digest: String) -> Data? {
|
||||
guard let foundRange = self.digestToRange[digest] else {
|
||||
return nil
|
||||
func findInfo(digest: String, offsetHint: UInt64) -> DigestInfo? {
|
||||
// Layers can have the same digests, for example, empty ones. Let's use the offset hint to make a better guess.
|
||||
if let info = self.offsetToRange[offsetHint], info.compressedDigest == digest {
|
||||
return info
|
||||
}
|
||||
return self.digestToRange[digest]
|
||||
}
|
||||
|
||||
return self.mappedDisk.subdata(in: foundRange)
|
||||
func subdata(_ range: Range<Data.Index>) -> Data {
|
||||
return self.mappedDisk.subdata(in: range)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,11 +121,14 @@ class DiskV2: Disk {
|
|||
// Launch a fetching and decompression task
|
||||
group.addTask {
|
||||
// No need to fetch and decompress anything if we've already done so
|
||||
if try pullResumed && Digest.hash(diskURL, offset: diskWritingOffset, size: uncompressedLayerSize) == uncompressedLayerContentDigest {
|
||||
// Update the progress
|
||||
progress.completedUnitCount += Int64(diskLayer.size)
|
||||
if pullResumed {
|
||||
// do not check hash in the condition above to make it lazy e.g. only do expensive calculations if needed
|
||||
if try Digest.hash(diskURL, offset: diskWritingOffset, size: uncompressedLayerSize) == uncompressedLayerContentDigest {
|
||||
// Update the progress
|
||||
progress.completedUnitCount += Int64(diskLayer.size)
|
||||
|
||||
return
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Open the disk file for writing
|
||||
|
|
@ -140,9 +143,17 @@ class DiskV2: Disk {
|
|||
}
|
||||
|
||||
// Check if we already have this layer contents in the local layer cache
|
||||
if let localLayerCache = localLayerCache, let data = localLayerCache.find(diskLayer.digest), Digest.hash(data) == uncompressedLayerContentDigest {
|
||||
// Fulfil the layer contents from the local blob cache
|
||||
_ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data)
|
||||
if let localLayerCache = localLayerCache, let localLayerInfo = localLayerCache.findInfo(digest: diskLayer.digest, offsetHint: diskWritingOffset) {
|
||||
// indicates that the locally cloned disk image has the same content at the given offset
|
||||
let localHit = localLayerInfo.uncompressedContentDigest == uncompressedLayerContentDigest
|
||||
&& localLayerInfo.range.lowerBound == diskWritingOffset
|
||||
// doesn't seem that localHit can ever be false if the localLayerCache is not nil
|
||||
// but let's just add extra safety here and check it
|
||||
if !localHit {
|
||||
// Fulfil the layer contents from the local blob cache
|
||||
let data = localLayerCache.subdata(localLayerInfo.range)
|
||||
_ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data)
|
||||
}
|
||||
try disk.close()
|
||||
|
||||
// Update the progress
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ extension URL: Prunable {
|
|||
func deduplicatedSizeBytes() throws -> Int {
|
||||
let values = try resourceValues(forKeys: [.totalFileAllocatedSizeKey, .mayShareFileContentKey])
|
||||
// make sure the file's origin file is there and duplication works
|
||||
var dedublicatedSize = 0
|
||||
if values.mayShareFileContent == true {
|
||||
return Int(deduplicatedBytes())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -297,7 +297,7 @@ class VMStorageOCI: PrunableStorage {
|
|||
|
||||
// Now, find the best match based on how many bytes we'll deduplicate
|
||||
let choosen = candidates.filter {
|
||||
$0.deduplicatedBytes > 0
|
||||
$0.deduplicatedBytes > 1024 * 1024 * 1024 // save at least 1GB
|
||||
}.max { left, right in
|
||||
return left.deduplicatedBytes < right.deduplicatedBytes
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue