mirror of https://github.com/cirruslabs/tart.git
tart pull: choose across multiple VM images to deduplicate against (#862)
This is accomplished by saving the OCI VM image manifests on "tart pull" in "manifest.json" file and then using them on successive "tart pull"'s to find the best candidate that results in the most de-duplication, measured in bytes.
This commit is contained in:
parent
ea5313698e
commit
ababe8cefc
|
|
@ -78,7 +78,7 @@ struct OCIManifestConfig: Codable, Equatable {
|
|||
var digest: String
|
||||
}
|
||||
|
||||
struct OCIManifestLayer: Codable, Equatable {
|
||||
struct OCIManifestLayer: Codable, Equatable, Hashable {
|
||||
var mediaType: String
|
||||
var size: Int
|
||||
var digest: String
|
||||
|
|
@ -113,6 +113,14 @@ struct OCIManifestLayer: Codable, Equatable {
|
|||
func uncompressedContentDigest() -> String? {
|
||||
annotations?[uncompressedContentDigestAnnotation]
|
||||
}
|
||||
|
||||
static func == (lhs: Self, rhs: Self) -> Bool {
|
||||
return lhs.digest == rhs.digest
|
||||
}
|
||||
|
||||
func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(digest)
|
||||
}
|
||||
}
|
||||
|
||||
struct Descriptor: Equatable {
|
||||
|
|
|
|||
|
|
@ -79,6 +79,9 @@ extension VMDirectory {
|
|||
nvram.write(data)
|
||||
}
|
||||
try nvram.close()
|
||||
|
||||
// Serialize VM's manifest to enable better de-duplication on subsequent "tart pull"'s
|
||||
try manifest.toJSON().write(to: manifestURL)
|
||||
}
|
||||
|
||||
func pushToRegistry(registry: Registry, references: [String], chunkSizeMb: Int, diskFormat: String) async throws -> RemoteName {
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ struct VMDirectory: Prunable {
|
|||
var stateURL: URL {
|
||||
baseURL.appendingPathComponent("state.vzvmsave")
|
||||
}
|
||||
var manifestURL: URL {
|
||||
baseURL.appendingPathComponent("manifest.json")
|
||||
}
|
||||
|
||||
var explicitlyPulledMark: URL {
|
||||
baseURL.appendingPathComponent(".explicitly-pulled")
|
||||
|
|
|
|||
|
|
@ -197,14 +197,8 @@ class VMStorageOCI: PrunableStorage {
|
|||
|
||||
try await withTaskCancellationHandler(operation: {
|
||||
try await retry(maxAttempts: 5, backoff: .exponentialWithFullJitter(baseDelay: .seconds(5), maxDelay: .seconds(60))) {
|
||||
var localLayerCache: LocalLayerCache? = nil
|
||||
|
||||
if name.reference.type == .Tag,
|
||||
let vmDir = try? open(name),
|
||||
let digest = try? digest(name),
|
||||
let (manifest, _) = try? await registry.pullManifest(reference: digest) {
|
||||
localLayerCache = try LocalLayerCache(vmDir.diskURL, manifest)
|
||||
}
|
||||
// Choose the best base image which has the most deduplication ratio
|
||||
let localLayerCache = try await chooseLocalLayerCache(name, manifest, registry)
|
||||
|
||||
try await tmpVMDir.pullFromRegistry(registry: registry, manifest: manifest, concurrency: concurrency, localLayerCache: localLayerCache)
|
||||
} recoverFromFailure: { error in
|
||||
|
|
@ -249,6 +243,57 @@ class VMStorageOCI: PrunableStorage {
|
|||
|
||||
try gc()
|
||||
}
|
||||
|
||||
func chooseLocalLayerCache(_ name: RemoteName, _ manifest: OCIManifest, _ registry: Registry) async throws -> LocalLayerCache? {
|
||||
// Establish a closure that will calculate how much bytes
|
||||
// we'll de-duplicate if we re-use the given manifest
|
||||
let target = Swift.Set(manifest.layers)
|
||||
|
||||
let calculateDeduplicatedBytes = { (manifest: OCIManifest) -> Int in
|
||||
target.intersection(manifest.layers).map({ $0.size }).reduce(0, +)
|
||||
}
|
||||
|
||||
// Load OCI VM images and their manifests (if present)
|
||||
var candidates: [(name: String, vmDir: VMDirectory, manifest: OCIManifest, deduplicatedBytes: Int)] = []
|
||||
|
||||
for (name, vmDir, isSymlink) in try list() {
|
||||
if isSymlink {
|
||||
continue
|
||||
}
|
||||
|
||||
guard let manifestJSON = try? Data(contentsOf: vmDir.manifestURL) else {
|
||||
continue
|
||||
}
|
||||
|
||||
guard let manifest = try? OCIManifest(fromJSON: manifestJSON) else {
|
||||
continue
|
||||
}
|
||||
|
||||
candidates.append((name, vmDir, manifest, calculateDeduplicatedBytes(manifest)))
|
||||
}
|
||||
|
||||
// Previously we haven't stored the OCI VM image manifests, but still fetched the VM image manifest if
|
||||
// what the user was trying to pull was a tagged image, and we already had that image in the OCI VM cache
|
||||
//
|
||||
// Keep supporting this behavior for backwards comaptibility, but only communicate
|
||||
// with the registry if we haven't already retrieved the manifest for that OCI VM image.
|
||||
if name.reference.type == .Tag,
|
||||
let vmDir = try? open(name),
|
||||
let digest = try? digest(name),
|
||||
try !candidates.contains(where: {try $0.manifest.digest() == digest}),
|
||||
let (manifest, _) = try? await registry.pullManifest(reference: digest) {
|
||||
candidates.append((name.description, vmDir, manifest, calculateDeduplicatedBytes(manifest)))
|
||||
}
|
||||
|
||||
// Now, find the best match based on how many bytes we'll de-duplicate
|
||||
let choosen = candidates.max { left, right in
|
||||
return left.deduplicatedBytes < right.deduplicatedBytes
|
||||
}
|
||||
|
||||
return try choosen.flatMap({ choosen in
|
||||
try LocalLayerCache(choosen.vmDir.diskURL, choosen.manifest)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
extension URL {
|
||||
|
|
|
|||
Loading…
Reference in New Issue