From 10bf706653e0d21ed588ad28896fd971c0e30ea5 Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Fri, 9 Aug 2024 17:26:20 +0400 Subject: [PATCH] tart push: avoid uploading blobs if they are already present (#887) By issuing HEAD requests to the registry before doing the actual upload. --- Sources/tart/OCI/Layerizer/DiskV2.swift | 7 +++++-- Sources/tart/OCI/Registry.swift | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Sources/tart/OCI/Layerizer/DiskV2.swift b/Sources/tart/OCI/Layerizer/DiskV2.swift index c91972a..7c2217e 100644 --- a/Sources/tart/OCI/Layerizer/DiskV2.swift +++ b/Sources/tart/OCI/Layerizer/DiskV2.swift @@ -26,8 +26,11 @@ class DiskV2: Disk { // Launch a disk layer pushing task group.addTask { let compressedData = try (data as NSData).compressed(using: .lz4) as Data + let compressedDataDigest = Digest.hash(compressedData) - let layerDigest = try await registry.pushBlob(fromData: compressedData, chunkSizeMb: chunkSizeMb) + if try await !registry.blobExists(compressedDataDigest) { + _ = try await registry.pushBlob(fromData: compressedData, chunkSizeMb: chunkSizeMb, digest: compressedDataDigest) + } // Update progress using a relative value progress.completedUnitCount += Int64(data.count) @@ -35,7 +38,7 @@ class DiskV2: Disk { return (index, OCIManifestLayer( mediaType: diskV2MediaType, size: compressedData.count, - digest: layerDigest, + digest: compressedDataDigest, uncompressedSize: UInt64(data.count), uncompressedContentDigest: Digest.hash(data) )) diff --git a/Sources/tart/OCI/Registry.swift b/Sources/tart/OCI/Registry.swift index 4e0923f..92b614b 100644 --- a/Sources/tart/OCI/Registry.swift +++ b/Sources/tart/OCI/Registry.swift @@ -10,6 +10,7 @@ enum RegistryError: Error { } enum HTTPMethod: String { + case HEAD = "HEAD" case GET = "GET" case POST = "POST" case PUT = "PUT" @@ -21,6 +22,7 @@ enum HTTPCode: Int { case Created = 201 case Accepted = 202 case Unauthorized = 401 + case NotFound = 404 } extension Data { @@ -189,7 +191,7 @@ class Registry { return URLComponents(url: uploadLocation.absolutize(baseURL), resolvingAgainstBaseURL: true)! } - public func pushBlob(fromData: Data, chunkSizeMb: Int = 0) async throws -> String { + public func pushBlob(fromData: Data, chunkSizeMb: Int = 0, digest: String? = nil) async throws -> String { // Initiate a blob upload let (data, postResponse) = try await dataRequest(.POST, endpointURL("\(namespace)/blobs/uploads/"), headers: ["Content-Length": "0"]) @@ -201,7 +203,7 @@ class Registry { // Figure out where to upload the blob var uploadLocation = try uploadLocationFromResponse(postResponse) - let digest = Digest.hash(fromData) + let digest = digest ?? Digest.hash(fromData) if chunkSizeMb == 0 { // monolithic upload @@ -249,6 +251,19 @@ class Registry { return digest } + public func blobExists(_ digest: String) async throws -> Bool { + let (data, response) = try await dataRequest(.HEAD, endpointURL("\(namespace)/blobs/\(digest)")) + + switch response.statusCode { + case HTTPCode.Ok.rawValue: + return true + case HTTPCode.NotFound.rawValue: + return false + default: + throw RegistryError.UnexpectedHTTPStatusCode(when: "checking blob", code: response.statusCode, details: data.asText()) + } + } + public func pullBlob(_ digest: String, handler: (Data) async throws -> Void) async throws { let (channel, response) = try await channelRequest(.GET, endpointURL("\(namespace)/blobs/\(digest)"), viaFile: true) if response.statusCode != HTTPCode.Ok.rawValue {