From e59382aebab5a82661cbb3cc7a9fc9a1423b211d Mon Sep 17 00:00:00 2001 From: fedor Date: Sun, 4 Aug 2024 23:38:00 -0400 Subject: [PATCH] subdataChunks optimization --- Sources/tart/Data+Chunks.swift | 18 ++++++++++++++++++ Sources/tart/Fetcher.swift | 4 ++-- Sources/tart/OCI/Layerizer/DiskV2.swift | 4 ++-- Sources/tart/OCI/Registry.swift | 2 +- 4 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 Sources/tart/Data+Chunks.swift diff --git a/Sources/tart/Data+Chunks.swift b/Sources/tart/Data+Chunks.swift new file mode 100644 index 0000000..a385718 --- /dev/null +++ b/Sources/tart/Data+Chunks.swift @@ -0,0 +1,18 @@ +import Foundation + +extension Data { + /* + * Performant version of splitting a Data into chunks of a given size. + * It appers that "Data.chunks` is not as performant as chunking the range of the data + * into subranges and getting subdata directly. + */ + func subdataChunks(ofCount: Int) -> [Data] { + var chunks: [Data] = [] + + for subrange in (0..= concurrency { if let (index, pushedLayer) = try await group.next() { @@ -209,7 +209,7 @@ class DiskV2: Disk { var offset = offset - for chunk in data.chunks(ofCount: holeGranularityBytes) { + for chunk in data.subdataChunks(ofCount: holeGranularityBytes) { // If the local layer cache is used, only write chunks that differ // since the base disk can contain anything at any position if let rdisk = rdisk { diff --git a/Sources/tart/OCI/Registry.swift b/Sources/tart/OCI/Registry.swift index 4e0923f..b4d63f1 100644 --- a/Sources/tart/OCI/Registry.swift +++ b/Sources/tart/OCI/Registry.swift @@ -223,7 +223,7 @@ class Registry { // chunked upload var uploadedBytes = 0 - let chunks = fromData.chunks(ofCount: chunkSizeMb == 0 ? fromData.count : chunkSizeMb * 1_000_000) + let chunks = fromData.subdataChunks(ofCount: chunkSizeMb == 0 ? fromData.count : chunkSizeMb * 1_000_000) for (index, chunk) in chunks.enumerated() { let lastChunk = index == (chunks.count - 1) let (data, response) = try await dataRequest(