subdataChunks optimization

This commit is contained in:
fedor 2024-08-04 23:38:00 -04:00
parent 91f3d3288a
commit e59382aeba
4 changed files with 23 additions and 5 deletions

View File

@ -0,0 +1,18 @@
import Foundation
extension Data {
/*
* Performant version of splitting a Data into chunks of a given size.
* It appers that "Data.chunks` is not as performant as chunking the range of the data
* into subranges and getting subdata directly.
*/
func subdataChunks(ofCount: Int) -> [Data] {
var chunks: [Data] = []
for subrange in (0..<self.count).chunks(ofCount: ofCount) {
chunks.append(self.subdata(in: subrange))
}
return chunks
}
}

View File

@ -52,8 +52,8 @@ class Fetcher {
try FileManager.default.removeItem(at: fileURL)
Task {
for chunk in (0 ..< mappedFile.count).chunks(ofCount: 64 * 1024 * 1024) {
await dataCh.send(mappedFile.subdata(in: chunk))
for chunk in mappedFile.subdataChunks(ofCount: 64 * 1024 * 1024) {
await dataCh.send(chunk)
}
dataCh.finish()

View File

@ -15,7 +15,7 @@ class DiskV2: Disk {
// Compress the disk file as multiple individually decompressible streams,
// each equal ``Self.layerLimitBytes`` bytes or less due to LZ4 compression
try await withThrowingTaskGroup(of: (Int, OCIManifestLayer).self) { group in
for (index, data) in mappedDisk.chunks(ofCount: layerLimitBytes).enumerated() {
for (index, data) in mappedDisk.subdataChunks(ofCount: layerLimitBytes).enumerated() {
// Respect the concurrency limit
if index >= concurrency {
if let (index, pushedLayer) = try await group.next() {
@ -209,7 +209,7 @@ class DiskV2: Disk {
var offset = offset
for chunk in data.chunks(ofCount: holeGranularityBytes) {
for chunk in data.subdataChunks(ofCount: holeGranularityBytes) {
// If the local layer cache is used, only write chunks that differ
// since the base disk can contain anything at any position
if let rdisk = rdisk {

View File

@ -223,7 +223,7 @@ class Registry {
// chunked upload
var uploadedBytes = 0
let chunks = fromData.chunks(ofCount: chunkSizeMb == 0 ? fromData.count : chunkSizeMb * 1_000_000)
let chunks = fromData.subdataChunks(ofCount: chunkSizeMb == 0 ? fromData.count : chunkSizeMb * 1_000_000)
for (index, chunk) in chunks.enumerated() {
let lastChunk = index == (chunks.count - 1)
let (data, response) = try await dataRequest(