metal : retain src and dst buffers during async ops (llama/17101)

This commit is contained in:
Georgi Gerganov 2025-11-09 08:28:51 +02:00
parent 6de3404773
commit a4339e2ea7
1 changed files with 4 additions and 2 deletions

View File

@ -289,7 +289,7 @@ void ggml_metal_set_tensor_async(ggml_metal_t ctx, struct ggml_tensor * tensor,
// queue the copy operation into the queue of the Metal context
// this will be queued at the end, after any currently ongoing GPU operations
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBuffer];
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
[encoder copyFromBuffer:buf_src
@ -300,6 +300,7 @@ void ggml_metal_set_tensor_async(ggml_metal_t ctx, struct ggml_tensor * tensor,
[encoder endEncoding];
[cmd_buf commit];
[buf_src release];
// do not wait here for completion
//[cmd_buf waitUntilCompleted];
@ -330,7 +331,7 @@ void ggml_metal_get_tensor_async(ggml_metal_t ctx, const struct ggml_tensor * te
// queue the copy operation into the queue of the Metal context
// this will be queued at the end, after any currently ongoing GPU operations
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBuffer];
id<MTLBlitCommandEncoder> encoder = [cmd_buf blitCommandEncoder];
[encoder copyFromBuffer:bid_src.metal
@ -341,6 +342,7 @@ void ggml_metal_get_tensor_async(ggml_metal_t ctx, const struct ggml_tensor * te
[encoder endEncoding];
[cmd_buf commit];
[buf_dst release];
// do not wait here for completion
//[cmd_buf waitUntilCompleted];