From 0c7c3ba570cb0b6f03da762d53ba211022cfb89a Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Thu, 30 Apr 2026 17:37:13 +0200 Subject: [PATCH] vulkan: add get/set tensor 2d functions (llama/22514) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * vulkan: add get/set_tensor_2d functions * fix backend interface comments * Update ggml/src/ggml-metal/ggml-metal.cpp Co-authored-by: Sigbjørn Skjæret --- ggml/src/ggml-backend-meta.cpp | 2 +- ggml/src/ggml-blas/ggml-blas.cpp | 4 +- ggml/src/ggml-cann/ggml-cann.cpp | 2 +- ggml/src/ggml-cpu/ggml-cpu.cpp | 2 +- ggml/src/ggml-cuda/ggml-cuda.cu | 4 +- ggml/src/ggml-hexagon/ggml-hexagon.cpp | 2 +- ggml/src/ggml-metal/ggml-metal.cpp | 6 +- ggml/src/ggml-opencl/ggml-opencl.cpp | 4 +- ggml/src/ggml-rpc/ggml-rpc.cpp | 4 +- ggml/src/ggml-sycl/ggml-sycl.cpp | 2 +- ggml/src/ggml-virtgpu/ggml-backend.cpp | 2 +- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 215 +++++++++++++++++++------ ggml/src/ggml-webgpu/ggml-webgpu.cpp | 2 +- ggml/src/ggml-zdnn/ggml-zdnn.cpp | 2 +- ggml/src/ggml-zendnn/ggml-zendnn.cpp | 2 +- 15 files changed, 182 insertions(+), 73 deletions(-) diff --git a/ggml/src/ggml-backend-meta.cpp b/ggml/src/ggml-backend-meta.cpp index fbc02d64..c0ffd9a0 100644 --- a/ggml/src/ggml-backend-meta.cpp +++ b/ggml/src/ggml-backend-meta.cpp @@ -2100,8 +2100,8 @@ static const ggml_backend_i ggml_backend_meta_i = { /* .free = */ ggml_backend_meta_free, /* .set_tensor_async = */ ggml_backend_meta_set_tensor_async, /* .get_tensor_async = */ ggml_backend_meta_get_tensor_async, - /* .get_tensor_2d_async = */ nullptr, /* .set_tensor_2d_async = */ nullptr, + /* .get_tensor_2d_async = */ nullptr, /* .cpy_tensor_async = */ nullptr, /* .synchronize = */ ggml_backend_meta_synchronize, /* .graph_plan_create = */ nullptr, diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp index 05245b69..b4c73526 100644 --- a/ggml/src/ggml-blas/ggml-blas.cpp +++ b/ggml/src/ggml-blas/ggml-blas.cpp @@ -262,9 +262,9 @@ static struct ggml_backend_i blas_backend_i = { /* .get_name = */ ggml_backend_blas_get_name, /* .free = */ ggml_backend_blas_free, /* .set_tensor_async = */ NULL, - /* .get_tensor_2d_async = */ NULL, - /* .set_tensor_2d_async = */ NULL, /* .get_tensor_async = */ NULL, + /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ NULL, /* .synchronize = */ NULL, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 3618ba7f..5f51ea3b 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -2746,8 +2746,8 @@ static const ggml_backend_i ggml_backend_cann_interface = { /* .free = */ ggml_backend_cann_free, /* .set_tensor_async = */ ggml_backend_cann_set_tensor_async, /* .get_tensor_async = */ ggml_backend_cann_get_tensor_async, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ ggml_backend_cann_cpy_tensor_async, /* .synchronize = */ ggml_backend_cann_synchronize, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index 49f840be..128883b4 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -195,8 +195,8 @@ static const struct ggml_backend_i ggml_backend_cpu_i = { /* .free = */ ggml_backend_cpu_free, /* .set_tensor_async = */ NULL, /* .get_tensor_async = */ NULL, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ NULL, /* .synchronize = */ NULL, /* .graph_plan_create = */ ggml_backend_cpu_graph_plan_create, diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 0e6f7468..fbe0fa06 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -4588,8 +4588,8 @@ static const ggml_backend_i ggml_backend_cuda_interface = { /* .free = */ ggml_backend_cuda_free, /* .set_tensor_async = */ ggml_backend_cuda_set_tensor_async, /* .get_tensor_async = */ ggml_backend_cuda_get_tensor_async, - /* .get_tensor_2d_async = */ ggml_backend_cuda_set_tensor_2d_async, - /* .set_tensor_2d_async = */ ggml_backend_cuda_get_tensor_2d_async, + /* .set_tensor_2d_async = */ ggml_backend_cuda_set_tensor_2d_async, + /* .get_tensor_2d_async = */ ggml_backend_cuda_get_tensor_2d_async, /* .cpy_tensor_async = */ ggml_backend_cuda_cpy_tensor_async, /* .synchronize = */ ggml_backend_cuda_synchronize, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp index 9345da62..17ac083f 100644 --- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp @@ -3036,8 +3036,8 @@ static struct ggml_backend_i hexagon_backend_i = { /* .free = */ ggml_backend_hexagon_free, /* .set_tensor_async = */ NULL, /* .get_tensor_async = */ NULL, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ NULL, /* .synchronize = */ ggml_backend_hexagon_synchronize, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp index 6a836e45..cc329d67 100644 --- a/ggml/src/ggml-metal/ggml-metal.cpp +++ b/ggml/src/ggml-metal/ggml-metal.cpp @@ -166,8 +166,8 @@ static ggml_backend_buffer_i ggml_backend_metal_buffer_private_i = { /* .memset_tensor = */ ggml_backend_metal_buffer_private_memset_tensor, /* .set_tensor = */ ggml_backend_metal_buffer_private_set_tensor, /* .get_tensor = */ ggml_backend_metal_buffer_private_get_tensor, - /* .get_tensor_2d_async = */ NULL, - /* .set_tensor_2d_async = */ NULL, + /* .set_tensor_2d = */ NULL, + /* .get_tensor_2d = */ NULL, /* .cpy_tensor = */ ggml_backend_metal_buffer_private_cpy_tensor, /* .clear = */ ggml_backend_metal_buffer_private_clear, /* .reset = */ NULL, @@ -567,8 +567,8 @@ static ggml_backend_i ggml_backend_metal_i = { /* .free = */ ggml_backend_metal_free, /* .set_tensor_async = */ ggml_backend_metal_set_tensor_async, /* .get_tensor_async = */ ggml_backend_metal_get_tensor_async, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ ggml_backend_metal_cpy_tensor_async, // only needed for multi-GPU setups /* .synchronize = */ ggml_backend_metal_synchronize, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index 4d31591a..11f72a51 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -4343,9 +4343,9 @@ static ggml_backend_i ggml_backend_opencl_i = { /* .free = */ ggml_backend_opencl_free, /* .set_tensor_async = */ NULL, /* ggml_backend_opencl_set_tensor_async */ /* .get_tensor_async = */ NULL, /* ggml_backend_opencl_get_tensor_async */ - /* .cpy_tensor_async = */ NULL, /* ggml_backend_opencl_cpy_tensor_async */ - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, + /* .cpy_tensor_async = */ NULL, /* ggml_backend_opencl_cpy_tensor_async */ /* .synchronize = */ ggml_backend_opencl_synchronize, /* .graph_plan_create = */ NULL, /* .graph_plan_free = */ NULL, diff --git a/ggml/src/ggml-rpc/ggml-rpc.cpp b/ggml/src/ggml-rpc/ggml-rpc.cpp index 505bec73..7176d2fe 100644 --- a/ggml/src/ggml-rpc/ggml-rpc.cpp +++ b/ggml/src/ggml-rpc/ggml-rpc.cpp @@ -740,9 +740,9 @@ static ggml_backend_i ggml_backend_rpc_interface = { /* .free = */ ggml_backend_rpc_free, /* .set_tensor_async = */ NULL, /* .get_tensor_async = */ NULL, - /* .cpy_tensor_async = */ NULL, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, + /* .cpy_tensor_async = */ NULL, /* .synchronize = */ ggml_backend_rpc_synchronize, /* .graph_plan_create = */ NULL, /* .graph_plan_free = */ NULL, diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 1eead625..f06147ee 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -4700,8 +4700,8 @@ static ggml_backend_i ggml_backend_sycl_interface = { /* .free = */ ggml_backend_sycl_free, /* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async, /* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ NULL, // ggml_backend_sycl_cpy_tensor_async, // // TODO: update for the new // interface diff --git a/ggml/src/ggml-virtgpu/ggml-backend.cpp b/ggml/src/ggml-virtgpu/ggml-backend.cpp index 2b978556..12756c92 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend.cpp @@ -34,8 +34,8 @@ static ggml_backend_i ggml_backend_remoting_interface = { /* .free = */ ggml_backend_remoting_free, /* .set_tensor_async = */ NULL, // ggml_backend_remoting_set_tensor_async, /* .get_tensor_async = */ NULL, // ggml_backend_remoting_get_tensor_async, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ NULL, // ggml_backend_remoting_cpy_tensor_async, /* .synchronize = */ NULL, // ggml_backend_remoting_synchronize, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 69c24bb5..10b73317 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -6845,7 +6845,7 @@ static void ggml_vk_buffer_write_nc_async(ggml_backend_vk_context * ctx, vk_cont } } -static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height, bool sync_staging = false) { +static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, size_t offset, const void * src, size_t spitch, size_t dpitch, size_t width, size_t height, bool sync_staging = false) { VK_LOG_DEBUG("ggml_vk_buffer_write_2d_async(" << width << ", " << height << ")"); // Check if src is pinned memory vk_buffer buf = nullptr; @@ -6855,7 +6855,7 @@ static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, siz if (buf != nullptr) { // Memory is pinned, use as staging buffer std::vector slices(1); - if (width == spitch) { + if (width == spitch && width == dpitch) { // Only do single write if stride is equal slices[0].srcOffset = buf_offset; slices[0].dstOffset = offset; @@ -6864,7 +6864,7 @@ static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, siz slices.resize(height); for (size_t i = 0; i < height; i++) { slices[i].srcOffset = buf_offset + i * spitch; - slices[i].dstOffset = offset + i * width; + slices[i].dstOffset = offset + i * dpitch; slices[i].size = width; } } @@ -6881,21 +6881,30 @@ static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, siz } // Staging buffer required - const size_t copy_size = width*height; - ggml_vk_ensure_sync_staging_buffer(dst->device, copy_size); + const size_t staging_size = width * height; + ggml_vk_ensure_sync_staging_buffer(dst->device, staging_size); vk_buffer& staging_buffer = dst->device->sync_staging; - VkBufferCopy buf_copy = { - 0, - offset, - copy_size}; + std::vector slices(1); + if (width == dpitch) { + slices[0].srcOffset = 0; + slices[0].dstOffset = offset; + slices[0].size = staging_size; + } else { + slices.resize(height); + for (size_t i = 0; i < height; i++) { + slices[i].srcOffset = i * width; + slices[i].dstOffset = offset + i * dpitch; + slices[i].size = width; + } + } ggml_vk_sync_buffers(nullptr, subctx); - vkCmdCopyBuffer(subctx->s->buffer->buf, (VkBuffer)staging_buffer->buffer, (VkBuffer)dst->buffer, 1, &buf_copy); + subctx->s->buffer->buf.copyBuffer((VkBuffer)staging_buffer->buffer, (VkBuffer)dst->buffer, slices); if (width == spitch) { - deferred_memcpy((uint8_t *)staging_buffer->ptr, src, width * height, &subctx->in_memcpys); + deferred_memcpy((uint8_t *)staging_buffer->ptr, src, staging_size, &subctx->in_memcpys); } else { for (size_t i = 0; i < height; i++) { deferred_memcpy((uint8_t *)staging_buffer->ptr + i * width, (const uint8_t *) src + i * spitch, width, &subctx->in_memcpys); @@ -6906,24 +6915,24 @@ static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, siz static bool ggml_vk_buffer_write_async(vk_context subctx, vk_buffer& dst, size_t offset, const void * src, size_t size, bool sync_staging = false) { VK_LOG_DEBUG("ggml_vk_buffer_write_async(" << size << ")"); - return ggml_vk_buffer_write_2d_async(subctx, dst, offset, src, size, size, 1, sync_staging); + return ggml_vk_buffer_write_2d_async(subctx, dst, offset, src, size, size, size, 1, sync_staging); } -static void ggml_vk_buffer_write_2d(vk_buffer& dst, size_t offset, const void * src, size_t spitch, size_t width, size_t height) { +static void ggml_vk_buffer_write_2d(vk_buffer& dst, size_t offset, const void * src, size_t spitch, size_t dpitch, size_t width, size_t height) { VK_LOG_DEBUG("ggml_vk_buffer_write_2d(" << width << ", " << height << ")"); // Buffer is already mapped if(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible) { GGML_ASSERT(dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostCoherent); for (size_t i = 0; i < height; i++) { - memcpy((uint8_t *)dst->ptr + offset + i * width, (const uint8_t *) src + i * spitch, width); + memcpy((uint8_t *)dst->ptr + offset + i * dpitch, (const uint8_t *) src + i * spitch, width); } } else { std::lock_guard guard(dst->device->mutex); vk_context subctx = ggml_vk_create_temporary_context(dst->device->transfer_queue.cmd_pool); ggml_vk_ctx_begin(dst->device, subctx); - bool ret = ggml_vk_buffer_write_2d_async(subctx, dst, offset, src, spitch, width, height, true); + bool ret = ggml_vk_buffer_write_2d_async(subctx, dst, offset, src, spitch, dpitch, width, height, true); GGML_ASSERT(ret); ggml_vk_ctx_end(subctx); @@ -6944,7 +6953,7 @@ static void ggml_vk_buffer_write_2d(vk_buffer& dst, size_t offset, const void * static void ggml_vk_buffer_write(vk_buffer& dst, size_t offset, const void * src, size_t size) { VK_LOG_DEBUG("ggml_vk_buffer_write(" << size << ")"); - ggml_vk_buffer_write_2d(dst, offset, src, 0, size, 1); + ggml_vk_buffer_write_2d(dst, offset, src, size, size, size, 1); } static bool ggml_vk_buffer_read_2d_async(vk_context subctx, vk_buffer& src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height, bool sync_staging = false) { @@ -6990,15 +6999,35 @@ static bool ggml_vk_buffer_read_2d_async(vk_context subctx, vk_buffer& src, size } // Fall back to staging buffer - const size_t copy_size = dpitch * height; - ggml_vk_ensure_sync_staging_buffer(src->device, copy_size); + const size_t staging_size = width * height; + ggml_vk_ensure_sync_staging_buffer(src->device, staging_size); vk_buffer& staging_buffer = src->device->sync_staging; - ggml_vk_sync_buffers(nullptr, subctx); - subctx->s->buffer->buf.copyBuffer(src->buffer, staging_buffer->buffer, slices); + std::vector staging_slices(1); + if (width == spitch) { + staging_slices[0].srcOffset = offset; + staging_slices[0].dstOffset = 0; + staging_slices[0].size = staging_size; + } else { + staging_slices.resize(height); + for (size_t i = 0; i < height; i++) { + staging_slices[i].srcOffset = offset + i * spitch; + staging_slices[i].dstOffset = i * width; + staging_slices[i].size = width; + } + } - deferred_memcpy(dst, staging_buffer->ptr, copy_size, &subctx->out_memcpys); + ggml_vk_sync_buffers(nullptr, subctx); + subctx->s->buffer->buf.copyBuffer(src->buffer, staging_buffer->buffer, staging_slices); + + if (width == dpitch) { + deferred_memcpy(dst, staging_buffer->ptr, staging_size, &subctx->out_memcpys); + } else { + for (size_t i = 0; i < height; i++) { + deferred_memcpy((uint8_t *) dst + i * dpitch, (const uint8_t *) staging_buffer->ptr + i * width, width, &subctx->out_memcpys); + } + } return true; } @@ -7006,8 +7035,8 @@ static bool ggml_vk_buffer_read_async(vk_context subctx, vk_buffer& src, size_t return ggml_vk_buffer_read_2d_async(subctx, src, offset, dst, size, size, size, 1, sync_staging); } -static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_t size) { - VK_LOG_DEBUG("ggml_vk_buffer_read(" << src->buffer << ", " << offset << ", " << size << ")"); +static void ggml_vk_buffer_read_2d(vk_buffer& src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height) { + VK_LOG_DEBUG("ggml_vk_buffer_read_2d(" << src->buffer << ", " << offset << ", " << width << ", " << height << ")"); // If the device is not an UMA device the memory is host-accessible through rebar. While writing // through PCIe is sufficient fast reading back data from PCIe is slower than going through @@ -7015,18 +7044,20 @@ static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_ if(src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible && src->device->uma) { GGML_ASSERT(src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostCoherent); - memcpy(dst, (uint8_t *) src->ptr + offset, size); + for (size_t i = 0; i < height; i++) { + memcpy((uint8_t *) dst + i * dpitch, (const uint8_t *) src->ptr + offset + i * spitch, width); + } } else { std::lock_guard guard(src->device->mutex); vk_context subctx = ggml_vk_create_temporary_context(src->device->transfer_queue.cmd_pool); ggml_vk_ctx_begin(src->device, subctx); - bool ret = ggml_vk_buffer_read_async(subctx, src, offset, dst, size, true); + bool ret = ggml_vk_buffer_read_2d_async(subctx, src, offset, dst, spitch, dpitch, width, height, true); GGML_ASSERT(ret); ggml_vk_ctx_end(subctx); ggml_vk_submit(subctx, src->device->fence); - VK_CHECK(src->device->device.waitForFences({ src->device->fence }, true, UINT64_MAX), "vk_buffer_read waitForFences"); + VK_CHECK(src->device->device.waitForFences({ src->device->fence }, true, UINT64_MAX), "vk_buffer_read_2d waitForFences"); src->device->device.resetFences({ src->device->fence }); ggml_vk_queue_command_pools_cleanup(src->device); @@ -7036,6 +7067,11 @@ static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_ } } +static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_t size) { + VK_LOG_DEBUG("ggml_vk_buffer_read(" << src->buffer << ", " << offset << ", " << size << ")"); + ggml_vk_buffer_read_2d(src, offset, dst, size, size, size, 1); +} + static void ggml_vk_buffer_copy_async(vk_context& ctx, vk_buffer& dst, size_t dst_offset, vk_buffer& src, size_t src_offset, size_t size) { VK_LOG_DEBUG("ggml_vk_buffer_copy_async(" << size << ")"); // Make sure both buffers are on same device @@ -7067,7 +7103,7 @@ static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& sr // Copy to src staging buffer ggml_vk_buffer_copy(src->device->sync_staging, 0, src, src_offset, size); // Copy to dst buffer - ggml_vk_buffer_write_2d(dst, dst_offset, src->device->sync_staging->ptr, 0, size, 1); + ggml_vk_buffer_write(dst, dst_offset, src->device->sync_staging->ptr, size); } } @@ -13615,6 +13651,20 @@ static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml ggml_vk_buffer_write(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, size); } +static void ggml_backend_vk_buffer_set_tensor_2d(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, + size_t size, size_t n_copies, size_t stride_tensor, size_t stride_data) { + VK_LOG_DEBUG("ggml_backend_vk_buffer_set_tensor_2d(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ", " << + n_copies << ", " << stride_tensor << ", " << stride_data << ")"); + ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)buffer->context; + vk_buffer buf = buf_ctx->dev_buffer; + + if (size == 0) { + return; + } + + ggml_vk_buffer_write_2d(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, stride_data, stride_tensor, size, n_copies); +} + static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { VK_LOG_DEBUG("ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")"); ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)buffer->context; @@ -13628,6 +13678,21 @@ static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, cons ggml_vk_buffer_read(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, size); } +static void ggml_backend_vk_buffer_get_tensor_2d(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, + size_t size, size_t n_copies, size_t stride_tensor, size_t stride_data) { + VK_LOG_DEBUG("ggml_backend_vk_buffer_get_tensor_2d(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ", " << + n_copies << ", " << stride_tensor << ", " << stride_data << ")"); + ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)buffer->context; + + if (size == 0) { + return; + } + + vk_buffer buf = buf_ctx->dev_buffer; + + ggml_vk_buffer_read_2d(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, stride_tensor, stride_data, size, n_copies); +} + static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { if (ggml_nbytes(src) == 0) { return true; @@ -13662,8 +13727,8 @@ static ggml_backend_buffer_i ggml_backend_vk_buffer_interface = { /* .memset_tensor = */ ggml_backend_vk_buffer_memset_tensor, /* .set_tensor = */ ggml_backend_vk_buffer_set_tensor, /* .get_tensor = */ ggml_backend_vk_buffer_get_tensor, - /* .set_tensor_2d = */ NULL, - /* .get_tensor_2d = */ NULL, + /* .set_tensor_2d = */ ggml_backend_vk_buffer_set_tensor_2d, + /* .get_tensor_2d = */ ggml_backend_vk_buffer_get_tensor_2d, /* .cpy_tensor = */ ggml_backend_vk_buffer_cpy_tensor, /* .clear = */ ggml_backend_vk_buffer_clear, /* .reset = */ NULL, @@ -13819,8 +13884,9 @@ static ggml_backend_buffer_type_t ggml_backend_vk_get_default_buffer_type(ggml_b return &ctx->device->buffer_type; } -static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { - VK_LOG_DEBUG("ggml_backend_vk_set_tensor_async(" << size << ")"); +static void ggml_backend_vk_set_tensor_2d_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, + size_t size, size_t n_copies, size_t stride_tensor, size_t stride_data) { + VK_LOG_DEBUG("ggml_backend_vk_set_tensor_2d_async(" << size << ", " << n_copies << ")"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); @@ -13834,7 +13900,6 @@ static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor if (ctx->device->async_use_transfer_queue) { if (ctx->transfer_ctx.expired()) { - // Initialize new transfer context cpy_ctx = ggml_vk_create_context(ctx, ctx->transfer_cmd_pool); ctx->transfer_ctx = cpy_ctx; ggml_vk_ctx_begin(ctx->device, cpy_ctx); @@ -13849,25 +13914,48 @@ static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor auto dst_offset = vk_tensor_offset(tensor) + tensor->view_offs + offset; - bool ret = ggml_vk_buffer_write_async(cpy_ctx, buf, dst_offset, data, size); + bool ret = ggml_vk_buffer_write_2d_async(cpy_ctx, buf, dst_offset, data, stride_data, stride_tensor, size, n_copies); if (!ret) { - ggml_vk_ensure_sync_staging_buffer(ctx, size); + const size_t staging_size = size * n_copies; + ggml_vk_ensure_sync_staging_buffer(ctx, staging_size); ggml_vk_sync_buffers(nullptr, cpy_ctx); - vk::BufferCopy buffer_cpy; - buffer_cpy.srcOffset = 0; - buffer_cpy.dstOffset = dst_offset; - buffer_cpy.size = size; + std::vector slices(1); + if (size == stride_tensor) { + slices[0].srcOffset = 0; + slices[0].dstOffset = dst_offset; + slices[0].size = staging_size; + } else { + slices.resize(n_copies); + for (size_t i = 0; i < n_copies; i++) { + slices[i].srcOffset = i * size; + slices[i].dstOffset = dst_offset + i * stride_tensor; + slices[i].size = size; + } + } - cpy_ctx->s->buffer->buf.copyBuffer(ctx->sync_staging->buffer, buf->buffer, { buffer_cpy }); - deferred_memcpy(ctx->sync_staging->ptr, data, size, &cpy_ctx->in_memcpys); + cpy_ctx->s->buffer->buf.copyBuffer(ctx->sync_staging->buffer, buf->buffer, slices); + + if (size == stride_data) { + deferred_memcpy(ctx->sync_staging->ptr, data, staging_size, &cpy_ctx->in_memcpys); + } else { + for (size_t i = 0; i < n_copies; i++) { + deferred_memcpy((uint8_t *)ctx->sync_staging->ptr + i * size, (const uint8_t *)data + i * stride_data, size, &cpy_ctx->in_memcpys); + } + } ggml_vk_synchronize(ctx); } } -static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { - VK_LOG_DEBUG("ggml_backend_vk_get_tensor_async(" << size << ")"); +static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { + VK_LOG_DEBUG("ggml_backend_vk_set_tensor_async(" << size << ")"); + ggml_backend_vk_set_tensor_2d_async(backend, tensor, data, offset, size, 1, size, size); +} + +static void ggml_backend_vk_get_tensor_2d_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, + size_t size, size_t n_copies, size_t stride_tensor, size_t stride_data) { + VK_LOG_DEBUG("ggml_backend_vk_get_tensor_2d_async(" << size << ", " << n_copies << ")"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type"); @@ -13882,24 +13970,45 @@ static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_ vk_buffer buf = buf_ctx->dev_buffer; auto src_offset = vk_tensor_offset(tensor) + tensor->view_offs + offset; - bool ret = ggml_vk_buffer_read_async(compute_ctx, buf, src_offset, data, size); + bool ret = ggml_vk_buffer_read_2d_async(compute_ctx, buf, src_offset, data, stride_tensor, stride_data, size, n_copies); - // If that failed, copy synchronously through a staging buffer if (!ret) { - ggml_vk_ensure_sync_staging_buffer(ctx, size); + const size_t staging_size = size * n_copies; + ggml_vk_ensure_sync_staging_buffer(ctx, staging_size); ggml_vk_sync_buffers(nullptr, compute_ctx); - vk::BufferCopy buffer_cpy; - buffer_cpy.srcOffset = src_offset; - buffer_cpy.dstOffset = 0; - buffer_cpy.size = size; + std::vector slices(1); + if (size == stride_tensor) { + slices[0].srcOffset = src_offset; + slices[0].dstOffset = 0; + slices[0].size = staging_size; + } else { + slices.resize(n_copies); + for (size_t i = 0; i < n_copies; i++) { + slices[i].srcOffset = src_offset + i * stride_tensor; + slices[i].dstOffset = i * size; + slices[i].size = size; + } + } - compute_ctx->s->buffer->buf.copyBuffer(buf->buffer, ctx->sync_staging->buffer, { buffer_cpy }); - deferred_memcpy(data, ctx->sync_staging->ptr, size, &compute_ctx->out_memcpys); + compute_ctx->s->buffer->buf.copyBuffer(buf->buffer, ctx->sync_staging->buffer, slices); + + if (size == stride_data) { + deferred_memcpy(data, ctx->sync_staging->ptr, staging_size, &compute_ctx->out_memcpys); + } else { + for (size_t i = 0; i < n_copies; i++) { + deferred_memcpy((uint8_t *)data + i * stride_data, (const uint8_t *)ctx->sync_staging->ptr + i * size, size, &compute_ctx->out_memcpys); + } + } ggml_vk_synchronize(ctx); } } +static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { + VK_LOG_DEBUG("ggml_backend_vk_get_tensor_async(" << size << ")"); + ggml_backend_vk_get_tensor_2d_async(backend, tensor, data, offset, size, 1, size, size); +} + static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) { VK_LOG_DEBUG("ggml_backend_vk_cpy_tensor_async(" << src << " -> " << dst << ", size=" << ggml_nbytes(src) << ")"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend_dst->context; @@ -15123,8 +15232,8 @@ static ggml_backend_i ggml_backend_vk_interface = { /* .free = */ ggml_backend_vk_free, /* .set_tensor_async = */ ggml_backend_vk_set_tensor_async, /* .get_tensor_async = */ ggml_backend_vk_get_tensor_async, - /* .get_tensor_2d_async = */ NULL, - /* .set_tensor_2d_async = */ NULL, + /* .set_tensor_2d_async = */ ggml_backend_vk_set_tensor_2d_async, + /* .get_tensor_2d_async = */ ggml_backend_vk_get_tensor_2d_async, /* .cpy_tensor_async = */ ggml_backend_vk_cpy_tensor_async, /* .synchronize = */ ggml_backend_vk_synchronize, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 5e55a2a1..a1dccfc0 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -3107,8 +3107,8 @@ static ggml_backend_i ggml_backend_webgpu_i = { /* .free = */ ggml_backend_webgpu_free, /* .set_tensor_async = */ ggml_backend_webgpu_set_tensor_async, /* .get_tensor_async = */ NULL, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ NULL, /* .synchronize = */ ggml_backend_webgpu_synchronize, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-zdnn/ggml-zdnn.cpp b/ggml/src/ggml-zdnn/ggml-zdnn.cpp index e6b6fc24..639b818d 100644 --- a/ggml/src/ggml-zdnn/ggml-zdnn.cpp +++ b/ggml/src/ggml-zdnn/ggml-zdnn.cpp @@ -423,8 +423,8 @@ static ggml_backend_i ggml_backend_zdnn_i = { /* .free = */ ggml_backend_zdnn_free, /* .set_tensor_async = */ NULL, /* .get_tensor_async = */ NULL, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ NULL, /* .synchronize = */ NULL, /* .graph_plan_create = */ NULL, diff --git a/ggml/src/ggml-zendnn/ggml-zendnn.cpp b/ggml/src/ggml-zendnn/ggml-zendnn.cpp index fc1df4db..2b82c7c1 100644 --- a/ggml/src/ggml-zendnn/ggml-zendnn.cpp +++ b/ggml/src/ggml-zendnn/ggml-zendnn.cpp @@ -407,8 +407,8 @@ static struct ggml_backend_i ggml_backend_zendnn_i = { /* .free = */ ggml_backend_zendnn_free, /* .set_tensor_async = */ NULL, /* .get_tensor_async = */ NULL, - /* .get_tensor_2d_async = */ NULL, /* .set_tensor_2d_async = */ NULL, + /* .get_tensor_2d_async = */ NULL, /* .cpy_tensor_async = */ NULL, /* .synchronize = */ NULL, /* .graph_plan_create = */ NULL,