vulkan: reduce host memory lock contention (llama/23376)

* vulkan: reduces lock contention

* replace unique_lock with lock_guard
This commit is contained in:
Winston Ma 2026-06-01 20:03:32 +08:00 committed by Georgi Gerganov
parent e815b264eb
commit c471bcce1b
1 changed files with 5 additions and 3 deletions

View File

@ -62,6 +62,7 @@ typedef struct VkPhysicalDeviceCooperativeMatrixDecodeVectorFeaturesNV {
#include <map>
#include <set>
#include <unordered_map>
#include <shared_mutex>
#include <mutex>
#include <future>
#include <thread>
@ -618,6 +619,7 @@ static constexpr std::initializer_list<std::array<int, 3>> rms_norm_mul_rope_vie
struct vk_device_struct {
std::recursive_mutex mutex;
mutable std::shared_mutex pinned_memory_mutex;
vk::PhysicalDevice physical_device;
vk::PhysicalDeviceProperties properties;
@ -7010,7 +7012,7 @@ static void * ggml_vk_host_malloc(vk_device& device, size_t size) {
return nullptr;
}
std::lock_guard<std::recursive_mutex> guard(device->mutex);
std::lock_guard<std::shared_mutex> guard(device->pinned_memory_mutex);
device->pinned_memory.push_back(std::make_tuple(buf->ptr, size, buf));
return buf->ptr;
@ -7021,7 +7023,7 @@ static void ggml_vk_host_free(vk_device& device, void* ptr) {
return;
}
VK_LOG_MEMORY("ggml_vk_host_free(" << ptr << ")");
std::lock_guard<std::recursive_mutex> guard(device->mutex);
std::lock_guard<std::shared_mutex> guard(device->pinned_memory_mutex);
vk_buffer buf;
size_t index;
@ -7045,7 +7047,7 @@ static void ggml_vk_host_free(vk_device& device, void* ptr) {
}
static void ggml_vk_host_get(const vk_device& device, const void * ptr, vk_buffer& buf, size_t& buf_offset) {
std::lock_guard<std::recursive_mutex> guard(device->mutex);
std::shared_lock<std::shared_mutex> guard(device->pinned_memory_mutex);
buf = nullptr;
buf_offset = 0;
for (size_t i = 0; i < device->pinned_memory.size(); i++) {