From 9d6e561f692b9b6353a33fa63e8b8a6998a41cb1 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 4 Jun 2026 08:05:32 +0300 Subject: [PATCH] metal : reduce rset heartbeat from 500ms -> 5ms (llama/24074) --- ggml/src/ggml-metal/ggml-metal-device.m | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index 196af1026..05d7f4305 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -547,6 +547,8 @@ struct ggml_metal_rsets { // number of seconds since the last graph computation // keep the residency sets wired for that amount of time to avoid being collected by the OS int keep_alive_s; + int loops_per_s; + int time_per_loop_ms; // background heartbeat thread to keep the residency sets alive atomic_bool d_stop; @@ -573,10 +575,13 @@ ggml_metal_rsets_t ggml_metal_rsets_init(void) { res->keep_alive_s = 3*60; } + res->time_per_loop_ms = 5; + res->loops_per_s = 1000/res->time_per_loop_ms; + GGML_LOG_INFO("%s: creating a residency set collection (keep_alive = %d s)\n", __func__, res->keep_alive_s); atomic_store_explicit(&res->d_stop, false, memory_order_relaxed); - atomic_store_explicit(&res->d_loop, 2*res->keep_alive_s, memory_order_relaxed); + atomic_store_explicit(&res->d_loop, res->loops_per_s*res->keep_alive_s, memory_order_relaxed); res->d_group = dispatch_group_create(); @@ -599,8 +604,7 @@ ggml_metal_rsets_t ggml_metal_rsets_init(void) { [res->lock unlock]; } - // half a second - usleep(500 * 1000); + usleep(res->time_per_loop_ms * 1000); } } #endif @@ -979,7 +983,7 @@ void ggml_metal_device_rsets_keep_alive(ggml_metal_device_t dev) { return; } - atomic_store_explicit(&dev->rsets->d_loop, 2*dev->rsets->keep_alive_s, memory_order_relaxed); + atomic_store_explicit(&dev->rsets->d_loop, dev->rsets->loops_per_s*dev->rsets->keep_alive_s, memory_order_relaxed); } struct ggml_metal_event {