From 642a88013568d949f8c0c5ca8e324ce9bc05c176 Mon Sep 17 00:00:00 2001
From: shaihi <shaihi@icloud.com>
Date: Mon, 9 Mar 2026 10:21:15 +0200
Subject: [PATCH] metal: increase main-thread graph chunk to reduce CB overhead

---
 ggml/src/ggml-metal/ggml-metal-context.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/src/ggml-metal/ggml-metal-context.m b/ggml/src/ggml-metal/ggml-metal-context.m
index 5d3a8ce41..3b836f36a 100644
--- a/ggml/src/ggml-metal/ggml-metal-context.m
+++ b/ggml/src/ggml-metal/ggml-metal-context.m
@@ -415,7 +415,7 @@ bool ggml_metal_cpy_tensor_async(ggml_metal_t ctx_src, ggml_metal_t ctx_dst, con
 
 enum ggml_status ggml_metal_graph_compute(ggml_metal_t ctx, struct ggml_cgraph * gf) {
     // number of nodes encoded by the main thread (empirically determined)
-    const int n_main = MAX(64, 0.1*gf->n_nodes);
+    const int n_main = MAX(64, 0.2*gf->n_nodes);
 
     // number of threads in addition to the main thread
     const int n_cb = ctx->n_cb;