From 45be2cd27a02f0fbe3354e9d07efc403b97da341 Mon Sep 17 00:00:00 2001
From: Michael Wand <michael@baybridgeaquarium.com>
Date: Sat, 10 Jan 2026 01:16:07 -0800
Subject: [PATCH] Corrected: changed s13 = src1->nb[3] instead of nb[2]
 (llama/18724)

---
 ggml/src/ggml-cuda/mmq.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu
index ceb95758..c9aa7024 100644
--- a/ggml/src/ggml-cuda/mmq.cu
+++ b/ggml/src/ggml-cuda/mmq.cu
@@ -190,7 +190,7 @@ void ggml_cuda_mul_mat_q(
     {
         const int64_t s11 = src1->nb[1] / ts_src1;
         const int64_t s12 = src1->nb[2] / ts_src1;
-        const int64_t s13 = src1->nb[2] / ts_src1;
+        const int64_t s13 = src1->nb[3] / ts_src1;
 
         if (use_native_mxfp4) {
             quantize_mmq_mxfp4_cuda(src1_d, ids_src1.get(), src1_q8_1.get(), src0->type, ne10, s11, s12, s13,