cuda : fix "V is K view" check for non-unified KV cache (llama/19145)

2026-01-28 09:15:27 +02:00 · 2026-01-28 09:15:27 +02:00 · 7fb0f823de
parent f28a733025
commit 7fb0f823de
2 changed files with 2 additions and 2 deletions
--- a/ggml/src/ggml-cuda/fattn-common.cuh
+++ b/ggml/src/ggml-cuda/fattn-common.cuh
@ -789,7 +789,7 @@ void launch_fattn(
    const ggml_tensor * K = dst->src[1];
    const ggml_tensor * V = dst->src[2];

-    const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
+    const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs));

    const ggml_tensor * mask  = dst->src[3];
    const ggml_tensor * sinks = dst->src[4];
--- a/ggml/src/ggml-cuda/fattn.cu
+++ b/ggml/src/ggml-cuda/fattn.cu
@ -310,7 +310,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
        }
    }

-    const bool V_is_K_view = V->view_src && V->view_offs == 0 && (V->view_src == K || V->view_src == K->view_src);
+    const bool V_is_K_view = V->view_src && (V->view_src == K || (V->view_src == K->view_src && V->view_offs == K->view_offs));

    const int cc = ggml_cuda_info().devices[device].cc;