ggml-webgpu: Fix vectorized handling in mul-mat and mul-mat-id (llama/22578)

* Fix vectorized condition of mul-mat-fast pipeline and add vectorized variant to mul-mat-id * Apply suggestion from @CISC Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
2026-05-01 23:55:01 +09:00 · 2026-05-01 23:55:01 +09:00 · 9623c1203b
parent 95053f68e4
commit 9623c1203b
1 changed files with 9 additions and 6 deletions
--- a/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp
+++ b/ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp
@ -1779,12 +1779,12 @@ class ggml_webgpu_shader_lib {

    webgpu_pipeline get_mul_mat_fast_pipeline(const ggml_webgpu_shader_lib_context & context) {
        ggml_webgpu_mul_mat_pipeline_key key = {};
-        key.src0_type                        = context.src0->type;
-        key.src1_type                        = context.src1->type;
-        key.vectorized = (context.src0->ne[0] % 4 == 0 && context.dst->ne[0] % 4 == 0 && context.dst->ne[1] % 4 == 0 &&
-                          (context.src0->type == GGML_TYPE_F32 || context.src0->type == GGML_TYPE_F16)) ?
-                             1 :
-                             0;
+        key.src0_type           = context.src0->type;
+        key.src1_type           = context.src1->type;
+        key.vectorized          = (context.src0->ne[0] % 4 == 0 && context.dst->ne[0] % 4 == 0 &&
+                                   (context.src0->type == GGML_TYPE_F32 || context.src0->type == GGML_TYPE_F16)) ?
+                                      1 :
+                                      0;
        key.use_subgroup_matrix = context.supports_subgroup_matrix;

        auto it = mul_mat_fast_pipelines.find(key);
@ -2143,6 +2143,9 @@ class ggml_webgpu_shader_lib {

        // variant suffix for src1 type
        variant += std::string("_") + (context.src1->type == GGML_TYPE_F32 ? "f32" : "f16");
+        if (key.vectorized) {
+            variant += "_vectorized";
+        }

        auto processed = preprocessor.preprocess(wgsl_mul_mat_id, defines);