ggml-webgpu: Fix vectorized handling in mul-mat and mul-mat-id (llama/22578)

* Fix vectorized condition of mul-mat-fast pipeline and add vectorized variant to mul-mat-id

* Apply suggestion from @CISC

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
Masashi Yoshimura 2026-05-01 23:55:01 +09:00 committed by Georgi Gerganov
parent 95053f68e4
commit 9623c1203b
1 changed files with 9 additions and 6 deletions

View File

@ -1779,12 +1779,12 @@ class ggml_webgpu_shader_lib {
webgpu_pipeline get_mul_mat_fast_pipeline(const ggml_webgpu_shader_lib_context & context) {
ggml_webgpu_mul_mat_pipeline_key key = {};
key.src0_type = context.src0->type;
key.src1_type = context.src1->type;
key.vectorized = (context.src0->ne[0] % 4 == 0 && context.dst->ne[0] % 4 == 0 && context.dst->ne[1] % 4 == 0 &&
(context.src0->type == GGML_TYPE_F32 || context.src0->type == GGML_TYPE_F16)) ?
1 :
0;
key.src0_type = context.src0->type;
key.src1_type = context.src1->type;
key.vectorized = (context.src0->ne[0] % 4 == 0 && context.dst->ne[0] % 4 == 0 &&
(context.src0->type == GGML_TYPE_F32 || context.src0->type == GGML_TYPE_F16)) ?
1 :
0;
key.use_subgroup_matrix = context.supports_subgroup_matrix;
auto it = mul_mat_fast_pipelines.find(key);
@ -2143,6 +2143,9 @@ class ggml_webgpu_shader_lib {
// variant suffix for src1 type
variant += std::string("_") + (context.src1->type == GGML_TYPE_F32 ? "f32" : "f16");
if (key.vectorized) {
variant += "_vectorized";
}
auto processed = preprocessor.preprocess(wgsl_mul_mat_id, defines);