Corrected: changed s13 = src1->nb[3] instead of nb[2] (llama/18724)
This commit is contained in:
parent
4af27bf2da
commit
45be2cd27a
|
|
@ -190,7 +190,7 @@ void ggml_cuda_mul_mat_q(
|
|||
{
|
||||
const int64_t s11 = src1->nb[1] / ts_src1;
|
||||
const int64_t s12 = src1->nb[2] / ts_src1;
|
||||
const int64_t s13 = src1->nb[2] / ts_src1;
|
||||
const int64_t s13 = src1->nb[3] / ts_src1;
|
||||
|
||||
if (use_native_mxfp4) {
|
||||
quantize_mmq_mxfp4_cuda(src1_d, ids_src1.get(), src1_q8_1.get(), src0->type, ne10, s11, s12, s13,
|
||||
|
|
|
|||
Loading…
Reference in New Issue