From b0c631cfb692480716fba2941eea22ad7cd8664f Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Tue, 15 Jul 2025 04:01:25 -0700 Subject: [PATCH] Templating to differenciate the block_q4_0 --- ggml/src/ggml-cpu/repack.cpp | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp index 396cc3056..c77c330d6 100644 --- a/ggml/src/ggml-cpu/repack.cpp +++ b/ggml/src/ggml-cpu/repack.cpp @@ -1419,7 +1419,14 @@ template type) { case GGML_TYPE_Q4_0: { - ggml_compute_forward_get_rows_q4_0x8(params, dst); + if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) { + if (src0->ne[1] % 8 == 0) { + ggml_compute_forward_get_rows_q4_0(params, dst, 8); + } + } else { + GGML_ABORT("Unsupported block interleaved size for get_rows function"); + } + } break; default: GGML_ABORT("fatal error"); @@ -1427,9 +1434,11 @@ template + static void ggml_compute_forward_get_rows_q4_0( const ggml_compute_params * params, - ggml_tensor * dst) { + ggml_tensor * dst, + int nrows_interleaved) { const ggml_tensor * src0 = dst->src[0]; const ggml_tensor * src1 = dst->src[1]; @@ -1453,8 +1462,7 @@ template data + i11 * nb02 + i12 * nb03; - // Pointer to the first block_q4_0x8 of the identified row_group_idx - const block_q4_0x8 * p_first_repacked_block_of_group_x8 = (const block_q4_0x8 *)(base_ptr_for_higher_dims_in_src0 + row_group_idx * stride_between_actual_row_groups); + // Pointer to the first of the identified row_group_idx + const BLOCK_TYPE * p_first_repacked_block_of_group_block_type = (const BLOCK_TYPE *)(base_ptr_for_higher_dims_in_src0 + row_group_idx * stride_between_actual_row_groups); - dequantize_row_q4_0x8( - p_first_repacked_block_of_group_x8, + dequantize_row_q4_0( + p_first_repacked_block_of_group_block_type, (float *)((char *)dst->data + i10 * nb1 + i11 * nb2 + i12 * nb3), nc, row_idx_in_group); } } @@ -1490,8 +1498,9 @@ template + static void dequantize_row_q4_0( + const BLOCK_TYPE * GGML_RESTRICT p_repacked_group_column_blocks, float * GGML_RESTRICT y, int64_t k, int row_idx_in_group) {