ggml-cuda: tune RDNA3 Q6_K MMVQ nwarps (llama/23349)
This commit is contained in:
parent
37f17208c2
commit
0a0a34287e
|
|
@ -359,7 +359,9 @@ static constexpr __host__ __device__ int calc_nwarps(ggml_type type, int ncols_d
|
|||
case GGML_TYPE_Q5_1:
|
||||
case GGML_TYPE_Q8_0:
|
||||
case GGML_TYPE_Q4_K:
|
||||
return 8;
|
||||
case GGML_TYPE_Q6_K:
|
||||
return 2;
|
||||
case GGML_TYPE_IQ4_NL:
|
||||
return 8;
|
||||
default:
|
||||
|
|
|
|||
Loading…
Reference in New Issue