HIP: fix RDNA3 FP16/BF16 matrix multiplication (llama/17817)

This commit is contained in:
Johannes Gäßler 2025-12-06 13:45:36 +01:00 committed by Georgi Gerganov
parent b67e3abdb2
commit 94be71911f
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
1 changed files with 2 additions and 2 deletions

View File

@ -160,9 +160,9 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const
case GGML_TYPE_F32:
return ampere_mma_available(cc);
case GGML_TYPE_F16:
return volta_mma_available(cc) || turing_mma_available(cc) || amd_wmma_available(cc);
return volta_mma_available(cc) || turing_mma_available(cc) || (amd_wmma_available(cc) && GGML_CUDA_CC_IS_RDNA4(cc));
case GGML_TYPE_BF16:
return ampere_mma_available(cc) || amd_wmma_available(cc);
return ampere_mma_available(cc) || (amd_wmma_available(cc) && GGML_CUDA_CC_IS_RDNA4(cc));
default:
return false;
}