From a2839b4404de473bc7af127b7b308d530afda024 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Tue, 12 May 2026 07:59:24 +0200 Subject: [PATCH] ggml-cuda : add explicit casts to -INFINITY for float and half2 types This commit adds explicit casts to float for -INFINITY. The motivation for this is that in CUDA 11.8.0, the -INFINITY macro is defined as a double (a header provided NVCC). This triggers a warning and hence causes a CI failure in whisper.cpp. I belive that this header might have been updated in CUDA 12 which is why we don't see this warning. Refs: https://github.com/ggml-org/whisper.cpp/actions/runs/25713948217/job/75500081939?pr=3803 Refs: https://github.com/ggml-org/llama.cpp/issues/22824 --- ggml/src/ggml-cuda/common.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 10817505d..246a76193 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -582,9 +582,9 @@ template struct block_reduce_policy { static __device__ T sentinel() { if constexpr (std::is_same_v) { - return -INFINITY; + return -(float)INFINITY; } else if constexpr (std::is_same_v) { - return make_half2(-INFINITY, -INFINITY); + return make_half2(__float2half(-(float)INFINITY), __float2half(-(float)INFINITY)); } else { static_assert(ggml_cuda_dependent_false_v, "Unsupported type for block reduce max"); }