diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 10817505d..246a76193 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -582,9 +582,9 @@ template struct block_reduce_policy { static __device__ T sentinel() { if constexpr (std::is_same_v) { - return -INFINITY; + return -(float)INFINITY; } else if constexpr (std::is_same_v) { - return make_half2(-INFINITY, -INFINITY); + return make_half2(__float2half(-(float)INFINITY), __float2half(-(float)INFINITY)); } else { static_assert(ggml_cuda_dependent_false_v, "Unsupported type for block reduce max"); }