Fix for issue #22974. Cast intermediate results to float before adding and casting the result to the destination type. Avoids half+half operator ambiguity. (llama/22994)

This commit is contained in:
scutler-nv 2026-05-13 13:36:14 -07:00 committed by Georgi Gerganov
parent d4a4d87f0e
commit 97371e9285
1 changed files with 7 additions and 4 deletions

View File

@ -184,13 +184,15 @@ static __global__ void ggml_cuda_ar_kernel(
#pragma unroll
for (int k = 0; k < ELEMS_PER_VEC; ++k) {
const T_wire d_low = ggml_cuda_cast<T_wire>(sendbuf[off + k]);
recvbuf[off + k] = ggml_cuda_cast<T_dst>(d_low) + ggml_cuda_cast<T_dst>(wire[k]);
recvbuf[off + k] = ggml_cuda_cast<T_dst>(
ggml_cuda_cast<float>(d_low) + ggml_cuda_cast<float>(wire[k]));
}
}
if (bid == 0 && tid < count - tail) {
const T_wire d_low = ggml_cuda_cast<T_wire>(sendbuf[tail + tid]);
recvbuf[tail + tid] =
ggml_cuda_cast<T_dst>(d_low) + ggml_cuda_cast<T_dst>(host_other[tail + tid]);
recvbuf[tail + tid] = ggml_cuda_cast<T_dst>(
ggml_cuda_cast<float>(d_low) +
ggml_cuda_cast<float>(host_other[tail + tid]));
}
}
}
@ -210,7 +212,8 @@ static __global__ void ggml_cuda_ar_add_kernel(
const int nt = gridDim.x * blockDim.x;
for (int i = tid; i < count; i += nt) {
const T_src d_low = ggml_cuda_cast<T_src>(dst[i]);
dst[i] = ggml_cuda_cast<T_dst>(d_low) + ggml_cuda_cast<T_dst>(src[i]);
dst[i] = ggml_cuda_cast<T_dst>(
ggml_cuda_cast<float>(d_low) + ggml_cuda_cast<float>(src[i]));
}
}