From 2d68a3066f601e95f189dd8468b3e9fe73ac445e Mon Sep 17 00:00:00 2001 From: Yash Raj Pandey <55940078+devYRPauli@users.noreply.github.com> Date: Tue, 9 Jun 2026 03:24:27 -0400 Subject: [PATCH] ggml-cpu : fix rms_norm_back wrong output under in-place aliasing (llama/24305) * ggml-cpu : fix rms_norm_back wrong output under in-place aliasing * cont : clean-up comment --------- Co-authored-by: Georgi Gerganov --- ggml/src/ggml-cpu/ops.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 3a1912ae9..becac9d6e 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -4008,12 +4008,12 @@ static void ggml_compute_forward_rms_norm_back_f32( // dx := scale(dx, rrms) float * dx = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); - // dx[i00] = (x*(-sum_xdz/sum_eps) + dz) / sqrtf(mean_eps) - ggml_vec_cpy_f32 (ne00, dx, x); - // ggml_vec_scale_f32(ne00, dx, -mean_xdz/mean_eps); - ggml_vec_scale_f32(ne00, dx, (float)(-sum_xdz)/sum_eps); - ggml_vec_acc_f32 (ne00, dx, dz); - ggml_vec_scale_f32(ne00, dx, rrms); + // dx[i00] = (dz + x*(-sum_xdz/sum_eps)) * rrms + // note: https://github.com/ggml-org/ggml/issues/1491 + const float scale_x = (float) (-sum_xdz) / sum_eps; + for (int64_t i00 = 0; i00 < ne00; i00++) { + dx[i00] = (dz[i00] + x[i00] * scale_x) * rrms; + } } } }