ggml-cpu : fix rms_norm_back wrong output under in-place aliasing (llama/24305)
* ggml-cpu : fix rms_norm_back wrong output under in-place aliasing * cont : clean-up comment --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
72894aa250
commit
2d68a3066f
|
|
@ -4008,12 +4008,12 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|||
// dx := scale(dx, rrms)
|
||||
float * dx = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
||||
|
||||
// dx[i00] = (x*(-sum_xdz/sum_eps) + dz) / sqrtf(mean_eps)
|
||||
ggml_vec_cpy_f32 (ne00, dx, x);
|
||||
// ggml_vec_scale_f32(ne00, dx, -mean_xdz/mean_eps);
|
||||
ggml_vec_scale_f32(ne00, dx, (float)(-sum_xdz)/sum_eps);
|
||||
ggml_vec_acc_f32 (ne00, dx, dz);
|
||||
ggml_vec_scale_f32(ne00, dx, rrms);
|
||||
// dx[i00] = (dz + x*(-sum_xdz/sum_eps)) * rrms
|
||||
// note: https://github.com/ggml-org/ggml/issues/1491
|
||||
const float scale_x = (float) (-sum_xdz) / sum_eps;
|
||||
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
||||
dx[i00] = (dz[i00] + x[i00] * scale_x) * rrms;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue