vulkan: fix l2_norm epsilon handling (llama/20350)

This commit is contained in:
Jeff Bolz 2026-03-12 00:39:41 -05:00 committed by Georgi Gerganov
parent 6c5e3aac3e
commit 86e312d61d
2 changed files with 2 additions and 2 deletions

View File

@ -16061,7 +16061,7 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph *
tensor_clone = ggml_arange(ggml_ctx, start, stop, step);
} else if (tensor->op == GGML_OP_FILL) {
const float value = ggml_get_op_params_f32(tensor, 0);
tensor_clone = ggml_fill(ggml_ctx, tensor_clone, value);
tensor_clone = ggml_fill(ggml_ctx, src_clone[0], value);
} else if (tensor->op == GGML_OP_SQR) {
tensor_clone = ggml_sqr(ggml_ctx, src_clone[0]);
} else if (tensor->op == GGML_OP_SQRT) {

View File

@ -36,7 +36,7 @@ void main() {
barrier();
}
const FLOAT_TYPE scale = inversesqrt(max(sum[0], FLOAT_TYPE(p.param1)));
const FLOAT_TYPE scale = 1.0f / max(sqrt(sum[0]), FLOAT_TYPE(p.param1));
[[unroll]] for (uint i0 = tid; i0 < p.ne00; i0 += BLOCK_SIZE) {
data_d[i3*p.nb13 + i2*p.nb12 + i1*p.nb11 + i0] = D_TYPE(scale * FLOAT_TYPE(data_a[i3*p.nb03 + i2*p.nb02 + i1*p.nb01 + i0]));