docs: more extensive RoPE documentation [no ci] (llama/21953)

* more extensive ggml_rope documentation

* add more docs

* nits
This commit is contained in:
Xuan-Son Nguyen 2026-04-15 14:45:16 +02:00 committed by Georgi Gerganov
parent 24cc89e477
commit 86d94cd95b
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
1 changed files with 55 additions and 1 deletions

View File

@ -1773,8 +1773,32 @@ extern "C" {
int n_dims,
int mode);
// custom RoPE
// RoPE operations with extended options
// a is the input tensor to apply RoPE to, shape [n_embd, n_head, n_token]
// b is an int32 vector with size n_token
// c is freq factors (e.g. phi3-128k), (optional)
// mode can be GGML_ROPE_TYPE_NORMAL or NEOX; for MROPE and VISION mode, use ggml_rope_multi
//
// pseudo-code for computing theta:
// for i in [0, n_dims/2):
// theta[i] = b[i] * powf(freq_base, -2.0 * i / n_dims);
// theta[i] = theta[i] / c[i]; # if c is provided, divide theta by c
// theta[i] = rope_yarn(theta[i], ...); # note: theta = theta * freq_scale is applied here
//
// other params are used by YaRN RoPE scaling, these default values will disable YaRN:
// freq_scale = 1.0f
// ext_factor = 0.0f
// attn_factor = 1.0f
// beta_fast = 0.0f
// beta_slow = 0.0f
//
// example:
// (marking: c = cos, s = sin, 0 = unrotated)
// given a single head with size = 8 --> [00000000]
// GGML_ROPE_TYPE_NORMAL n_dims = 4 --> [cscs0000]
// GGML_ROPE_TYPE_NORMAL n_dims = 8 --> [cscscscs]
// GGML_ROPE_TYPE_NEOX n_dims = 4 --> [ccss0000]
// GGML_ROPE_TYPE_NEOX n_dims = 8 --> [ccccssss]
GGML_API struct ggml_tensor * ggml_rope_ext(
struct ggml_context * ctx,
struct ggml_tensor * a,
@ -1790,6 +1814,36 @@ extern "C" {
float beta_fast,
float beta_slow);
// multi-dimensional RoPE, for Qwen-VL and similar vision models
// mode can be either VISION, MROPE, IMROPE, cannot be combined with NORMAL or NEOX
// sections specify how many dimensions to rotate in each section:
// section length is equivalent to number of cos/sin pairs, NOT the number of dims
// (i.e. sum of 4 sections are expected to be n_dims/2)
// last sections can be 0, means ignored
// all other options are identical to ggml_rope_ext
//
// important note:
// - NEOX ordering is automatically applied and cannot be disabled for MROPE and VISION
// if you need normal ordering, there are 2 methods:
// (1) split the tensor manually using ggml_view
// (2) permute the weight upon conversion
// - for VISION, n_dims must be head_size/2
//
// example M-RoPE:
// given sections = [t=4, y=2, x=2, 0]
// given a single head with size = 18 --> [000000000000000000]
// GGML_ROPE_TYPE_MROPE n_dims = 16 --> [ttttyyxxttttyyxx00] (cos/sin are applied in NEOX ordering)
// GGML_ROPE_TYPE_IMROPE n_dims = 16 --> [ttyxttyxttyxttyx00] (interleaved M-RoPE, still NEOX ordering)
// note: the theta for each dim is computed the same way as ggml_rope_ext, no matter the section
// in other words, idx used for theta: [0123456789... until n_dims/2], not reset for each section
//
// example vision RoPE:
// given sections = [y=4, x=4, 0, 0] (last 2 sections are ignored)
// given a single head with size = 8 --> [00000000]
// GGML_ROPE_TYPE_VISION n_dims = 4 --> [yyyyxxxx]
// other values of n_dims are untested and is undefined behavior
// note: unlike MROPE, the theta for each dim is computed differently for each section
// in other words, idx used for theta: [0123] for y section, then [0123] for x section
GGML_API struct ggml_tensor * ggml_rope_multi(
struct ggml_context * ctx,
struct ggml_tensor * a,