@@ -308,6 +308,7 @@ enum llm_kv {
308
308
LLM_KV_ATTENTION_SCALE,
309
309
310
310
LLM_KV_ROPE_DIMENSION_COUNT,
311
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
311
312
LLM_KV_ROPE_FREQ_BASE,
312
313
LLM_KV_ROPE_SCALE_LINEAR,
313
314
LLM_KV_ROPE_SCALING_TYPE,
@@ -424,6 +425,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
424
425
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
425
426
426
427
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
428
+ { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
427
429
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
428
430
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
429
431
{ LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
@@ -2407,11 +2409,12 @@ struct llama_hparams {
2407
2409
uint32_t time_decay_extra_dim = 0;
2408
2410
uint32_t wkv_head_size = 0;
2409
2411
2410
- float rope_attn_factor = 1.0f;
2411
- float rope_freq_base_train;
2412
- float rope_freq_scale_train;
2413
- uint32_t n_ctx_orig_yarn;
2414
- float rope_yarn_log_mul;
2412
+ float rope_attn_factor = 1.0f;
2413
+ float rope_freq_base_train;
2414
+ float rope_freq_scale_train;
2415
+ uint32_t n_ctx_orig_yarn;
2416
+ float rope_yarn_log_mul;
2417
+ std::array<uint32_t, 4> rope_mrope_sections;
2415
2418
2416
2419
// for State Space Models
2417
2420
uint32_t ssm_d_conv = 0;
@@ -2466,8 +2469,9 @@ struct llama_hparams {
2466
2469
if (this->n_ff_shexp != other.n_ff_shexp) return true;
2467
2470
if (this->n_expert_shared != other.n_expert_shared) return true;
2468
2471
2469
- if (this->rope_finetuned != other.rope_finetuned) return true;
2470
- if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
2472
+ if (this->rope_finetuned != other.rope_finetuned) return true;
2473
+ if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
2474
+ if (this->rope_mrope_sections != other.rope_mrope_sections) return true;
2471
2475
2472
2476
if (this->ssm_d_conv != other.ssm_d_conv) return true;
2473
2477
if (this->ssm_d_inner != other.ssm_d_inner) return true;
@@ -5620,8 +5624,12 @@ static void llm_load_hparams(
5620
5624
default: model.type = e_model::MODEL_UNKNOWN;
5621
5625
}
5622
5626
} break;
5623
- case LLM_ARCH_QWEN2:
5624
5627
case LLM_ARCH_QWEN2VL:
5628
+ {
5629
+ std::fill(hparams.rope_mrope_sections.begin(), hparams.rope_mrope_sections.end(), 0);
5630
+ ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_mrope_sections, 4, true);
5631
+ }
5632
+ case LLM_ARCH_QWEN2:
5625
5633
{
5626
5634
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
5627
5635
switch (hparams.n_layer) {
@@ -12398,7 +12406,7 @@ struct llm_build_context {
12398
12406
12399
12407
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
12400
12408
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
12401
- int sections[4] = {16, 24, 24, 0}; // TODO: move this into gguf model file.
12409
+ int * sections = (int *)hparams.rope_mrope_sections.data();
12402
12410
12403
12411
for (int il = 0; il < n_layer; ++il) {
12404
12412
struct ggml_tensor * inpSA = inpL;
0 commit comments