@@ -310,6 +310,7 @@ enum llm_kv {
310
310
LLM_KV_ATTENTION_SCALE,
311
311
312
312
LLM_KV_ROPE_DIMENSION_COUNT,
313
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
313
314
LLM_KV_ROPE_FREQ_BASE,
314
315
LLM_KV_ROPE_SCALE_LINEAR,
315
316
LLM_KV_ROPE_SCALING_TYPE,
@@ -426,6 +427,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
426
427
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
427
428
428
429
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
430
+ { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
429
431
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
430
432
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
431
433
{ LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
@@ -2429,11 +2431,12 @@ struct llama_hparams {
2429
2431
uint32_t time_decay_extra_dim = 0;
2430
2432
uint32_t wkv_head_size = 0;
2431
2433
2432
- float rope_attn_factor = 1.0f;
2433
- float rope_freq_base_train;
2434
- float rope_freq_scale_train;
2435
- uint32_t n_ctx_orig_yarn;
2436
- float rope_yarn_log_mul;
2434
+ float rope_attn_factor = 1.0f;
2435
+ float rope_freq_base_train;
2436
+ float rope_freq_scale_train;
2437
+ uint32_t n_ctx_orig_yarn;
2438
+ float rope_yarn_log_mul;
2439
+ std::array<uint32_t, 4> rope_mrope_sections;
2437
2440
2438
2441
// for State Space Models
2439
2442
uint32_t ssm_d_conv = 0;
@@ -2488,8 +2491,9 @@ struct llama_hparams {
2488
2491
if (this->n_ff_shexp != other.n_ff_shexp) return true;
2489
2492
if (this->n_expert_shared != other.n_expert_shared) return true;
2490
2493
2491
- if (this->rope_finetuned != other.rope_finetuned) return true;
2492
- if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
2494
+ if (this->rope_finetuned != other.rope_finetuned) return true;
2495
+ if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
2496
+ if (this->rope_mrope_sections != other.rope_mrope_sections) return true;
2493
2497
2494
2498
if (this->ssm_d_conv != other.ssm_d_conv) return true;
2495
2499
if (this->ssm_d_inner != other.ssm_d_inner) return true;
@@ -5710,8 +5714,12 @@ static void llm_load_hparams(
5710
5714
default: model.type = e_model::MODEL_UNKNOWN;
5711
5715
}
5712
5716
} break;
5713
- case LLM_ARCH_QWEN2:
5714
5717
case LLM_ARCH_QWEN2VL:
5718
+ {
5719
+ std::fill(hparams.rope_mrope_sections.begin(), hparams.rope_mrope_sections.end(), 0);
5720
+ ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_mrope_sections, 4, true);
5721
+ }
5722
+ case LLM_ARCH_QWEN2:
5715
5723
{
5716
5724
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
5717
5725
switch (hparams.n_layer) {
@@ -12532,7 +12540,7 @@ struct llm_build_context {
12532
12540
12533
12541
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
12534
12542
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
12535
- int sections[4] = {16, 24, 24, 0}; // TODO: move this into gguf model file.
12543
+ int * sections = (int *)hparams.rope_mrope_sections.data();
12536
12544
12537
12545
for (int il = 0; il < n_layer; ++il) {
12538
12546
struct ggml_tensor * inpSA = inpL;
0 commit comments