Skip to content

Commit 07553cf

Browse files
committed
update llama_hparams
1 parent 241bb45 commit 07553cf

File tree

5 files changed

+31
-10
lines changed

5 files changed

+31
-10
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,12 @@ def set_vocab(self):
19801980
class Qwen2VLModel(Model):
19811981
model_arch = gguf.MODEL_ARCH.QWEN2VL
19821982

1983+
def set_gguf_parameters(self):
1984+
super().set_gguf_parameters()
1985+
mrope_section = self.hparams["rope_scaling"]["mrope_section"]
1986+
mrope_section += [0] * max(0, 4 - len(mrope_section))
1987+
self.gguf_writer.add_rope_dimension_sections(mrope_section)
1988+
19831989
def set_vocab(self):
19841990
try:
19851991
self._set_vocab_sentencepiece()

examples/llava/qwen2_vl_surgery.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,10 @@ def main(args):
133133
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), vcfg.depth)
134134
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), 0) # BUG: not sure what this does
135135
fout.add_name(model_name)
136-
# fout.add_string("clip.vision.mm_patch_merge_type", v_hparams["mm_patch_merge_type"])
136+
"""
137+
HACK: Since vision rope related parameter aren't stored in the `Qwen2VLConfig,
138+
it will be hardcoded in the `clip_image_build_graph` from `clip.cpp`.
139+
"""
137140

138141
processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_name)
139142
# breakpoint()

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class Attention:
131131

132132
class Rope:
133133
DIMENSION_COUNT = "{arch}.rope.dimension_count"
134+
DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
134135
FREQ_BASE = "{arch}.rope.freq_base"
135136
SCALING_TYPE = "{arch}.rope.scaling.type"
136137
SCALING_FACTOR = "{arch}.rope.scaling.factor"

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,9 @@ def add_pooling_type(self, value: PoolingType) -> None:
750750

751751
def add_rope_dimension_count(self, count: int) -> None:
752752
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
753+
754+
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
755+
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
753756

754757
def add_rope_freq_base(self, value: float) -> None:
755758
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)

src/llama.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ enum llm_kv {
310310
LLM_KV_ATTENTION_SCALE,
311311

312312
LLM_KV_ROPE_DIMENSION_COUNT,
313+
LLM_KV_ROPE_DIMENSION_SECTIONS,
313314
LLM_KV_ROPE_FREQ_BASE,
314315
LLM_KV_ROPE_SCALE_LINEAR,
315316
LLM_KV_ROPE_SCALING_TYPE,
@@ -426,6 +427,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
426427
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
427428

428429
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
430+
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
429431
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
430432
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
431433
{ LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
@@ -2429,11 +2431,12 @@ struct llama_hparams {
24292431
uint32_t time_decay_extra_dim = 0;
24302432
uint32_t wkv_head_size = 0;
24312433

2432-
float rope_attn_factor = 1.0f;
2433-
float rope_freq_base_train;
2434-
float rope_freq_scale_train;
2435-
uint32_t n_ctx_orig_yarn;
2436-
float rope_yarn_log_mul;
2434+
float rope_attn_factor = 1.0f;
2435+
float rope_freq_base_train;
2436+
float rope_freq_scale_train;
2437+
uint32_t n_ctx_orig_yarn;
2438+
float rope_yarn_log_mul;
2439+
std::array<uint32_t, 4> rope_mrope_sections;
24372440

24382441
// for State Space Models
24392442
uint32_t ssm_d_conv = 0;
@@ -2488,8 +2491,9 @@ struct llama_hparams {
24882491
if (this->n_ff_shexp != other.n_ff_shexp) return true;
24892492
if (this->n_expert_shared != other.n_expert_shared) return true;
24902493

2491-
if (this->rope_finetuned != other.rope_finetuned) return true;
2492-
if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
2494+
if (this->rope_finetuned != other.rope_finetuned) return true;
2495+
if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
2496+
if (this->rope_mrope_sections != other.rope_mrope_sections) return true;
24932497

24942498
if (this->ssm_d_conv != other.ssm_d_conv) return true;
24952499
if (this->ssm_d_inner != other.ssm_d_inner) return true;
@@ -5710,8 +5714,12 @@ static void llm_load_hparams(
57105714
default: model.type = e_model::MODEL_UNKNOWN;
57115715
}
57125716
} break;
5713-
case LLM_ARCH_QWEN2:
57145717
case LLM_ARCH_QWEN2VL:
5718+
{
5719+
std::fill(hparams.rope_mrope_sections.begin(), hparams.rope_mrope_sections.end(), 0);
5720+
ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_mrope_sections, 4, true);
5721+
}
5722+
case LLM_ARCH_QWEN2:
57155723
{
57165724
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
57175725
switch (hparams.n_layer) {
@@ -12532,7 +12540,7 @@ struct llm_build_context {
1253212540

1253312541
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
1253412542
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
12535-
int sections[4] = {16, 24, 24, 0}; // TODO: move this into gguf model file.
12543+
int * sections = (int *)hparams.rope_mrope_sections.data();
1253612544

1253712545
for (int il = 0; il < n_layer; ++il) {
1253812546
struct ggml_tensor * inpSA = inpL;

0 commit comments

Comments
 (0)