Skip to content

Commit 1f67ba0

Browse files
committed
update llama_hparams
1 parent d93fa48 commit 1f67ba0

File tree

5 files changed

+31
-10
lines changed

5 files changed

+31
-10
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,12 @@ def set_vocab(self):
19801980
class Qwen2VLModel(Model):
19811981
model_arch = gguf.MODEL_ARCH.QWEN2VL
19821982

1983+
def set_gguf_parameters(self):
1984+
super().set_gguf_parameters()
1985+
mrope_section = self.hparams["rope_scaling"]["mrope_section"]
1986+
mrope_section += [0] * max(0, 4 - len(mrope_section))
1987+
self.gguf_writer.add_rope_dimension_sections(mrope_section)
1988+
19831989
def set_vocab(self):
19841990
try:
19851991
self._set_vocab_sentencepiece()

examples/llava/qwen2_vl_surgery.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,10 @@ def main(args):
133133
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), vcfg.depth)
134134
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), 0) # BUG: not sure what this does
135135
fout.add_name(model_name)
136-
# fout.add_string("clip.vision.mm_patch_merge_type", v_hparams["mm_patch_merge_type"])
136+
"""
137+
HACK: Since vision rope related parameter aren't stored in the `Qwen2VLConfig,
138+
it will be hardcoded in the `clip_image_build_graph` from `clip.cpp`.
139+
"""
137140

138141
processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_name)
139142
# breakpoint()

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ class Attention:
119119

120120
class Rope:
121121
DIMENSION_COUNT = "{arch}.rope.dimension_count"
122+
DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
122123
FREQ_BASE = "{arch}.rope.freq_base"
123124
SCALING_TYPE = "{arch}.rope.scaling.type"
124125
SCALING_FACTOR = "{arch}.rope.scaling.factor"

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,9 @@ def add_pooling_type(self, value: PoolingType) -> None:
720720

721721
def add_rope_dimension_count(self, count: int) -> None:
722722
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
723+
724+
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
725+
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
723726

724727
def add_rope_freq_base(self, value: float) -> None:
725728
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)

src/llama.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ enum llm_kv {
308308
LLM_KV_ATTENTION_SCALE,
309309

310310
LLM_KV_ROPE_DIMENSION_COUNT,
311+
LLM_KV_ROPE_DIMENSION_SECTIONS,
311312
LLM_KV_ROPE_FREQ_BASE,
312313
LLM_KV_ROPE_SCALE_LINEAR,
313314
LLM_KV_ROPE_SCALING_TYPE,
@@ -424,6 +425,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
424425
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
425426

426427
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
428+
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
427429
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
428430
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
429431
{ LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
@@ -2407,11 +2409,12 @@ struct llama_hparams {
24072409
uint32_t time_decay_extra_dim = 0;
24082410
uint32_t wkv_head_size = 0;
24092411

2410-
float rope_attn_factor = 1.0f;
2411-
float rope_freq_base_train;
2412-
float rope_freq_scale_train;
2413-
uint32_t n_ctx_orig_yarn;
2414-
float rope_yarn_log_mul;
2412+
float rope_attn_factor = 1.0f;
2413+
float rope_freq_base_train;
2414+
float rope_freq_scale_train;
2415+
uint32_t n_ctx_orig_yarn;
2416+
float rope_yarn_log_mul;
2417+
std::array<uint32_t, 4> rope_mrope_sections;
24152418

24162419
// for State Space Models
24172420
uint32_t ssm_d_conv = 0;
@@ -2466,8 +2469,9 @@ struct llama_hparams {
24662469
if (this->n_ff_shexp != other.n_ff_shexp) return true;
24672470
if (this->n_expert_shared != other.n_expert_shared) return true;
24682471

2469-
if (this->rope_finetuned != other.rope_finetuned) return true;
2470-
if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
2472+
if (this->rope_finetuned != other.rope_finetuned) return true;
2473+
if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
2474+
if (this->rope_mrope_sections != other.rope_mrope_sections) return true;
24712475

24722476
if (this->ssm_d_conv != other.ssm_d_conv) return true;
24732477
if (this->ssm_d_inner != other.ssm_d_inner) return true;
@@ -5620,8 +5624,12 @@ static void llm_load_hparams(
56205624
default: model.type = e_model::MODEL_UNKNOWN;
56215625
}
56225626
} break;
5623-
case LLM_ARCH_QWEN2:
56245627
case LLM_ARCH_QWEN2VL:
5628+
{
5629+
std::fill(hparams.rope_mrope_sections.begin(), hparams.rope_mrope_sections.end(), 0);
5630+
ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_mrope_sections, 4, true);
5631+
}
5632+
case LLM_ARCH_QWEN2:
56255633
{
56265634
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
56275635
switch (hparams.n_layer) {
@@ -12398,7 +12406,7 @@ struct llm_build_context {
1239812406

1239912407
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
1240012408
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
12401-
int sections[4] = {16, 24, 24, 0}; // TODO: move this into gguf model file.
12409+
int * sections = (int *)hparams.rope_mrope_sections.data();
1240212410

1240312411
for (int il = 0; il < n_layer; ++il) {
1240412412
struct ggml_tensor * inpSA = inpL;

0 commit comments

Comments
 (0)