Skip to content

Commit 44cd468

Browse files
committed
add pixtral text model (vision is wip)
1 parent 82e3381 commit 44cd468

File tree

6 files changed

+177
-3
lines changed

6 files changed

+177
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
776776
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
777777
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
778778
res = "glm4"
779+
if chkhsh == "0e9433cbbb161f89e264eb32e8e64bfe69e834973ffca5d41d3948a604a3e2a3":
780+
# ref: https://huggingface.co/mistral-community/pixtral-12b
781+
res = "pixtral"
779782

780783
if res is None:
781784
logger.warning("\n")
@@ -1724,7 +1727,8 @@ def prepare_tensors(self):
17241727
"MistralForCausalLM",
17251728
"MixtralForCausalLM",
17261729
"Idefics3ForConditionalGeneration",
1727-
"SmolVLMForConditionalGeneration")
1730+
"SmolVLMForConditionalGeneration",
1731+
"LlavaForConditionalGeneration")
17281732
class LlamaModel(TextModel):
17291733
model_arch = gguf.MODEL_ARCH.LLAMA
17301734
undo_permute = True
@@ -1734,6 +1738,10 @@ def __init__(self, *args, **kwargs):
17341738
# fix for SmolVLM2, missing `num_attention_heads` in config.json
17351739
if self.hparams["architectures"][0] == "SmolVLMForConditionalGeneration":
17361740
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
1741+
# fix for Pixtral, missing `num_attention_heads` in config.json
1742+
if self.hparams["architectures"][0] == "LlavaForConditionalGeneration" \
1743+
and self.hparams.get("model_type") == "mistral":
1744+
self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32)
17371745

17381746
def set_vocab(self):
17391747
try:
@@ -1797,12 +1805,17 @@ def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
17971805
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
17981806
n_head = self.hparams["num_attention_heads"]
17991807
n_kv_head = self.hparams.get("num_key_value_heads")
1800-
is_vision_tensor = "vision_tower" in name or "vision_model" in name or "model.connector" in name
1808+
is_vision_tensor = "vision_tower" in name \
1809+
or "vision_model" in name \
1810+
or "model.connector" in name \
1811+
or "multi_modal_projector" in name
18011812

18021813
if is_vision_tensor:
18031814
return [] # skip vision tensors
18041815
elif name.startswith("model.text_model"):
18051816
name = name.replace("text_model.", "") # for SmolVLM
1817+
elif name.startswith("language_model."):
1818+
name = name.replace("language_model.", "") # for the rest
18061819

18071820
if self.undo_permute:
18081821
if name.endswith(("q_proj.weight", "q_proj.bias")):

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ class TOKENIZER_TYPE(IntEnum):
115115
{"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
116116
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
117117
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", },
118+
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
118119
]
119120

120121

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ extern "C" {
111111
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
112112
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
113113
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
114+
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
114115
};
115116

116117
enum llama_rope_type {

models/ggml-vocab-pixtral.gguf.inp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
ied 4 ½ months
2+
__ggml_vocab_test__
3+
Führer
4+
__ggml_vocab_test__
5+
6+
__ggml_vocab_test__
7+
8+
__ggml_vocab_test__
9+
10+
__ggml_vocab_test__
11+
12+
__ggml_vocab_test__
13+
14+
__ggml_vocab_test__
15+
16+
17+
__ggml_vocab_test__
18+
19+
20+
21+
__ggml_vocab_test__
22+
23+
24+
25+
26+
__ggml_vocab_test__
27+
28+
29+
__ggml_vocab_test__
30+
Hello world
31+
__ggml_vocab_test__
32+
Hello world
33+
__ggml_vocab_test__
34+
Hello World
35+
__ggml_vocab_test__
36+
Hello World
37+
__ggml_vocab_test__
38+
Hello World!
39+
__ggml_vocab_test__
40+
Hello, world!
41+
__ggml_vocab_test__
42+
Hello, world!
43+
__ggml_vocab_test__
44+
this is 🦙.cpp
45+
__ggml_vocab_test__
46+
w048 7tuijk dsdfhu
47+
__ggml_vocab_test__
48+
нещо на Български
49+
__ggml_vocab_test__
50+
កាន់តែពិសេសអាចខលចេញ
51+
__ggml_vocab_test__
52+
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
53+
__ggml_vocab_test__
54+
Hello
55+
__ggml_vocab_test__
56+
Hello
57+
__ggml_vocab_test__
58+
Hello
59+
__ggml_vocab_test__
60+
Hello
61+
__ggml_vocab_test__
62+
Hello
63+
__ggml_vocab_test__
64+
Hello
65+
Hello
66+
__ggml_vocab_test__
67+
(
68+
__ggml_vocab_test__
69+
70+
=
71+
__ggml_vocab_test__
72+
' era
73+
__ggml_vocab_test__
74+
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
75+
__ggml_vocab_test__
76+
!!!!!!
77+
__ggml_vocab_test__
78+
3
79+
__ggml_vocab_test__
80+
33
81+
__ggml_vocab_test__
82+
333
83+
__ggml_vocab_test__
84+
3333
85+
__ggml_vocab_test__
86+
33333
87+
__ggml_vocab_test__
88+
333333
89+
__ggml_vocab_test__
90+
3333333
91+
__ggml_vocab_test__
92+
33333333
93+
__ggml_vocab_test__
94+
333333333
95+
__ggml_vocab_test__
96+
Cửa Việt
97+
__ggml_vocab_test__
98+
discards
99+
__ggml_vocab_test__
100+
101+
102+
103+
104+
105+
106+
107+
108+
109+
110+
111+
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
112+
__ggml_vocab_test__

models/ggml-vocab-pixtral.gguf.out

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
2014 1032 1052 1032 28504 6972
2+
1070 7088 1258
3+
4+
1032
5+
1256
6+
1293
7+
1009
8+
1010
9+
1267
10+
4688
11+
1009 1010
12+
22177 4304
13+
45383 4304
14+
22177 5325
15+
45383 5325
16+
45383 5325 1033
17+
22177 1044 4304 1033
18+
45383 1044 4304 1033
19+
1593 1395 119685 1166 1153 1046 51228
20+
1119 1048 1052 1056 1032 1055 17391 23216 30203 7785 17279
21+
3337 30757 1902 4200 63073 3671
22+
1225 1158 1128 1225 1158 1182 1225 1158 1147 1225 1159 1139 1225 1158 1143 1225 1159 1130 1225 1158 1150 1225 1158 1183 1225 1158 1159 1225 21359 1225 1158 1159 1225 1158 1162 1225 1158 1182 1225 1158 1133 1225 1158 1129 1225 1158 1155 1225 1158 1133 1225 21359 1225 1158 1137
23+
1240 1159 1154 1128 1319 13052 1041 119685 1152 1182 29568 1240 1159 1140 1171 1239 1184 1143 1319 88181 1873 3659 1275 56421 1621 1041 126241 1133 1319 11234 1873 26303 1455 1934 2246 3754 10835 1041
24+
22177
25+
45383
26+
1032 45383
27+
1256 45383
28+
1293 45383
29+
1293 45383 1010 1293 45383
30+
1319
31+
1010 1376
32+
1039 4033
33+
22177 1044 1404 48054 1033 3075 1584 1636 119685 1152 1129 3082 26060 2998 63614 82278 1049 1051 1049 1052 1049 1053 1049 6434 6749
34+
7290 7290 7290
35+
1051
36+
1051 1051
37+
1051 1051 1051
38+
1051 1051 1051 1051
39+
1051 1051 1051 1051 1051
40+
1051 1051 1051 1051 1051 1051
41+
1051 1051 1051 1051 1051 1051 1051
42+
1051 1051 1051 1051 1051 1051 1051 1051
43+
1051 1051 1051 1051 1051 1051 1051 1051 1051
44+
1067 59503 28783
45+
3724 4058
46+
1010 1032 1267 1032 4688 1032 17152 1458 29356 1010 1256 1010 1293 1010 1260 1010 1652 1010 1240 1159 1154 1128 1319 13052 1041 119685 1152 1182 29568 1240 1159 1140 1171 1239 1184 1143 1319 88181 1873 3659 1275 56421 1621 1041 126241 1133 119685 1166 1153 1240 1159 1166 1153 1032 1051 1032 1051 1051 1032 1051 1051 1051 1032 1051 1051 1051 1051 1032 1051 1051 1051 1051 1051 1032 1051 1051 1051 1051 1051 1051 1032 1051 1051 1051 1051 1051 1051 1051 1032 1051 1051 1051 1051 1051 1051 1051 1051 1032 1051 1046 1051 1032 1051 1791 1051 1032 1051 2880 1051 71881 1158 1128 1225 1158 1182 1225 1158 1147 1225 1159 1139 1225 1158 1143 1225 1159 1130 1225 1158 1150 1225 1158 1183 1225 1158 1159 1225 21359 1225 1158 1159 1225 1158 1162 1225 1158 1182 1225 1158 1133 1240 1159 1152 1129 3082 26060 2998 63614 82278 1049 1051 1049 1052 1049 1053 1049 6434 6749 45577 1045 6626 43555 2843 30757 1902 4200 63073 3671 14931 20040 20040 1657 1657 1975 14135 14135 83923 7290 7290 7290 45509 45509 45509 1362 6483 2151 1576 1116 2189 1514 1681 2156 1044 1576 3609 1636 5257 1063 1576 1077 1605 5257 1362 7534 3180 1494 1044 1576 1068 1636 2479 2269 26883 1063 2837 1039 45654 1261 54297 1076

src/llama-vocab.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1506,7 +1506,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
15061506
tokenizer_pre == "llama3" ||
15071507
tokenizer_pre == "llama-v3" ||
15081508
tokenizer_pre == "llama-bpe"||
1509-
tokenizer_pre == "falcon3") {
1509+
tokenizer_pre == "falcon3" ||
1510+
tokenizer_pre == "pixtral") {
15101511
pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
15111512
ignore_merges = true;
15121513
add_bos = true;

0 commit comments

Comments
 (0)