Skip to content

Commit bf572ca

Browse files
committed
https://github.com/ggerganov/llama.cpp/pull/11310
1 parent 7844a11 commit bf572ca

File tree

3 files changed

+179
-1
lines changed

3 files changed

+179
-1
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
ied 4 ½ months
2+
__ggml_vocab_test__
3+
Führer
4+
__ggml_vocab_test__
5+
6+
__ggml_vocab_test__
7+
8+
__ggml_vocab_test__
9+
10+
__ggml_vocab_test__
11+
12+
__ggml_vocab_test__
13+
14+
__ggml_vocab_test__
15+
16+
17+
__ggml_vocab_test__
18+
19+
20+
21+
__ggml_vocab_test__
22+
23+
24+
25+
26+
__ggml_vocab_test__
27+
28+
29+
__ggml_vocab_test__
30+
Hello world
31+
__ggml_vocab_test__
32+
Hello world
33+
__ggml_vocab_test__
34+
Hello World
35+
__ggml_vocab_test__
36+
Hello World
37+
__ggml_vocab_test__
38+
Hello World!
39+
__ggml_vocab_test__
40+
Hello, world!
41+
__ggml_vocab_test__
42+
Hello, world!
43+
__ggml_vocab_test__
44+
this is 🦙.cpp
45+
__ggml_vocab_test__
46+
w048 7tuijk dsdfhu
47+
__ggml_vocab_test__
48+
нещо на Български
49+
__ggml_vocab_test__
50+
កាន់តែពិសេសអាចខលចេញ
51+
__ggml_vocab_test__
52+
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
53+
__ggml_vocab_test__
54+
Hello
55+
__ggml_vocab_test__
56+
Hello
57+
__ggml_vocab_test__
58+
Hello
59+
__ggml_vocab_test__
60+
Hello
61+
__ggml_vocab_test__
62+
Hello
63+
__ggml_vocab_test__
64+
Hello
65+
Hello
66+
__ggml_vocab_test__
67+
(
68+
__ggml_vocab_test__
69+
70+
=
71+
__ggml_vocab_test__
72+
' era
73+
__ggml_vocab_test__
74+
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
75+
__ggml_vocab_test__
76+
!!!!!!
77+
__ggml_vocab_test__
78+
3
79+
__ggml_vocab_test__
80+
33
81+
__ggml_vocab_test__
82+
333
83+
__ggml_vocab_test__
84+
3333
85+
__ggml_vocab_test__
86+
33333
87+
__ggml_vocab_test__
88+
333333
89+
__ggml_vocab_test__
90+
3333333
91+
__ggml_vocab_test__
92+
33333333
93+
__ggml_vocab_test__
94+
333333333
95+
__ggml_vocab_test__
96+
Cửa Việt
97+
__ggml_vocab_test__
98+
discards
99+
__ggml_vocab_test__
100+
101+
102+
103+
104+
105+
106+
107+
108+
109+
110+
111+
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
112+
__ggml_vocab_test__
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
1122 220 19 220 26062 3951
2+
37 50753 261
3+
4+
220
5+
256
6+
262
7+
197
8+
198
9+
271
10+
1406
11+
1572
12+
9707 1879
13+
21927 1879
14+
9707 4337
15+
21927 4337
16+
21927 4337 0
17+
9707 11 1879 0
18+
21927 11 1879 0
19+
419 374 11162 99 247 13 10821
20+
86 15 19 23 220 22 83 1963 41808 11472 2940 16739
21+
78762 14144 1456 13073 63471 33594 3038 133178 79012
22+
146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 147805 148301 147270 44258 223 146848
23+
145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 320 3243 42365 429 702 1181 1828 3950 8
24+
9707
25+
21927
26+
220 21927
27+
256 21927
28+
262 21927
29+
262 21927 198 262 21927
30+
320
31+
198 284
32+
6 11385
33+
9707 11 379 64848 0 2585 525 498 26525 223 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216
34+
17085 2928
35+
18
36+
18 18
37+
18 18 18
38+
18 18 18 18
39+
18 18 18 18 18
40+
18 18 18 18 18 18
41+
18 18 18 18 18 18 18
42+
18 18 18 18 18 18 18 18
43+
18 18 18 18 18 18 18 18 18
44+
34 90063 128324
45+
2560 2347
46+
198 4710 14731 65497 7847 1572 2303 78672 10947 145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 11162 99 247 149955 220 18 220 18 18 220 18 18 18 220 18 18 18 18 220 18 18 18 18 18 220 18 18 18 18 18 18 220 18 18 18 18 18 18 18 220 18 18 18 18 18 18 18 18 220 18 13 18 220 18 496 18 220 18 1112 18 220 146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 144534 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216 55460 53237 18658 14144 1456 13073 63471 33594 3038 133178 79012 3355 4605 4605 13874 13874 73594 3014 3014 28149 17085 2928 26610 7646 358 3003 1012 364 83 813 566 594 1052 11 364 787 498 2704 30 364 44 537 2704 358 3278 1281 432 11 364 35 498 1075 1045 15243 30 1205 6 42612 264 63866 43

src/llama.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,7 @@ enum llm_chat_template {
15731573
LLM_CHAT_TEMPLATE_VICUNA_ORCA,
15741574
LLM_CHAT_TEMPLATE_DEEPSEEK,
15751575
LLM_CHAT_TEMPLATE_DEEPSEEK_2,
1576+
LLM_CHAT_TEMPLATE_DEEPSEEK_3,
15761577
LLM_CHAT_TEMPLATE_COMMAND_R,
15771578
LLM_CHAT_TEMPLATE_LLAMA_3,
15781579
LLM_CHAT_TEMPLATE_CHATGML_3,
@@ -1606,6 +1607,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
16061607
{ "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA },
16071608
{ "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK },
16081609
{ "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 },
1610+
{ "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 },
16091611
{ "command-r", LLM_CHAT_TEMPLATE_COMMAND_R },
16101612
{ "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
16111613
{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
@@ -6421,7 +6423,8 @@ static void llm_load_vocab(
64216423
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_COMMAND_R;
64226424
vocab.tokenizer_clean_spaces = false;
64236425
} else if (
6424-
tokenizer_pre == "qwen2") {
6426+
tokenizer_pre == "qwen2" ||
6427+
tokenizer_pre == "deepseek-r1-qwen") {
64256428
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
64266429
vocab.tokenizer_clean_spaces = false;
64276430
} else if (
@@ -21845,6 +21848,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2184521848
return LLM_CHAT_TEMPLATE_MINICPM;
2184621849
} else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
2184721850
return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
21851+
} else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
21852+
return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
2184821853
} else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
2184921854
// ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
2185021855
// EXAONE-3.0-7.8B-Instruct
@@ -22154,6 +22159,21 @@ static int32_t llama_chat_apply_template_internal(
2215422159
if (add_ass) {
2215522160
ss << "Assistant:";
2215622161
}
22162+
} else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
22163+
// DeepSeek-V3
22164+
for (auto message : chat) {
22165+
std::string role(message->role);
22166+
if (role == "system") {
22167+
ss << message->content << "\n\n";
22168+
} else if (role == "user") {
22169+
ss << LU8("<|User|>") << message->content;
22170+
} else if (role == "assistant") {
22171+
ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
22172+
}
22173+
}
22174+
if (add_ass) {
22175+
ss << LU8("<|Assistant|>");
22176+
}
2215722177
} else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
2215822178
// ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
2215922179
// EXAONE-3.0-7.8B-Instruct

0 commit comments

Comments
 (0)