Skip to content

Commit de014bc

Browse files
committed
rebase
ggml-ci
1 parent e428393 commit de014bc

File tree

4 files changed

+55
-0
lines changed

4 files changed

+55
-0
lines changed

src/llama-arch.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
88
{ LLM_ARCH_LLAMA, "llama" },
9+
{ LLM_ARCH_DECI, "deci" },
910
{ LLM_ARCH_FALCON, "falcon" },
1011
{ LLM_ARCH_GROK, "grok" },
1112
{ LLM_ARCH_GPT2, "gpt2" },
@@ -214,6 +215,32 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
214215
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
215216
},
216217
},
218+
{
219+
LLM_ARCH_DECI,
220+
{
221+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
222+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
223+
{ LLM_TENSOR_OUTPUT, "output" },
224+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
225+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
226+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
227+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
228+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
229+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
230+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
231+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
232+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
233+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
234+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
235+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
236+
{ LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
237+
{ LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
238+
{ LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
239+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
240+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
241+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
242+
},
243+
},
217244
{
218245
LLM_ARCH_BAICHUAN,
219246
{

src/llama-arch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
enum llm_arch {
1212
LLM_ARCH_LLAMA,
13+
LLM_ARCH_DECI,
1314
LLM_ARCH_FALCON,
1415
LLM_ARCH_BAICHUAN,
1516
LLM_ARCH_GROK,

src/llama-chat.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
3535
{ "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
3636
{ "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 },
3737
{ "phi3", LLM_CHAT_TEMPLATE_PHI_3 },
38+
{ "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 },
3839
{ "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR },
3940
{ "monarch", LLM_CHAT_TEMPLATE_MONARCH },
4041
{ "gemma", LLM_CHAT_TEMPLATE_GEMMA },
@@ -53,6 +54,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
5354
{ "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
5455
{ "granite", LLM_CHAT_TEMPLATE_GRANITE },
5556
{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
57+
{ "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
5658
};
5759

5860
llm_chat_template llm_chat_template_from_str(const std::string & name) {
@@ -108,6 +110,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
108110
}
109111
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
110112
return LLM_CHAT_TEMPLATE_PHI_3;
113+
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
114+
return LLM_CHAT_TEMPLATE_FALCON_3;
111115
} else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
112116
return LLM_CHAT_TEMPLATE_ZEPHYR;
113117
} else if (tmpl_contains("bos_token + message['role']")) {
@@ -154,6 +158,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
154158
return LLM_CHAT_TEMPLATE_GRANITE;
155159
} else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
156160
return LLM_CHAT_TEMPLATE_GIGACHAT;
161+
} else if (tmpl_contains("<|role_start|>")) {
162+
return LLM_CHAT_TEMPLATE_MEGREZ;
157163
}
158164
return LLM_CHAT_TEMPLATE_UNKNOWN;
159165
}
@@ -260,6 +266,15 @@ int32_t llm_chat_apply_template(
260266
if (add_ass) {
261267
ss << "<|assistant|>\n";
262268
}
269+
} else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
270+
// Falcon 3
271+
for (auto message : chat) {
272+
std::string role(message->role);
273+
ss << "<|" << role << "|>\n" << message->content << "\n";
274+
}
275+
if (add_ass) {
276+
ss << "<|assistant|>\n";
277+
}
263278
} else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
264279
// zephyr template
265280
for (auto message : chat) {
@@ -503,6 +518,16 @@ int32_t llm_chat_apply_template(
503518
if (add_ass) {
504519
ss << "assistant<|role_sep|>";
505520
}
521+
} else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
522+
// Megrez template
523+
for (auto message : chat) {
524+
std::string role(message->role);
525+
ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
526+
}
527+
528+
if (add_ass) {
529+
ss << "<|role_start|>assistant<|role_end|>";
530+
}
506531
} else {
507532
// template not supported
508533
return -1;

src/llama-chat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ enum llm_chat_template {
1515
LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
1616
LLM_CHAT_TEMPLATE_MISTRAL_V7,
1717
LLM_CHAT_TEMPLATE_PHI_3,
18+
LLM_CHAT_TEMPLATE_FALCON_3,
1819
LLM_CHAT_TEMPLATE_ZEPHYR,
1920
LLM_CHAT_TEMPLATE_MONARCH,
2021
LLM_CHAT_TEMPLATE_GEMMA,
@@ -33,6 +34,7 @@ enum llm_chat_template {
3334
LLM_CHAT_TEMPLATE_RWKV_WORLD,
3435
LLM_CHAT_TEMPLATE_GRANITE,
3536
LLM_CHAT_TEMPLATE_GIGACHAT,
37+
LLM_CHAT_TEMPLATE_MEGREZ,
3638
LLM_CHAT_TEMPLATE_UNKNOWN,
3739
};
3840

0 commit comments

Comments
 (0)