Skip to content

Commit aad44e6

Browse files
dixyesNeoZhangJianyu
authored andcommitted
llama : support InfiniAI Megrez 3b (ggml-org#10893)
* Support InfiniAI Megrez 3b * Fix tokenizer_clean_spaces for megrez
1 parent 791dfeb commit aad44e6

File tree

4 files changed

+25
-0
lines changed

4 files changed

+25
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
684684
if chkhsh == "ad851be1dba641f2e3711822f816db2c265f788b37c63b4e1aeacb9ee92de8eb":
685685
# ref: https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct
686686
res = "gigachat"
687+
if chkhsh == "d4c8f286ea6b520b3d495c4455483cfa2302c0cfcd4be05d781b6a8a0a7cdaf1":
688+
# ref: https://huggingface.co/Infinigence/Megrez-3B-Instruct
689+
res = "megrez"
687690

688691
if res is None:
689692
logger.warning("\n")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class TOKENIZER_TYPE(IntEnum):
106106
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
107107
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
108108
{"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
109+
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
109110
]
110111

111112

src/llama.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,6 +1724,7 @@ enum llm_chat_template {
17241724
LLM_CHAT_TEMPLATE_RWKV_WORLD,
17251725
LLM_CHAT_TEMPLATE_GRANITE,
17261726
LLM_CHAT_TEMPLATE_GIGACHAT,
1727+
LLM_CHAT_TEMPLATE_MEGREZ,
17271728
LLM_CHAT_TEMPLATE_UNKNOWN,
17281729
};
17291730

@@ -1757,6 +1758,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
17571758
{ "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
17581759
{ "granite", LLM_CHAT_TEMPLATE_GRANITE },
17591760
{ "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT },
1761+
{ "megrez", LLM_CHAT_TEMPLATE_MEGREZ },
17601762
};
17611763

17621764
static llm_arch llm_arch_from_string(const std::string & name) {
@@ -6707,6 +6709,9 @@ static void llm_load_vocab(
67076709
} else if (
67086710
tokenizer_pre == "minerva-7b") {
67096711
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MINERVA;
6712+
} else if (
6713+
tokenizer_pre == "megrez") {
6714+
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
67106715
} else {
67116716
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
67126717
}
@@ -22940,6 +22945,8 @@ static llm_chat_template llama_chat_detect_template(const std::string & tmpl) {
2294022945
return LLM_CHAT_TEMPLATE_GRANITE;
2294122946
} else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
2294222947
return LLM_CHAT_TEMPLATE_GIGACHAT;
22948+
} else if (tmpl_contains("<|role_start|>")) {
22949+
return LLM_CHAT_TEMPLATE_MEGREZ;
2294322950
}
2294422951
return LLM_CHAT_TEMPLATE_UNKNOWN;
2294522952
}
@@ -23298,6 +23305,16 @@ static int32_t llama_chat_apply_template_internal(
2329823305
if (add_ass) {
2329923306
ss << "assistant<|role_sep|>";
2330023307
}
23308+
} else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
23309+
// Megrez template
23310+
for (auto message : chat) {
23311+
std::string role(message->role);
23312+
ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
23313+
}
23314+
23315+
if (add_ass) {
23316+
ss << "<|role_start|>assistant<|role_end|>";
23317+
}
2330123318
} else {
2330223319
// template not supported
2330323320
return -1;

tests/test-chat-template.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ int main(void) {
7777
"{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'system' %}{{ '[SYSTEM_PROMPT] ' + message['content'] + '[/SYSTEM_PROMPT]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token }}{% else %}{{ raise_exception('Only user, system and assistant roles are supported!') }}{% endif %}{% endfor %}",
7878
// ai-sage/GigaChat-20B-A3B-instruct
7979
"{% if messages[0]['role'] == 'system' -%}\n {%- set loop_messages = messages[1:] -%}\n {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n {%- set loop_messages = messages -%}\n {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n \n {%- if loop.index0 == 0 -%}\n {{ system_message -}}\n {%- endif -%}\n {%- if message['role'] == 'user' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if message['role'] == 'assistant' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if loop.last and add_generation_prompt -%}\n {{ 'assistant' + additional_special_tokens[0] -}}\n {%- endif -%}\n{%- endfor %}",
80+
// Infinigence/Megrez-3B-Instruct
81+
u8"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"
8082
};
8183
std::vector<std::string> expected_output = {
8284
// teknium/OpenHermes-2.5-Mistral-7B
@@ -133,6 +135,8 @@ int main(void) {
133135
"[SYSTEM_PROMPT] You are a helpful assistant[/SYSTEM_PROMPT][INST] Hello[/INST] Hi there</s>[INST] Who are you[/INST] I am an assistant </s>[INST] Another question[/INST]",
134136
// ai-sage/GigaChat-20B-A3B-instruct
135137
"<s>You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|> I am an assistant <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>",
138+
// Infinigence/Megrez-3B-Instruct
139+
"<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|> I am an assistant <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
136140
};
137141
std::vector<char> formatted_chat(1024);
138142
int32_t res;

0 commit comments

Comments
 (0)