Skip to content

Commit 73f3d01

Browse files
committed
Fix tokenizer_clean_spaces for megrez
1 parent a02c63d commit 73f3d01

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/llama.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6604,8 +6604,7 @@ static void llm_load_vocab(
66046604
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_COMMAND_R;
66056605
vocab.tokenizer_clean_spaces = false;
66066606
} else if (
6607-
tokenizer_pre == "qwen2" ||
6608-
tokenizer_pre == "megrez") {
6607+
tokenizer_pre == "qwen2")
66096608
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
66106609
vocab.tokenizer_clean_spaces = false;
66116610
} else if (
@@ -6665,6 +6664,9 @@ static void llm_load_vocab(
66656664
} else if (
66666665
tokenizer_pre == "minerva-7b") {
66676666
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MINERVA;
6667+
} else if (
6668+
tokenizer_pre == "megrez")
6669+
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
66686670
} else {
66696671
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
66706672
}

0 commit comments

Comments
 (0)