Skip to content

Commit 5e732bd

Browse files
committed
fix: copy to fix fallthrough
1 parent 5a163eb commit 5e732bd

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

llama.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12208,7 +12208,6 @@ struct llm_tokenizer_bpe {
1220812208
switch (vocab.type_pre) {
1220912209
case LLAMA_VOCAB_PRE_TYPE_LLAMA3:
1221012210
ignore_merges = true;
12211-
case LLAMA_VOCAB_PRE_TYPE_DBRX:
1221212211
word_collection = unicode_regex_split(text, {
1221312212
// original regex from tokenizer.json
1221412213
//"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
@@ -12217,6 +12216,12 @@ struct llm_tokenizer_bpe {
1221712216
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
1221812217
});
1221912218
break;
12219+
case LLAMA_VOCAB_PRE_TYPE_DBRX:
12220+
word_collection = unicode_regex_split(text, {
12221+
// same as llama3
12222+
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
12223+
});
12224+
break;
1222012225
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM:
1222112226
word_collection = unicode_regex_split(text, {
1222212227
"[\r\n]",

0 commit comments

Comments
 (0)