We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3a461db commit 3275e60Copy full SHA for 3275e60
llama.cpp
@@ -12212,14 +12212,13 @@ struct llm_tokenizer_bpe {
12212
"\\s?\\p{L}+",
12213
"\\s?\\p{P}+",
12214
"[一-龥ࠀ-一가-]+",
12215
- "\\p{N}+",
+ "\\p{N}",
12216
});
12217
break;
12218
case LLAMA_VOCAB_PRE_TYPE_FALCON:
12219
word_collection = unicode_regex_split(text, {
12220
"[\\p{P}\\$\\+<=>\\^~\\|]+",
12221
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
12222
12223
"[0-9][0-9][0-9]",
12224
12225
0 commit comments