File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -411,7 +411,8 @@ struct llm_tokenizer_bpe : llm_tokenizer {
411
411
regex_exprs = {
412
412
// original regex from tokenizer.json
413
413
// "'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+"
414
- " '(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\ r\\ n\\ p{L}\\ p{N}]?+\\ p{L}+|\\ p{N}| ?[^\\ s\\ p{L}\\ p{N}]++[\\ r\\ n]*|\\ s*[\\ r\\ n]|\\ s+(?!\\ S)|\\ s+" ,
414
+ // FIXME? Changed possessive quantifiers (?+ and ++) to greedy to avoid errors and imatrix hanging (tried atomic grouping but it's not supported?)
415
+ " '(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\ r\\ n\\ p{L}\\ p{N}]?\\ p{L}+|\\ p{N}| ?[^\\ s\\ p{L}\\ p{N}]+[\\ r\\ n]*|\\ s*[\\ r\\ n]|\\ s+(?!\\ S)|\\ s+" ,
415
416
};
416
417
break ;
417
418
default :
You can’t perform that action at this time.
0 commit comments