Skip to content

Commit 0c991be

Browse files
committed
Adding unicode regex function
1 parent 021643f commit 0c991be

File tree

4 files changed

+16
-41
lines changed

4 files changed

+16
-41
lines changed

unicode-data.cpp

Lines changed: 11 additions & 1 deletion
Large diffs are not rendered by default.

unicode-data.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,4 @@ extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_symbol;
1414
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_control;
1515
extern const std::multimap<uint32_t, uint32_t> unicode_map_nfd;
1616
extern const std::map<char32_t, char32_t> unicode_map_lowercase;
17-
extern const std::map<std::string, std::wstring> unicode_regex_to_wregex;
18-
extern const std::map<std::string, std::string> unicode_regex_to_regex;
17+
extern const std::map<std::string, std::wstring> unicode_regex_to_wregex;

unicode.cpp

Lines changed: 3 additions & 21 deletions
Large diffs are not rendered by default.

unicode.h

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,5 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8);
2828

2929
char32_t unicode_tolower(char32_t cp);
3030

31-
std::vector<std::wstring> get_gpt2_regex();
32-
std::vector<std::wstring> get_deepseek_coder_regex();
33-
std::vector<std::wstring> get_deepseek_llm_regex();
34-
35-
inline std::wstring from_utf8(const std::string & s)
36-
{
37-
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
38-
return conv.from_bytes(s);
39-
}
40-
41-
inline std::string to_utf8(const std::wstring & ws)
42-
{
43-
// code to convert from utf32/utf16 to utf8
44-
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
45-
std::string utf8 = converter.to_bytes(ws);
46-
return utf8;
47-
}
31+
bool unicode_wregex_exists(const std::string & regex);
4832
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::wstring> & regex_exprs);

0 commit comments

Comments
 (0)