Skip to content

Commit abd7e7f

Browse files
Use UTF-16 as input on Windows, since UTF-8 does not work and reads multibyte characters as zeros.
1 parent 698f7b5 commit abd7e7f

File tree

3 files changed

+32
-2
lines changed

3 files changed

+32
-2
lines changed

examples/common.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,19 @@
1616
#endif
1717

1818
#if defined (_WIN32)
19+
#include <fcntl.h>
20+
#include <io.h>
1921
#pragma comment(lib,"kernel32.lib")
2022
extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle);
2123
extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode);
2224
extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
2325
extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
2426
extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
27+
extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int CodePage, unsigned long dwFlags,
28+
const wchar_t * lpWideCharStr, int cchWideChar,
29+
char * lpMultiByteStr, int cbMultiByte,
30+
const char * lpDefaultChar, bool * lpUsedDefaultChar);
31+
#define CP_UTF8 65001
2532
#endif
2633

2734
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
@@ -307,12 +314,25 @@ void win32_console_init(bool enable_color) {
307314
SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4)
308315
}
309316
// Set console output codepage to UTF8
310-
SetConsoleOutputCP(65001); // CP_UTF8
317+
SetConsoleOutputCP(CP_UTF8);
311318
}
312319
void* hConIn = GetStdHandle((unsigned long)-10); // STD_INPUT_HANDLE (-10)
313320
if (hConIn && hConIn != (void*)-1 && GetConsoleMode(hConIn, &dwMode)) {
321+
#if 0
314322
// Set console input codepage to UTF8
315-
SetConsoleCP(65001); // CP_UTF8
323+
SetConsoleCP(CP_UTF8);
324+
#else
325+
// Set console input codepage to UTF16
326+
_setmode(_fileno(stdin), _O_WTEXT);
327+
#endif
316328
}
317329
}
330+
331+
// Convert a wide Unicode string to an UTF8 string
332+
void win32_utf8_encode(const std::wstring & wstr, std::string & str) {
333+
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
334+
std::string strTo(size_needed, 0);
335+
WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
336+
str = strTo;
337+
}
318338
#endif

examples/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,5 @@ void set_console_color(console_state & con_st, console_color_t color);
9292

9393
#if defined (_WIN32)
9494
void win32_console_init(bool enable_color);
95+
void win32_utf8_encode(const std::wstring & wstr, std::string & str);
9596
#endif

examples/main/main.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,19 @@ int main(int argc, char ** argv) {
386386
std::string line;
387387
bool another_line = true;
388388
do {
389+
#if defined(_WIN32)
390+
std::wstring wline;
391+
if (!std::getline(std::wcin, wline)) {
392+
// input stream is bad or EOF received
393+
return 0;
394+
}
395+
win32_utf8_encode(wline, line);
396+
#else
389397
if (!std::getline(std::cin, line)) {
390398
// input stream is bad or EOF received
391399
return 0;
392400
}
401+
#endif
393402
if (line.empty() || line.back() != '\\') {
394403
another_line = false;
395404
} else {

0 commit comments

Comments
 (0)