@@ -134,6 +134,18 @@ char _PyIO_get_console_type(PyObject *path_or_fd) {
134
134
return m ;
135
135
}
136
136
137
+ DWORD _find_last_utf8_boundary (char * buf , DWORD len ) {
138
+ /* This function never returns 0, returns the original len instead */
139
+ DWORD count = 1 ;
140
+ if (len == 0 || (buf [len - 1 ] & 0x80 ) == 0 )
141
+ return len ;
142
+ for (;; count ++ ) {
143
+ if (count > 3 || count >= len )
144
+ return len ;
145
+ if ((buf [len - count ] & 0xc0 ) != 0x80 )
146
+ return len - count ;
147
+ }
148
+ }
137
149
138
150
/*[clinic input]
139
151
module _io
@@ -975,7 +987,7 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls,
975
987
{
976
988
BOOL res = TRUE;
977
989
wchar_t * wbuf ;
978
- DWORD len , wlen , orig_len , n = 0 ;
990
+ DWORD len , wlen , n = 0 ;
979
991
HANDLE handle ;
980
992
981
993
if (self -> fd == -1 )
@@ -1007,21 +1019,8 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls,
1007
1019
have to reduce and recalculate. */
1008
1020
while (wlen > 32766 / sizeof (wchar_t )) {
1009
1021
len /= 2 ;
1010
- orig_len = len ;
1011
- /* Reduce the length until we hit the final byte of a UTF-8 sequence
1012
- * (top bit is unset). Fix for github issue 82052.
1013
- */
1014
- while (len > 0 && (((char * )b -> buf )[len - 1 ] & 0x80 ) != 0 )
1015
- -- len ;
1016
- /* If we hit a length of 0, something has gone wrong. This shouldn't
1017
- * be possible, as valid UTF-8 can have at most 3 non-final bytes
1018
- * before a final one, and our buffer is way longer than that.
1019
- * But to be on the safe side, if we hit this issue we just restore
1020
- * the original length and let the console API sort it out.
1021
- */
1022
- if (len == 0 ) {
1023
- len = orig_len ;
1024
- }
1022
+ /* Fix for github issues gh-110913 and gh-82052. */
1023
+ len = _find_last_utf8_boundary (b -> buf , len );
1025
1024
wlen = MultiByteToWideChar (CP_UTF8 , 0 , b -> buf , len , NULL , 0 );
1026
1025
}
1027
1026
Py_END_ALLOW_THREADS
0 commit comments