@@ -14,8 +14,18 @@ use crate::sys::handle::Handle;
14
14
pub struct Stdin {
15
15
surrogate : u16 ,
16
16
}
17
- pub struct Stdout ;
18
- pub struct Stderr ;
17
+ pub struct Stdout {
18
+ incomplete_utf8 : IncompleteUtf8 ,
19
+ }
20
+
21
+ pub struct Stderr {
22
+ incomplete_utf8 : IncompleteUtf8 ,
23
+ }
24
+
25
+ struct IncompleteUtf8 {
26
+ bytes : [ u8 ; 4 ] ,
27
+ len : u8 ,
28
+ }
19
29
20
30
// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
21
31
// #13304 for details).
@@ -50,7 +60,27 @@ fn is_console(handle: c::HANDLE) -> bool {
50
60
unsafe { c:: GetConsoleMode ( handle, & mut mode) != 0 }
51
61
}
52
62
53
- fn write ( handle_id : c:: DWORD , data : & [ u8 ] ) -> io:: Result < usize > {
63
+ // Simple reimplementation of std::str::utf8_char_width() which is feature-gated
64
+ fn utf8_char_width ( b : u8 ) -> usize {
65
+ match b {
66
+ 0x00 ..=0x7F => 1 ,
67
+ 0x80 ..=0xC1 => 0 ,
68
+ 0xC2 ..=0xDF => 2 ,
69
+ 0xE0 ..=0xEF => 3 ,
70
+ 0xF0 ..=0xF4 => 4 ,
71
+ 0xF5 ..=0xFF => 0 ,
72
+ }
73
+ }
74
+
75
+ fn write (
76
+ handle_id : c:: DWORD ,
77
+ data : & [ u8 ] ,
78
+ incomplete_utf8 : & mut IncompleteUtf8 ,
79
+ ) -> io:: Result < usize > {
80
+ if data. is_empty ( ) {
81
+ return Ok ( 0 ) ;
82
+ }
83
+
54
84
let handle = get_handle ( handle_id) ?;
55
85
if !is_console ( handle) {
56
86
let handle = Handle :: new ( handle) ;
@@ -59,22 +89,74 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
59
89
return ret;
60
90
}
61
91
62
- // As the console is meant for presenting text, we assume bytes of `data` come from a string
63
- // and are encoded as UTF-8, which needs to be encoded as UTF-16.
92
+ match incomplete_utf8. len {
93
+ 0 => { }
94
+ 1 ..=3 => {
95
+ if data[ 0 ] >> 6 != 0b10 {
96
+ incomplete_utf8. len = 0 ;
97
+ // not a continuation byte - reject
98
+ return Err ( io:: Error :: new (
99
+ io:: ErrorKind :: InvalidData ,
100
+ "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ,
101
+ ) ) ;
102
+ }
103
+ incomplete_utf8. bytes [ incomplete_utf8. len as usize ] = data[ 0 ] ;
104
+ incomplete_utf8. len += 1 ;
105
+ let char_width = utf8_char_width ( incomplete_utf8. bytes [ 0 ] ) ;
106
+ if ( incomplete_utf8. len as usize ) < char_width {
107
+ // more bytes needed
108
+ return Ok ( 1 ) ;
109
+ }
110
+ let s = str:: from_utf8 ( & incomplete_utf8. bytes [ 0 ..incomplete_utf8. len as usize ] ) ;
111
+ incomplete_utf8. len = 0 ;
112
+ match s {
113
+ Ok ( s) => {
114
+ assert_eq ! ( char_width, s. len( ) ) ;
115
+ let written = write_valid_utf8 ( handle, s) ?;
116
+ assert_eq ! ( written, s. len( ) ) ; // guaranteed by write0() for single codepoint writes
117
+ return Ok ( 1 ) ;
118
+ }
119
+ Err ( _) => {
120
+ return Err ( io:: Error :: new (
121
+ io:: ErrorKind :: InvalidData ,
122
+ "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ,
123
+ ) ) ;
124
+ }
125
+ }
126
+ }
127
+ _ => {
128
+ panic ! ( "Unexpected number of incomplete UTF-8 chars." ) ;
129
+ }
130
+ }
131
+
132
+ // As the console is meant for presenting text, we assume bytes of `data` are encoded as UTF-8,
133
+ // which needs to be encoded as UTF-16.
64
134
//
65
135
// If the data is not valid UTF-8 we write out as many bytes as are valid.
66
- // Only when there are no valid bytes (which will happen on the next call), return an error.
136
+ // If the first byte is invalid it is either first byte of a multi-byte sequence but the
137
+ // provided byte slice is too short or it is the first byte of an invalide multi-byte sequence.
67
138
let len = cmp:: min ( data. len ( ) , MAX_BUFFER_SIZE / 2 ) ;
68
139
let utf8 = match str:: from_utf8 ( & data[ ..len] ) {
69
140
Ok ( s) => s,
70
141
Err ( ref e) if e. valid_up_to ( ) == 0 => {
71
- return Err ( io:: Error :: new_const (
72
- io:: ErrorKind :: InvalidData ,
73
- & "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ,
74
- ) ) ;
142
+ if data. len ( ) < utf8_char_width ( data[ 0 ] ) {
143
+ incomplete_utf8. bytes [ 0 ] = data[ 0 ] ;
144
+ incomplete_utf8. len = 1 ;
145
+ return Ok ( 1 ) ;
146
+ } else {
147
+ return Err ( io:: Error :: new_const (
148
+ io:: ErrorKind :: InvalidData ,
149
+ & "Windows stdio in console mode does not support writing non-UTF-8 byte sequences" ,
150
+ ) ) ;
151
+ }
75
152
}
76
153
Err ( e) => str:: from_utf8 ( & data[ ..e. valid_up_to ( ) ] ) . unwrap ( ) ,
77
154
} ;
155
+
156
+ write_valid_utf8 ( handle, utf8)
157
+ }
158
+
159
+ fn write_valid_utf8 ( handle : c:: HANDLE , utf8 : & str ) -> io:: Result < usize > {
78
160
let mut utf16 = [ 0u16 ; MAX_BUFFER_SIZE / 2 ] ;
79
161
let mut len_utf16 = 0 ;
80
162
for ( chr, dest) in utf8. encode_utf16 ( ) . zip ( utf16. iter_mut ( ) ) {
@@ -254,15 +336,21 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
254
336
Ok ( written)
255
337
}
256
338
339
+ impl IncompleteUtf8 {
340
+ pub const fn new ( ) -> IncompleteUtf8 {
341
+ IncompleteUtf8 { bytes : [ 0 ; 4 ] , len : 0 }
342
+ }
343
+ }
344
+
257
345
impl Stdout {
258
346
pub const fn new ( ) -> Stdout {
259
- Stdout
347
+ Stdout { incomplete_utf8 : IncompleteUtf8 :: new ( ) }
260
348
}
261
349
}
262
350
263
351
impl io:: Write for Stdout {
264
352
fn write ( & mut self , buf : & [ u8 ] ) -> io:: Result < usize > {
265
- write ( c:: STD_OUTPUT_HANDLE , buf)
353
+ write ( c:: STD_ERROR_HANDLE , buf, & mut self . incomplete_utf8 )
266
354
}
267
355
268
356
fn flush ( & mut self ) -> io:: Result < ( ) > {
@@ -272,13 +360,13 @@ impl io::Write for Stdout {
272
360
273
361
impl Stderr {
274
362
pub const fn new ( ) -> Stderr {
275
- Stderr
363
+ Stderr { incomplete_utf8 : IncompleteUtf8 :: new ( ) }
276
364
}
277
365
}
278
366
279
367
impl io:: Write for Stderr {
280
368
fn write ( & mut self , buf : & [ u8 ] ) -> io:: Result < usize > {
281
- write ( c:: STD_ERROR_HANDLE , buf)
369
+ write ( c:: STD_ERROR_HANDLE , buf, & mut self . incomplete_utf8 )
282
370
}
283
371
284
372
fn flush ( & mut self ) -> io:: Result < ( ) > {
0 commit comments