@@ -15,7 +15,9 @@ use core::str::utf8_char_width;
15
15
// the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490.
16
16
pub struct Stdin {
17
17
surrogate : u16 ,
18
+ incomplete_utf8 : IncompleteUtf8 ,
18
19
}
20
+
19
21
pub struct Stdout {
20
22
incomplete_utf8 : IncompleteUtf8 ,
21
23
}
@@ -29,6 +31,25 @@ struct IncompleteUtf8 {
29
31
len : u8 ,
30
32
}
31
33
34
+ impl IncompleteUtf8 {
35
+ // Implemented for use in Stdin::read.
36
+ fn read ( & mut self , buf : & mut [ u8 ] ) -> usize {
37
+ // Write to buffer until the buffer is full or we run out of bytes.
38
+ let to_write = cmp:: min ( buf. len ( ) , self . len as usize ) ;
39
+ buf[ ..to_write] . copy_from_slice ( & self . bytes [ ..to_write] ) ;
40
+
41
+ // Rotate the remaining bytes if not enough remaining space in buffer.
42
+ if usize:: from ( self . len ) > buf. len ( ) {
43
+ self . bytes . copy_within ( to_write.., 0 ) ;
44
+ self . len -= to_write as u8 ;
45
+ } else {
46
+ self . len = 0 ;
47
+ }
48
+
49
+ to_write
50
+ }
51
+ }
52
+
32
53
// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
33
54
// #13304 for details).
34
55
//
@@ -205,7 +226,7 @@ fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
205
226
206
227
impl Stdin {
207
228
pub const fn new ( ) -> Stdin {
208
- Stdin { surrogate : 0 }
229
+ Stdin { surrogate : 0 , incomplete_utf8 : IncompleteUtf8 :: new ( ) }
209
230
}
210
231
}
211
232
@@ -221,24 +242,39 @@ impl io::Read for Stdin {
221
242
}
222
243
}
223
244
224
- if buf. len ( ) == 0 {
225
- return Ok ( 0 ) ;
226
- } else if buf. len ( ) < 4 {
227
- return Err ( io:: Error :: new_const (
228
- io:: ErrorKind :: InvalidInput ,
229
- & "Windows stdin in console mode does not support a buffer too small to \
230
- guarantee holding one arbitrary UTF-8 character (4 bytes)",
231
- ) ) ;
245
+ // If there are bytes in the incomplete utf-8, start with those.
246
+ // (No-op if there is nothing in the buffer.)
247
+ let mut bytes_copied = self . incomplete_utf8 . read ( buf) ;
248
+
249
+ if bytes_copied == buf. len ( ) {
250
+ return Ok ( bytes_copied) ;
251
+ } else if buf. len ( ) - bytes_copied < 4 {
252
+ // Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
253
+ let mut utf16_buf = [ 0u16 ; 1 ] ;
254
+ // Read one u16 character.
255
+ let read = read_u16s_fixup_surrogates ( handle, & mut utf16_buf, 1 , & mut self . surrogate ) ?;
256
+ // Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
257
+ let read_bytes = utf16_to_utf8 ( & utf16_buf[ ..read] , & mut self . incomplete_utf8 . bytes ) ?;
258
+
259
+ // Read in the bytes from incomplete_utf8 until the buffer is full.
260
+ self . incomplete_utf8 . len = read_bytes as u8 ;
261
+ // No-op if no bytes.
262
+ bytes_copied += self . incomplete_utf8 . read ( & mut buf[ bytes_copied..] ) ;
263
+ Ok ( bytes_copied)
264
+ } else {
265
+ let mut utf16_buf = [ 0u16 ; MAX_BUFFER_SIZE / 2 ] ;
266
+ // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
267
+ // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
268
+ // lost.
269
+ let amount = cmp:: min ( buf. len ( ) / 3 , utf16_buf. len ( ) ) ;
270
+ let read =
271
+ read_u16s_fixup_surrogates ( handle, & mut utf16_buf, amount, & mut self . surrogate ) ?;
272
+
273
+ match utf16_to_utf8 ( & utf16_buf[ ..read] , buf) {
274
+ Ok ( value) => return Ok ( bytes_copied + value) ,
275
+ Err ( e) => return Err ( e) ,
276
+ }
232
277
}
233
-
234
- let mut utf16_buf = [ 0u16 ; MAX_BUFFER_SIZE / 2 ] ;
235
- // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
236
- // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
237
- // lost.
238
- let amount = cmp:: min ( buf. len ( ) / 3 , utf16_buf. len ( ) ) ;
239
- let read = read_u16s_fixup_surrogates ( handle, & mut utf16_buf, amount, & mut self . surrogate ) ?;
240
-
241
- utf16_to_utf8 ( & utf16_buf[ ..read] , buf)
242
278
}
243
279
}
244
280
0 commit comments