Skip to content

Commit d49d1d4

Browse files
Modifications to buffer UTF-16 internally so that there is no longer a 4-byte buffer minimum. Include suggestions from @agausmann and @Mark-Simulacrum.
1 parent 404c847 commit d49d1d4

File tree

1 file changed

+54
-18
lines changed

1 file changed

+54
-18
lines changed

Diff for: library/std/src/sys/windows/stdio.rs

+54-18
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ use core::str::utf8_char_width;
1515
// the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490.
1616
pub struct Stdin {
1717
surrogate: u16,
18+
incomplete_utf8: IncompleteUtf8,
1819
}
20+
1921
pub struct Stdout {
2022
incomplete_utf8: IncompleteUtf8,
2123
}
@@ -29,6 +31,25 @@ struct IncompleteUtf8 {
2931
len: u8,
3032
}
3133

34+
impl IncompleteUtf8 {
35+
// Implemented for use in Stdin::read.
36+
fn read(&mut self, buf: &mut [u8]) -> usize {
37+
// Write to buffer until the buffer is full or we run out of bytes.
38+
let to_write = cmp::min(buf.len(), self.len as usize);
39+
buf[..to_write].copy_from_slice(&self.bytes[..to_write]);
40+
41+
// Rotate the remaining bytes if not enough remaining space in buffer.
42+
if usize::from(self.len) > buf.len() {
43+
self.bytes.copy_within(to_write.., 0);
44+
self.len -= to_write as u8;
45+
} else {
46+
self.len = 0;
47+
}
48+
49+
to_write
50+
}
51+
}
52+
3253
// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
3354
// #13304 for details).
3455
//
@@ -205,7 +226,7 @@ fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
205226

206227
impl Stdin {
207228
pub const fn new() -> Stdin {
208-
Stdin { surrogate: 0 }
229+
Stdin { surrogate: 0, incomplete_utf8: IncompleteUtf8::new() }
209230
}
210231
}
211232

@@ -221,24 +242,39 @@ impl io::Read for Stdin {
221242
}
222243
}
223244

224-
if buf.len() == 0 {
225-
return Ok(0);
226-
} else if buf.len() < 4 {
227-
return Err(io::Error::new_const(
228-
io::ErrorKind::InvalidInput,
229-
&"Windows stdin in console mode does not support a buffer too small to \
230-
guarantee holding one arbitrary UTF-8 character (4 bytes)",
231-
));
245+
// If there are bytes in the incomplete utf-8, start with those.
246+
// (No-op if there is nothing in the buffer.)
247+
let mut bytes_copied = self.incomplete_utf8.read(buf);
248+
249+
if bytes_copied == buf.len() {
250+
return Ok(bytes_copied);
251+
} else if buf.len() - bytes_copied < 4 {
252+
// Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
253+
let mut utf16_buf = [0u16; 1];
254+
// Read one u16 character.
255+
let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?;
256+
// Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
257+
let read_bytes = utf16_to_utf8(&utf16_buf[..read], &mut self.incomplete_utf8.bytes)?;
258+
259+
// Read in the bytes from incomplete_utf8 until the buffer is full.
260+
self.incomplete_utf8.len = read_bytes as u8;
261+
// No-op if no bytes.
262+
bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]);
263+
Ok(bytes_copied)
264+
} else {
265+
let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2];
266+
// In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
267+
// we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
268+
// lost.
269+
let amount = cmp::min(buf.len() / 3, utf16_buf.len());
270+
let read =
271+
read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
272+
273+
match utf16_to_utf8(&utf16_buf[..read], buf) {
274+
Ok(value) => return Ok(bytes_copied + value),
275+
Err(e) => return Err(e),
276+
}
232277
}
233-
234-
let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2];
235-
// In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
236-
// we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
237-
// lost.
238-
let amount = cmp::min(buf.len() / 3, utf16_buf.len());
239-
let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
240-
241-
utf16_to_utf8(&utf16_buf[..read], buf)
242278
}
243279
}
244280

0 commit comments

Comments
 (0)