Skip to content

Commit efaf429

Browse files
authored
Rollup merge of #91754 - Patrick-Poitras:rm-4byte-minimum-stdio-windows, r=Mark-Simulacrum
Modifications to `std::io::Stdin` on Windows so that there is no longer a 4-byte buffer minimum in read(). This is an attempted fix of issue #91722, where a too-small buffer was passed to the read function of stdio on Windows. This caused an error to be returned when `read_to_end` or `read_to_string` were called. Both delegate to `std::io::default_read_to_end`, which creates a buffer that is of length >0, and forwards it to `std::io::Stdin::read()`. The latter method returns an error if the length of the buffer is less than 4, as there might not be enough space to allocate a UTF-16 character. This creates a problem when the buffer length is in `0 < N < 4`, causing the bug. The current modification creates an internal buffer, much like the one used for the write functions I'd also like to acknowledge the help of `@agausmann` and `@hkratz` in detecting and isolating the bug, and for suggestions that made the fix possible. Couple disclaimers: - Firstly, I didn't know where to put code to replicate the bug found in the issue. It would probably be wise to add that case to the testing suite, but I'm afraid that I don't know _where_ that test should be added. - Secondly, the code is fairly fundamental to IO operations, so my fears are that this may cause some undesired side effects ~or performance loss in benchmarks.~ The testing suite runs on my computer, and it does fix the issue noted in #91722. - Thirdly, I left the "surrogate" field in the Stdin struct, but from a cursory glance, it seems to be serving the same purpose for other functions. Perhaps merging the two would be appropriate. Finally, this is my first pull request to the rust language, and as such some things may be weird/unidiomatic/plain out bad. If there are any obvious improvements I could do to the code, or any other suggestions, I would appreciate them. Edit: Closes #91722
2 parents ddabe07 + d49d1d4 commit efaf429

File tree

1 file changed

+54
-18
lines changed

1 file changed

+54
-18
lines changed

Diff for: library/std/src/sys/windows/stdio.rs

+54-18
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ use core::str::utf8_char_width;
1515
// the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490.
1616
pub struct Stdin {
1717
surrogate: u16,
18+
incomplete_utf8: IncompleteUtf8,
1819
}
20+
1921
pub struct Stdout {
2022
incomplete_utf8: IncompleteUtf8,
2123
}
@@ -29,6 +31,25 @@ struct IncompleteUtf8 {
2931
len: u8,
3032
}
3133

34+
impl IncompleteUtf8 {
35+
// Implemented for use in Stdin::read.
36+
fn read(&mut self, buf: &mut [u8]) -> usize {
37+
// Write to buffer until the buffer is full or we run out of bytes.
38+
let to_write = cmp::min(buf.len(), self.len as usize);
39+
buf[..to_write].copy_from_slice(&self.bytes[..to_write]);
40+
41+
// Rotate the remaining bytes if not enough remaining space in buffer.
42+
if usize::from(self.len) > buf.len() {
43+
self.bytes.copy_within(to_write.., 0);
44+
self.len -= to_write as u8;
45+
} else {
46+
self.len = 0;
47+
}
48+
49+
to_write
50+
}
51+
}
52+
3253
// Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
3354
// #13304 for details).
3455
//
@@ -205,7 +226,7 @@ fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
205226

206227
impl Stdin {
207228
pub const fn new() -> Stdin {
208-
Stdin { surrogate: 0 }
229+
Stdin { surrogate: 0, incomplete_utf8: IncompleteUtf8::new() }
209230
}
210231
}
211232

@@ -221,24 +242,39 @@ impl io::Read for Stdin {
221242
}
222243
}
223244

224-
if buf.len() == 0 {
225-
return Ok(0);
226-
} else if buf.len() < 4 {
227-
return Err(io::Error::new_const(
228-
io::ErrorKind::InvalidInput,
229-
&"Windows stdin in console mode does not support a buffer too small to \
230-
guarantee holding one arbitrary UTF-8 character (4 bytes)",
231-
));
245+
// If there are bytes in the incomplete utf-8, start with those.
246+
// (No-op if there is nothing in the buffer.)
247+
let mut bytes_copied = self.incomplete_utf8.read(buf);
248+
249+
if bytes_copied == buf.len() {
250+
return Ok(bytes_copied);
251+
} else if buf.len() - bytes_copied < 4 {
252+
// Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
253+
let mut utf16_buf = [0u16; 1];
254+
// Read one u16 character.
255+
let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?;
256+
// Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
257+
let read_bytes = utf16_to_utf8(&utf16_buf[..read], &mut self.incomplete_utf8.bytes)?;
258+
259+
// Read in the bytes from incomplete_utf8 until the buffer is full.
260+
self.incomplete_utf8.len = read_bytes as u8;
261+
// No-op if no bytes.
262+
bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]);
263+
Ok(bytes_copied)
264+
} else {
265+
let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2];
266+
// In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
267+
// we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
268+
// lost.
269+
let amount = cmp::min(buf.len() / 3, utf16_buf.len());
270+
let read =
271+
read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
272+
273+
match utf16_to_utf8(&utf16_buf[..read], buf) {
274+
Ok(value) => return Ok(bytes_copied + value),
275+
Err(e) => return Err(e),
276+
}
232277
}
233-
234-
let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2];
235-
// In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
236-
// we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
237-
// lost.
238-
let amount = cmp::min(buf.len() / 3, utf16_buf.len());
239-
let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
240-
241-
utf16_to_utf8(&utf16_buf[..read], buf)
242278
}
243279
}
244280

0 commit comments

Comments
 (0)