Skip to content

Commit f19783d

Browse files
authored
Rollup merge of rust-lang#101193 - thomcc:win-stdio-nozero, r=ChrisDenton
Avoid zeroing large stack buffers in stdio on Windows Does what it says on the tin, using `[MaybeUninit<u16>; N]` instead of `[0u16; N]`. These buffers seem to be around 8kb, which is big enough that this is likely to be a very nice perf boost to stdio-heavy windows code. r? ``@ChrisDenton`` *(Note: this PR also has a commit that adds windows to CI, but as it mentions I'll revert that after it comes out green -- I can only do a check build on the machine I'm typing this on)*
2 parents 42a1901 + 1b8b2dc commit f19783d

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

library/std/src/sys/windows/stdio.rs

+27-14
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
use crate::char::decode_utf16;
44
use crate::cmp;
55
use crate::io;
6+
use crate::mem::MaybeUninit;
67
use crate::os::windows::io::{FromRawHandle, IntoRawHandle};
78
use crate::ptr;
89
use crate::str;
@@ -169,13 +170,14 @@ fn write(
169170
}
170171

171172
fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usize> {
172-
let mut utf16 = [0u16; MAX_BUFFER_SIZE / 2];
173+
let mut utf16 = [MaybeUninit::<u16>::uninit(); MAX_BUFFER_SIZE / 2];
173174
let mut len_utf16 = 0;
174175
for (chr, dest) in utf8.encode_utf16().zip(utf16.iter_mut()) {
175-
*dest = chr;
176+
*dest = MaybeUninit::new(chr);
176177
len_utf16 += 1;
177178
}
178-
let utf16 = &utf16[..len_utf16];
179+
// Safety: We've initialized `len_utf16` values.
180+
let utf16: &[u16] = unsafe { MaybeUninit::slice_assume_init_ref(&utf16[..len_utf16]) };
179181

180182
let mut written = write_u16s(handle, &utf16)?;
181183

@@ -250,27 +252,33 @@ impl io::Read for Stdin {
250252
return Ok(bytes_copied);
251253
} else if buf.len() - bytes_copied < 4 {
252254
// Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
253-
let mut utf16_buf = [0u16; 1];
255+
let mut utf16_buf = [MaybeUninit::new(0); 1];
254256
// Read one u16 character.
255257
let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?;
256258
// Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
257-
let read_bytes = utf16_to_utf8(&utf16_buf[..read], &mut self.incomplete_utf8.bytes)?;
259+
let read_bytes = utf16_to_utf8(
260+
unsafe { MaybeUninit::slice_assume_init_ref(&utf16_buf[..read]) },
261+
&mut self.incomplete_utf8.bytes,
262+
)?;
258263

259264
// Read in the bytes from incomplete_utf8 until the buffer is full.
260265
self.incomplete_utf8.len = read_bytes as u8;
261266
// No-op if no bytes.
262267
bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]);
263268
Ok(bytes_copied)
264269
} else {
265-
let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2];
270+
let mut utf16_buf = [MaybeUninit::<u16>::uninit(); MAX_BUFFER_SIZE / 2];
271+
266272
// In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
267273
// we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
268274
// lost.
269275
let amount = cmp::min(buf.len() / 3, utf16_buf.len());
270276
let read =
271277
read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
272-
273-
match utf16_to_utf8(&utf16_buf[..read], buf) {
278+
// Safety `read_u16s_fixup_surrogates` returns the number of items
279+
// initialized.
280+
let utf16s = unsafe { MaybeUninit::slice_assume_init_ref(&utf16_buf[..read]) };
281+
match utf16_to_utf8(utf16s, buf) {
274282
Ok(value) => return Ok(bytes_copied + value),
275283
Err(e) => return Err(e),
276284
}
@@ -283,14 +291,14 @@ impl io::Read for Stdin {
283291
// This is a best effort, and might not work if we are not the only reader on Stdin.
284292
fn read_u16s_fixup_surrogates(
285293
handle: c::HANDLE,
286-
buf: &mut [u16],
294+
buf: &mut [MaybeUninit<u16>],
287295
mut amount: usize,
288296
surrogate: &mut u16,
289297
) -> io::Result<usize> {
290298
// Insert possibly remaining unpaired surrogate from last read.
291299
let mut start = 0;
292300
if *surrogate != 0 {
293-
buf[0] = *surrogate;
301+
buf[0] = MaybeUninit::new(*surrogate);
294302
*surrogate = 0;
295303
start = 1;
296304
if amount == 1 {
@@ -303,7 +311,10 @@ fn read_u16s_fixup_surrogates(
303311
let mut amount = read_u16s(handle, &mut buf[start..amount])? + start;
304312

305313
if amount > 0 {
306-
let last_char = buf[amount - 1];
314+
// Safety: The returned `amount` is the number of values initialized,
315+
// and it is not 0, so we know that `buf[amount - 1]` have been
316+
// initialized.
317+
let last_char = unsafe { buf[amount - 1].assume_init() };
307318
if last_char >= 0xD800 && last_char <= 0xDBFF {
308319
// high surrogate
309320
*surrogate = last_char;
@@ -313,7 +324,8 @@ fn read_u16s_fixup_surrogates(
313324
Ok(amount)
314325
}
315326

316-
fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result<usize> {
327+
// Returns `Ok(n)` if it initialized `n` values in `buf`.
328+
fn read_u16s(handle: c::HANDLE, buf: &mut [MaybeUninit<u16>]) -> io::Result<usize> {
317329
// Configure the `pInputControl` parameter to not only return on `\r\n` but also Ctrl-Z, the
318330
// traditional DOS method to indicate end of character stream / user input (SUB).
319331
// See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole.
@@ -346,8 +358,9 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result<usize> {
346358
}
347359
break;
348360
}
349-
350-
if amount > 0 && buf[amount as usize - 1] == CTRL_Z {
361+
// Safety: if `amount > 0`, then that many bytes were written, so
362+
// `buf[amount as usize - 1]` has been initialized.
363+
if amount > 0 && unsafe { buf[amount as usize - 1].assume_init() } == CTRL_Z {
351364
amount -= 1;
352365
}
353366
Ok(amount as usize)

0 commit comments

Comments
 (0)