Skip to content

Commit f74fe8b

Browse files
committed
Limit read size in File::read_to_end loop
This works around performance issues on Windows by limiting reads the size of reads when the expected size is known.
1 parent fa4cc63 commit f74fe8b

File tree

4 files changed

+43
-24
lines changed

4 files changed

+43
-24
lines changed

library/std/src/fs.rs

+22-18
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,9 @@ pub struct DirBuilder {
249249
pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
250250
fn inner(path: &Path) -> io::Result<Vec<u8>> {
251251
let mut file = File::open(path)?;
252-
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
253-
let mut bytes = Vec::with_capacity(size as usize);
254-
io::default_read_to_end(&mut file, &mut bytes)?;
252+
let size = file.metadata().map(|m| m.len() as usize).ok();
253+
let mut bytes = Vec::with_capacity(size.unwrap_or(0));
254+
io::default_read_to_end(&mut file, &mut bytes, size)?;
255255
Ok(bytes)
256256
}
257257
inner(path.as_ref())
@@ -289,9 +289,9 @@ pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
289289
pub fn read_to_string<P: AsRef<Path>>(path: P) -> io::Result<String> {
290290
fn inner(path: &Path) -> io::Result<String> {
291291
let mut file = File::open(path)?;
292-
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
293-
let mut string = String::with_capacity(size as usize);
294-
io::default_read_to_string(&mut file, &mut string)?;
292+
let size = file.metadata().map(|m| m.len() as usize).ok();
293+
let mut string = String::with_capacity(size.unwrap_or(0));
294+
io::default_read_to_string(&mut file, &mut string, size)?;
295295
Ok(string)
296296
}
297297
inner(path.as_ref())
@@ -732,12 +732,12 @@ impl fmt::Debug for File {
732732
}
733733

734734
/// Indicates how much extra capacity is needed to read the rest of the file.
735-
fn buffer_capacity_required(mut file: &File) -> usize {
736-
let size = file.metadata().map(|m| m.len()).unwrap_or(0);
737-
let pos = file.stream_position().unwrap_or(0);
735+
fn buffer_capacity_required(mut file: &File) -> Option<usize> {
736+
let size = file.metadata().map(|m| m.len()).ok()?;
737+
let pos = file.stream_position().ok()?;
738738
// Don't worry about `usize` overflow because reading will fail regardless
739739
// in that case.
740-
size.saturating_sub(pos) as usize
740+
Some(size.saturating_sub(pos) as usize)
741741
}
742742

743743
#[stable(feature = "rust1", since = "1.0.0")]
@@ -761,14 +761,16 @@ impl Read for File {
761761

762762
// Reserves space in the buffer based on the file size when available.
763763
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
764-
buf.reserve(buffer_capacity_required(self));
765-
io::default_read_to_end(self, buf)
764+
let size = buffer_capacity_required(self);
765+
buf.reserve(size.unwrap_or(0));
766+
io::default_read_to_end(self, buf, size)
766767
}
767768

768769
// Reserves space in the buffer based on the file size when available.
769770
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
770-
buf.reserve(buffer_capacity_required(self));
771-
io::default_read_to_string(self, buf)
771+
let size = buffer_capacity_required(self);
772+
buf.reserve(size.unwrap_or(0));
773+
io::default_read_to_string(self, buf, size)
772774
}
773775
}
774776
#[stable(feature = "rust1", since = "1.0.0")]
@@ -817,14 +819,16 @@ impl Read for &File {
817819

818820
// Reserves space in the buffer based on the file size when available.
819821
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
820-
buf.reserve(buffer_capacity_required(self));
821-
io::default_read_to_end(self, buf)
822+
let size = buffer_capacity_required(self);
823+
buf.reserve(size.unwrap_or(0));
824+
io::default_read_to_end(self, buf, size)
822825
}
823826

824827
// Reserves space in the buffer based on the file size when available.
825828
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
826-
buf.reserve(buffer_capacity_required(self));
827-
io::default_read_to_string(self, buf)
829+
let size = buffer_capacity_required(self);
830+
buf.reserve(size.unwrap_or(0));
831+
io::default_read_to_string(self, buf, size)
828832
}
829833
}
830834
#[stable(feature = "rust1", since = "1.0.0")]

library/std/src/io/mod.rs

+19-5
Original file line numberDiff line numberDiff line change
@@ -357,17 +357,30 @@ where
357357
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
358358
// time is 4,500 times (!) slower than a default reservation size of 32 if the
359359
// reader has a very small amount of data to return.
360-
pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
360+
pub(crate) fn default_read_to_end<R: Read + ?Sized>(
361+
r: &mut R,
362+
buf: &mut Vec<u8>,
363+
size_hint: Option<usize>,
364+
) -> Result<usize> {
361365
let start_len = buf.len();
362366
let start_cap = buf.capacity();
367+
// Optionally limit the maximum bytes read on each iteration.
368+
// This adds an arbitrary fiddle factor to allow for more data than we expect.
369+
let max_read_size =
370+
size_hint.and_then(|s| s.checked_add(1024)?.checked_next_multiple_of(DEFAULT_BUF_SIZE));
363371

364372
let mut initialized = 0; // Extra initialized bytes from previous loop iteration
365373
loop {
366374
if buf.len() == buf.capacity() {
367375
buf.reserve(32); // buf is full, need more space
368376
}
369377

370-
let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into();
378+
let mut spare = buf.spare_capacity_mut();
379+
if let Some(size) = max_read_size {
380+
let len = cmp::min(spare.len(), size);
381+
spare = &mut spare[..len]
382+
}
383+
let mut read_buf: BorrowedBuf<'_> = spare.into();
371384

372385
// SAFETY: These bytes were initialized but not filled in the previous loop
373386
unsafe {
@@ -419,6 +432,7 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>
419432
pub(crate) fn default_read_to_string<R: Read + ?Sized>(
420433
r: &mut R,
421434
buf: &mut String,
435+
size_hint: Option<usize>,
422436
) -> Result<usize> {
423437
// Note that we do *not* call `r.read_to_end()` here. We are passing
424438
// `&mut Vec<u8>` (the raw contents of `buf`) into the `read_to_end`
@@ -429,7 +443,7 @@ pub(crate) fn default_read_to_string<R: Read + ?Sized>(
429443
// To prevent extraneously checking the UTF-8-ness of the entire buffer
430444
// we pass it to our hardcoded `default_read_to_end` implementation which
431445
// we know is guaranteed to only read data into the end of the buffer.
432-
unsafe { append_to_string(buf, |b| default_read_to_end(r, b)) }
446+
unsafe { append_to_string(buf, |b| default_read_to_end(r, b, size_hint)) }
433447
}
434448

435449
pub(crate) fn default_read_vectored<F>(read: F, bufs: &mut [IoSliceMut<'_>]) -> Result<usize>
@@ -709,7 +723,7 @@ pub trait Read {
709723
/// [`std::fs::read`]: crate::fs::read
710724
#[stable(feature = "rust1", since = "1.0.0")]
711725
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
712-
default_read_to_end(self, buf)
726+
default_read_to_end(self, buf, None)
713727
}
714728

715729
/// Read all bytes until EOF in this source, appending them to `buf`.
@@ -752,7 +766,7 @@ pub trait Read {
752766
/// [`std::fs::read_to_string`]: crate::fs::read_to_string
753767
#[stable(feature = "rust1", since = "1.0.0")]
754768
fn read_to_string(&mut self, buf: &mut String) -> Result<usize> {
755-
default_read_to_string(self, buf)
769+
default_read_to_string(self, buf, None)
756770
}
757771

758772
/// Read the exact number of bytes required to fill `buf`.

library/std/src/io/tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ fn bench_read_to_end(b: &mut test::Bencher) {
314314
b.iter(|| {
315315
let mut lr = repeat(1).take(10000000);
316316
let mut vec = Vec::with_capacity(1024);
317-
super::default_read_to_end(&mut lr, &mut vec)
317+
super::default_read_to_end(&mut lr, &mut vec, None)
318318
});
319319
}
320320

library/std/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@
289289
#![feature(float_next_up_down)]
290290
#![feature(hasher_prefixfree_extras)]
291291
#![feature(hashmap_internals)]
292+
#![feature(int_roundings)]
292293
#![feature(ip)]
293294
#![feature(ip_in_core)]
294295
#![feature(maybe_uninit_slice)]

0 commit comments

Comments
 (0)