Skip to content

Commit 50166d5

Browse files
committed
Auto merge of rust-lang#98748 - saethlin:optimize-bufreader, r=Mark-Simulacrum
Remove some redundant checks from BufReader The implementation of BufReader contains a lot of redundant checks. While any one of these checks is not particularly expensive to execute, especially when taken together they dramatically inhibit LLVM's ability to make subsequent optimizations by confusing data flow increasing the code size of anything that uses BufReader. In particular, these changes have a ~2x increase on the benchmark that this adds a `black_box` to. I'm adding that `black_box` here just in case LLVM gets clever enough to remove the reads entirely. Right now it can't, but these optimizations are really setting it up to do so. We get this optimization by factoring all the actual buffer management and bounds-checking logic into a new module inside `bufreader` with a new `Buffer` type. This makes it much easier to ensure that we have correctly encapsulated the management of the region of the buffer that we have read bytes into, and it lets us provide a new faster way to do small reads. `Buffer::consume_with` lets a caller do a read from the buffer with a single bounds check, instead of the double-check that's required to use `buffer` + `consume`. Unfortunately I'm not aware of a lot of open-source usage of `BufReader` in perf-critical environments. Some time ago I tweaked this code because I saw `BufReader` in a profile at work, and I contributed some benchmarks to the `bincode` crate which exercise `BufReader::buffer`. These changes appear to help those benchmarks at little, but all these sorts of benchmarks are kind of fragile so I'm wary of quoting anything specific.
2 parents ff693dc + 5fa1926 commit 50166d5

File tree

3 files changed

+134
-53
lines changed

3 files changed

+134
-53
lines changed

Diff for: library/std/src/io/buffered/bufreader.rs

+28-53
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
use crate::cmp;
1+
mod buffer;
2+
23
use crate::fmt;
34
use crate::io::{
45
self, BufRead, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
56
};
6-
use crate::mem::MaybeUninit;
7+
use buffer::Buffer;
78

89
/// The `BufReader<R>` struct adds buffering to any reader.
910
///
@@ -48,10 +49,7 @@ use crate::mem::MaybeUninit;
4849
#[stable(feature = "rust1", since = "1.0.0")]
4950
pub struct BufReader<R> {
5051
inner: R,
51-
buf: Box<[MaybeUninit<u8>]>,
52-
pos: usize,
53-
cap: usize,
54-
init: usize,
52+
buf: Buffer,
5553
}
5654

5755
impl<R: Read> BufReader<R> {
@@ -93,8 +91,7 @@ impl<R: Read> BufReader<R> {
9391
/// ```
9492
#[stable(feature = "rust1", since = "1.0.0")]
9593
pub fn with_capacity(capacity: usize, inner: R) -> BufReader<R> {
96-
let buf = Box::new_uninit_slice(capacity);
97-
BufReader { inner, buf, pos: 0, cap: 0, init: 0 }
94+
BufReader { inner, buf: Buffer::with_capacity(capacity) }
9895
}
9996
}
10097

@@ -170,8 +167,7 @@ impl<R> BufReader<R> {
170167
/// ```
171168
#[stable(feature = "bufreader_buffer", since = "1.37.0")]
172169
pub fn buffer(&self) -> &[u8] {
173-
// SAFETY: self.cap is always <= self.init, so self.buf[self.pos..self.cap] is always init
174-
unsafe { MaybeUninit::slice_assume_init_ref(&self.buf[self.pos..self.cap]) }
170+
self.buf.buffer()
175171
}
176172

177173
/// Returns the number of bytes the internal buffer can hold at once.
@@ -194,7 +190,7 @@ impl<R> BufReader<R> {
194190
/// ```
195191
#[stable(feature = "buffered_io_capacity", since = "1.46.0")]
196192
pub fn capacity(&self) -> usize {
197-
self.buf.len()
193+
self.buf.capacity()
198194
}
199195

200196
/// Unwraps this `BufReader<R>`, returning the underlying reader.
@@ -224,8 +220,7 @@ impl<R> BufReader<R> {
224220
/// Invalidates all data in the internal buffer.
225221
#[inline]
226222
fn discard_buffer(&mut self) {
227-
self.pos = 0;
228-
self.cap = 0;
223+
self.buf.discard_buffer()
229224
}
230225
}
231226

@@ -236,15 +231,15 @@ impl<R: Seek> BufReader<R> {
236231
/// must track this information themselves if it is required.
237232
#[stable(feature = "bufreader_seek_relative", since = "1.53.0")]
238233
pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> {
239-
let pos = self.pos as u64;
234+
let pos = self.buf.pos() as u64;
240235
if offset < 0 {
241-
if let Some(new_pos) = pos.checked_sub((-offset) as u64) {
242-
self.pos = new_pos as usize;
236+
if let Some(_) = pos.checked_sub((-offset) as u64) {
237+
self.buf.unconsume((-offset) as usize);
243238
return Ok(());
244239
}
245240
} else if let Some(new_pos) = pos.checked_add(offset as u64) {
246-
if new_pos <= self.cap as u64 {
247-
self.pos = new_pos as usize;
241+
if new_pos <= self.buf.filled() as u64 {
242+
self.buf.consume(offset as usize);
248243
return Ok(());
249244
}
250245
}
@@ -259,7 +254,7 @@ impl<R: Read> Read for BufReader<R> {
259254
// If we don't have any buffered data and we're doing a massive read
260255
// (larger than our internal buffer), bypass our internal buffer
261256
// entirely.
262-
if self.pos == self.cap && buf.len() >= self.buf.len() {
257+
if self.buf.pos() == self.buf.filled() && buf.len() >= self.capacity() {
263258
self.discard_buffer();
264259
return self.inner.read(buf);
265260
}
@@ -275,7 +270,7 @@ impl<R: Read> Read for BufReader<R> {
275270
// If we don't have any buffered data and we're doing a massive read
276271
// (larger than our internal buffer), bypass our internal buffer
277272
// entirely.
278-
if self.pos == self.cap && buf.remaining() >= self.buf.len() {
273+
if self.buf.pos() == self.buf.filled() && buf.remaining() >= self.capacity() {
279274
self.discard_buffer();
280275
return self.inner.read_buf(buf);
281276
}
@@ -295,9 +290,7 @@ impl<R: Read> Read for BufReader<R> {
295290
// generation for the common path where the buffer has enough bytes to fill the passed-in
296291
// buffer.
297292
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
298-
if self.buffer().len() >= buf.len() {
299-
buf.copy_from_slice(&self.buffer()[..buf.len()]);
300-
self.consume(buf.len());
293+
if self.buf.consume_with(buf.len(), |claimed| buf.copy_from_slice(claimed)) {
301294
return Ok(());
302295
}
303296

@@ -306,7 +299,7 @@ impl<R: Read> Read for BufReader<R> {
306299

307300
fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
308301
let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
309-
if self.pos == self.cap && total_len >= self.buf.len() {
302+
if self.buf.pos() == self.buf.filled() && total_len >= self.capacity() {
310303
self.discard_buffer();
311304
return self.inner.read_vectored(bufs);
312305
}
@@ -325,8 +318,9 @@ impl<R: Read> Read for BufReader<R> {
325318
// The inner reader might have an optimized `read_to_end`. Drain our buffer and then
326319
// delegate to the inner implementation.
327320
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
328-
let nread = self.cap - self.pos;
329-
buf.extend_from_slice(&self.buffer());
321+
let inner_buf = self.buffer();
322+
buf.extend_from_slice(inner_buf);
323+
let nread = inner_buf.len();
330324
self.discard_buffer();
331325
Ok(nread + self.inner.read_to_end(buf)?)
332326
}
@@ -371,33 +365,11 @@ impl<R: Read> Read for BufReader<R> {
371365
#[stable(feature = "rust1", since = "1.0.0")]
372366
impl<R: Read> BufRead for BufReader<R> {
373367
fn fill_buf(&mut self) -> io::Result<&[u8]> {
374-
// If we've reached the end of our internal buffer then we need to fetch
375-
// some more data from the underlying reader.
376-
// Branch using `>=` instead of the more correct `==`
377-
// to tell the compiler that the pos..cap slice is always valid.
378-
if self.pos >= self.cap {
379-
debug_assert!(self.pos == self.cap);
380-
381-
let mut readbuf = ReadBuf::uninit(&mut self.buf);
382-
383-
// SAFETY: `self.init` is either 0 or set to `readbuf.initialized_len()`
384-
// from the last time this function was called
385-
unsafe {
386-
readbuf.assume_init(self.init);
387-
}
388-
389-
self.inner.read_buf(&mut readbuf)?;
390-
391-
self.cap = readbuf.filled_len();
392-
self.init = readbuf.initialized_len();
393-
394-
self.pos = 0;
395-
}
396-
Ok(self.buffer())
368+
self.buf.fill_buf(&mut self.inner)
397369
}
398370

399371
fn consume(&mut self, amt: usize) {
400-
self.pos = cmp::min(self.pos + amt, self.cap);
372+
self.buf.consume(amt)
401373
}
402374
}
403375

@@ -409,7 +381,10 @@ where
409381
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
410382
fmt.debug_struct("BufReader")
411383
.field("reader", &self.inner)
412-
.field("buffer", &format_args!("{}/{}", self.cap - self.pos, self.buf.len()))
384+
.field(
385+
"buffer",
386+
&format_args!("{}/{}", self.buf.filled() - self.buf.pos(), self.capacity()),
387+
)
413388
.finish()
414389
}
415390
}
@@ -441,7 +416,7 @@ impl<R: Seek> Seek for BufReader<R> {
441416
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
442417
let result: u64;
443418
if let SeekFrom::Current(n) = pos {
444-
let remainder = (self.cap - self.pos) as i64;
419+
let remainder = (self.buf.filled() - self.buf.pos()) as i64;
445420
// it should be safe to assume that remainder fits within an i64 as the alternative
446421
// means we managed to allocate 8 exbibytes and that's absurd.
447422
// But it's not out of the realm of possibility for some weird underlying reader to
@@ -499,7 +474,7 @@ impl<R: Seek> Seek for BufReader<R> {
499474
/// }
500475
/// ```
501476
fn stream_position(&mut self) -> io::Result<u64> {
502-
let remainder = (self.cap - self.pos) as u64;
477+
let remainder = (self.buf.filled() - self.buf.pos()) as u64;
503478
self.inner.stream_position().map(|pos| {
504479
pos.checked_sub(remainder).expect(
505480
"overflow when subtracting remaining buffer size from inner stream position",

Diff for: library/std/src/io/buffered/bufreader/buffer.rs

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
///! An encapsulation of `BufReader`'s buffer management logic.
2+
///
3+
/// This module factors out the basic functionality of `BufReader` in order to protect two core
4+
/// invariants:
5+
/// * `filled` bytes of `buf` are always initialized
6+
/// * `pos` is always <= `filled`
7+
/// Since this module encapsulates the buffer management logic, we can ensure that the range
8+
/// `pos..filled` is always a valid index into the initialized region of the buffer. This means
9+
/// that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so
10+
/// without encountering any runtime bounds checks.
11+
use crate::cmp;
12+
use crate::io::{self, Read, ReadBuf};
13+
use crate::mem::MaybeUninit;
14+
15+
pub struct Buffer {
16+
// The buffer.
17+
buf: Box<[MaybeUninit<u8>]>,
18+
// The current seek offset into `buf`, must always be <= `filled`.
19+
pos: usize,
20+
// Each call to `fill_buf` sets `filled` to indicate how many bytes at the start of `buf` are
21+
// initialized with bytes from a read.
22+
filled: usize,
23+
}
24+
25+
impl Buffer {
26+
#[inline]
27+
pub fn with_capacity(capacity: usize) -> Self {
28+
let buf = Box::new_uninit_slice(capacity);
29+
Self { buf, pos: 0, filled: 0 }
30+
}
31+
32+
#[inline]
33+
pub fn buffer(&self) -> &[u8] {
34+
// SAFETY: self.pos and self.cap are valid, and self.cap => self.pos, and
35+
// that region is initialized because those are all invariants of this type.
36+
unsafe { MaybeUninit::slice_assume_init_ref(self.buf.get_unchecked(self.pos..self.filled)) }
37+
}
38+
39+
#[inline]
40+
pub fn capacity(&self) -> usize {
41+
self.buf.len()
42+
}
43+
44+
#[inline]
45+
pub fn filled(&self) -> usize {
46+
self.filled
47+
}
48+
49+
#[inline]
50+
pub fn pos(&self) -> usize {
51+
self.pos
52+
}
53+
54+
#[inline]
55+
pub fn discard_buffer(&mut self) {
56+
self.pos = 0;
57+
self.filled = 0;
58+
}
59+
60+
#[inline]
61+
pub fn consume(&mut self, amt: usize) {
62+
self.pos = cmp::min(self.pos + amt, self.filled);
63+
}
64+
65+
/// If there are `amt` bytes available in the buffer, pass a slice containing those bytes to
66+
/// `visitor` and return true. If there are not enough bytes available, return false.
67+
#[inline]
68+
pub fn consume_with<V>(&mut self, amt: usize, mut visitor: V) -> bool
69+
where
70+
V: FnMut(&[u8]),
71+
{
72+
if let Some(claimed) = self.buffer().get(..amt) {
73+
visitor(claimed);
74+
// If the indexing into self.buffer() succeeds, amt must be a valid increment.
75+
self.pos += amt;
76+
true
77+
} else {
78+
false
79+
}
80+
}
81+
82+
#[inline]
83+
pub fn unconsume(&mut self, amt: usize) {
84+
self.pos = self.pos.saturating_sub(amt);
85+
}
86+
87+
#[inline]
88+
pub fn fill_buf(&mut self, mut reader: impl Read) -> io::Result<&[u8]> {
89+
// If we've reached the end of our internal buffer then we need to fetch
90+
// some more data from the reader.
91+
// Branch using `>=` instead of the more correct `==`
92+
// to tell the compiler that the pos..cap slice is always valid.
93+
if self.pos >= self.filled {
94+
debug_assert!(self.pos == self.filled);
95+
96+
let mut readbuf = ReadBuf::uninit(&mut self.buf);
97+
98+
reader.read_buf(&mut readbuf)?;
99+
100+
self.filled = readbuf.filled_len();
101+
self.pos = 0;
102+
}
103+
Ok(self.buffer())
104+
}
105+
}

Diff for: library/std/src/io/buffered/tests.rs

+1
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ fn bench_buffered_reader_small_reads(b: &mut test::Bencher) {
523523
let mut buf = [0u8; 4];
524524
for _ in 0..1024 {
525525
reader.read_exact(&mut buf).unwrap();
526+
core::hint::black_box(&buf);
526527
}
527528
});
528529
}

0 commit comments

Comments
 (0)