Skip to content

Commit e68f935

Browse files
committed
Auto merge of rust-lang#98943 - WilliamVenner:feat/bufread_skip_until, r=dtolnay
Add `BufRead::skip_until` Alternative version of `BufRead::read_until` that simply discards data, rather than copying it into a buffer. Useful for situations like skipping irrelevant data in a binary file format that is NUL-terminated. <details> <summary>Benchmark</summary> ``` running 2 tests test bench_read_until ... bench: 123 ns/iter (+/- 6) test bench_skip_until ... bench: 66 ns/iter (+/- 3) ``` ```rs #![feature(test)] extern crate test; use test::Bencher; use std::io::{ErrorKind, BufRead}; fn skip_until<R: BufRead + ?Sized>(r: &mut R, delim: u8) -> Result<usize, std::io::Error> { let mut read = 0; loop { let (done, used) = { let available = match r.fill_buf() { Ok(n) => n, Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, Err(e) => return Err(e), }; match memchr::memchr(delim, available) { Some(i) => (true, i + 1), None => (false, available.len()), } }; r.consume(used); read += used; if done || used == 0 { return Ok(read); } } } const STR: &[u8] = b"Ferris\0Hello, world!\0"; #[bench] fn bench_skip_until(b: &mut Bencher) { b.iter(|| { let mut io = std::io::Cursor::new(test::black_box(STR)); skip_until(&mut io, b'\0').unwrap(); let mut hello = Vec::with_capacity(b"Hello, world!\0".len()); let num_bytes = io.read_until(b'\0', &mut hello).unwrap(); assert_eq!(num_bytes, b"Hello, world!\0".len()); assert_eq!(hello, b"Hello, world!\0"); }); } #[bench] fn bench_read_until(b: &mut Bencher) { b.iter(|| { let mut io = std::io::Cursor::new(test::black_box(STR)); io.read_until(b'\0', &mut Vec::new()).unwrap(); let mut hello = Vec::with_capacity(b"Hello, world!\0".len()); let num_bytes = io.read_until(b'\0', &mut hello).unwrap(); assert_eq!(num_bytes, b"Hello, world!\0".len()); assert_eq!(hello, b"Hello, world!\0"); }); } ``` </details>
2 parents a1a3773 + 7c1ab71 commit e68f935

File tree

2 files changed

+114
-0
lines changed

2 files changed

+114
-0
lines changed

Diff for: library/std/src/io/mod.rs

+84
Original file line numberDiff line numberDiff line change
@@ -2044,6 +2044,28 @@ fn read_until<R: BufRead + ?Sized>(r: &mut R, delim: u8, buf: &mut Vec<u8>) -> R
20442044
}
20452045
}
20462046

2047+
fn skip_until<R: BufRead + ?Sized>(r: &mut R, delim: u8) -> Result<usize> {
2048+
let mut read = 0;
2049+
loop {
2050+
let (done, used) = {
2051+
let available = match r.fill_buf() {
2052+
Ok(n) => n,
2053+
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
2054+
Err(e) => return Err(e),
2055+
};
2056+
match memchr::memchr(delim, available) {
2057+
Some(i) => (true, i + 1),
2058+
None => (false, available.len()),
2059+
}
2060+
};
2061+
r.consume(used);
2062+
read += used;
2063+
if done || used == 0 {
2064+
return Ok(read);
2065+
}
2066+
}
2067+
}
2068+
20472069
/// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it
20482070
/// to perform extra ways of reading.
20492071
///
@@ -2247,6 +2269,68 @@ pub trait BufRead: Read {
22472269
read_until(self, byte, buf)
22482270
}
22492271

2272+
/// Skip all bytes until the delimiter `byte` or EOF is reached.
2273+
///
2274+
/// This function will read (and discard) bytes from the underlying stream until the
2275+
/// delimiter or EOF is found.
2276+
///
2277+
/// If successful, this function will return the total number of bytes read,
2278+
/// including the delimiter byte.
2279+
///
2280+
/// This is useful for efficiently skipping data such as NUL-terminated strings
2281+
/// in binary file formats without buffering.
2282+
///
2283+
/// This function is blocking and should be used carefully: it is possible for
2284+
/// an attacker to continuously send bytes without ever sending the delimiter
2285+
/// or EOF.
2286+
///
2287+
/// # Errors
2288+
///
2289+
/// This function will ignore all instances of [`ErrorKind::Interrupted`] and
2290+
/// will otherwise return any errors returned by [`fill_buf`].
2291+
///
2292+
/// If an I/O error is encountered then all bytes read so far will be
2293+
/// present in `buf` and its length will have been adjusted appropriately.
2294+
///
2295+
/// [`fill_buf`]: BufRead::fill_buf
2296+
///
2297+
/// # Examples
2298+
///
2299+
/// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
2300+
/// this example, we use [`Cursor`] to read some NUL-terminated information
2301+
/// about Ferris from a binary string, skipping the fun fact:
2302+
///
2303+
/// ```
2304+
/// #![feature(bufread_skip_until)]
2305+
///
2306+
/// use std::io::{self, BufRead};
2307+
///
2308+
/// let mut cursor = io::Cursor::new(b"Ferris\0Likes long walks on the beach\0Crustacean\0");
2309+
///
2310+
/// // read name
2311+
/// let mut name = Vec::new();
2312+
/// let num_bytes = cursor.read_until(b'\0', &mut name)
2313+
/// .expect("reading from cursor won't fail");
2314+
/// assert_eq!(num_bytes, 7);
2315+
/// assert_eq!(name, b"Ferris\0");
2316+
///
2317+
/// // skip fun fact
2318+
/// let num_bytes = cursor.skip_until(b'\0')
2319+
/// .expect("reading from cursor won't fail");
2320+
/// assert_eq!(num_bytes, 30);
2321+
///
2322+
/// // read animal type
2323+
/// let mut animal = Vec::new();
2324+
/// let num_bytes = cursor.read_until(b'\0', &mut animal)
2325+
/// .expect("reading from cursor won't fail");
2326+
/// assert_eq!(num_bytes, 11);
2327+
/// assert_eq!(animal, b"Crustacean\0");
2328+
/// ```
2329+
#[unstable(feature = "bufread_skip_until", issue = "111735")]
2330+
fn skip_until(&mut self, byte: u8) -> Result<usize> {
2331+
skip_until(self, byte)
2332+
}
2333+
22502334
/// Read all bytes until a newline (the `0xA` byte) is reached, and append
22512335
/// them to the provided `String` buffer.
22522336
///

Diff for: library/std/src/io/tests.rs

+30
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,36 @@ fn read_until() {
2525
assert_eq!(v, []);
2626
}
2727

28+
#[test]
29+
fn skip_until() {
30+
let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore\0";
31+
let mut reader = BufReader::new(bytes);
32+
33+
// read from the bytes, alternating between
34+
// consuming `read\0`s and skipping `ignore\0`s
35+
loop {
36+
// consume `read\0`
37+
let mut out = Vec::new();
38+
let read = reader.read_until(0, &mut out).unwrap();
39+
if read == 0 {
40+
// eof
41+
break;
42+
} else {
43+
assert_eq!(out, b"read\0");
44+
assert_eq!(read, b"read\0".len());
45+
}
46+
47+
// skip past `ignore\0`
48+
let skipped = reader.skip_until(0).unwrap();
49+
assert_eq!(skipped, b"ignore\0".len());
50+
}
51+
52+
// ensure we are at the end of the byte slice and that we can skip no further
53+
// also ensure skip_until matches the behavior of read_until at EOF
54+
let skipped = reader.skip_until(0).unwrap();
55+
assert_eq!(skipped, 0);
56+
}
57+
2858
#[test]
2959
fn split() {
3060
let buf = Cursor::new(&b"12"[..]);

0 commit comments

Comments
 (0)