Skip to content

Commit 66df86a

Browse files
committed
auto merge of #10466 : alexcrichton/rust/issue-10334, r=cmr
These commits create a `Buffer` trait in the `io` module which represents an I/O reader which is internally buffered. This abstraction is used to reasonably implement `read_line` and `read_until` along with at least an ok implementation of `read_char` (although I certainly haven't benchmarked `read_char`).
2 parents 0a577f3 + 01343d3 commit 66df86a

File tree

4 files changed

+157
-102
lines changed

4 files changed

+157
-102
lines changed

src/libstd/io/buffered.rs

Lines changed: 26 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ use prelude::*;
5555

5656
use num;
5757
use vec;
58-
use str;
59-
use super::{Reader, Writer, Stream, Decorator};
58+
use super::{Stream, Decorator};
6059

6160
// libuv recommends 64k buffers to maximize throughput
6261
// https://groups.google.com/forum/#!topic/libuv/oQO1HJAIDdA
@@ -93,45 +92,10 @@ impl<R: Reader> BufferedReader<R> {
9392
pub fn new(inner: R) -> BufferedReader<R> {
9493
BufferedReader::with_capacity(DEFAULT_CAPACITY, inner)
9594
}
95+
}
9696

97-
/// Reads the next line of input, interpreted as a sequence of utf-8
98-
/// encoded unicode codepoints. If a newline is encountered, then the
99-
/// newline is contained in the returned string.
100-
pub fn read_line(&mut self) -> Option<~str> {
101-
self.read_until('\n' as u8).map(str::from_utf8_owned)
102-
}
103-
104-
/// Reads a sequence of bytes leading up to a specified delimeter. Once the
105-
/// specified byte is encountered, reading ceases and the bytes up to and
106-
/// including the delimiter are returned.
107-
pub fn read_until(&mut self, byte: u8) -> Option<~[u8]> {
108-
let mut res = ~[];
109-
let mut used;
110-
loop {
111-
{
112-
let available = self.fill_buffer();
113-
match available.iter().position(|&b| b == byte) {
114-
Some(i) => {
115-
res.push_all(available.slice_to(i + 1));
116-
used = i + 1;
117-
break
118-
}
119-
None => {
120-
res.push_all(available);
121-
used = available.len();
122-
}
123-
}
124-
}
125-
if used == 0 {
126-
break
127-
}
128-
self.pos += used;
129-
}
130-
self.pos += used;
131-
return if res.len() == 0 {None} else {Some(res)};
132-
}
133-
134-
fn fill_buffer<'a>(&'a mut self) -> &'a [u8] {
97+
impl<R: Reader> Buffer for BufferedReader<R> {
98+
fn fill<'a>(&'a mut self) -> &'a [u8] {
13599
if self.pos == self.cap {
136100
match self.inner.read(self.buf) {
137101
Some(cap) => {
@@ -143,12 +107,17 @@ impl<R: Reader> BufferedReader<R> {
143107
}
144108
return self.buf.slice(self.pos, self.cap);
145109
}
110+
111+
fn consume(&mut self, amt: uint) {
112+
self.pos += amt;
113+
assert!(self.pos <= self.cap);
114+
}
146115
}
147116

148117
impl<R: Reader> Reader for BufferedReader<R> {
149118
fn read(&mut self, buf: &mut [u8]) -> Option<uint> {
150119
let nread = {
151-
let available = self.fill_buffer();
120+
let available = self.fill();
152121
if available.len() == 0 {
153122
return None;
154123
}
@@ -166,17 +135,9 @@ impl<R: Reader> Reader for BufferedReader<R> {
166135
}
167136

168137
impl<R: Reader> Decorator<R> for BufferedReader<R> {
169-
fn inner(self) -> R {
170-
self.inner
171-
}
172-
173-
fn inner_ref<'a>(&'a self) -> &'a R {
174-
&self.inner
175-
}
176-
177-
fn inner_mut_ref<'a>(&'a mut self) -> &'a mut R {
178-
&mut self.inner
179-
}
138+
fn inner(self) -> R { self.inner }
139+
fn inner_ref<'a>(&'a self) -> &'a R { &self.inner }
140+
fn inner_mut_ref<'a>(&'a mut self) -> &'a mut R { &mut self.inner }
180141
}
181142

182143
/// Wraps a Writer and buffers output to it
@@ -279,13 +240,8 @@ impl<W: Writer> Decorator<W> for LineBufferedWriter<W> {
279240
struct InternalBufferedWriter<W>(BufferedWriter<W>);
280241

281242
impl<W: Reader> Reader for InternalBufferedWriter<W> {
282-
fn read(&mut self, buf: &mut [u8]) -> Option<uint> {
283-
self.inner.read(buf)
284-
}
285-
286-
fn eof(&mut self) -> bool {
287-
self.inner.eof()
288-
}
243+
fn read(&mut self, buf: &mut [u8]) -> Option<uint> { self.inner.read(buf) }
244+
fn eof(&mut self) -> bool { self.inner.eof() }
289245
}
290246

291247
/// Wraps a Stream and buffers input and output to and from it
@@ -311,35 +267,24 @@ impl<S: Stream> BufferedStream<S> {
311267
}
312268
}
313269

314-
impl<S: Stream> Reader for BufferedStream<S> {
315-
fn read(&mut self, buf: &mut [u8]) -> Option<uint> {
316-
self.inner.read(buf)
317-
}
270+
impl<S: Stream> Buffer for BufferedStream<S> {
271+
fn fill<'a>(&'a mut self) -> &'a [u8] { self.inner.fill() }
272+
fn consume(&mut self, amt: uint) { self.inner.consume(amt) }
273+
}
318274

319-
fn eof(&mut self) -> bool {
320-
self.inner.eof()
321-
}
275+
impl<S: Stream> Reader for BufferedStream<S> {
276+
fn read(&mut self, buf: &mut [u8]) -> Option<uint> { self.inner.read(buf) }
277+
fn eof(&mut self) -> bool { self.inner.eof() }
322278
}
323279

324280
impl<S: Stream> Writer for BufferedStream<S> {
325-
fn write(&mut self, buf: &[u8]) {
326-
self.inner.inner.write(buf)
327-
}
328-
329-
fn flush(&mut self) {
330-
self.inner.inner.flush()
331-
}
281+
fn write(&mut self, buf: &[u8]) { self.inner.inner.write(buf) }
282+
fn flush(&mut self) { self.inner.inner.flush() }
332283
}
333284

334285
impl<S: Stream> Decorator<S> for BufferedStream<S> {
335-
fn inner(self) -> S {
336-
self.inner.inner.inner()
337-
}
338-
339-
fn inner_ref<'a>(&'a self) -> &'a S {
340-
self.inner.inner.inner_ref()
341-
}
342-
286+
fn inner(self) -> S { self.inner.inner.inner() }
287+
fn inner_ref<'a>(&'a self) -> &'a S { self.inner.inner.inner_ref() }
343288
fn inner_mut_ref<'a>(&'a mut self) -> &'a mut S {
344289
self.inner.inner.inner_mut_ref()
345290
}

src/libstd/io/mem.rs

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -123,29 +123,18 @@ impl Reader for MemReader {
123123

124124
impl Seek for MemReader {
125125
fn tell(&self) -> u64 { self.pos as u64 }
126-
127126
fn seek(&mut self, _pos: i64, _style: SeekStyle) { fail!() }
128127
}
129128

130-
impl Decorator<~[u8]> for MemReader {
131-
132-
fn inner(self) -> ~[u8] {
133-
match self {
134-
MemReader { buf: buf, _ } => buf
135-
}
136-
}
137-
138-
fn inner_ref<'a>(&'a self) -> &'a ~[u8] {
139-
match *self {
140-
MemReader { buf: ref buf, _ } => buf
141-
}
142-
}
129+
impl Buffer for MemReader {
130+
fn fill<'a>(&'a mut self) -> &'a [u8] { self.buf.slice_from(self.pos) }
131+
fn consume(&mut self, amt: uint) { self.pos += amt; }
132+
}
143133

144-
fn inner_mut_ref<'a>(&'a mut self) -> &'a mut ~[u8] {
145-
match *self {
146-
MemReader { buf: ref mut buf, _ } => buf
147-
}
148-
}
134+
impl Decorator<~[u8]> for MemReader {
135+
fn inner(self) -> ~[u8] { self.buf }
136+
fn inner_ref<'a>(&'a self) -> &'a ~[u8] { &self.buf }
137+
fn inner_mut_ref<'a>(&'a mut self) -> &'a mut ~[u8] { &mut self.buf }
149138
}
150139

151140

@@ -244,6 +233,11 @@ impl<'self> Seek for BufReader<'self> {
244233
fn seek(&mut self, _pos: i64, _style: SeekStyle) { fail!() }
245234
}
246235

236+
impl<'self> Buffer for BufReader<'self> {
237+
fn fill<'a>(&'a mut self) -> &'a [u8] { self.buf.slice_from(self.pos) }
238+
fn consume(&mut self, amt: uint) { self.pos += amt; }
239+
}
240+
247241
///Calls a function with a MemWriter and returns
248242
///the writer's stored vector.
249243
pub fn with_mem_writer(writeFn:&fn(&mut MemWriter)) -> ~[u8] {
@@ -394,4 +388,20 @@ mod test {
394388
let buf = with_mem_writer(|wr| wr.write([1,2,3,4,5,6,7]));
395389
assert_eq!(buf, ~[1,2,3,4,5,6,7]);
396390
}
391+
392+
#[test]
393+
fn test_read_char() {
394+
let mut r = BufReader::new(bytes!("Việt"));
395+
assert_eq!(r.read_char(), Some('V'));
396+
assert_eq!(r.read_char(), Some('i'));
397+
assert_eq!(r.read_char(), Some('ệ'));
398+
assert_eq!(r.read_char(), Some('t'));
399+
assert_eq!(r.read_char(), None);
400+
}
401+
402+
#[test]
403+
fn test_read_bad_char() {
404+
let mut r = BufReader::new(bytes!(0x80));
405+
assert_eq!(r.read_char(), None);
406+
}
397407
}

src/libstd/io/mod.rs

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,11 +247,12 @@ use iter::Iterator;
247247
use option::{Option, Some, None};
248248
use path::Path;
249249
use result::{Ok, Err, Result};
250+
use str;
250251
use str::{StrSlice, OwnedStr};
251252
use to_str::ToStr;
252253
use uint;
253254
use unstable::finally::Finally;
254-
use vec::{OwnedVector, MutableVector};
255+
use vec::{OwnedVector, MutableVector, ImmutableVector, OwnedCopyableVector};
255256
use vec;
256257

257258
// Reexports
@@ -960,6 +961,105 @@ pub trait Stream: Reader + Writer { }
960961

961962
impl<T: Reader + Writer> Stream for T {}
962963

964+
/// A Buffer is a type of reader which has some form of internal buffering to
965+
/// allow certain kinds of reading operations to be more optimized than others.
966+
/// This type extends the `Reader` trait with a few methods that are not
967+
/// possible to reasonably implement with purely a read interface.
968+
pub trait Buffer: Reader {
969+
/// Fills the internal buffer of this object, returning the buffer contents.
970+
/// Note that none of the contents will be "read" in the sense that later
971+
/// calling `read` may return the same contents.
972+
///
973+
/// The `consume` function must be called with the number of bytes that are
974+
/// consumed from this buffer returned to ensure that the bytes are never
975+
/// returned twice.
976+
///
977+
/// # Failure
978+
///
979+
/// This function will raise on the `io_error` condition if a read error is
980+
/// encountered.
981+
fn fill<'a>(&'a mut self) -> &'a [u8];
982+
983+
/// Tells this buffer that `amt` bytes have been consumed from the buffer,
984+
/// so they should no longer be returned in calls to `fill` or `read`.
985+
fn consume(&mut self, amt: uint);
986+
987+
/// Reads the next line of input, interpreted as a sequence of utf-8
988+
/// encoded unicode codepoints. If a newline is encountered, then the
989+
/// newline is contained in the returned string.
990+
///
991+
/// # Failure
992+
///
993+
/// This function will raise on the `io_error` condition if a read error is
994+
/// encountered. The task will also fail if sequence of bytes leading up to
995+
/// the newline character are not valid utf-8.
996+
fn read_line(&mut self) -> Option<~str> {
997+
self.read_until('\n' as u8).map(str::from_utf8_owned)
998+
}
999+
1000+
/// Reads a sequence of bytes leading up to a specified delimeter. Once the
1001+
/// specified byte is encountered, reading ceases and the bytes up to and
1002+
/// including the delimiter are returned.
1003+
///
1004+
/// # Failure
1005+
///
1006+
/// This function will raise on the `io_error` condition if a read error is
1007+
/// encountered.
1008+
fn read_until(&mut self, byte: u8) -> Option<~[u8]> {
1009+
let mut res = ~[];
1010+
let mut used;
1011+
loop {
1012+
{
1013+
let available = self.fill();
1014+
match available.iter().position(|&b| b == byte) {
1015+
Some(i) => {
1016+
res.push_all(available.slice_to(i + 1));
1017+
used = i + 1;
1018+
break
1019+
}
1020+
None => {
1021+
res.push_all(available);
1022+
used = available.len();
1023+
}
1024+
}
1025+
}
1026+
if used == 0 {
1027+
break
1028+
}
1029+
self.consume(used);
1030+
}
1031+
self.consume(used);
1032+
return if res.len() == 0 {None} else {Some(res)};
1033+
}
1034+
1035+
/// Reads the next utf8-encoded character from the underlying stream.
1036+
///
1037+
/// This will return `None` if the following sequence of bytes in the
1038+
/// stream are not a valid utf8-sequence, or if an I/O error is encountered.
1039+
///
1040+
/// # Failure
1041+
///
1042+
/// This function will raise on the `io_error` condition if a read error is
1043+
/// encountered.
1044+
fn read_char(&mut self) -> Option<char> {
1045+
let width = {
1046+
let available = self.fill();
1047+
if available.len() == 0 { return None } // read error
1048+
str::utf8_char_width(available[0])
1049+
};
1050+
if width == 0 { return None } // not uf8
1051+
let mut buf = [0, ..4];
1052+
match self.read(buf.mut_slice_to(width)) {
1053+
Some(n) if n == width => {}
1054+
Some(*) | None => return None // read error
1055+
}
1056+
match str::from_utf8_slice_opt(buf.slice_to(width)) {
1057+
Some(s) => Some(s.char_at(0)),
1058+
None => None
1059+
}
1060+
}
1061+
}
1062+
9631063
pub enum SeekStyle {
9641064
/// Seek from the beginning of the stream
9651065
SeekSet,

src/libstd/prelude.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ pub use num::{Orderable, Signed, Unsigned, Round};
6767
pub use num::{Primitive, Int, Float, ToStrRadix, ToPrimitive, FromPrimitive};
6868
pub use path::{GenericPath, Path, PosixPath, WindowsPath};
6969
pub use ptr::RawPtr;
70-
pub use io::{Writer, Reader, Seek};
70+
pub use io::{Buffer, Writer, Reader, Seek};
7171
pub use send_str::{SendStr, SendStrOwned, SendStrStatic, IntoSendStr};
7272
pub use str::{Str, StrVector, StrSlice, OwnedStr};
7373
pub use to_bytes::IterBytes;

0 commit comments

Comments
 (0)