-
Notifications
You must be signed in to change notification settings - Fork 24
Add Lines, Chars and CharsMut iterators #31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
e5364ad
6f34fc4
89d041a
a2575d2
e50d786
4d858f6
404b08d
76f538f
a60cbe1
b9c525a
9ccb6e7
e5f0aab
9caa7e8
cec0181
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ extern crate core; | |
|
||
use self::core::{fmt, mem}; | ||
use self::core::ops::{Index, IndexMut, Range, RangeTo, RangeFrom, RangeFull}; | ||
use self::core::slice::{Iter, IterMut}; | ||
#[cfg(feature = "std")] | ||
use std::error::Error; | ||
#[cfg(feature = "std")] | ||
|
@@ -62,6 +63,32 @@ impl AsciiStr { | |
self.as_slice().as_ptr() | ||
} | ||
|
||
/// Returns an iterator over the characters of the `AsciiStr`. | ||
#[inline] | ||
pub fn chars(&self) -> Chars { | ||
self.slice.iter() | ||
} | ||
|
||
/// Returns an iterator over the characters of the `AsciiStr` which allows you to modify the | ||
/// value of each `AsciiChar`. | ||
#[inline] | ||
pub fn chars_mut(&mut self) -> CharsMut { | ||
self.slice.iter_mut() | ||
} | ||
|
||
/// Returns an iterator over the lines of the `AsciiStr`, which are themselves `AsciiStr`s. | ||
/// | ||
/// Lines are ended with either `LineFeed` (`\n`), or `CarriageReturn` then `LineFeed` (`\r\n`). | ||
/// | ||
/// The final line ending is optional. | ||
#[inline] | ||
pub fn lines(&self) -> Lines { | ||
Lines { | ||
current_index: 0, | ||
string: self | ||
} | ||
} | ||
|
||
/// Returns an unsafe mutable pointer to the `AsciiStr`'s buffer. | ||
/// | ||
/// The caller must ensure that the slice outlives the pointer this function returns, or else it | ||
|
@@ -389,6 +416,97 @@ impl AsciiExt for AsciiStr { | |
} | ||
|
||
|
||
impl<'a> IntoIterator for &'a AsciiStr { | ||
type Item = &'a AsciiChar; | ||
type IntoIter = Chars<'a>; | ||
#[inline] | ||
fn into_iter(self) -> Self::IntoIter { | ||
self.chars() | ||
} | ||
} | ||
|
||
impl<'a> IntoIterator for &'a mut AsciiStr { | ||
type Item = &'a mut AsciiChar; | ||
type IntoIter = CharsMut<'a>; | ||
#[inline] | ||
fn into_iter(self) -> Self::IntoIter { | ||
self.chars_mut() | ||
} | ||
} | ||
|
||
/// An immutable iterator over the characters of an `AsciiStr`. | ||
pub type Chars<'a> = Iter<'a, AsciiChar>; | ||
|
||
/// A mutable iterator over the characters of an `AsciiStr`. | ||
pub type CharsMut<'a> = IterMut<'a, AsciiChar>; | ||
|
||
/// An iterator over the lines of the internal character array. | ||
#[derive(Debug)] | ||
pub struct Lines<'a> { | ||
// TODO: should this use `core::slice::Split` internally? | ||
current_index: usize, | ||
string: &'a AsciiStr | ||
} | ||
|
||
impl<'a> Iterator for Lines<'a> { | ||
type Item = &'a AsciiStr; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
let curr_idx = self.current_index; | ||
let len = self.string.len(); | ||
if curr_idx >= len { | ||
return None; | ||
} | ||
|
||
let mut next_idx = None; | ||
let mut linebreak_skip = 0; | ||
|
||
for i in curr_idx..(len-1) { | ||
match (self.string[i], self.string[i + 1]) { | ||
(AsciiChar::CarriageReturn, AsciiChar::LineFeed) => { | ||
next_idx = Some(i); | ||
linebreak_skip = 2; | ||
break; | ||
} | ||
(AsciiChar::LineFeed, _) => { | ||
next_idx = Some(i); | ||
linebreak_skip = 1; | ||
break; | ||
} | ||
_ => {} | ||
} | ||
} | ||
|
||
let next_idx = match next_idx { | ||
Some(i) => i, | ||
None => return None | ||
}; | ||
let line: &AsciiStr = From::from(&self.string[curr_idx..next_idx]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
self.current_index = next_idx + linebreak_skip; | ||
|
||
if line.is_empty() && self.current_index == self.string.len() { | ||
// This is a trailing line break | ||
None | ||
} else { | ||
Some(line) | ||
} | ||
} | ||
|
||
#[inline] | ||
fn size_hint(&self) -> (usize, Option<usize>) { | ||
(self.len(), Some(self.len())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess the compiler is smart enough to optimize this such that the entire string is only once iterated over instead of twice. I will check this, but it's only a minor observation. |
||
} | ||
} | ||
|
||
impl<'a> ExactSizeIterator for Lines<'a> { | ||
#[inline] | ||
fn len(&self) -> usize { | ||
self.string.chars().skip(self.current_index).filter(|&&c| c == AsciiChar::LineFeed).count() | ||
} | ||
} | ||
|
||
|
||
/// Error that is returned when a sequence of `u8` are not all ASCII. | ||
/// | ||
/// Is used by `As[Mut]AsciiStr` and the `from_ascii` method on `AsciiStr` and `AsciiString`. | ||
|
@@ -604,6 +722,54 @@ mod tests { | |
assert_eq!(b, "A@A"); | ||
} | ||
|
||
#[test] | ||
fn chars_iter() { | ||
let chars = &[b'h', b'e', b'l', b'l', b'o', b' ', b'w', b'o', b'r', b'l', b'd', b'\0']; | ||
let ascii = AsciiStr::from_ascii(chars).unwrap(); | ||
for (achar, byte) in ascii.chars().zip(chars.iter()) { | ||
assert_eq!(achar, byte); | ||
} | ||
} | ||
|
||
#[test] | ||
fn chars_iter_mut() { | ||
let mut chars = &mut [b'h', b'e', b'l', b'l', b'o', b' ', b'w', b'o', b'r', b'l', b'd', b'\0']; | ||
let mut ascii = chars.as_mut_ascii_str().unwrap(); | ||
|
||
for (i, achar) in ascii.chars_mut().enumerate() { | ||
if i == 0 { | ||
*achar = AsciiChar::H; | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
assert_eq!(ascii[0], b'H'); | ||
} | ||
|
||
#[test] | ||
fn lines_iter() { | ||
use super::core::iter::Iterator; | ||
let lines: [&str; 3] = ["great work", "cool beans", "awesome stuff"]; | ||
let joined = "great work\ncool beans\r\nawesome stuff\n"; | ||
let ascii = AsciiStr::from_ascii(joined.as_bytes()).unwrap(); | ||
assert_eq!(ascii.lines().len(), 3); | ||
for (asciiline, line) in ascii.lines().zip(&lines) { | ||
assert_eq!(asciiline, *line); | ||
} | ||
|
||
let trailing_line_break = b"\n"; | ||
for _ in AsciiStr::from_ascii(&trailing_line_break).unwrap().lines() { | ||
unreachable!(); | ||
} | ||
|
||
let empty_lines = b"\n\r\n\n\r\n"; | ||
let mut ensure_iterated = false; | ||
for line in AsciiStr::from_ascii(&empty_lines).unwrap().lines() { | ||
ensure_iterated = true; | ||
assert!(line.is_empty()); | ||
} | ||
assert!(ensure_iterated); | ||
} | ||
|
||
#[test] | ||
#[cfg(feature = "std")] | ||
fn fmt_ascii_str() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
core::slice::Split
only allows a single byte at a time to be checked. We would check for\n
and then see if there was a\r
right before I guess. In this case I think using thememchr
crate would give a nice speedup.Do you like to try implementing this? If not, no problem, I'll just open an issue to not forget it.