Skip to content

Commit 6c0d7a8

Browse files
committed
Impl String::into_chars
Signed-off-by: tison <[email protected]>
1 parent c113247 commit 6c0d7a8

File tree

1 file changed

+182
-2
lines changed

1 file changed

+182
-2
lines changed

library/alloc/src/string.rs

+182-2
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ use crate::alloc::Allocator;
6262
use crate::borrow::{Cow, ToOwned};
6363
use crate::boxed::Box;
6464
use crate::collections::TryReserveError;
65-
use crate::str::{self, Chars, Utf8Error, from_utf8_unchecked_mut};
65+
use crate::str::{self, CharIndices, Chars, Utf8Error, from_utf8_unchecked_mut};
6666
#[cfg(not(no_global_oom_handling))]
6767
use crate::str::{FromStr, from_boxed_utf8_unchecked};
68-
use crate::vec::Vec;
68+
use crate::vec::{self, Vec};
6969

7070
/// A UTF-8–encoded, growable string.
7171
///
@@ -1952,6 +1952,61 @@ impl String {
19521952
Drain { start, end, iter: chars_iter, string: self_ptr }
19531953
}
19541954

1955+
/// Converts a `String` into an iterator over the [`char`]s of the string.
1956+
///
1957+
/// As a string consists of valid UTF-8, we can iterate through a string
1958+
/// by [`char`]. This method returns such an iterator.
1959+
///
1960+
/// It's important to remember that [`char`] represents a Unicode Scalar
1961+
/// Value, and might not match your idea of what a 'character' is. Iteration
1962+
/// over grapheme clusters may be what you actually want. That functionality
1963+
/// is not provided by Rust's standard library, check crates.io instead.
1964+
///
1965+
/// # Examples
1966+
///
1967+
/// Basic usage:
1968+
///
1969+
/// ```
1970+
/// #![feature(string_into_chars)]
1971+
///
1972+
/// let word = String::from("goodbye");
1973+
///
1974+
/// let mut chars = word.into_chars();
1975+
///
1976+
/// assert_eq!(Some('g'), chars.next());
1977+
/// assert_eq!(Some('o'), chars.next());
1978+
/// assert_eq!(Some('o'), chars.next());
1979+
/// assert_eq!(Some('d'), chars.next());
1980+
/// assert_eq!(Some('b'), chars.next());
1981+
/// assert_eq!(Some('y'), chars.next());
1982+
/// assert_eq!(Some('e'), chars.next());
1983+
///
1984+
/// assert_eq!(None, chars.next());
1985+
/// ```
1986+
///
1987+
/// Remember, [`char`]s might not match your intuition about characters:
1988+
///
1989+
/// ```
1990+
/// #![feature(string_into_chars)]
1991+
///
1992+
/// let y = String::from("y̆");
1993+
///
1994+
/// let mut chars = y.into_chars();
1995+
///
1996+
/// assert_eq!(Some('y'), chars.next()); // not 'y̆'
1997+
/// assert_eq!(Some('\u{0306}'), chars.next());
1998+
///
1999+
/// assert_eq!(None, chars.next());
2000+
/// ```
2001+
///
2002+
/// [`char`]: prim@char
2003+
#[inline]
2004+
#[must_use = "`self` will be dropped if the result is not used"]
2005+
#[unstable(feature = "string_into_chars", issue = "133125")]
2006+
pub fn into_chars(self) -> IntoChars {
2007+
IntoChars { bytes: self.into_bytes().into_iter() }
2008+
}
2009+
19552010
/// Removes the specified range in the string,
19562011
/// and replaces it with the given string.
19572012
/// The given string doesn't need to be the same length as the range.
@@ -3090,6 +3145,131 @@ impl fmt::Write for String {
30903145
}
30913146
}
30923147

3148+
/// An iterator over the [`char`]s of a string.
3149+
///
3150+
/// This struct is created by the [`into_chars`] method on [`String`].
3151+
/// See its documentation for more.
3152+
///
3153+
/// [`char`]: prim@char
3154+
/// [`into_chars`]: String::into_chars
3155+
#[cfg_attr(not(no_global_oom_handling), derive(Clone))]
3156+
#[must_use = "iterators are lazy and do nothing unless consumed"]
3157+
#[unstable(feature = "string_into_chars", issue = "133125")]
3158+
pub struct IntoChars {
3159+
bytes: vec::IntoIter<u8>,
3160+
}
3161+
3162+
#[unstable(feature = "string_into_chars", issue = "133125")]
3163+
impl fmt::Debug for IntoChars {
3164+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3165+
f.debug_tuple("IntoChars").field(&self.as_str()).finish()
3166+
}
3167+
}
3168+
3169+
impl IntoChars {
3170+
/// Views the underlying data as a subslice of the original data.
3171+
///
3172+
/// # Examples
3173+
///
3174+
/// ```
3175+
/// #![feature(string_into_chars)]
3176+
///
3177+
/// let mut chars = String::from("abc").into_chars();
3178+
///
3179+
/// assert_eq!(chars.as_str(), "abc");
3180+
/// chars.next();
3181+
/// assert_eq!(chars.as_str(), "bc");
3182+
/// chars.next();
3183+
/// chars.next();
3184+
/// assert_eq!(chars.as_str(), "");
3185+
/// ```
3186+
#[must_use]
3187+
#[inline]
3188+
pub fn as_str(&self) -> &str {
3189+
// SAFETY: `bytes` is a valid UTF-8 string.
3190+
unsafe { str::from_utf8_unchecked(self.bytes.as_slice()) }
3191+
}
3192+
3193+
/// Consumes the `IntoChars`, returning the remaining string.
3194+
///
3195+
/// # Examples
3196+
///
3197+
/// ```
3198+
/// #![feature(string_into_chars)]
3199+
///
3200+
/// let mut chars = String::from("abc").into_chars();
3201+
/// assert_eq!(chars.into_string(), "abc");
3202+
///
3203+
/// let mut chars = String::from("def").into_chars();
3204+
/// chars.next();
3205+
/// assert_eq!(chars.into_string(), "ef");
3206+
/// ```
3207+
#[inline]
3208+
pub fn into_string(self) -> String {
3209+
// Safety: `bytes` are kept in UTF-8 form, only removing whole `char`s at a time.
3210+
unsafe { String::from_utf8_unchecked(self.bytes.collect()) }
3211+
}
3212+
3213+
#[inline]
3214+
fn iter(&self) -> CharIndices<'_> {
3215+
self.as_str().char_indices()
3216+
}
3217+
}
3218+
3219+
#[unstable(feature = "string_into_chars", issue = "133125")]
3220+
impl Iterator for IntoChars {
3221+
type Item = char;
3222+
3223+
#[inline]
3224+
fn next(&mut self) -> Option<char> {
3225+
let mut iter = self.iter();
3226+
match iter.next() {
3227+
None => None,
3228+
Some((_, ch)) => {
3229+
let offset = iter.offset();
3230+
// `offset` is a valid index.
3231+
let _ = self.bytes.advance_by(offset);
3232+
Some(ch)
3233+
}
3234+
}
3235+
}
3236+
3237+
#[inline]
3238+
fn count(self) -> usize {
3239+
self.iter().count()
3240+
}
3241+
3242+
#[inline]
3243+
fn size_hint(&self) -> (usize, Option<usize>) {
3244+
self.iter().size_hint()
3245+
}
3246+
3247+
#[inline]
3248+
fn last(mut self) -> Option<char> {
3249+
self.next_back()
3250+
}
3251+
}
3252+
3253+
#[unstable(feature = "string_into_chars", issue = "133125")]
3254+
impl DoubleEndedIterator for IntoChars {
3255+
#[inline]
3256+
fn next_back(&mut self) -> Option<char> {
3257+
let len = self.as_str().len();
3258+
let mut iter = self.iter();
3259+
match iter.next_back() {
3260+
None => None,
3261+
Some((idx, ch)) => {
3262+
// `idx` is a valid index.
3263+
let _ = self.bytes.advance_back_by(len - idx);
3264+
Some(ch)
3265+
}
3266+
}
3267+
}
3268+
}
3269+
3270+
#[unstable(feature = "string_into_chars", issue = "133125")]
3271+
impl FusedIterator for IntoChars {}
3272+
30933273
/// A draining iterator for `String`.
30943274
///
30953275
/// This struct is created by the [`drain`] method on [`String`]. See its

0 commit comments

Comments
 (0)