Skip to content

Commit 1916e3c

Browse files
Copy AsciiExt methods to str directly
This is done in order to deprecate AsciiExt eventually. Note that this commit contains a bunch of `cfg(stage0)` statements. This is due to a new compiler feature this commit depends on: the `slice_u8` lang item. Once this lang item is available in the stage0 compiler, all those cfg flags (and more) can be removed.
1 parent 5a1d11a commit 1916e3c

File tree

4 files changed

+290
-5
lines changed

4 files changed

+290
-5
lines changed

src/liballoc/slice.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1533,7 +1533,7 @@ impl<T> [T] {
15331533
}
15341534
}
15351535

1536-
// TODO(LukasKalbertodt): the `not(stage0)` constraint can be removed in the
1536+
// FIXME(LukasKalbertodt): the `not(stage0)` constraint can be removed in the
15371537
// future once the stage0 compiler is new enough to know about the `slice_u8`
15381538
// lang item.
15391539
#[lang = "slice_u8"]

src/liballoc/str.rs

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2070,6 +2070,282 @@ impl str {
20702070
s.extend((0..n).map(|_| self));
20712071
s
20722072
}
2073+
2074+
/// Checks if all characters in this string are within the ASCII range.
2075+
///
2076+
/// # Examples
2077+
///
2078+
/// ```
2079+
/// let ascii = "hello!\n";
2080+
/// let non_ascii = "Grüße, Jürgen ❤";
2081+
///
2082+
/// assert!(ascii.is_ascii());
2083+
/// assert!(!non_ascii.is_ascii());
2084+
/// ```
2085+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2086+
#[inline]
2087+
pub fn is_ascii(&self) -> bool {
2088+
// We can treat each byte as character here: all multibyte characters
2089+
// start with a byte that is not in the ascii range, so we will stop
2090+
// there already.
2091+
self.bytes().all(|b| b.is_ascii())
2092+
}
2093+
2094+
/// Returns a copy of this string where each character is mapped to its
2095+
/// ASCII upper case equivalent.
2096+
///
2097+
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
2098+
/// but non-ASCII letters are unchanged.
2099+
///
2100+
/// To uppercase the value in-place, use [`make_ascii_uppercase`].
2101+
///
2102+
/// To uppercase ASCII characters in addition to non-ASCII characters, use
2103+
/// [`to_uppercase`].
2104+
///
2105+
/// # Examples
2106+
///
2107+
/// ```
2108+
/// let s = "Grüße, Jürgen ❤";
2109+
///
2110+
/// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase());
2111+
/// ```
2112+
///
2113+
/// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
2114+
/// [`to_uppercase`]: #method.to_uppercase
2115+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2116+
#[inline]
2117+
#[cfg(not(stage0))]
2118+
pub fn to_ascii_uppercase(&self) -> String {
2119+
let mut bytes = self.as_bytes().to_vec();
2120+
bytes.make_ascii_uppercase();
2121+
// make_ascii_uppercase() preserves the UTF-8 invariant.
2122+
unsafe { String::from_utf8_unchecked(bytes) }
2123+
}
2124+
2125+
/// Returns a copy of this string where each character is mapped to its
2126+
/// ASCII lower case equivalent.
2127+
///
2128+
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
2129+
/// but non-ASCII letters are unchanged.
2130+
///
2131+
/// To lowercase the value in-place, use [`make_ascii_lowercase`].
2132+
///
2133+
/// To lowercase ASCII characters in addition to non-ASCII characters, use
2134+
/// [`to_lowercase`].
2135+
///
2136+
/// # Examples
2137+
///
2138+
/// ```
2139+
/// let s = "Grüße, Jürgen ❤";
2140+
///
2141+
/// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase());
2142+
/// ```
2143+
///
2144+
/// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
2145+
/// [`to_lowercase`]: #method.to_lowercase
2146+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2147+
#[inline]
2148+
#[cfg(not(stage0))]
2149+
pub fn to_ascii_lowercase(&self) -> String {
2150+
let mut bytes = self.as_bytes().to_vec();
2151+
bytes.make_ascii_lowercase();
2152+
// make_ascii_lowercase() preserves the UTF-8 invariant.
2153+
unsafe { String::from_utf8_unchecked(bytes) }
2154+
}
2155+
2156+
/// Checks that two strings are an ASCII case-insensitive match.
2157+
///
2158+
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
2159+
/// but without allocating and copying temporaries.
2160+
///
2161+
/// # Examples
2162+
///
2163+
/// ```
2164+
/// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
2165+
/// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
2166+
/// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
2167+
/// ```
2168+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2169+
#[inline]
2170+
#[cfg(not(stage0))]
2171+
pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
2172+
self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
2173+
}
2174+
2175+
/// Converts this string to its ASCII upper case equivalent in-place.
2176+
///
2177+
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
2178+
/// but non-ASCII letters are unchanged.
2179+
///
2180+
/// To return a new uppercased value without modifying the existing one, use
2181+
/// [`to_ascii_uppercase`].
2182+
///
2183+
/// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
2184+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2185+
#[cfg(not(stage0))]
2186+
pub fn make_ascii_uppercase(&mut self) {
2187+
let me = unsafe { self.as_bytes_mut() };
2188+
me.make_ascii_uppercase()
2189+
}
2190+
2191+
/// Converts this string to its ASCII lower case equivalent in-place.
2192+
///
2193+
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
2194+
/// but non-ASCII letters are unchanged.
2195+
///
2196+
/// To return a new lowercased value without modifying the existing one, use
2197+
/// [`to_ascii_lowercase`].
2198+
///
2199+
/// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
2200+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2201+
#[cfg(not(stage0))]
2202+
pub fn make_ascii_lowercase(&mut self) {
2203+
let me = unsafe { self.as_bytes_mut() };
2204+
me.make_ascii_lowercase()
2205+
}
2206+
2207+
/// Checks if all characters of this string are ASCII alphabetic
2208+
/// characters:
2209+
///
2210+
/// - U+0041 'A' ... U+005A 'Z', or
2211+
/// - U+0061 'a' ... U+007A 'z'.
2212+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2213+
#[inline]
2214+
pub fn is_ascii_alphabetic(&self) -> bool {
2215+
self.bytes().all(|b| b.is_ascii_alphabetic())
2216+
}
2217+
2218+
/// Checks if all characters of this string are ASCII uppercase characters:
2219+
/// U+0041 'A' ... U+005A 'Z'.
2220+
///
2221+
/// # Example
2222+
///
2223+
/// ```
2224+
/// // Only ascii uppercase characters
2225+
/// assert!("HELLO".is_ascii_uppercase());
2226+
///
2227+
/// // While all characters are ascii, 'y' and 'e' are not uppercase
2228+
/// assert!(!"Bye".is_ascii_uppercase());
2229+
///
2230+
/// // While all characters are uppercase, 'Ü' is not ascii
2231+
/// assert!(!"TSCHÜSS".is_ascii_uppercase());
2232+
/// ```
2233+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2234+
#[inline]
2235+
pub fn is_ascii_uppercase(&self) -> bool {
2236+
self.bytes().all(|b| b.is_ascii_uppercase())
2237+
}
2238+
2239+
/// Checks if all characters of this string are ASCII lowercase characters:
2240+
/// U+0061 'a' ... U+007A 'z'.
2241+
///
2242+
/// # Example
2243+
///
2244+
/// ```
2245+
/// // Only ascii uppercase characters
2246+
/// assert!("hello".is_ascii_lowercase());
2247+
///
2248+
/// // While all characters are ascii, 'B' is not lowercase
2249+
/// assert!(!"Bye".is_ascii_lowercase());
2250+
///
2251+
/// // While all characters are lowercase, 'Ü' is not ascii
2252+
/// assert!(!"tschüss".is_ascii_lowercase());
2253+
/// ```
2254+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2255+
#[inline]
2256+
pub fn is_ascii_lowercase(&self) -> bool {
2257+
self.bytes().all(|b| b.is_ascii_lowercase())
2258+
}
2259+
2260+
/// Checks if all characters of this string are ASCII alphanumeric
2261+
/// characters:
2262+
///
2263+
/// - U+0041 'A' ... U+005A 'Z', or
2264+
/// - U+0061 'a' ... U+007A 'z', or
2265+
/// - U+0030 '0' ... U+0039 '9'.
2266+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2267+
#[inline]
2268+
pub fn is_ascii_alphanumeric(&self) -> bool {
2269+
self.bytes().all(|b| b.is_ascii_alphanumeric())
2270+
}
2271+
2272+
/// Checks if all characters of this string are ASCII decimal digit:
2273+
/// U+0030 '0' ... U+0039 '9'.
2274+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2275+
#[inline]
2276+
pub fn is_ascii_digit(&self) -> bool {
2277+
self.bytes().all(|b| b.is_ascii_digit())
2278+
}
2279+
2280+
/// Checks if all characters of this string are ASCII hexadecimal digits:
2281+
///
2282+
/// - U+0030 '0' ... U+0039 '9', or
2283+
/// - U+0041 'A' ... U+0046 'F', or
2284+
/// - U+0061 'a' ... U+0066 'f'.
2285+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2286+
#[inline]
2287+
pub fn is_ascii_hexdigit(&self) -> bool {
2288+
self.bytes().all(|b| b.is_ascii_hexdigit())
2289+
}
2290+
2291+
/// Checks if all characters of this string are ASCII punctuation
2292+
/// characters:
2293+
///
2294+
/// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or
2295+
/// - U+003A ... U+0040 `: ; < = > ? @`, or
2296+
/// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or
2297+
/// - U+007B ... U+007E `{ | } ~`
2298+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2299+
#[inline]
2300+
pub fn is_ascii_punctuation(&self) -> bool {
2301+
self.bytes().all(|b| b.is_ascii_punctuation())
2302+
}
2303+
2304+
/// Checks if all characters of this string are ASCII graphic characters:
2305+
/// U+0021 '@' ... U+007E '~'.
2306+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2307+
#[inline]
2308+
pub fn is_ascii_graphic(&self) -> bool {
2309+
self.bytes().all(|b| b.is_ascii_graphic())
2310+
}
2311+
2312+
/// Checks if all characters of this string are ASCII whitespace characters:
2313+
/// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
2314+
/// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
2315+
///
2316+
/// Rust uses the WhatWG Infra Standard's [definition of ASCII
2317+
/// whitespace][infra-aw]. There are several other definitions in
2318+
/// wide use. For instance, [the POSIX locale][pct] includes
2319+
/// U+000B VERTICAL TAB as well as all the above characters,
2320+
/// but—from the very same specification—[the default rule for
2321+
/// "field splitting" in the Bourne shell][bfs] considers *only*
2322+
/// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
2323+
///
2324+
/// If you are writing a program that will process an existing
2325+
/// file format, check what that format's definition of whitespace is
2326+
/// before using this function.
2327+
///
2328+
/// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
2329+
/// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
2330+
/// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
2331+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2332+
#[inline]
2333+
pub fn is_ascii_whitespace(&self) -> bool {
2334+
self.bytes().all(|b| b.is_ascii_whitespace())
2335+
}
2336+
2337+
/// Checks if all characters of this string are ASCII control characters:
2338+
///
2339+
/// - U+0000 NUL ... U+001F UNIT SEPARATOR, or
2340+
/// - U+007F DELETE.
2341+
///
2342+
/// Note that most ASCII whitespace characters are control
2343+
/// characters, but SPACE is not.
2344+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
2345+
#[inline]
2346+
pub fn is_ascii_control(&self) -> bool {
2347+
self.bytes().all(|b| b.is_ascii_control())
2348+
}
20732349
}
20742350

20752351
/// Converts a boxed slice of bytes to a boxed string slice without checking

src/libstd/ascii.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,10 @@ pub trait AsciiExt {
298298
fn is_ascii_control(&self) -> bool { unimplemented!(); }
299299
}
300300

301+
// FIXME(LukasKalbertodt): this impl block can be removed in the future. This is
302+
// possible once the stage0 compiler is new enough to contain the inherent
303+
// ascii methods for `[str]`. See FIXME comment further down.
304+
#[cfg(stage0)]
301305
#[stable(feature = "rust1", since = "1.0.0")]
302306
impl AsciiExt for str {
303307
type Owned = String;
@@ -389,9 +393,9 @@ impl AsciiExt for str {
389393
}
390394
}
391395

392-
// TODO(LukasKalbertodt): this impl block can be removed in the future. This is
396+
// FIXME(LukasKalbertodt): this impl block can be removed in the future. This is
393397
// possible once the stage0 compiler is new enough to contain the inherent
394-
// ascii methods for `[u8]`. See TODO comment further down.
398+
// ascii methods for `[u8]`. See FIXME comment further down.
395399
#[cfg(stage0)]
396400
#[stable(feature = "rust1", since = "1.0.0")]
397401
impl AsciiExt for [u8] {
@@ -546,12 +550,18 @@ macro_rules! impl_by_delegating {
546550
impl_by_delegating!(u8, u8);
547551
impl_by_delegating!(char, char);
548552

549-
// TODO(LukasKalbertodt): the macro invocation should replace the impl block
553+
// FIXME(LukasKalbertodt): the macro invocation should replace the impl block
550554
// for `[u8]` above. But this is not possible until the stage0 compiler is new
551555
// enough to contain the inherent ascii methods for `[u8]`.
552556
#[cfg(not(stage0))]
553557
impl_by_delegating!([u8], Vec<u8>);
554558

559+
// FIXME(LukasKalbertodt): the macro invocation should replace the impl block
560+
// for `str` above. But this is not possible until the stage0 compiler is new
561+
// enough to contain the inherent ascii methods for `str`.
562+
#[cfg(not(stage0))]
563+
impl_by_delegating!(str, String);
564+
555565
/// An iterator over the escaped version of a byte.
556566
///
557567
/// This `struct` is created by the [`escape_default`] function. See its

src/libsyntax/feature_gate.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ use visit::{self, FnKind, Visitor};
3535
use parse::ParseSess;
3636
use symbol::Symbol;
3737

38-
use std::ascii::AsciiExt;
3938
use std::env;
4039

4140
macro_rules! set {

0 commit comments

Comments
 (0)