Skip to content

Commit 5a1d11a

Browse files
Copy AsciiExt methods to [u8] directly
This is done in order to deprecate AsciiExt eventually. Note that this commit contains a bunch of `cfg(stage0)` statements. This is due to a new compiler feature I am using: the `slice_u8` lang item. Once this lang item is available in the stage0 compiler, all those cfg flags (and more) can be removed.
1 parent 9e441c7 commit 5a1d11a

File tree

2 files changed

+219
-0
lines changed

2 files changed

+219
-0
lines changed

src/liballoc/slice.rs

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,215 @@ impl<T> [T] {
15331533
}
15341534
}
15351535

1536+
// TODO(LukasKalbertodt): the `not(stage0)` constraint can be removed in the
1537+
// future once the stage0 compiler is new enough to know about the `slice_u8`
1538+
// lang item.
1539+
#[lang = "slice_u8"]
1540+
#[cfg(all(not(stage0), not(test)))]
1541+
impl [u8] {
1542+
/// Checks if all bytes in this slice are within the ASCII range.
1543+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1544+
#[inline]
1545+
pub fn is_ascii(&self) -> bool {
1546+
self.iter().all(|b| b.is_ascii())
1547+
}
1548+
1549+
/// Returns a vector containing a copy of this slice where each byte
1550+
/// is mapped to its ASCII upper case equivalent.
1551+
///
1552+
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1553+
/// but non-ASCII letters are unchanged.
1554+
///
1555+
/// To uppercase the value in-place, use [`make_ascii_uppercase`].
1556+
///
1557+
/// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
1558+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1559+
#[inline]
1560+
pub fn to_ascii_uppercase(&self) -> Vec<u8> {
1561+
let mut me = self.to_vec();
1562+
me.make_ascii_uppercase();
1563+
me
1564+
}
1565+
1566+
/// Returns a vector containing a copy of this slice where each byte
1567+
/// is mapped to its ASCII lower case equivalent.
1568+
///
1569+
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1570+
/// but non-ASCII letters are unchanged.
1571+
///
1572+
/// To lowercase the value in-place, use [`make_ascii_lowercase`].
1573+
///
1574+
/// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
1575+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1576+
#[inline]
1577+
pub fn to_ascii_lowercase(&self) -> Vec<u8> {
1578+
let mut me = self.to_vec();
1579+
me.make_ascii_lowercase();
1580+
me
1581+
}
1582+
1583+
/// Checks that two slices are an ASCII case-insensitive match.
1584+
///
1585+
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
1586+
/// but without allocating and copying temporaries.
1587+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1588+
#[inline]
1589+
pub fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
1590+
self.len() == other.len() &&
1591+
self.iter().zip(other).all(|(a, b)| {
1592+
a.eq_ignore_ascii_case(b)
1593+
})
1594+
}
1595+
1596+
/// Converts this slice to its ASCII upper case equivalent in-place.
1597+
///
1598+
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1599+
/// but non-ASCII letters are unchanged.
1600+
///
1601+
/// To return a new uppercased value without modifying the existing one, use
1602+
/// [`to_ascii_uppercase`].
1603+
///
1604+
/// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
1605+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1606+
#[inline]
1607+
pub fn make_ascii_uppercase(&mut self) {
1608+
for byte in self {
1609+
byte.make_ascii_uppercase();
1610+
}
1611+
}
1612+
1613+
/// Converts this slice to its ASCII lower case equivalent in-place.
1614+
///
1615+
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1616+
/// but non-ASCII letters are unchanged.
1617+
///
1618+
/// To return a new lowercased value without modifying the existing one, use
1619+
/// [`to_ascii_lowercase`].
1620+
///
1621+
/// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
1622+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1623+
#[inline]
1624+
pub fn make_ascii_lowercase(&mut self) {
1625+
for byte in self {
1626+
byte.make_ascii_lowercase();
1627+
}
1628+
}
1629+
1630+
/// Checks if all bytes of this slice are ASCII alphabetic characters:
1631+
///
1632+
/// - U+0041 'A' ... U+005A 'Z', or
1633+
/// - U+0061 'a' ... U+007A 'z'.
1634+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1635+
#[inline]
1636+
pub fn is_ascii_alphabetic(&self) -> bool {
1637+
self.iter().all(|b| b.is_ascii_alphabetic())
1638+
}
1639+
1640+
/// Checks if all bytes of this slice are ASCII uppercase characters:
1641+
/// U+0041 'A' ... U+005A 'Z'.
1642+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1643+
#[inline]
1644+
pub fn is_ascii_uppercase(&self) -> bool {
1645+
self.iter().all(|b| b.is_ascii_uppercase())
1646+
}
1647+
1648+
/// Checks if all bytes of this slice are ASCII lowercase characters:
1649+
/// U+0061 'a' ... U+007A 'z'.
1650+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1651+
#[inline]
1652+
pub fn is_ascii_lowercase(&self) -> bool {
1653+
self.iter().all(|b| b.is_ascii_lowercase())
1654+
}
1655+
1656+
/// Checks if all bytes of this slice are ASCII alphanumeric characters:
1657+
///
1658+
/// - U+0041 'A' ... U+005A 'Z', or
1659+
/// - U+0061 'a' ... U+007A 'z', or
1660+
/// - U+0030 '0' ... U+0039 '9'.
1661+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1662+
#[inline]
1663+
pub fn is_ascii_alphanumeric(&self) -> bool {
1664+
self.iter().all(|b| b.is_ascii_alphanumeric())
1665+
}
1666+
1667+
/// Checks if all bytes of this slice are ASCII decimal digit:
1668+
/// U+0030 '0' ... U+0039 '9'.
1669+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1670+
#[inline]
1671+
pub fn is_ascii_digit(&self) -> bool {
1672+
self.iter().all(|b| b.is_ascii_digit())
1673+
}
1674+
1675+
/// Checks if all bytes of this slice are ASCII hexadecimal digits:
1676+
///
1677+
/// - U+0030 '0' ... U+0039 '9', or
1678+
/// - U+0041 'A' ... U+0046 'F', or
1679+
/// - U+0061 'a' ... U+0066 'f'.
1680+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1681+
#[inline]
1682+
pub fn is_ascii_hexdigit(&self) -> bool {
1683+
self.iter().all(|b| b.is_ascii_hexdigit())
1684+
}
1685+
1686+
/// Checks if all bytes of this slice are ASCII punctuation characters:
1687+
///
1688+
/// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or
1689+
/// - U+003A ... U+0040 `: ; < = > ? @`, or
1690+
/// - U+005B ... U+0060 `[ \\ ] ^ _ \``, or
1691+
/// - U+007B ... U+007E `{ | } ~`
1692+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1693+
#[inline]
1694+
pub fn is_ascii_punctuation(&self) -> bool {
1695+
self.iter().all(|b| b.is_ascii_punctuation())
1696+
}
1697+
1698+
/// Checks if all bytes of this slice are ASCII graphic characters:
1699+
/// U+0021 '@' ... U+007E '~'.
1700+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1701+
#[inline]
1702+
pub fn is_ascii_graphic(&self) -> bool {
1703+
self.iter().all(|b| b.is_ascii_graphic())
1704+
}
1705+
1706+
/// Checks if all bytes of this slice are ASCII whitespace characters:
1707+
/// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1708+
/// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1709+
///
1710+
/// Rust uses the WhatWG Infra Standard's [definition of ASCII
1711+
/// whitespace][infra-aw]. There are several other definitions in
1712+
/// wide use. For instance, [the POSIX locale][pct] includes
1713+
/// U+000B VERTICAL TAB as well as all the above characters,
1714+
/// but—from the very same specification—[the default rule for
1715+
/// "field splitting" in the Bourne shell][bfs] considers *only*
1716+
/// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1717+
///
1718+
/// If you are writing a program that will process an existing
1719+
/// file format, check what that format's definition of whitespace is
1720+
/// before using this function.
1721+
///
1722+
/// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1723+
/// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1724+
/// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1725+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1726+
#[inline]
1727+
pub fn is_ascii_whitespace(&self) -> bool {
1728+
self.iter().all(|b| b.is_ascii_whitespace())
1729+
}
1730+
1731+
/// Checks if all bytes of this slice are ASCII control characters:
1732+
///
1733+
/// - U+0000 NUL ... U+001F UNIT SEPARATOR, or
1734+
/// - U+007F DELETE.
1735+
///
1736+
/// Note that most ASCII whitespace characters are control
1737+
/// characters, but SPACE is not.
1738+
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1739+
#[inline]
1740+
pub fn is_ascii_control(&self) -> bool {
1741+
self.iter().all(|b| b.is_ascii_control())
1742+
}
1743+
}
1744+
15361745
////////////////////////////////////////////////////////////////////////////////
15371746
// Extension traits for slices over specific kinds of data
15381747
////////////////////////////////////////////////////////////////////////////////

src/libstd/ascii.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,10 @@ impl AsciiExt for str {
389389
}
390390
}
391391

392+
// TODO(LukasKalbertodt): this impl block can be removed in the future. This is
393+
// possible once the stage0 compiler is new enough to contain the inherent
394+
// ascii methods for `[u8]`. See TODO comment further down.
395+
#[cfg(stage0)]
392396
#[stable(feature = "rust1", since = "1.0.0")]
393397
impl AsciiExt for [u8] {
394398
type Owned = Vec<u8>;
@@ -542,6 +546,12 @@ macro_rules! impl_by_delegating {
542546
impl_by_delegating!(u8, u8);
543547
impl_by_delegating!(char, char);
544548

549+
// TODO(LukasKalbertodt): the macro invocation should replace the impl block
550+
// for `[u8]` above. But this is not possible until the stage0 compiler is new
551+
// enough to contain the inherent ascii methods for `[u8]`.
552+
#[cfg(not(stage0))]
553+
impl_by_delegating!([u8], Vec<u8>);
554+
545555
/// An iterator over the escaped version of a byte.
546556
///
547557
/// This `struct` is created by the [`escape_default`] function. See its

0 commit comments

Comments
 (0)