Skip to content

Commit a8df928

Browse files
committed
Add feature(str_from_utf16_endian)
1 parent b8f4cb6 commit a8df928

File tree

1 file changed

+150
-0
lines changed

1 file changed

+150
-0
lines changed

library/alloc/src/string.rs

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,156 @@ impl String {
724724
decode_utf16(v.iter().cloned()).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect()
725725
}
726726

727+
/// Decode a UTF-16LE–encoded vector `v` into a `String`, returning [`Err`]
728+
/// if `v` contains any invalid data.
729+
///
730+
/// # Examples
731+
///
732+
/// Basic usage:
733+
///
734+
/// ```
735+
/// #![feature(str_from_utf16_endian)]
736+
/// // 𝄞music
737+
/// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
738+
/// 0x73, 0x00, 0x69, 0x00, 0x63, 0x00];
739+
/// assert_eq!(String::from("𝄞music"),
740+
/// String::from_utf16le(v).unwrap());
741+
///
742+
/// // 𝄞mu<invalid>ic
743+
/// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
744+
/// 0x00, 0xD8, 0x69, 0x00, 0x63, 0x00];
745+
/// assert!(String::from_utf16le(v).is_err());
746+
/// ```
747+
#[cfg(not(no_global_oom_handling))]
748+
#[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
749+
pub fn from_utf16le(v: &[u8]) -> Result<String, FromUtf16Error> {
750+
if v.len() % 2 != 0 {
751+
return Err(FromUtf16Error(()));
752+
}
753+
match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) {
754+
(true, (&[], v, &[])) => Self::from_utf16(v),
755+
_ => decode_utf16(v.array_chunks::<2>().copied().map(u16::from_le_bytes))
756+
.collect::<Result<_, _>>()
757+
.map_err(|_| FromUtf16Error(())),
758+
}
759+
}
760+
761+
/// Decode a UTF-16LE–encoded slice `v` into a `String`, replacing
762+
/// invalid data with [the replacement character (`U+FFFD`)][U+FFFD].
763+
///
764+
/// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`],
765+
/// `from_utf16le_lossy` returns a `String` since the UTF-16 to UTF-8
766+
/// conversion requires a memory allocation.
767+
///
768+
/// [`from_utf8_lossy`]: String::from_utf8_lossy
769+
/// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow"
770+
/// [U+FFFD]: core::char::REPLACEMENT_CHARACTER
771+
///
772+
/// # Examples
773+
///
774+
/// Basic usage:
775+
///
776+
/// ```
777+
/// #![feature(str_from_utf16_endian)]
778+
/// // 𝄞mus<invalid>ic<invalid>
779+
/// let v = &[0x34, 0xD8, 0x1E, 0xDD, 0x6d, 0x00, 0x75, 0x00,
780+
/// 0x73, 0x00, 0x1E, 0xDD, 0x69, 0x00, 0x63, 0x00,
781+
/// 0x34, 0xD8];
782+
///
783+
/// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
784+
/// String::from_utf16le_lossy(v));
785+
/// ```
786+
#[cfg(not(no_global_oom_handling))]
787+
#[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
788+
pub fn from_utf16le_lossy(v: &[u8]) -> String {
789+
match (cfg!(target_endian = "little"), unsafe { v.align_to::<u16>() }) {
790+
(true, (&[], v, &[])) => Self::from_utf16_lossy(v),
791+
(true, (&[], v, &[_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
792+
_ => {
793+
let mut iter = v.array_chunks::<2>();
794+
let string = decode_utf16(iter.by_ref().copied().map(u16::from_le_bytes))
795+
.map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
796+
.collect();
797+
if iter.remainder().is_empty() { string } else { string + "\u{FFFD}" }
798+
}
799+
}
800+
}
801+
802+
/// Decode a UTF-16BE–encoded vector `v` into a `String`, returning [`Err`]
803+
/// if `v` contains any invalid data.
804+
///
805+
/// # Examples
806+
///
807+
/// Basic usage:
808+
///
809+
/// ```
810+
/// #![feature(str_from_utf16_endian)]
811+
/// // 𝄞music
812+
/// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
813+
/// 0x00, 0x73, 0x00, 0x69, 0x00, 0x63];
814+
/// assert_eq!(String::from("𝄞music"),
815+
/// String::from_utf16be(v).unwrap());
816+
///
817+
/// // 𝄞mu<invalid>ic
818+
/// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
819+
/// 0xD8, 0x00, 0x00, 0x69, 0x00, 0x63];
820+
/// assert!(String::from_utf16be(v).is_err());
821+
/// ```
822+
#[cfg(not(no_global_oom_handling))]
823+
#[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
824+
pub fn from_utf16be(v: &[u8]) -> Result<String, FromUtf16Error> {
825+
if v.len() % 2 != 0 {
826+
return Err(FromUtf16Error(()));
827+
}
828+
match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) {
829+
(true, (&[], v, &[])) => Self::from_utf16(v),
830+
_ => decode_utf16(v.array_chunks::<2>().copied().map(u16::from_be_bytes))
831+
.collect::<Result<_, _>>()
832+
.map_err(|_| FromUtf16Error(())),
833+
}
834+
}
835+
836+
/// Decode a UTF-16BE–encoded slice `v` into a `String`, replacing
837+
/// invalid data with [the replacement character (`U+FFFD`)][U+FFFD].
838+
///
839+
/// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`],
840+
/// `from_utf16le_lossy` returns a `String` since the UTF-16 to UTF-8
841+
/// conversion requires a memory allocation.
842+
///
843+
/// [`from_utf8_lossy`]: String::from_utf8_lossy
844+
/// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow"
845+
/// [U+FFFD]: core::char::REPLACEMENT_CHARACTER
846+
///
847+
/// # Examples
848+
///
849+
/// Basic usage:
850+
///
851+
/// ```
852+
/// #![feature(str_from_utf16_endian)]
853+
/// // 𝄞mus<invalid>ic<invalid>
854+
/// let v = &[0xD8, 0x34, 0xDD, 0x1E, 0x00, 0x6d, 0x00, 0x75,
855+
/// 0x00, 0x73, 0xDD, 0x1E, 0x00, 0x69, 0x00, 0x63,
856+
/// 0xD8, 0x34];
857+
///
858+
/// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"),
859+
/// String::from_utf16be_lossy(v));
860+
/// ```
861+
#[cfg(not(no_global_oom_handling))]
862+
#[unstable(feature = "str_from_utf16_endian", issue = "none", reason = "recently added")]
863+
pub fn from_utf16be_lossy(v: &[u8]) -> String {
864+
match (cfg!(target_endian = "big"), unsafe { v.align_to::<u16>() }) {
865+
(true, (&[], v, &[])) => Self::from_utf16_lossy(v),
866+
(true, (&[], v, &[_remainder])) => Self::from_utf16_lossy(v) + "\u{FFFD}",
867+
_ => {
868+
let mut iter = v.array_chunks::<2>();
869+
let string = decode_utf16(iter.by_ref().copied().map(u16::from_be_bytes))
870+
.map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
871+
.collect();
872+
if iter.remainder().is_empty() { string } else { string + "\u{FFFD}" }
873+
}
874+
}
875+
}
876+
727877
/// Decomposes a `String` into its raw components.
728878
///
729879
/// Returns the raw pointer to the underlying data, the length of

0 commit comments

Comments
 (0)