Mark and implement 'char::encode_utf8' as const.

bjoernager · bjoernager · commit 28e97566e5b9 · 2024-09-17T18:22:12.000+02:00
diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs
@@ -1,6 +1,7 @@
 //! impl char {}
 
 use super::*;
+use crate::hint::unreachable_unchecked;
 use crate::slice;
 use crate::str::from_utf8_unchecked_mut;
 use crate::unicode::printable::is_printable;
@@ -673,7 +674,7 @@ impl char {
     /// ```
     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
     #[inline]
-    pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
+    pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
         // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
         unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
     }
@@ -1762,35 +1763,35 @@ const fn len_utf8(code: u32) -> usize {
 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
 #[doc(hidden)]
 #[inline]
-pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
+pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
     let len = len_utf8(code);
-    match (len, &mut dst[..]) {
-        (1, [a, ..]) => {
-            *a = code as u8;
+    // Note that the original message is not const-compatible due to formatting.
+    assert!(
+        len <= dst.len(),
+        "encode_utf8: buffer does not have enough bytes to encode code point",
+    );
+    match len {
+        1 => {
+            dst[0x0] = code as u8;
         }
-        (2, [a, b, ..]) => {
-            *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
-            *b = (code & 0x3F) as u8 | TAG_CONT;
+        2 => {
+            dst[0x0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
+            dst[0x1] = (code & 0x3F) as u8 | TAG_CONT;
         }
-        (3, [a, b, c, ..]) => {
-            *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
-            *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
-            *c = (code & 0x3F) as u8 | TAG_CONT;
+        3 => {
+            dst[0x0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
+            dst[0x1] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+            dst[0x2] = (code & 0x3F) as u8 | TAG_CONT;
         }
-        (4, [a, b, c, d, ..]) => {
-            *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
-            *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
-            *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
-            *d = (code & 0x3F) as u8 | TAG_CONT;
+        4 => {
+            dst[0x0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
+            dst[0x1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
+            dst[0x2] = (code >> 6 & 0x3F) as u8 | TAG_CONT;
+            dst[0x3] = (code & 0x3F) as u8 | TAG_CONT;
         }
-        _ => panic!(
-            "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
-            len,
-            code,
-            dst.len(),
-        ),
+        _ => unsafe { unreachable_unchecked() },
     };
-    &mut dst[..len]
+    unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
 }
 
 /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,