@@ -289,32 +289,52 @@ mod prim_never {}
289
289
///
290
290
/// A `char` is a '[Unicode scalar value]', which is any '[Unicode code point]'
291
291
/// other than a [surrogate code point]. This has a fixed numerical definition:
292
- /// code points are in the range `'\0'` to `char::MAX` (`'\u{10FFFF}'`) , inclusive.
293
- /// Surrogate code points, used by UTF-16, are in the range U+D800 to U+DFFF .
292
+ /// code points are in the range 0 to 0x10FFFF , inclusive.
293
+ /// Surrogate code points, used by UTF-16, are in the range 0xD800 to 0xDFFF .
294
294
///
295
295
/// No `char` may be constructed, whether as a literal or at runtime, that is not a
296
296
/// Unicode scalar value:
297
297
///
298
298
/// ```text
299
- /// let forbidden_chars = [
300
- /// // Each of these is a compiler error
301
- /// '\u{D800}', '\u{DFFF}', '\u{110000}',
299
+ /// // Each of these is a compiler error
300
+ /// ['\u{D800}', '\u{DFFF}', '\u{110000}'];
301
+ /// ```
302
302
///
303
- /// // Panics; from_u32 returns None.
304
- /// char::from_u32(0xDE01).unwrap(),
303
+ /// ```should_panic
304
+ /// // Panics; from_u32 returns None.
305
+ /// char::from_u32(0xDE01).unwrap();
306
+ /// ```
305
307
///
306
- /// // Undefined behaviour
307
- /// unsafe { char::from_u32_unchecked(0x110000) },
308
- /// ] ;
308
+ /// ```
309
+ /// // Undefined behaviour
310
+ /// unsafe { char::from_u32_unchecked(0x110000) } ;
309
311
/// ```
310
312
///
311
- /// Unicode is regularly updated. Many USVs are not currently assigned to a
312
- /// character, but may be in the future ("reserved"); some will never be a character
313
- /// ("noncharacters"); and some may be given different meanings by different users
314
- /// ("private use").
313
+ /// USVs are also the exact set of values that may be encoded in UTF-8. Because
314
+ /// `char` values are USVs and `str` values are valid UTF-8, it is safe to store
315
+ /// any `char` in a `str` or read any character from a `str` as a `char`.
316
+ ///
317
+ /// The gap in valid `char` values is understood by the compiler, so in the
318
+ /// below example the two ranges are understood to cover the whole range of
319
+ /// possible `char` values and there is no error for a [non-exhaustive match].
320
+ ///
321
+ /// ```
322
+ /// let c: char = 'a';
323
+ /// match c {
324
+ /// '\0' ..= '\u{D7FF}' => false,
325
+ /// '\u{E000}' ..= '\u{10FFFF}' => true,
326
+ /// };
327
+ /// ```
328
+ ///
329
+ /// All USVs are valid `char` values, but not all of them represent a real
330
+ /// character. Many USVs are not currently assigned to a character, but may be
331
+ /// in the future ("reserved"); some will never be a character
332
+ /// ("noncharacters"); and some may be given different meanings by different
333
+ /// users ("private use").
315
334
///
316
- /// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
317
335
/// [Unicode code point]: https://www.unicode.org/glossary/#code_point
336
+ /// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
337
+ /// [non-exhaustive match]: ../book/ch06-02-match.html#matches-are-exhaustive
318
338
/// [surrogate code point]: https://www.unicode.org/glossary/#surrogate_code_point
319
339
///
320
340
/// # Representation
0 commit comments