Skip to content

Commit 5357ec1

Browse files
committed
(#93493) Add items from code review
1 parent 4d4ec97 commit 5357ec1

File tree

2 files changed

+70
-30
lines changed

2 files changed

+70
-30
lines changed

library/core/src/primitive_docs.rs

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -289,32 +289,52 @@ mod prim_never {}
289289
///
290290
/// A `char` is a '[Unicode scalar value]', which is any '[Unicode code point]'
291291
/// other than a [surrogate code point]. This has a fixed numerical definition:
292-
/// code points are in the range `'\0'` to `char::MAX` (`'\u{10FFFF}'`), inclusive.
293-
/// Surrogate code points, used by UTF-16, are in the range U+D800 to U+DFFF.
292+
/// code points are in the range 0 to 0x10FFFF, inclusive.
293+
/// Surrogate code points, used by UTF-16, are in the range 0xD800 to 0xDFFF.
294294
///
295295
/// No `char` may be constructed, whether as a literal or at runtime, that is not a
296296
/// Unicode scalar value:
297297
///
298298
/// ```text
299-
/// let forbidden_chars = [
300-
/// // Each of these is a compiler error
301-
/// '\u{D800}', '\u{DFFF}', '\u{110000}',
299+
/// // Each of these is a compiler error
300+
/// ['\u{D800}', '\u{DFFF}', '\u{110000}'];
301+
/// ```
302302
///
303-
/// // Panics; from_u32 returns None.
304-
/// char::from_u32(0xDE01).unwrap(),
303+
/// ```should_panic
304+
/// // Panics; from_u32 returns None.
305+
/// char::from_u32(0xDE01).unwrap();
306+
/// ```
305307
///
306-
/// // Undefined behaviour
307-
/// unsafe { char::from_u32_unchecked(0x110000) },
308-
/// ];
308+
/// ```
309+
/// // Undefined behaviour
310+
/// unsafe { char::from_u32_unchecked(0x110000) };
309311
/// ```
310312
///
311-
/// Unicode is regularly updated. Many USVs are not currently assigned to a
312-
/// character, but may be in the future ("reserved"); some will never be a character
313-
/// ("noncharacters"); and some may be given different meanings by different users
314-
/// ("private use").
313+
/// USVs are also the exact set of values that may be encoded in UTF-8. Because
314+
/// `char` values are USVs and `str` values are valid UTF-8, it is safe to store
315+
/// any `char` in a `str` or read any character from a `str` as a `char`.
316+
///
317+
/// The gap in valid `char` values is understood by the compiler, so in the
318+
/// below example the two ranges are understood to cover the whole range of
319+
/// possible `char` values and there is no error for a [non-exhaustive match].
320+
///
321+
/// ```
322+
/// let c: char = 'a';
323+
/// match c {
324+
/// '\0' ..= '\u{D7FF}' => false,
325+
/// '\u{E000}' ..= '\u{10FFFF}' => true,
326+
/// };
327+
/// ```
328+
///
329+
/// All USVs are valid `char` values, but not all of them represent a real
330+
/// character. Many USVs are not currently assigned to a character, but may be
331+
/// in the future ("reserved"); some will never be a character
332+
/// ("noncharacters"); and some may be given different meanings by different
333+
/// users ("private use").
315334
///
316-
/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
317335
/// [Unicode code point]: https://www.unicode.org/glossary/#code_point
336+
/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
337+
/// [non-exhaustive match]: ../book/ch06-02-match.html#matches-are-exhaustive
318338
/// [surrogate code point]: https://www.unicode.org/glossary/#surrogate_code_point
319339
///
320340
/// # Representation

library/std/src/primitive_docs.rs

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -289,32 +289,52 @@ mod prim_never {}
289289
///
290290
/// A `char` is a '[Unicode scalar value]', which is any '[Unicode code point]'
291291
/// other than a [surrogate code point]. This has a fixed numerical definition:
292-
/// code points are in the range `'\0'` to `char::MAX` (`'\u{10FFFF}'`), inclusive.
293-
/// Surrogate code points, used by UTF-16, are in the range U+D800 to U+DFFF.
292+
/// code points are in the range 0 to 0x10FFFF, inclusive.
293+
/// Surrogate code points, used by UTF-16, are in the range 0xD800 to 0xDFFF.
294294
///
295295
/// No `char` may be constructed, whether as a literal or at runtime, that is not a
296296
/// Unicode scalar value:
297297
///
298298
/// ```text
299-
/// let forbidden_chars = [
300-
/// // Each of these is a compiler error
301-
/// '\u{D800}', '\u{DFFF}', '\u{110000}',
299+
/// // Each of these is a compiler error
300+
/// ['\u{D800}', '\u{DFFF}', '\u{110000}'];
301+
/// ```
302302
///
303-
/// // Panics; from_u32 returns None.
304-
/// char::from_u32(0xDE01).unwrap(),
303+
/// ```should_panic
304+
/// // Panics; from_u32 returns None.
305+
/// char::from_u32(0xDE01).unwrap();
306+
/// ```
305307
///
306-
/// // Undefined behaviour
307-
/// unsafe { char::from_u32_unchecked(0x110000) },
308-
/// ];
308+
/// ```
309+
/// // Undefined behaviour
310+
/// unsafe { char::from_u32_unchecked(0x110000) };
309311
/// ```
310312
///
311-
/// Unicode is regularly updated. Many USVs are not currently assigned to a
312-
/// character, but may be in the future ("reserved"); some will never be a character
313-
/// ("noncharacters"); and some may be given different meanings by different users
314-
/// ("private use").
313+
/// USVs are also the exact set of values that may be encoded in UTF-8. Because
314+
/// `char` values are USVs and `str` values are valid UTF-8, it is safe to store
315+
/// any `char` in a `str` or read any character from a `str` as a `char`.
316+
///
317+
/// The gap in valid `char` values is understood by the compiler, so in the
318+
/// below example the two ranges are understood to cover the whole range of
319+
/// possible `char` values and there is no error for a [non-exhaustive match].
320+
///
321+
/// ```
322+
/// let c: char = 'a';
323+
/// match c {
324+
/// '\0' ..= '\u{D7FF}' => false,
325+
/// '\u{E000}' ..= '\u{10FFFF}' => true,
326+
/// };
327+
/// ```
328+
///
329+
/// All USVs are valid `char` values, but not all of them represent a real
330+
/// character. Many USVs are not currently assigned to a character, but may be
331+
/// in the future ("reserved"); some will never be a character
332+
/// ("noncharacters"); and some may be given different meanings by different
333+
/// users ("private use").
315334
///
316-
/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
317335
/// [Unicode code point]: https://www.unicode.org/glossary/#code_point
336+
/// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
337+
/// [non-exhaustive match]: ../book/ch06-02-match.html#matches-are-exhaustive
318338
/// [surrogate code point]: https://www.unicode.org/glossary/#surrogate_code_point
319339
///
320340
/// # Representation

0 commit comments

Comments
 (0)