Skip to content

Commit 61eea6c

Browse files
committed
Improve WTF-8 comments
1 parent 27d76de commit 61eea6c

File tree

1 file changed

+23
-15
lines changed

1 file changed

+23
-15
lines changed

std/src/sys_common/wtf8.rs

+23-15
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,12 @@ impl ops::DerefMut for Wtf8Buf {
156156
}
157157
}
158158

159-
/// Format the string with double quotes,
160-
/// and surrogates as `\u` followed by four hexadecimal digits.
161-
/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800]
159+
/// Formats the string in double quotes, with characters escaped according to
160+
/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`,
161+
/// where each `x` is a hexadecimal digit.
162+
///
163+
/// For example, the code units [U+0061, U+D800, U+000A] are formatted as
164+
/// `"a\u{D800}\n"`.
162165
impl fmt::Debug for Wtf8Buf {
163166
#[inline]
164167
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -181,7 +184,7 @@ impl Wtf8Buf {
181184

182185
/// Creates a WTF-8 string from a WTF-8 byte vec.
183186
///
184-
/// Since the byte vec is not checked for valid WTF-8, this functions is
187+
/// Since the byte vec is not checked for valid WTF-8, this function is
185188
/// marked unsafe.
186189
#[inline]
187190
pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf {
@@ -237,8 +240,9 @@ impl Wtf8Buf {
237240
string
238241
}
239242

240-
/// Copied from String::push
243+
/// Appends the given `char` to the end of this string.
241244
/// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
245+
/// Copied from String::push.
242246
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
243247
let mut bytes = [0; 4];
244248
let bytes = encode_utf8_raw(code_point.value, &mut bytes);
@@ -264,16 +268,16 @@ impl Wtf8Buf {
264268
///
265269
/// # Panics
266270
///
267-
/// Panics if the new capacity overflows `usize`.
271+
/// Panics if the new capacity exceeds `isize::MAX` bytes.
268272
#[inline]
269273
pub fn reserve(&mut self, additional: usize) {
270274
self.bytes.reserve(additional)
271275
}
272276

273-
/// Tries to reserve capacity for at least `additional` more length units
274-
/// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid
275-
/// frequent reallocations. After calling `try_reserve`, capacity will be
276-
/// greater than or equal to `self.len() + additional`. Does nothing if
277+
/// Tries to reserve capacity for at least `additional` more bytes to be
278+
/// inserted in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to
279+
/// avoid frequent reallocations. After calling `try_reserve`, capacity will
280+
/// be greater than or equal to `self.len() + additional`. Does nothing if
277281
/// capacity is already sufficient. This method preserves the contents even
278282
/// if an error occurs.
279283
///
@@ -291,8 +295,8 @@ impl Wtf8Buf {
291295
self.bytes.reserve_exact(additional)
292296
}
293297

294-
/// Tries to reserve the minimum capacity for exactly `additional`
295-
/// length units in the given `Wtf8Buf`. After calling
298+
/// Tries to reserve the minimum capacity for exactly `additional` more
299+
/// bytes to be inserted in the given `Wtf8Buf`. After calling
296300
/// `try_reserve_exact`, capacity will be greater than or equal to
297301
/// `self.len() + additional` if it returns `Ok(())`.
298302
/// Does nothing if the capacity is already sufficient.
@@ -450,6 +454,8 @@ impl Wtf8Buf {
450454
match self.next_surrogate(pos) {
451455
Some((surrogate_pos, _)) => {
452456
pos = surrogate_pos + 3;
457+
// Surrogates and the replacement character are all 3 bytes,
458+
// so they can substituted in-place.
453459
self.bytes[surrogate_pos..pos]
454460
.copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
455461
}
@@ -535,9 +541,9 @@ impl AsInner<[u8]> for Wtf8 {
535541
}
536542
}
537543

538-
/// Format the slice with double quotes,
539-
/// and surrogates as `\u` followed by four hexadecimal digits.
540-
/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800]
544+
/// Formats the string in double quotes, with characters escaped according to
545+
/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`,
546+
/// where each `x` is a hexadecimal digit.
541547
impl fmt::Debug for Wtf8 {
542548
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
543549
fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
@@ -562,6 +568,8 @@ impl fmt::Debug for Wtf8 {
562568
}
563569
}
564570

571+
/// Formats the string with unpaired surrogates substituted with the replacement
572+
/// character, U+FFFD.
565573
impl fmt::Display for Wtf8 {
566574
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
567575
let wtf8_bytes = &self.bytes;

0 commit comments

Comments
 (0)