Skip to content

Commit 42d870e

Browse files
committed
Introduce printable-ASCII fast-path for impl Debug for str
Instead of having a single loop that works on utf-8 `char`s, this splits the implementation into a loop that quickly skips over printable ASCII, falling back to per-char iteration for other chunks.
1 parent 3fda931 commit 42d870e

File tree

1 file changed

+42
-17
lines changed

1 file changed

+42
-17
lines changed

Diff for: library/core/src/fmt/mod.rs

+42-17
Original file line numberDiff line numberDiff line change
@@ -2399,26 +2399,51 @@ impl Display for bool {
23992399
impl Debug for str {
24002400
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
24012401
f.write_char('"')?;
2402-
let mut from = 0;
2403-
for (i, c) in self.char_indices() {
2404-
// a fast path for ASCII chars that do not need escapes:
2405-
if matches!(c, ' '..='~') && !matches!(c, '\\' | '\"') {
2406-
continue;
2407-
}
24082402

2409-
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
2410-
escape_grapheme_extended: true,
2411-
escape_single_quote: false,
2412-
escape_double_quote: true,
2413-
});
2414-
// If char needs escaping, flush backlog so far and write, else skip
2415-
if esc.len() != 1 {
2416-
f.write_str(&self[from..i])?;
2417-
Display::fmt(&esc, f)?;
2418-
from = i + c.len_utf8();
2403+
// substring we know is printable
2404+
let mut printable_range = 0..0;
2405+
2406+
fn needs_escape(b: u8) -> bool {
2407+
b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
2408+
}
2409+
2410+
// the outer loop here splits the string into chunks of printable ASCII, which is just skipped over,
2411+
// and chunks of other chars (unicode, or ASCII that needs escaping), which is handler per-`char`.
2412+
let mut rest = self.as_bytes();
2413+
while rest.len() > 0 {
2414+
let Some(non_printable_start) = rest.iter().position(|&b| needs_escape(b)) else {
2415+
printable_range.end += rest.len();
2416+
break;
2417+
};
2418+
2419+
printable_range.end += non_printable_start;
2420+
// SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
2421+
rest = unsafe { rest.get_unchecked(non_printable_start..) };
2422+
2423+
let printable_start = rest.iter().position(|&b| !needs_escape(b)).unwrap_or(rest.len());
2424+
let prefix;
2425+
// SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
2426+
(prefix, rest) = unsafe { rest.split_at_unchecked(printable_start) };
2427+
// SAFETY: prefix is a valid utf8 sequence, and at a char boundary
2428+
let prefix = unsafe { crate::str::from_utf8_unchecked(prefix) };
2429+
2430+
for c in prefix.chars() {
2431+
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
2432+
escape_grapheme_extended: true,
2433+
escape_single_quote: false,
2434+
escape_double_quote: true,
2435+
});
2436+
if esc.len() != 1 {
2437+
f.write_str(&self[printable_range.clone()])?;
2438+
Display::fmt(&esc, f)?;
2439+
printable_range.start = printable_range.end + c.len_utf8();
2440+
}
2441+
printable_range.end += c.len_utf8();
24192442
}
24202443
}
2421-
f.write_str(&self[from..])?;
2444+
2445+
f.write_str(&self[printable_range])?;
2446+
24222447
f.write_char('"')
24232448
}
24242449
}

0 commit comments

Comments
 (0)