Skip to content

Commit 3d87b25

Browse files
Add unicode/caret replacements for all control characters (#2712)
1 parent f2f6902 commit 3d87b25

File tree

4 files changed

+66
-51
lines changed

4 files changed

+66
-51
lines changed

src/preprocessor.rs

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -91,31 +91,27 @@ pub fn replace_nonprintable(
9191
});
9292
line_idx = 0;
9393
}
94-
// carriage return
95-
'\x0D' => output.push_str(match nonprintable_notation {
96-
NonprintableNotation::Caret => "^M",
97-
NonprintableNotation::Unicode => "␍",
98-
}),
99-
// null
100-
'\x00' => output.push_str(match nonprintable_notation {
101-
NonprintableNotation::Caret => "^@",
102-
NonprintableNotation::Unicode => "␀",
103-
}),
104-
// bell
105-
'\x07' => output.push_str(match nonprintable_notation {
106-
NonprintableNotation::Caret => "^G",
107-
NonprintableNotation::Unicode => "␇",
108-
}),
109-
// backspace
110-
'\x08' => output.push_str(match nonprintable_notation {
111-
NonprintableNotation::Caret => "^H",
112-
NonprintableNotation::Unicode => "␈",
113-
}),
114-
// escape
115-
'\x1B' => output.push_str(match nonprintable_notation {
116-
NonprintableNotation::Caret => "^[",
117-
NonprintableNotation::Unicode => "␛",
118-
}),
94+
// ASCII control characters
95+
'\x00'..='\x1F' => {
96+
let c = u32::from(chr);
97+
98+
match nonprintable_notation {
99+
NonprintableNotation::Caret => {
100+
let caret_character = char::from_u32(0x40 + c).unwrap();
101+
write!(output, "^{caret_character}").ok();
102+
}
103+
104+
NonprintableNotation::Unicode => {
105+
let replacement_symbol = char::from_u32(0x2400 + c).unwrap();
106+
output.push(replacement_symbol)
107+
}
108+
}
109+
}
110+
// delete
111+
'\x7F' => match nonprintable_notation {
112+
NonprintableNotation::Caret => output.push_str("^?"),
113+
NonprintableNotation::Unicode => output.push('\u{2421}'),
114+
},
119115
// printable ASCII
120116
c if c.is_ascii_alphanumeric()
121117
|| c.is_ascii_punctuation()

tests/examples/control_characters.txt

33 Bytes
Binary file not shown.

tests/integration_tests.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,25 @@ fn show_all_with_caret_notation() {
17281728
.assert()
17291729
.stdout("hello·world^J\n├──┤^M^@^G^H^[")
17301730
.stderr("");
1731+
1732+
bat()
1733+
.arg("--show-all")
1734+
.arg("--nonprintable-notation=caret")
1735+
.arg("control_characters.txt")
1736+
.assert()
1737+
.stdout("^@^A^B^C^D^E^F^G^H├─┤^J\n^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\\^]^^^_^?")
1738+
.stderr("");
1739+
}
1740+
1741+
#[test]
1742+
fn show_all_with_unicode() {
1743+
bat()
1744+
.arg("--show-all")
1745+
.arg("--nonprintable-notation=unicode")
1746+
.arg("control_characters.txt")
1747+
.assert()
1748+
.stdout("␀␁␂␃␄␅␆␇␈├─┤␊\n␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟␡")
1749+
.stderr("");
17311750
}
17321751

17331752
#[test]

tests/syntax-tests/highlighted/Plaintext/plaintext.txt

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,36 @@
11
␀␊
2-
\u{1}␊
3-
\u{2}␊
4-
\u{3}␊
5-
\u{4}␊
6-
\u{5}␊
7-
\u{6}␊
2+
␁␊
3+
␂␊
4+
␃␊
5+
␄␊
6+
␅␊
7+
␆␊
88
␇␊
99
␈␊
1010
├──┤␊
1111
␊
1212
␊
13-
\u{b}␊
14-
\u{c}␊
13+
␋␊
14+
␌␊
1515
␊
16-
\u{e}␊
17-
\u{f}␊
18-
\u{10}␊
19-
\u{11}␊
20-
\u{12}␊
21-
\u{13}␊
22-
\u{14}␊
23-
\u{15}␊
24-
\u{16}␊
25-
\u{17}␊
26-
\u{18}␊
27-
\u{19}␊
28-
\u{1a}␊
16+
␎␊
17+
␏␊
18+
␐␊
19+
␑␊
20+
␒␊
21+
␓␊
22+
␔␊
23+
␕␊
24+
␖␊
25+
␗␊
26+
␘␊
27+
␙␊
28+
␚␊
2929
␛␊
30-
\u{1c}␊
31-
\u{1d}␊
32-
\u{1e}␊
33-
\u{1f}␊
30+
␜␊
31+
␝␊
32+
␞␊
33+
␟␊
3434
·␊
3535
!␊
3636
"␊
@@ -126,7 +126,7 @@
126126
|␊
127127
}␊
128128
~␊
129-
\u{7f}␊
129+
␡␊
130130
\u{80}␊
131131
\u{81}␊
132132
\u{82}␊

0 commit comments

Comments
 (0)