Skip to content

Commit 54e33bb

Browse files
Account for C string literals in HiddenUnicodeCodepoints lint
1 parent 4e5fec2 commit 54e33bb

File tree

3 files changed

+76
-23
lines changed

3 files changed

+76
-23
lines changed

compiler/rustc_lint/src/hidden_unicode_codepoints.rs

+19-5
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,28 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
101101
if !contains_text_flow_control_chars(text.as_str()) {
102102
return;
103103
}
104-
let padding = match token_lit.kind {
104+
let (padding, point_at_inner_spans) = match token_lit.kind {
105105
// account for `"` or `'`
106-
ast::token::LitKind::Str | ast::token::LitKind::Char => 1,
106+
ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
107+
// account for `c"`
108+
ast::token::LitKind::CStr => (2, true),
107109
// account for `r###"`
108-
ast::token::LitKind::StrRaw(n) => n as u32 + 2,
109-
_ => return,
110+
ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
111+
// account for `cr###"`
112+
ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
113+
// suppress bad literals.
114+
ast::token::LitKind::Err(_) => return,
115+
// Be conservative just in case new literals do support these.
116+
_ => (0, false),
110117
};
111-
self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal");
118+
self.lint_text_direction_codepoint(
119+
cx,
120+
text,
121+
expr.span,
122+
padding,
123+
point_at_inner_spans,
124+
"literal",
125+
);
112126
}
113127
_ => {}
114128
};

tests/ui/parser/unicode-control-codepoints.rs

+7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//@ edition: 2021
2+
13
fn main() {
24
// if access_level != "us‫e‪r" { // Check if admin
35
//~^ ERROR unicode codepoint changing visible direction of text present in comment
@@ -25,6 +27,11 @@ fn main() {
2527
//~| ERROR non-ASCII character in raw byte string literal
2628
println!("{:?}", '‮');
2729
//~^ ERROR unicode codepoint changing visible direction of text present in literal
30+
31+
let _ = c"‮";
32+
//~^ ERROR unicode codepoint changing visible direction of text present in literal
33+
let _ = cr#"‮"#;
34+
//~^ ERROR unicode codepoint changing visible direction of text present in literal
2835
}
2936

3037
//"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */"

tests/ui/parser/unicode-control-codepoints.stderr

+50-18
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
error: unicode escape in byte string
2-
--> $DIR/unicode-control-codepoints.rs:6:26
2+
--> $DIR/unicode-control-codepoints.rs:8:26
33
|
44
LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
55
| ^^^^^^^^ unicode escape in byte string
66
|
77
= help: unicode escape sequences cannot be used as a byte or in a byte string
88

99
error: unicode escape in byte string
10-
--> $DIR/unicode-control-codepoints.rs:6:35
10+
--> $DIR/unicode-control-codepoints.rs:8:35
1111
|
1212
LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
1313
| ^^^^^^^^ unicode escape in byte string
1414
|
1515
= help: unicode escape sequences cannot be used as a byte or in a byte string
1616

1717
error: non-ASCII character in byte string literal
18-
--> $DIR/unicode-control-codepoints.rs:16:26
18+
--> $DIR/unicode-control-codepoints.rs:18:26
1919
|
2020
LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only ");
2121
| ^ must be ASCII but is '\u{202e}'
@@ -26,7 +26,7 @@ LL | println!("{:?}", b"/*\xE2\x80\xAE } �if isAdmin� � begin admins o
2626
| ~~~~~~~~~~~~
2727

2828
error: non-ASCII character in byte string literal
29-
--> $DIR/unicode-control-codepoints.rs:16:30
29+
--> $DIR/unicode-control-codepoints.rs:18:30
3030
|
3131
LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only ");
3232
| ^ must be ASCII but is '\u{2066}'
@@ -37,7 +37,7 @@ LL | println!("{:?}", b"/*� } \xE2\x81\xA6if isAdmin� � begin admins o
3737
| ~~~~~~~~~~~~
3838

3939
error: non-ASCII character in byte string literal
40-
--> $DIR/unicode-control-codepoints.rs:16:41
40+
--> $DIR/unicode-control-codepoints.rs:18:41
4141
|
4242
LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only ");
4343
| ^ must be ASCII but is '\u{2069}'
@@ -48,7 +48,7 @@ LL | println!("{:?}", b"/*� } �if isAdmin\xE2\x81\xA9 � begin admins o
4848
| ~~~~~~~~~~~~
4949

5050
error: non-ASCII character in byte string literal
51-
--> $DIR/unicode-control-codepoints.rs:16:43
51+
--> $DIR/unicode-control-codepoints.rs:18:43
5252
|
5353
LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only ");
5454
| ^ must be ASCII but is '\u{2066}'
@@ -59,31 +59,31 @@ LL | println!("{:?}", b"/*� } �if isAdmin� \xE2\x81\xA6 begin admins o
5959
| ~~~~~~~~~~~~
6060

6161
error: non-ASCII character in raw byte string literal
62-
--> $DIR/unicode-control-codepoints.rs:21:29
62+
--> $DIR/unicode-control-codepoints.rs:23:29
6363
|
6464
LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##);
6565
| ^ must be ASCII but is '\u{202e}'
6666

6767
error: non-ASCII character in raw byte string literal
68-
--> $DIR/unicode-control-codepoints.rs:21:33
68+
--> $DIR/unicode-control-codepoints.rs:23:33
6969
|
7070
LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##);
7171
| ^ must be ASCII but is '\u{2066}'
7272

7373
error: non-ASCII character in raw byte string literal
74-
--> $DIR/unicode-control-codepoints.rs:21:44
74+
--> $DIR/unicode-control-codepoints.rs:23:44
7575
|
7676
LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##);
7777
| ^ must be ASCII but is '\u{2069}'
7878

7979
error: non-ASCII character in raw byte string literal
80-
--> $DIR/unicode-control-codepoints.rs:21:46
80+
--> $DIR/unicode-control-codepoints.rs:23:46
8181
|
8282
LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##);
8383
| ^ must be ASCII but is '\u{2066}'
8484

8585
error: unicode codepoint changing visible direction of text present in comment
86-
--> $DIR/unicode-control-codepoints.rs:2:5
86+
--> $DIR/unicode-control-codepoints.rs:4:5
8787
|
8888
LL | // if access_level != "us�e�r" { // Check if admin
8989
| ^^^^^^^^^^^^^^^^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@@ -97,7 +97,7 @@ LL | // if access_level != "us�e�r" { // Check if admin
9797
= help: if their presence wasn't intentional, you can remove them
9898

9999
error: unicode codepoint changing visible direction of text present in comment
100-
--> $DIR/unicode-control-codepoints.rs:30:1
100+
--> $DIR/unicode-control-codepoints.rs:37:1
101101
|
102102
LL | //"/*� } �if isAdmin� � begin admins only */"
103103
| ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@@ -112,7 +112,7 @@ LL | //"/*� } �if isAdmin� � begin admins only */"
112112
= help: if their presence wasn't intentional, you can remove them
113113

114114
error: unicode codepoint changing visible direction of text present in literal
115-
--> $DIR/unicode-control-codepoints.rs:11:22
115+
--> $DIR/unicode-control-codepoints.rs:13:22
116116
|
117117
LL | println!("{:?}", "/*� } �if isAdmin� � begin admins only ");
118118
| ^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^
@@ -132,7 +132,7 @@ LL | println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begi
132132
| ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~
133133

134134
error: unicode codepoint changing visible direction of text present in literal
135-
--> $DIR/unicode-control-codepoints.rs:14:22
135+
--> $DIR/unicode-control-codepoints.rs:16:22
136136
|
137137
LL | println!("{:?}", r##"/*� } �if isAdmin� � begin admins only "##);
138138
| ^^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@@ -151,7 +151,7 @@ LL | println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} b
151151
| ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~
152152

153153
error: unicode codepoint changing visible direction of text present in literal
154-
--> $DIR/unicode-control-codepoints.rs:26:22
154+
--> $DIR/unicode-control-codepoints.rs:28:22
155155
|
156156
LL | println!("{:?}", '�');
157157
| ^-^
@@ -166,8 +166,40 @@ help: if you want to keep them but make them visible in your source code, you ca
166166
LL | println!("{:?}", '\u{202e}');
167167
| ~~~~~~~~
168168

169+
error: unicode codepoint changing visible direction of text present in literal
170+
--> $DIR/unicode-control-codepoints.rs:31:13
171+
|
172+
LL | let _ = c"�";
173+
| ^^-^
174+
| | |
175+
| | '\u{202e}'
176+
| this literal contains an invisible unicode text flow control codepoint
177+
|
178+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
179+
= help: if their presence wasn't intentional, you can remove them
180+
help: if you want to keep them but make them visible in your source code, you can escape them
181+
|
182+
LL | let _ = c"\u{202e}";
183+
| ~~~~~~~~
184+
185+
error: unicode codepoint changing visible direction of text present in literal
186+
--> $DIR/unicode-control-codepoints.rs:33:13
187+
|
188+
LL | let _ = cr#"�"#;
189+
| ^^^^-^^
190+
| | |
191+
| | '\u{202e}'
192+
| this literal contains an invisible unicode text flow control codepoint
193+
|
194+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
195+
= help: if their presence wasn't intentional, you can remove them
196+
help: if you want to keep them but make them visible in your source code, you can escape them
197+
|
198+
LL | let _ = cr#"\u{202e}"#;
199+
| ~~~~~~~~
200+
169201
error: unicode codepoint changing visible direction of text present in doc comment
170-
--> $DIR/unicode-control-codepoints.rs:33:1
202+
--> $DIR/unicode-control-codepoints.rs:40:1
171203
|
172204
LL | /** '�'); */fn foo() {}
173205
| ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint
@@ -177,7 +209,7 @@ LL | /** '�'); */fn foo() {}
177209
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
178210

179211
error: unicode codepoint changing visible direction of text present in doc comment
180-
--> $DIR/unicode-control-codepoints.rs:36:1
212+
--> $DIR/unicode-control-codepoints.rs:43:1
181213
|
182214
LL | / /**
183215
LL | | *
@@ -188,5 +220,5 @@ LL | | * '�'); */fn bar() {}
188220
= note: if their presence wasn't intentional, you can remove them
189221
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
190222

191-
error: aborting due to 17 previous errors
223+
error: aborting due to 19 previous errors
192224

0 commit comments

Comments
 (0)