Skip to content

Commit 0c94f63

Browse files
authored
Rollup merge of #134956 - compiler-errors:format-args-hidden-chars, r=jieyouxu
Account for C string literals and `format_args` in `HiddenUnicodeCodepoints` lint This is stacked on #134955, and either that can land first or both of them can land together here. I split this out because this is a bit more involved of an impl. Fixes #94945
2 parents 852440b + ea291e5 commit 0c94f63

File tree

9 files changed

+157
-42
lines changed

9 files changed

+157
-42
lines changed

compiler/rustc_ast/src/format.rs

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use rustc_span::{Ident, Span, Symbol};
44

55
use crate::Expr;
66
use crate::ptr::P;
7+
use crate::token::LitKind;
78

89
// Definitions:
910
//
@@ -45,6 +46,10 @@ pub struct FormatArgs {
4546
pub span: Span,
4647
pub template: Vec<FormatArgsPiece>,
4748
pub arguments: FormatArguments,
49+
/// The raw, un-split format string literal, with no escaping or processing.
50+
///
51+
/// Generally only useful for lints that care about the raw bytes the user wrote.
52+
pub uncooked_fmt_str: (LitKind, Symbol),
4853
}
4954

5055
/// A piece of a format template string.

compiler/rustc_ast/src/mut_visit.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1596,7 +1596,7 @@ fn walk_inline_asm_sym<T: MutVisitor>(
15961596

15971597
fn walk_format_args<T: MutVisitor>(vis: &mut T, fmt: &mut FormatArgs) {
15981598
// FIXME: visit the template exhaustively.
1599-
let FormatArgs { span, template: _, arguments } = fmt;
1599+
let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _ } = fmt;
16001600
for FormatArgument { kind, expr } in arguments.all_args_mut() {
16011601
match kind {
16021602
FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {

compiler/rustc_ast/src/visit.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ pub fn walk_inline_asm_sym<'a, V: Visitor<'a>>(
10611061
}
10621062

10631063
pub fn walk_format_args<'a, V: Visitor<'a>>(visitor: &mut V, fmt: &'a FormatArgs) -> V::Result {
1064-
let FormatArgs { span: _, template: _, arguments } = fmt;
1064+
let FormatArgs { span: _, template: _, arguments, uncooked_fmt_str: _ } = fmt;
10651065
for FormatArgument { kind, expr } in arguments.all_args() {
10661066
match kind {
10671067
FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {

compiler/rustc_builtin_macros/src/asm.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use smallvec::smallvec;
1616
use {rustc_ast as ast, rustc_parse_format as parse};
1717

1818
use crate::errors;
19-
use crate::util::expr_to_spanned_string;
19+
use crate::util::{ExprToSpannedString, expr_to_spanned_string};
2020

2121
pub struct AsmArgs {
2222
pub templates: Vec<P<ast::Expr>>,
@@ -527,7 +527,12 @@ fn expand_preparsed_asm(
527527
let msg = "asm template must be a string literal";
528528
let template_sp = template_expr.span;
529529
let template_is_mac_call = matches!(template_expr.kind, ast::ExprKind::MacCall(_));
530-
let (template_str, template_style, template_span) = {
530+
let ExprToSpannedString {
531+
symbol: template_str,
532+
style: template_style,
533+
span: template_span,
534+
..
535+
} = {
531536
let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else {
532537
return ExpandResult::Retry(());
533538
};

compiler/rustc_builtin_macros/src/format.rs

+14-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use rustc_parse_format as parse;
1717
use rustc_span::{BytePos, ErrorGuaranteed, Ident, InnerSpan, Span, Symbol};
1818

1919
use crate::errors;
20-
use crate::util::expr_to_spanned_string;
20+
use crate::util::{ExprToSpannedString, expr_to_spanned_string};
2121

2222
// The format_args!() macro is expanded in three steps:
2323
// 1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax,
@@ -166,13 +166,18 @@ fn make_format_args(
166166

167167
let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input;
168168

169-
let (fmt_str, fmt_style, fmt_span) = {
169+
let ExprToSpannedString {
170+
symbol: fmt_str,
171+
span: fmt_span,
172+
style: fmt_style,
173+
uncooked_symbol: uncooked_fmt_str,
174+
} = {
170175
let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else {
171176
return ExpandResult::Retry(());
172177
};
173178
match mac {
174179
Ok(mut fmt) if append_newline => {
175-
fmt.0 = Symbol::intern(&format!("{}\n", fmt.0));
180+
fmt.symbol = Symbol::intern(&format!("{}\n", fmt.symbol));
176181
fmt
177182
}
178183
Ok(fmt) => fmt,
@@ -584,7 +589,12 @@ fn make_format_args(
584589
}
585590
}
586591

587-
ExpandResult::Ready(Ok(FormatArgs { span: fmt_span, template, arguments: args }))
592+
ExpandResult::Ready(Ok(FormatArgs {
593+
span: fmt_span,
594+
template,
595+
arguments: args,
596+
uncooked_fmt_str,
597+
}))
588598
}
589599

590600
fn invalid_placeholder_type_error(

compiler/rustc_builtin_macros/src/util.rs

+19-4
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,17 @@ pub(crate) fn warn_on_duplicate_attribute(ecx: &ExtCtxt<'_>, item: &Annotatable,
5757

5858
/// `Ok` represents successfully retrieving the string literal at the correct
5959
/// position, e.g., `println("abc")`.
60-
type ExprToSpannedStringResult<'a> = Result<(Symbol, ast::StrStyle, Span), UnexpectedExprKind<'a>>;
60+
pub(crate) type ExprToSpannedStringResult<'a> = Result<ExprToSpannedString, UnexpectedExprKind<'a>>;
61+
62+
pub(crate) struct ExprToSpannedString {
63+
pub symbol: Symbol,
64+
pub style: ast::StrStyle,
65+
pub span: Span,
66+
/// The raw string literal, with no escaping or processing.
67+
///
68+
/// Generally only useful for lints that care about the raw bytes the user wrote.
69+
pub uncooked_symbol: (ast::token::LitKind, Symbol),
70+
}
6171

6272
/// - `Ok` is returned when the conversion to a string literal is unsuccessful,
6373
/// but another type of expression is obtained instead.
@@ -90,7 +100,12 @@ pub(crate) fn expr_to_spanned_string<'a>(
90100
ExpandResult::Ready(Err(match expr.kind {
91101
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
92102
Ok(ast::LitKind::Str(s, style)) => {
93-
return ExpandResult::Ready(Ok((s, style, expr.span)));
103+
return ExpandResult::Ready(Ok(ExprToSpannedString {
104+
symbol: s,
105+
style,
106+
span: expr.span,
107+
uncooked_symbol: (token_lit.kind, token_lit.symbol),
108+
}));
94109
}
95110
Ok(ast::LitKind::ByteStr(..)) => {
96111
let mut err = cx.dcx().struct_span_err(expr.span, err_msg);
@@ -128,7 +143,7 @@ pub(crate) fn expr_to_string(
128143
Ok((err, _)) => err.emit(),
129144
Err(guar) => guar,
130145
})
131-
.map(|(symbol, style, _)| (symbol, style))
146+
.map(|ExprToSpannedString { symbol, style, .. }| (symbol, style))
132147
})
133148
}
134149

@@ -183,7 +198,7 @@ pub(crate) fn get_single_str_spanned_from_tts(
183198
Ok((err, _)) => err.emit(),
184199
Err(guar) => guar,
185200
})
186-
.map(|(symbol, _style, span)| (symbol, span))
201+
.map(|ExprToSpannedString { symbol, span, .. }| (symbol, span))
187202
})
188203
}
189204

compiler/rustc_lint/src/hidden_unicode_codepoints.rs

+34-12
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,36 @@ impl HiddenUnicodeCodepoints {
8282
sub,
8383
});
8484
}
85+
86+
fn check_literal(
87+
&mut self,
88+
cx: &EarlyContext<'_>,
89+
text: Symbol,
90+
lit_kind: ast::token::LitKind,
91+
span: Span,
92+
label: &'static str,
93+
) {
94+
if !contains_text_flow_control_chars(text.as_str()) {
95+
return;
96+
}
97+
let (padding, point_at_inner_spans) = match lit_kind {
98+
// account for `"` or `'`
99+
ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
100+
// account for `c"`
101+
ast::token::LitKind::CStr => (2, true),
102+
// account for `r###"`
103+
ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
104+
// account for `cr###"`
105+
ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
106+
// suppress bad literals.
107+
ast::token::LitKind::Err(_) => return,
108+
// Be conservative just in case new literals do support these.
109+
_ => (0, false),
110+
};
111+
self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label);
112+
}
85113
}
114+
86115
impl EarlyLintPass for HiddenUnicodeCodepoints {
87116
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
88117
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
@@ -97,18 +126,11 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
97126
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
98127
match &expr.kind {
99128
ast::ExprKind::Lit(token_lit) => {
100-
let text = token_lit.symbol;
101-
if !contains_text_flow_control_chars(text.as_str()) {
102-
return;
103-
}
104-
let padding = match token_lit.kind {
105-
// account for `"` or `'`
106-
ast::token::LitKind::Str | ast::token::LitKind::Char => 1,
107-
// account for `r###"`
108-
ast::token::LitKind::StrRaw(n) => n as u32 + 2,
109-
_ => return,
110-
};
111-
self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal");
129+
self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal");
130+
}
131+
ast::ExprKind::FormatArgs(args) => {
132+
let (lit_kind, text) = args.uncooked_fmt_str;
133+
self.check_literal(cx, text, lit_kind, args.span, "format string");
112134
}
113135
_ => {}
114136
};

tests/ui/parser/unicode-control-codepoints.rs

+10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
//@ edition: 2021
2+
13
fn main() {
24
// if access_level != "us‫e‪r" { // Check if admin
35
//~^ ERROR unicode codepoint changing visible direction of text present in comment
@@ -25,6 +27,14 @@ fn main() {
2527
//~| ERROR non-ASCII character in raw byte string literal
2628
println!("{:?}", '‮');
2729
//~^ ERROR unicode codepoint changing visible direction of text present in literal
30+
31+
let _ = c"‮";
32+
//~^ ERROR unicode codepoint changing visible direction of text present in literal
33+
let _ = cr#"‮"#;
34+
//~^ ERROR unicode codepoint changing visible direction of text present in literal
35+
36+
println!("{{‮}}");
37+
//~^ ERROR unicode codepoint changing visible direction of text present in format string
2838
}
2939

3040
//"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */"

0 commit comments

Comments
 (0)