Skip to content

Commit d6734be

Browse files
committed
Auto merge of #100996 - m-ou-se:format-args-2, r=estebank
Rewrite and refactor format_args!() builtin macro. This is a near complete rewrite of `compiler/rustc_builtin_macros/src/format.rs`. This gets rid of the massive unmaintanable [`Context` struct](https://github.com/rust-lang/rust/blob/76531befc4b0352247ada67bd225e8cf71ee5686/compiler/rustc_builtin_macros/src/format.rs#L176-L263), and splits the macro expansion into three parts: 1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax, but doesn't parse the template (the literal) itself. 2. Second, `make_format_args` will parse the template, the format options, resolve argument references, produce diagnostics, and turn the whole thing into a `FormatArgs` structure. 3. Finally, `expand_parsed_format_args` will turn that `FormatArgs` structure into the expression that the macro expands to. In other words, the `format_args` builtin macro used to be a hard-to-maintain 'single pass compiler', which I've split into a three phase compiler with a parser/tokenizer (step 1), semantic analysis (step 2), and backend (step 3). (It's compilers all the way down. ^^) This can serve as a great starting point for #99012, which will only need to change the implementation of 3, while leaving step 1 and 2 unchanged. It also makes rust-lang/compiler-team#541 easier, which could then upgrade the new `FormatArgs` struct to an `ast` node and remove step 3, moving that step to later in the compilation process. It also fixes a few diagnostics bugs. This also [significantly reduces](https://gist.github.com/m-ou-se/b67b2d54172c4837a5ab1b26fa3e5284) the amount of generated code for cases with arguments in non-default order without formatting options, like `"{1} {0}"` or `"{a} {}"`, etc.
2 parents 90c34fa + 20bb600 commit d6734be

File tree

16 files changed

+1449
-1540
lines changed

16 files changed

+1449
-1540
lines changed

Diff for: compiler/rustc_builtin_macros/src/format.rs

+638-1,311
Large diffs are not rendered by default.

Diff for: compiler/rustc_builtin_macros/src/format/ast.rs

+240
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
use rustc_ast::ptr::P;
2+
use rustc_ast::Expr;
3+
use rustc_data_structures::fx::FxHashMap;
4+
use rustc_span::symbol::{Ident, Symbol};
5+
use rustc_span::Span;
6+
7+
// Definitions:
8+
//
9+
// format_args!("hello {abc:.xyz$}!!", abc="world");
10+
// └──────────────────────────────────────────────┘
11+
// FormatArgs
12+
//
13+
// format_args!("hello {abc:.xyz$}!!", abc="world");
14+
// └─────────┘
15+
// argument
16+
//
17+
// format_args!("hello {abc:.xyz$}!!", abc="world");
18+
// └───────────────────┘
19+
// template
20+
//
21+
// format_args!("hello {abc:.xyz$}!!", abc="world");
22+
// └────┘└─────────┘└┘
23+
// pieces
24+
//
25+
// format_args!("hello {abc:.xyz$}!!", abc="world");
26+
// └────┘ └┘
27+
// literal pieces
28+
//
29+
// format_args!("hello {abc:.xyz$}!!", abc="world");
30+
// └─────────┘
31+
// placeholder
32+
//
33+
// format_args!("hello {abc:.xyz$}!!", abc="world");
34+
// └─┘ └─┘
35+
// positions (could be names, numbers, empty, or `*`)
36+
37+
/// (Parsed) format args.
38+
///
39+
/// Basically the "AST" for a complete `format_args!()`.
40+
///
41+
/// E.g., `format_args!("hello {name}");`.
42+
#[derive(Clone, Debug)]
43+
pub struct FormatArgs {
44+
pub span: Span,
45+
pub template: Vec<FormatArgsPiece>,
46+
pub arguments: FormatArguments,
47+
}
48+
49+
/// A piece of a format template string.
50+
///
51+
/// E.g. "hello" or "{name}".
52+
#[derive(Clone, Debug)]
53+
pub enum FormatArgsPiece {
54+
Literal(Symbol),
55+
Placeholder(FormatPlaceholder),
56+
}
57+
58+
/// The arguments to format_args!().
59+
///
60+
/// E.g. `1, 2, name="ferris", n=3`,
61+
/// but also implicit captured arguments like `x` in `format_args!("{x}")`.
62+
#[derive(Clone, Debug)]
63+
pub struct FormatArguments {
64+
arguments: Vec<FormatArgument>,
65+
num_unnamed_args: usize,
66+
num_explicit_args: usize,
67+
names: FxHashMap<Symbol, usize>,
68+
}
69+
70+
impl FormatArguments {
71+
pub fn new() -> Self {
72+
Self {
73+
arguments: Vec::new(),
74+
names: FxHashMap::default(),
75+
num_unnamed_args: 0,
76+
num_explicit_args: 0,
77+
}
78+
}
79+
80+
pub fn add(&mut self, arg: FormatArgument) -> usize {
81+
let index = self.arguments.len();
82+
if let Some(name) = arg.kind.ident() {
83+
self.names.insert(name.name, index);
84+
} else if self.names.is_empty() {
85+
// Only count the unnamed args before the first named arg.
86+
// (Any later ones are errors.)
87+
self.num_unnamed_args += 1;
88+
}
89+
if !matches!(arg.kind, FormatArgumentKind::Captured(..)) {
90+
// This is an explicit argument.
91+
// Make sure that all arguments so far are explcit.
92+
assert_eq!(
93+
self.num_explicit_args,
94+
self.arguments.len(),
95+
"captured arguments must be added last"
96+
);
97+
self.num_explicit_args += 1;
98+
}
99+
self.arguments.push(arg);
100+
index
101+
}
102+
103+
pub fn by_name(&self, name: Symbol) -> Option<(usize, &FormatArgument)> {
104+
let i = *self.names.get(&name)?;
105+
Some((i, &self.arguments[i]))
106+
}
107+
108+
pub fn by_index(&self, i: usize) -> Option<&FormatArgument> {
109+
(i < self.num_explicit_args).then(|| &self.arguments[i])
110+
}
111+
112+
pub fn unnamed_args(&self) -> &[FormatArgument] {
113+
&self.arguments[..self.num_unnamed_args]
114+
}
115+
116+
pub fn named_args(&self) -> &[FormatArgument] {
117+
&self.arguments[self.num_unnamed_args..self.num_explicit_args]
118+
}
119+
120+
pub fn explicit_args(&self) -> &[FormatArgument] {
121+
&self.arguments[..self.num_explicit_args]
122+
}
123+
124+
pub fn into_vec(self) -> Vec<FormatArgument> {
125+
self.arguments
126+
}
127+
}
128+
129+
#[derive(Clone, Debug)]
130+
pub struct FormatArgument {
131+
pub kind: FormatArgumentKind,
132+
pub expr: P<Expr>,
133+
}
134+
135+
#[derive(Clone, Debug)]
136+
pub enum FormatArgumentKind {
137+
/// `format_args(…, arg)`
138+
Normal,
139+
/// `format_args(…, arg = 1)`
140+
Named(Ident),
141+
/// `format_args("… {arg} …")`
142+
Captured(Ident),
143+
}
144+
145+
impl FormatArgumentKind {
146+
pub fn ident(&self) -> Option<Ident> {
147+
match self {
148+
&Self::Normal => None,
149+
&Self::Named(id) => Some(id),
150+
&Self::Captured(id) => Some(id),
151+
}
152+
}
153+
}
154+
155+
#[derive(Clone, Debug, PartialEq, Eq)]
156+
pub struct FormatPlaceholder {
157+
/// Index into [`FormatArgs::arguments`].
158+
pub argument: FormatArgPosition,
159+
/// The span inside the format string for the full `{…}` placeholder.
160+
pub span: Option<Span>,
161+
/// `{}`, `{:?}`, or `{:x}`, etc.
162+
pub format_trait: FormatTrait,
163+
/// `{}` or `{:.5}` or `{:-^20}`, etc.
164+
pub format_options: FormatOptions,
165+
}
166+
167+
#[derive(Clone, Debug, PartialEq, Eq)]
168+
pub struct FormatArgPosition {
169+
/// Which argument this position refers to (Ok),
170+
/// or would've referred to if it existed (Err).
171+
pub index: Result<usize, usize>,
172+
/// What kind of position this is. See [`FormatArgPositionKind`].
173+
pub kind: FormatArgPositionKind,
174+
/// The span of the name or number.
175+
pub span: Option<Span>,
176+
}
177+
178+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
179+
pub enum FormatArgPositionKind {
180+
/// `{}` or `{:.*}`
181+
Implicit,
182+
/// `{1}` or `{:1$}` or `{:.1$}`
183+
Number,
184+
/// `{a}` or `{:a$}` or `{:.a$}`
185+
Named,
186+
}
187+
188+
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
189+
pub enum FormatTrait {
190+
/// `{}`
191+
Display,
192+
/// `{:?}`
193+
Debug,
194+
/// `{:e}`
195+
LowerExp,
196+
/// `{:E}`
197+
UpperExp,
198+
/// `{:o}`
199+
Octal,
200+
/// `{:p}`
201+
Pointer,
202+
/// `{:b}`
203+
Binary,
204+
/// `{:x}`
205+
LowerHex,
206+
/// `{:X}`
207+
UpperHex,
208+
}
209+
210+
#[derive(Clone, Debug, Default, PartialEq, Eq)]
211+
pub struct FormatOptions {
212+
/// The width. E.g. `{:5}` or `{:width$}`.
213+
pub width: Option<FormatCount>,
214+
/// The precision. E.g. `{:.5}` or `{:.precision$}`.
215+
pub precision: Option<FormatCount>,
216+
/// The alignment. E.g. `{:>}` or `{:<}` or `{:^}`.
217+
pub alignment: Option<FormatAlignment>,
218+
/// The fill character. E.g. the `.` in `{:.>10}`.
219+
pub fill: Option<char>,
220+
/// The `+`, `-`, `0`, `#`, `x?` and `X?` flags.
221+
pub flags: u32,
222+
}
223+
224+
#[derive(Clone, Debug, PartialEq, Eq)]
225+
pub enum FormatAlignment {
226+
/// `{:<}`
227+
Left,
228+
/// `{:>}`
229+
Right,
230+
/// `{:^}`
231+
Center,
232+
}
233+
234+
#[derive(Clone, Debug, PartialEq, Eq)]
235+
pub enum FormatCount {
236+
/// `{:5}` or `{:.5}`
237+
Literal(usize),
238+
/// `{:.*}`, `{:.5$}`, or `{:a$}`, etc.
239+
Argument(FormatArgPosition),
240+
}

0 commit comments

Comments
 (0)