Skip to content

Commit 9d31f86

Browse files
committed
Overhaul token collection.
This commit does the following. - Renames `collect_tokens_trailing_token` as `collect_tokens`, because (a) it's annoying long, and (b) the `_trailing_token` bit is less accurate now that its types have changed. - In `collect_tokens`, adds a `Option<CollectPos>` argument and a `UsePreAttrPos` in the return type of `f`. These are used in `parse_expr_force_collect` (for vanilla expressions) and in `parse_stmt_without_recovery` (for two different cases of expression statements). Together these ensure are enough to fix all the problems with token collection and assoc expressions. The changes to the `stringify.rs` test demonstrate some of these. - Adds a new test. The code in this test was causing an assertion failure prior to this commit, due to an invalid `NodeRange`. The extra complexity is annoying, but necessary to fix the existing problems.
1 parent fe460ac commit 9d31f86

File tree

12 files changed

+413
-291
lines changed

12 files changed

+413
-291
lines changed

Diff for: compiler/rustc_parse/src/parser/attr.rs

+11-7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use tracing::debug;
1010

1111
use super::{
1212
AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing,
13+
UsePreAttrPos,
1314
};
1415
use crate::{errors, fluent_generated as fluent, maybe_whole};
1516

@@ -259,7 +260,8 @@ impl<'a> Parser<'a> {
259260
pub fn parse_attr_item(&mut self, force_collect: ForceCollect) -> PResult<'a, ast::AttrItem> {
260261
maybe_whole!(self, NtMeta, |attr| attr.into_inner());
261262

262-
let do_parse = |this: &mut Self, _empty_attrs| {
263+
// Attr items don't have attributes.
264+
self.collect_tokens(None, AttrWrapper::empty(), force_collect, |this, _empty_attrs| {
263265
let is_unsafe = this.eat_keyword(kw::Unsafe);
264266
let unsafety = if is_unsafe {
265267
let unsafe_span = this.prev_token.span;
@@ -275,10 +277,12 @@ impl<'a> Parser<'a> {
275277
if is_unsafe {
276278
this.expect(&token::CloseDelim(Delimiter::Parenthesis))?;
277279
}
278-
Ok((ast::AttrItem { unsafety, path, args, tokens: None }, Trailing::No))
279-
};
280-
// Attr items don't have attributes.
281-
self.collect_tokens_trailing_token(AttrWrapper::empty(), force_collect, do_parse)
280+
Ok((
281+
ast::AttrItem { unsafety, path, args, tokens: None },
282+
Trailing::No,
283+
UsePreAttrPos::No,
284+
))
285+
})
282286
}
283287

284288
/// Parses attributes that appear after the opening of an item. These should
@@ -311,8 +315,8 @@ impl<'a> Parser<'a> {
311315
};
312316
if let Some(attr) = attr {
313317
// If we are currently capturing tokens (i.e. we are within a call to
314-
// `Parser::collect_tokens_trailing_tokens`) record the token positions of this
315-
// inner attribute, for possible later processing in a `LazyAttrTokenStream`.
318+
// `Parser::collect_tokens`) record the token positions of this inner attribute,
319+
// for possible later processing in a `LazyAttrTokenStream`.
316320
if let Capturing::Yes = self.capture_state.capturing {
317321
let end_pos = self.num_bump_calls;
318322
let parser_range = ParserRange(start_pos..end_pos);

Diff for: compiler/rustc_parse/src/parser/attr_wrapper.rs

+67-28
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,28 @@ use super::{
1515
TokenCursor, Trailing,
1616
};
1717

18+
// When collecting tokens, this fully captures the start point. Usually its
19+
// just after outer attributes, but occasionally it's before.
20+
#[derive(Clone, Debug)]
21+
pub(super) struct CollectPos {
22+
start_token: (Token, Spacing),
23+
cursor_snapshot: TokenCursor,
24+
start_pos: u32,
25+
}
26+
27+
pub(super) enum UsePreAttrPos {
28+
No,
29+
Yes,
30+
}
31+
1832
/// A wrapper type to ensure that the parser handles outer attributes correctly.
1933
/// When we parse outer attributes, we need to ensure that we capture tokens
2034
/// for the attribute target. This allows us to perform cfg-expansion on
2135
/// a token stream before we invoke a derive proc-macro.
2236
///
2337
/// This wrapper prevents direct access to the underlying `ast::AttrVec`.
2438
/// Parsing code can only get access to the underlying attributes
25-
/// by passing an `AttrWrapper` to `collect_tokens_trailing_token`.
39+
/// by passing an `AttrWrapper` to `collect_tokens`.
2640
/// This makes it difficult to accidentally construct an AST node
2741
/// (which stores an `ast::AttrVec`) without first collecting tokens.
2842
///
@@ -33,16 +47,18 @@ pub(super) struct AttrWrapper {
3347
attrs: AttrVec,
3448
// The start of the outer attributes in the parser's token stream.
3549
// This lets us create a `NodeReplacement` for the entire attribute
36-
// target, including outer attributes.
37-
start_pos: u32,
50+
// target, including outer attributes. `None` if there are no outer
51+
// attributes.
52+
start_pos: Option<u32>,
3853
}
3954

4055
impl AttrWrapper {
4156
pub(super) fn new(attrs: AttrVec, start_pos: u32) -> AttrWrapper {
42-
AttrWrapper { attrs, start_pos }
57+
AttrWrapper { attrs, start_pos: Some(start_pos) }
4358
}
59+
4460
pub(super) fn empty() -> AttrWrapper {
45-
AttrWrapper { attrs: AttrVec::new(), start_pos: u32::MAX }
61+
AttrWrapper { attrs: AttrVec::new(), start_pos: None }
4662
}
4763

4864
pub(super) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
@@ -77,7 +93,7 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
7793
}
7894

7995
// From a value of this type we can reconstruct the `TokenStream` seen by the
80-
// `f` callback passed to a call to `Parser::collect_tokens_trailing_token`, by
96+
// `f` callback passed to a call to `Parser::collect_tokens`, by
8197
// replaying the getting of the tokens. This saves us producing a `TokenStream`
8298
// if it is never needed, e.g. a captured `macro_rules!` argument that is never
8399
// passed to a proc macro. In practice, token stream creation happens rarely
@@ -166,16 +182,30 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
166182
}
167183

168184
impl<'a> Parser<'a> {
185+
pub(super) fn collect_pos(&self) -> CollectPos {
186+
CollectPos {
187+
start_token: (self.token.clone(), self.token_spacing),
188+
cursor_snapshot: self.token_cursor.clone(),
189+
start_pos: self.num_bump_calls,
190+
}
191+
}
192+
169193
/// Parses code with `f`. If appropriate, it records the tokens (in
170194
/// `LazyAttrTokenStream` form) that were parsed in the result, accessible
171195
/// via the `HasTokens` trait. The `Trailing` part of the callback's
172196
/// result indicates if an extra token should be captured, e.g. a comma or
173-
/// semicolon.
197+
/// semicolon. The `UsePreAttrPos` part of the callback's result indicates
198+
/// if we should use `pre_attr_pos` as the collection start position (only
199+
/// required in a few cases).
174200
///
175201
/// The `attrs` passed in are in `AttrWrapper` form, which is opaque. The
176202
/// `AttrVec` within is passed to `f`. See the comment on `AttrWrapper` for
177203
/// details.
178204
///
205+
/// `pre_attr_pos` is the position before the outer attributes (or the node
206+
/// itself, if no outer attributes are present). It is only needed if `f`
207+
/// can return `UsePreAttrPos::Yes`.
208+
///
179209
/// Note: If your callback consumes an opening delimiter (including the
180210
/// case where `self.token` is an opening delimiter on entry to this
181211
/// function), you must also consume the corresponding closing delimiter.
@@ -197,11 +227,12 @@ impl<'a> Parser<'a> {
197227
/// } // 32..33
198228
/// } // 33..34
199229
/// ```
200-
pub(super) fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
230+
pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
201231
&mut self,
232+
pre_attr_pos: Option<CollectPos>,
202233
attrs: AttrWrapper,
203234
force_collect: ForceCollect,
204-
f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, Trailing)>,
235+
f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
205236
) -> PResult<'a, R> {
206237
// We must collect if anything could observe the collected tokens, i.e.
207238
// if any of the following conditions hold.
@@ -220,23 +251,20 @@ impl<'a> Parser<'a> {
220251
return Ok(f(self, attrs.attrs)?.0);
221252
}
222253

223-
let start_token = (self.token.clone(), self.token_spacing);
224-
let cursor_snapshot = self.token_cursor.clone();
225-
let start_pos = self.num_bump_calls;
254+
let mut collect_pos = self.collect_pos();
226255
let has_outer_attrs = !attrs.attrs.is_empty();
227256
let parser_replacements_start = self.capture_state.parser_replacements.len();
228257

229258
// We set and restore `Capturing::Yes` on either side of the call to
230-
// `f`, so we can distinguish the outermost call to
231-
// `collect_tokens_trailing_token` (e.g. parsing `m` in the example
232-
// above) from any inner (indirectly recursive) calls (e.g. parsing `g`
233-
// in the example above). This distinction is used below and in
234-
// `Parser::parse_inner_attributes`.
235-
let (mut ret, capture_trailing) = {
259+
// `f`, so we can distinguish the outermost call to `collect_tokens`
260+
// (e.g. parsing `m` in the example above) from any inner (indirectly
261+
// recursive) calls (e.g. parsing `g` in the example above). This
262+
// distinction is used below and in `Parser::parse_inner_attributes`.
263+
let (mut ret, capture_trailing, use_pre_attr_pos) = {
236264
let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
237-
let f_res = f(self, attrs.attrs);
265+
let res = f(self, attrs.attrs);
238266
self.capture_state.capturing = prev_capturing;
239-
f_res?
267+
res?
240268
};
241269

242270
// When we're not in `capture_cfg` mode, then skip collecting and
@@ -279,6 +307,14 @@ impl<'a> Parser<'a> {
279307
return Ok(ret);
280308
}
281309

310+
// Replace the post-attribute collection start position with the
311+
// pre-attribute position supplied, if `f` indicated it is necessary.
312+
// (The caller is responsible for providing a non-`None` `pre_attr_pos`
313+
// if this is a possibility.)
314+
if matches!(use_pre_attr_pos, UsePreAttrPos::Yes) {
315+
collect_pos = pre_attr_pos.unwrap();
316+
}
317+
282318
let parser_replacements_end = self.capture_state.parser_replacements.len();
283319

284320
assert!(
@@ -294,7 +330,7 @@ impl<'a> Parser<'a> {
294330
// `AttrTokenStream`, we will create the proper token.
295331
+ self.break_last_token as u32;
296332

297-
let num_calls = end_pos - start_pos;
333+
let num_calls = end_pos - collect_pos.start_pos;
298334

299335
// Take the captured `ParserRange`s for any inner attributes that we parsed in
300336
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
@@ -328,7 +364,9 @@ impl<'a> Parser<'a> {
328364
.iter()
329365
.cloned()
330366
.chain(inner_attr_parser_replacements.iter().cloned())
331-
.map(|(parser_range, data)| (NodeRange::new(parser_range, start_pos), data))
367+
.map(|(parser_range, data)| {
368+
(NodeRange::new(parser_range, collect_pos.start_pos), data)
369+
})
332370
.collect()
333371
};
334372

@@ -355,9 +393,9 @@ impl<'a> Parser<'a> {
355393
// - `tokens`: lazy tokens for `g` (with its inner attr deleted).
356394

357395
let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
358-
start_token,
396+
start_token: collect_pos.start_token,
397+
cursor_snapshot: collect_pos.cursor_snapshot,
359398
num_calls,
360-
cursor_snapshot,
361399
break_last_token: self.break_last_token,
362400
node_replacements,
363401
});
@@ -368,9 +406,9 @@ impl<'a> Parser<'a> {
368406
}
369407

370408
// If `capture_cfg` is set and we're inside a recursive call to
371-
// `collect_tokens_trailing_token`, then we need to register a replace range
372-
// if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
373-
// on the captured token stream.
409+
// `collect_tokens`, then we need to register a replace range if we
410+
// have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
411+
// cfg-expansion on the captured token stream.
374412
if self.capture_cfg
375413
&& matches!(self.capture_state.capturing, Capturing::Yes)
376414
&& has_cfg_or_cfg_attr(ret.attrs())
@@ -389,7 +427,8 @@ impl<'a> Parser<'a> {
389427
// Set things up so that the entire AST node that we just parsed, including attributes,
390428
// will be replaced with `target` in the lazy token stream. This will allow us to
391429
// cfg-expand this AST node.
392-
let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
430+
let start_pos =
431+
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
393432
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
394433
self.capture_state
395434
.parser_replacements

Diff for: compiler/rustc_parse/src/parser/diagnostics.rs

+10-9
Original file line numberDiff line numberDiff line change
@@ -2487,13 +2487,14 @@ impl<'a> Parser<'a> {
24872487
pub(super) fn handle_unambiguous_unbraced_const_arg(&mut self) -> PResult<'a, P<Expr>> {
24882488
let start = self.token.span;
24892489
let attrs = self.parse_outer_attributes()?;
2490-
let expr = self.parse_expr_res(Restrictions::CONST_EXPR, attrs).map_err(|mut err| {
2491-
err.span_label(
2492-
start.shrink_to_lo(),
2493-
"while parsing a const generic argument starting here",
2494-
);
2495-
err
2496-
})?;
2490+
let (expr, _) =
2491+
self.parse_expr_res(Restrictions::CONST_EXPR, attrs).map_err(|mut err| {
2492+
err.span_label(
2493+
start.shrink_to_lo(),
2494+
"while parsing a const generic argument starting here",
2495+
);
2496+
err
2497+
})?;
24972498
if !self.expr_is_valid_const_arg(&expr) {
24982499
self.dcx().emit_err(ConstGenericWithoutBraces {
24992500
span: expr.span,
@@ -2613,7 +2614,7 @@ impl<'a> Parser<'a> {
26132614
let attrs = self.parse_outer_attributes()?;
26142615
self.parse_expr_res(Restrictions::CONST_EXPR, attrs)
26152616
})() {
2616-
Ok(expr) => {
2617+
Ok((expr, _)) => {
26172618
// Find a mistake like `MyTrait<Assoc == S::Assoc>`.
26182619
if snapshot.token == token::EqEq {
26192620
err.span_suggestion(
@@ -2671,7 +2672,7 @@ impl<'a> Parser<'a> {
26712672
})() {
26722673
// Since we don't know the exact reason why we failed to parse the type or the
26732674
// expression, employ a simple heuristic to weed out some pathological cases.
2674-
Ok(expr) if let token::Comma | token::Gt = snapshot.token.kind => {
2675+
Ok((expr, _)) if let token::Comma | token::Gt = snapshot.token.kind => {
26752676
self.restore_snapshot(snapshot);
26762677
Some(expr)
26772678
}

0 commit comments

Comments
 (0)