Skip to content

Commit 5fa22fe

Browse files
committed
Auto merge of #81286 - Aaron1011:collect-tokens-attrs, r=petrochenkov
Require passing an `AttrWrapper` to `collect_tokens_trailing_token` This is a pure refactoring split out from #80689. It represents the most invasive part of that PR, requiring changes in every caller of `parse_outer_attributes` In order to eagerly expand `#[cfg]` attributes while preserving the original `TokenStream`, we need to know the range of tokens that corresponds to every attribute target. This is accomplished by making `parse_outer_attributes` return an opaque `AttrWrapper` struct. An `AttrWrapper` must be converted to a plain `AttrVec` by passing it to `collect_tokens_trailing_token`. This makes it difficult to accidentally construct an AST node with attributes without calling `collect_tokens_trailing_token`, since AST nodes store an `AttrVec`, not an `AttrWrapper`. As a result, we now call `collect_tokens_trailing_token` for attribute targets which only support inert attributes, such as generic arguments and struct fields. Currently, the constructed `LazyTokenStream` is simply discarded. Future PRs will record the token range corresponding to the attribute target, allowing those tokens to be removed from an enclosing `collect_tokens_trailing_token` call if necessary.
2 parents b86674e + 3321d70 commit 5fa22fe

File tree

10 files changed

+777
-541
lines changed

10 files changed

+777
-541
lines changed

Diff for: compiler/rustc_ast/src/ast.rs

+15
Original file line numberDiff line numberDiff line change
@@ -2975,3 +2975,18 @@ macro_rules! derive_has_tokens {
29752975
derive_has_tokens! {
29762976
Item, Expr, Ty, AttrItem, Visibility, Path, Block, Pat
29772977
}
2978+
2979+
macro_rules! derive_has_attrs_no_tokens {
2980+
($($ty:path),*) => { $(
2981+
impl HasTokens for $ty {
2982+
fn finalize_tokens(&mut self, _tokens: LazyTokenStream) {}
2983+
}
2984+
)* }
2985+
}
2986+
2987+
// These ast nodes only support inert attributes, so they don't
2988+
// store tokens (since nothing can observe them)
2989+
derive_has_attrs_no_tokens! {
2990+
StructField, Arm,
2991+
Field, FieldPat, Variant, Param, GenericParam
2992+
}

Diff for: compiler/rustc_parse/src/parser/attr.rs

+7-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::{Parser, PathStyle};
1+
use super::{AttrWrapper, Parser, PathStyle};
22
use rustc_ast as ast;
33
use rustc_ast::attr;
44
use rustc_ast::token::{self, Nonterminal};
@@ -26,7 +26,7 @@ pub(super) const DEFAULT_INNER_ATTR_FORBIDDEN: InnerAttrPolicy<'_> = InnerAttrPo
2626

2727
impl<'a> Parser<'a> {
2828
/// Parses attributes that appear before an item.
29-
pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> {
29+
pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, AttrWrapper> {
3030
let mut attrs: Vec<ast::Attribute> = Vec::new();
3131
let mut just_parsed_doc_comment = false;
3232
loop {
@@ -74,7 +74,7 @@ impl<'a> Parser<'a> {
7474
break;
7575
}
7676
}
77-
Ok(attrs)
77+
Ok(AttrWrapper::new(attrs))
7878
}
7979

8080
/// Matches `attribute = # ! [ meta_item ]`.
@@ -89,7 +89,8 @@ impl<'a> Parser<'a> {
8989
inner_parse_policy, self.token
9090
);
9191
let lo = self.token.span;
92-
self.collect_tokens(|this| {
92+
// Attributse can't have attributes of their own
93+
self.collect_tokens_no_attrs(|this| {
9394
if this.eat(&token::Pound) {
9495
let style = if this.eat(&token::Not) {
9596
ast::AttrStyle::Inner
@@ -163,7 +164,8 @@ impl<'a> Parser<'a> {
163164
let args = this.parse_attr_args()?;
164165
Ok(ast::AttrItem { path, args, tokens: None })
165166
};
166-
if capture_tokens { self.collect_tokens(do_parse) } else { do_parse(self) }?
167+
// Attr items don't have attributes
168+
if capture_tokens { self.collect_tokens_no_attrs(do_parse) } else { do_parse(self) }?
167169
})
168170
}
169171

Diff for: compiler/rustc_parse/src/parser/attr_wrapper.rs

+185
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
use super::attr;
2+
use super::{ForceCollect, Parser, TokenCursor, TrailingToken};
3+
use rustc_ast::token::{self, Token, TokenKind};
4+
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
5+
use rustc_ast::tokenstream::{DelimSpan, LazyTokenStream, Spacing};
6+
use rustc_ast::HasTokens;
7+
use rustc_ast::{self as ast};
8+
use rustc_errors::PResult;
9+
use rustc_span::{Span, DUMMY_SP};
10+
11+
/// A wrapper type to ensure that the parser handles outer attributes correctly.
12+
/// When we parse outer attributes, we need to ensure that we capture tokens
13+
/// for the attribute target. This allows us to perform cfg-expansion on
14+
/// a token stream before we invoke a derive proc-macro.
15+
///
16+
/// This wrapper prevents direct access to the underlying `Vec<ast::Attribute>`.
17+
/// Parsing code can only get access to the underlying attributes
18+
/// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
19+
/// This makes it difficult to accidentally construct an AST node
20+
/// (which stores a `Vec<ast::Attribute>`) without first collecting tokens.
21+
///
22+
/// This struct has its own module, to ensure that the parser code
23+
/// cannot directly access the `attrs` field
24+
#[derive(Debug, Clone)]
25+
pub struct AttrWrapper {
26+
attrs: Vec<ast::Attribute>,
27+
}
28+
29+
impl AttrWrapper {
30+
pub fn empty() -> AttrWrapper {
31+
AttrWrapper { attrs: vec![] }
32+
}
33+
pub fn new(attrs: Vec<ast::Attribute>) -> AttrWrapper {
34+
AttrWrapper { attrs }
35+
}
36+
// FIXME: Delay span bug here?
37+
pub(crate) fn take_for_recovery(self) -> Vec<ast::Attribute> {
38+
self.attrs
39+
}
40+
pub fn is_empty(&self) -> bool {
41+
self.attrs.is_empty()
42+
}
43+
}
44+
45+
impl<'a> Parser<'a> {
46+
/// Records all tokens consumed by the provided callback,
47+
/// including the current token. These tokens are collected
48+
/// into a `LazyTokenStream`, and returned along with the result
49+
/// of the callback.
50+
///
51+
/// Note: If your callback consumes an opening delimiter
52+
/// (including the case where you call `collect_tokens`
53+
/// when the current token is an opening delimeter),
54+
/// you must also consume the corresponding closing delimiter.
55+
///
56+
/// That is, you can consume
57+
/// `something ([{ }])` or `([{}])`, but not `([{}]`
58+
///
59+
/// This restriction shouldn't be an issue in practice,
60+
/// since this function is used to record the tokens for
61+
/// a parsed AST item, which always has matching delimiters.
62+
pub fn collect_tokens_trailing_token<R: HasTokens>(
63+
&mut self,
64+
attrs: AttrWrapper,
65+
force_collect: ForceCollect,
66+
f: impl FnOnce(&mut Self, Vec<ast::Attribute>) -> PResult<'a, (R, TrailingToken)>,
67+
) -> PResult<'a, R> {
68+
if matches!(force_collect, ForceCollect::No) && !attr::maybe_needs_tokens(&attrs.attrs) {
69+
return Ok(f(self, attrs.attrs)?.0);
70+
}
71+
let start_token = (self.token.clone(), self.token_spacing);
72+
let cursor_snapshot = self.token_cursor.clone();
73+
74+
let (mut ret, trailing_token) = f(self, attrs.attrs)?;
75+
76+
// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
77+
// and `num_calls`, we can reconstruct the `TokenStream` seen
78+
// by the callback. This allows us to avoid producing a `TokenStream`
79+
// if it is never needed - for example, a captured `macro_rules!`
80+
// argument that is never passed to a proc macro.
81+
// In practice token stream creation happens rarely compared to
82+
// calls to `collect_tokens` (see some statistics in #78736),
83+
// so we are doing as little up-front work as possible.
84+
//
85+
// This also makes `Parser` very cheap to clone, since
86+
// there is no intermediate collection buffer to clone.
87+
#[derive(Clone)]
88+
struct LazyTokenStreamImpl {
89+
start_token: (Token, Spacing),
90+
cursor_snapshot: TokenCursor,
91+
num_calls: usize,
92+
desugar_doc_comments: bool,
93+
append_unglued_token: Option<TreeAndSpacing>,
94+
}
95+
impl CreateTokenStream for LazyTokenStreamImpl {
96+
fn create_token_stream(&self) -> TokenStream {
97+
// The token produced by the final call to `next` or `next_desugared`
98+
// was not actually consumed by the callback. The combination
99+
// of chaining the initial token and using `take` produces the desired
100+
// result - we produce an empty `TokenStream` if no calls were made,
101+
// and omit the final token otherwise.
102+
let mut cursor_snapshot = self.cursor_snapshot.clone();
103+
let tokens = std::iter::once(self.start_token.clone())
104+
.chain((0..self.num_calls).map(|_| {
105+
if self.desugar_doc_comments {
106+
cursor_snapshot.next_desugared()
107+
} else {
108+
cursor_snapshot.next()
109+
}
110+
}))
111+
.take(self.num_calls);
112+
113+
make_token_stream(tokens, self.append_unglued_token.clone())
114+
}
115+
}
116+
117+
let mut num_calls = self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls;
118+
match trailing_token {
119+
TrailingToken::None => {}
120+
TrailingToken::Semi => {
121+
assert_eq!(self.token.kind, token::Semi);
122+
num_calls += 1;
123+
}
124+
TrailingToken::MaybeComma => {
125+
if self.token.kind == token::Comma {
126+
num_calls += 1;
127+
}
128+
}
129+
}
130+
131+
let lazy_impl = LazyTokenStreamImpl {
132+
start_token,
133+
num_calls,
134+
cursor_snapshot,
135+
desugar_doc_comments: self.desugar_doc_comments,
136+
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
137+
};
138+
ret.finalize_tokens(LazyTokenStream::new(lazy_impl));
139+
Ok(ret)
140+
}
141+
}
142+
143+
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
144+
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
145+
/// of open and close delims.
146+
fn make_token_stream(
147+
tokens: impl Iterator<Item = (Token, Spacing)>,
148+
append_unglued_token: Option<TreeAndSpacing>,
149+
) -> TokenStream {
150+
#[derive(Debug)]
151+
struct FrameData {
152+
open: Span,
153+
inner: Vec<(TokenTree, Spacing)>,
154+
}
155+
let mut stack = vec![FrameData { open: DUMMY_SP, inner: vec![] }];
156+
for (token, spacing) in tokens {
157+
match token {
158+
Token { kind: TokenKind::OpenDelim(_), span } => {
159+
stack.push(FrameData { open: span, inner: vec![] });
160+
}
161+
Token { kind: TokenKind::CloseDelim(delim), span } => {
162+
let frame_data = stack.pop().expect("Token stack was empty!");
163+
let dspan = DelimSpan::from_pair(frame_data.open, span);
164+
let stream = TokenStream::new(frame_data.inner);
165+
let delimited = TokenTree::Delimited(dspan, delim, stream);
166+
stack
167+
.last_mut()
168+
.unwrap_or_else(|| panic!("Bottom token frame is missing for tokens!"))
169+
.inner
170+
.push((delimited, Spacing::Alone));
171+
}
172+
token => {
173+
stack
174+
.last_mut()
175+
.expect("Bottom token frame is missing!")
176+
.inner
177+
.push((TokenTree::Token(token), spacing));
178+
}
179+
}
180+
}
181+
let mut final_buf = stack.pop().expect("Missing final buf!");
182+
final_buf.inner.extend(append_unglued_token);
183+
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
184+
TokenStream::new(final_buf.inner)
185+
}

0 commit comments

Comments
 (0)