Skip to content

Commit 31df680

Browse files
committed
Eliminate TokenTreeOrTokenTreeSlice.
As its name suggests, `TokenTreeOrTokenTreeSlice` is either a single `TokenTree` or a slice of them. It has methods `len` and `get_tt` that let it be treated much like an ordinary slice. The reason it isn't an ordinary slice is that for `TokenTree::Delimited` the open and close delimiters are represented implicitly, and when they are needed they are constructed on the fly with `Delimited::{open,close}_tt`, rather than being present in memory. This commit changes `Delimited` so the open and close delimiters are represented explicitly. As a result, `TokenTreeOrTokenTreeSlice` is no longer needed and `MatcherPos` and `MatcherTtFrame` can just use an ordinary slice. `TokenTree::{len,get_tt}` are also removed, because they were only needed to support `TokenTreeOrTokenTreeSlice`. The change makes the code shorter and a little bit faster on benchmarks that use macro expansion heavily, partly because `MatcherPos` is a lot smaller (less data to `memcpy`) and partly because ordinary slice operations are faster than `TokenTreeOrTokenTreeSlice::{len,get_tt}`.
1 parent 754dc8e commit 31df680

File tree

6 files changed

+146
-173
lines changed

6 files changed

+146
-173
lines changed

compiler/rustc_expand/src/mbe.rs

+41-47
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,48 @@ use rustc_data_structures::sync::Lrc;
1717
use rustc_span::symbol::Ident;
1818
use rustc_span::Span;
1919

20-
/// Contains the sub-token-trees of a "delimited" token tree, such as the contents of `(`. Note
21-
/// that the delimiter itself might be `NoDelim`.
20+
/// Contains the sub-token-trees of a "delimited" token tree such as `(a b c)`. The delimiter itself
21+
/// might be `NoDelim`.
2222
#[derive(Clone, PartialEq, Encodable, Decodable, Debug)]
2323
struct Delimited {
2424
delim: token::DelimToken,
25-
tts: Vec<TokenTree>,
25+
/// Note: This contains the opening and closing delimiters tokens (e.g. `(` and `)`). Note that
26+
/// these could be `NoDelim`. These token kinds must match `delim`, and the methods below
27+
/// debug_assert this.
28+
all_tts: Vec<TokenTree>,
2629
}
2730

2831
impl Delimited {
29-
/// Returns a `self::TokenTree` with a `Span` corresponding to the opening delimiter.
30-
fn open_tt(&self, span: DelimSpan) -> TokenTree {
31-
TokenTree::token(token::OpenDelim(self.delim), span.open)
32+
/// Returns a `self::TokenTree` with a `Span` corresponding to the opening delimiter. Panics if
33+
/// the delimiter is `NoDelim`.
34+
fn open_tt(&self) -> &TokenTree {
35+
let tt = self.all_tts.first().unwrap();
36+
debug_assert!(matches!(
37+
tt,
38+
&TokenTree::Token(token::Token { kind: token::OpenDelim(d), .. }) if d == self.delim
39+
));
40+
tt
41+
}
42+
43+
/// Returns a `self::TokenTree` with a `Span` corresponding to the closing delimiter. Panics if
44+
/// the delimeter is `NoDelim`.
45+
fn close_tt(&self) -> &TokenTree {
46+
let tt = self.all_tts.last().unwrap();
47+
debug_assert!(matches!(
48+
tt,
49+
&TokenTree::Token(token::Token { kind: token::CloseDelim(d), .. }) if d == self.delim
50+
));
51+
tt
3252
}
3353

34-
/// Returns a `self::TokenTree` with a `Span` corresponding to the closing delimiter.
35-
fn close_tt(&self, span: DelimSpan) -> TokenTree {
36-
TokenTree::token(token::CloseDelim(self.delim), span.close)
54+
/// Returns the tts excluding the outer delimiters.
55+
///
56+
/// FIXME: #67062 has details about why this is sub-optimal.
57+
fn inner_tts(&self) -> &[TokenTree] {
58+
// These functions are called for the assertions within them.
59+
let _open_tt = self.open_tt();
60+
let _close_tt = self.close_tt();
61+
&self.all_tts[1..self.all_tts.len() - 1]
3762
}
3863
}
3964

@@ -73,35 +98,24 @@ enum KleeneOp {
7398
ZeroOrOne,
7499
}
75100

76-
/// Similar to `tokenstream::TokenTree`, except that `$i`, `$i:ident`, `$(...)`,
77-
/// and `${...}` are "first-class" token trees. Useful for parsing macros.
101+
/// Similar to `tokenstream::TokenTree`, except that `Sequence`, `MetaVar`, `MetaVarDecl`, and
102+
/// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros.
78103
#[derive(Debug, Clone, PartialEq, Encodable, Decodable)]
79104
enum TokenTree {
80105
Token(Token),
106+
/// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS).
81107
Delimited(DelimSpan, Lrc<Delimited>),
82-
/// A kleene-style repetition sequence
108+
/// A kleene-style repetition sequence, e.g. `$($e:expr)*` (RHS) or `$($e),*` (LHS).
83109
Sequence(DelimSpan, Lrc<SequenceRepetition>),
84-
/// e.g., `$var`
110+
/// e.g., `$var`.
85111
MetaVar(Span, Ident),
86-
/// e.g., `$var:expr`. This is only used in the left hand side of MBE macros.
112+
/// e.g., `$var:expr`. Only appears on the LHS.
87113
MetaVarDecl(Span, Ident /* name to bind */, Option<NonterminalKind>),
88-
/// A meta-variable expression inside `${...}`
114+
/// A meta-variable expression inside `${...}`.
89115
MetaVarExpr(DelimSpan, MetaVarExpr),
90116
}
91117

92118
impl TokenTree {
93-
/// Return the number of tokens in the tree.
94-
fn len(&self) -> usize {
95-
match *self {
96-
TokenTree::Delimited(_, ref delimed) => match delimed.delim {
97-
token::NoDelim => delimed.tts.len(),
98-
_ => delimed.tts.len() + 2,
99-
},
100-
TokenTree::Sequence(_, ref seq) => seq.tts.len(),
101-
_ => 0,
102-
}
103-
}
104-
105119
/// Returns `true` if the given token tree is delimited.
106120
fn is_delimited(&self) -> bool {
107121
matches!(*self, TokenTree::Delimited(..))
@@ -115,26 +129,6 @@ impl TokenTree {
115129
}
116130
}
117131

118-
/// Gets the `index`-th sub-token-tree. This only makes sense for delimited trees and sequences.
119-
fn get_tt(&self, index: usize) -> TokenTree {
120-
match (self, index) {
121-
(&TokenTree::Delimited(_, ref delimed), _) if delimed.delim == token::NoDelim => {
122-
delimed.tts[index].clone()
123-
}
124-
(&TokenTree::Delimited(span, ref delimed), _) => {
125-
if index == 0 {
126-
return delimed.open_tt(span);
127-
}
128-
if index == delimed.tts.len() + 1 {
129-
return delimed.close_tt(span);
130-
}
131-
delimed.tts[index - 1].clone()
132-
}
133-
(&TokenTree::Sequence(_, ref seq), _) => seq.tts[index].clone(),
134-
_ => panic!("Cannot expand a token tree"),
135-
}
136-
}
137-
138132
/// Retrieves the `TokenTree`'s span.
139133
fn span(&self) -> Span {
140134
match *self {

compiler/rustc_expand/src/mbe/macro_check.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ fn check_binders(
281281
// `MetaVarExpr` can not appear in the LHS of a macro arm
282282
TokenTree::MetaVarExpr(..) => {}
283283
TokenTree::Delimited(_, ref del) => {
284-
for tt in &del.tts {
284+
for tt in del.inner_tts() {
285285
check_binders(sess, node_id, tt, macros, binders, ops, valid);
286286
}
287287
}
@@ -344,7 +344,7 @@ fn check_occurrences(
344344
check_ops_is_prefix(sess, node_id, macros, binders, ops, dl.entire(), name);
345345
}
346346
TokenTree::Delimited(_, ref del) => {
347-
check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid);
347+
check_nested_occurrences(sess, node_id, del.inner_tts(), macros, binders, ops, valid);
348348
}
349349
TokenTree::Sequence(_, ref seq) => {
350350
let ops = ops.push(seq.kleene);
@@ -431,14 +431,20 @@ fn check_nested_occurrences(
431431
{
432432
let macro_rules = state == NestedMacroState::MacroRulesNotName;
433433
state = NestedMacroState::Empty;
434-
let rest =
435-
check_nested_macro(sess, node_id, macro_rules, &del.tts, &nested_macros, valid);
434+
let rest = check_nested_macro(
435+
sess,
436+
node_id,
437+
macro_rules,
438+
del.inner_tts(),
439+
&nested_macros,
440+
valid,
441+
);
436442
// If we did not check the whole macro definition, then check the rest as if outside
437443
// the macro definition.
438444
check_nested_occurrences(
439445
sess,
440446
node_id,
441-
&del.tts[rest..],
447+
&del.inner_tts()[rest..],
442448
macros,
443449
binders,
444450
ops,

compiler/rustc_expand/src/mbe/macro_parser.rs

+25-57
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,8 @@
7272
7373
crate use NamedMatch::*;
7474
crate use ParseResult::*;
75-
use TokenTreeOrTokenTreeSlice::*;
7675

77-
use crate::mbe::{self, DelimSpan, SequenceRepetition, TokenTree};
76+
use crate::mbe::{self, SequenceRepetition, TokenTree};
7877

7978
use rustc_ast::token::{self, DocComment, Nonterminal, Token};
8079
use rustc_parse::parser::Parser;
@@ -90,43 +89,14 @@ use std::borrow::Cow;
9089
use std::collections::hash_map::Entry::{Occupied, Vacant};
9190
use std::mem;
9291

93-
// To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body.
94-
95-
/// Either a slice of token trees or a single one. This is used as the representation of the
96-
/// token trees that make up a matcher.
97-
#[derive(Clone)]
98-
enum TokenTreeOrTokenTreeSlice<'tt> {
99-
Tt(TokenTree),
100-
TtSlice(&'tt [TokenTree]),
101-
}
102-
103-
impl<'tt> TokenTreeOrTokenTreeSlice<'tt> {
104-
/// Returns the number of constituent top-level token trees of `self` (top-level in that it
105-
/// will not recursively descend into subtrees).
106-
fn len(&self) -> usize {
107-
match *self {
108-
TtSlice(ref v) => v.len(),
109-
Tt(ref tt) => tt.len(),
110-
}
111-
}
112-
113-
/// The `index`-th token tree of `self`.
114-
fn get_tt(&self, index: usize) -> TokenTree {
115-
match *self {
116-
TtSlice(ref v) => v[index].clone(),
117-
Tt(ref tt) => tt.get_tt(index),
118-
}
119-
}
120-
}
121-
12292
/// An unzipping of `TokenTree`s... see the `stack` field of `MatcherPos`.
12393
///
12494
/// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have
12595
/// descended into.
12696
#[derive(Clone)]
12797
struct MatcherTtFrame<'tt> {
12898
/// The "parent" matcher that we are descending into.
129-
elts: TokenTreeOrTokenTreeSlice<'tt>,
99+
elts: &'tt [TokenTree],
130100
/// The position of the "dot" in `elts` at the time we descended.
131101
idx: usize,
132102
}
@@ -138,7 +108,7 @@ type NamedMatchVec = SmallVec<[NamedMatch; 4]>;
138108
#[derive(Clone)]
139109
struct MatcherPos<'tt> {
140110
/// The token or slice of tokens that make up the matcher. `elts` is short for "elements".
141-
top_elts: TokenTreeOrTokenTreeSlice<'tt>,
111+
top_elts: &'tt [TokenTree],
142112

143113
/// The position of the "dot" in this matcher
144114
idx: usize,
@@ -183,7 +153,7 @@ struct MatcherPos<'tt> {
183153

184154
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
185155
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
186-
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 232);
156+
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 136);
187157

188158
impl<'tt> MatcherPos<'tt> {
189159
/// `len` `Vec`s (initially shared and empty) that will store matches of metavars.
@@ -203,7 +173,7 @@ impl<'tt> MatcherPos<'tt> {
203173
let match_idx_hi = count_names(ms);
204174
MatcherPos {
205175
// Start with the top level matcher given to us.
206-
top_elts: TtSlice(ms),
176+
top_elts: ms,
207177

208178
// The "dot" is before the first token of the matcher.
209179
idx: 0,
@@ -224,9 +194,9 @@ impl<'tt> MatcherPos<'tt> {
224194
}
225195
}
226196

227-
fn repetition(up: Box<MatcherPos<'tt>>, sp: DelimSpan, seq: Lrc<SequenceRepetition>) -> Self {
197+
fn repetition(up: Box<MatcherPos<'tt>>, seq: &'tt SequenceRepetition) -> Self {
228198
MatcherPos {
229-
stack: smallvec![],
199+
top_elts: &seq.tts,
230200
idx: 0,
231201
matches: Self::create_matches(up.matches.len()),
232202
match_lo: up.match_cur,
@@ -237,7 +207,7 @@ impl<'tt> MatcherPos<'tt> {
237207
sep: seq.separator.clone(),
238208
seq_op: seq.kleene.op,
239209
}),
240-
top_elts: Tt(TokenTree::Sequence(sp, seq)),
210+
stack: smallvec![],
241211
}
242212
}
243213

@@ -288,8 +258,8 @@ crate type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, N
288258
pub(super) fn count_names(ms: &[TokenTree]) -> usize {
289259
ms.iter().fold(0, |count, elt| {
290260
count
291-
+ match *elt {
292-
TokenTree::Delimited(_, ref delim) => count_names(&delim.tts),
261+
+ match elt {
262+
TokenTree::Delimited(_, delim) => count_names(delim.inner_tts()),
293263
TokenTree::MetaVar(..) => 0,
294264
TokenTree::MetaVarDecl(..) => 1,
295265
// Panicking here would abort execution because `parse_tree` makes use of this
@@ -298,7 +268,7 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
298268
// `0` is still returned to inform that no meta-variable was found. `Meta-variables
299269
// != Meta-variable expressions`
300270
TokenTree::MetaVarExpr(..) => 0,
301-
TokenTree::Sequence(_, ref seq) => seq.num_captures,
271+
TokenTree::Sequence(_, seq) => seq.num_captures,
302272
TokenTree::Token(..) => 0,
303273
}
304274
})
@@ -382,7 +352,7 @@ fn nameize<I: Iterator<Item = NamedMatch>>(
382352
}
383353
}
384354
TokenTree::Delimited(_, ref delim) => {
385-
for next_m in &delim.tts {
355+
for next_m in delim.inner_tts() {
386356
n_rec(sess, next_m, res.by_ref(), ret_val)?;
387357
}
388358
}
@@ -446,8 +416,8 @@ pub struct TtParser<'tt> {
446416
}
447417

448418
impl<'tt> TtParser<'tt> {
449-
pub(super) fn new(macro_name: Ident) -> Self {
450-
Self { macro_name, cur_items: vec![], next_items: vec![], bb_items: vec![] }
419+
pub(super) fn new(macro_name: Ident) -> TtParser<'tt> {
420+
TtParser { macro_name, cur_items: vec![], next_items: vec![], bb_items: vec![] }
451421
}
452422

453423
/// Process the matcher positions of `cur_items` until it is empty. In the process, this will
@@ -492,8 +462,8 @@ impl<'tt> TtParser<'tt> {
492462
if idx < len {
493463
// We are in the middle of a matcher. Compare the matcher's current tt against
494464
// `token`.
495-
match item.top_elts.get_tt(idx) {
496-
TokenTree::Sequence(sp, seq) => {
465+
match &item.top_elts[idx] {
466+
TokenTree::Sequence(_sp, seq) => {
497467
let op = seq.kleene.op;
498468
if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne {
499469
// Allow for the possibility of zero matches of this sequence.
@@ -507,17 +477,17 @@ impl<'tt> TtParser<'tt> {
507477
}
508478

509479
// Allow for the possibility of one or more matches of this sequence.
510-
self.cur_items.push(box MatcherPos::repetition(item, sp, seq));
480+
self.cur_items.push(box MatcherPos::repetition(item, &seq));
511481
}
512482

513-
TokenTree::MetaVarDecl(span, _, None) => {
483+
&TokenTree::MetaVarDecl(span, _, None) => {
514484
// E.g. `$e` instead of `$e:expr`.
515485
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
516486
return Some(Error(span, "missing fragment specifier".to_string()));
517487
}
518488
}
519489

520-
TokenTree::MetaVarDecl(_, _, Some(kind)) => {
490+
&TokenTree::MetaVarDecl(_, _, Some(kind)) => {
521491
// Built-in nonterminals never start with these tokens, so we can eliminate
522492
// them from consideration.
523493
//
@@ -528,13 +498,14 @@ impl<'tt> TtParser<'tt> {
528498
}
529499
}
530500

531-
seq @ TokenTree::Delimited(..) => {
501+
TokenTree::Delimited(_, delimited) => {
532502
// To descend into a delimited submatcher, we push the current matcher onto
533503
// a stack and push a new item containing the submatcher onto `cur_items`.
534504
//
535505
// At the beginning of the loop, if we reach the end of the delimited
536-
// submatcher, we pop the stack to backtrack out of the descent.
537-
let lower_elts = mem::replace(&mut item.top_elts, Tt(seq));
506+
// submatcher, we pop the stack to backtrack out of the descent. Note that
507+
// we use `all_tts` to include the open and close delimiter tokens.
508+
let lower_elts = mem::replace(&mut item.top_elts, &delimited.all_tts);
538509
let idx = item.idx;
539510
item.stack.push(MatcherTtFrame { elts: lower_elts, idx });
540511
item.idx = 0;
@@ -560,7 +531,6 @@ impl<'tt> TtParser<'tt> {
560531
} else if let Some(repetition) = &item.repetition {
561532
// We are past the end of a repetition.
562533
debug_assert!(idx <= len + 1);
563-
debug_assert!(matches!(item.top_elts, Tt(TokenTree::Sequence(..))));
564534

565535
if idx == len {
566536
// Add all matches from the sequence to `up`, and move the "dot" past the
@@ -678,9 +648,7 @@ impl<'tt> TtParser<'tt> {
678648
(0, 1) => {
679649
// We need to call the black-box parser to get some nonterminal.
680650
let mut item = self.bb_items.pop().unwrap();
681-
if let TokenTree::MetaVarDecl(span, _, Some(kind)) =
682-
item.top_elts.get_tt(item.idx)
683-
{
651+
if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts[item.idx] {
684652
let match_cur = item.match_cur;
685653
// We use the span of the metavariable declaration to determine any
686654
// edition-specific matching behavior for non-terminals.
@@ -720,7 +688,7 @@ impl<'tt> TtParser<'tt> {
720688
let nts = self
721689
.bb_items
722690
.iter()
723-
.map(|item| match item.top_elts.get_tt(item.idx) {
691+
.map(|item| match item.top_elts[item.idx] {
724692
TokenTree::MetaVarDecl(_, bind, Some(kind)) => {
725693
format!("{} ('{}')", kind, bind)
726694
}

0 commit comments

Comments
 (0)