Skip to content

Commit 1e848a5

Browse files
committed
Remove TokenStreamBuilder.
`TokenStreamBuilder` exists to concatenate multiple `TokenStream`s together. This commit removes it, and moves the concatenation functionality directly into `TokenStream`, via two new methods `push_tree` and `push_stream`. This makes things both simpler and faster. `push_tree` is particularly important. `TokenStreamBuilder` only had a single `push` method, which pushed a stream. But in practice most of the time we push a single token tree rather than a stream, and `push_tree` avoids the need to build a token stream with a single entry (which requires two allocations, one for the `Lrc` and one for the `Vec`). The main `push_tree` use arises from a change to one of the `ToInternal` impls in `proc_macro_server.rs`. It now returns a `SmallVec` instead of a `TokenStream`. This return value is then iterated over by `concat_trees`, which does `push_tree` on each element. Furthermore, the use of `SmallVec` avoids more allocations, because there is always only one or two token trees. Note: the removed `TokenStreamBuilder::push` method had some code to deal with a quadratic blowup case from rust-lang#57735. This commit removes the code. I tried and failed to reproduce the blowup from that PR, before and after this change. Various other changes have happened to `TokenStreamBuilder` in the meantime, so I suspect the original problem is no longer relevant, though I don't have proof of this. Generally speaking, repeatedly extending a `Vec` without pre-determining its capacity is *not* quadratic. It's also incredibly common, within rustc and many other Rust programs, so if there were performance problems there you'd think it would show up in other places, too.
1 parent 1e8dc45 commit 1e848a5

File tree

3 files changed

+87
-115
lines changed

3 files changed

+87
-115
lines changed

Diff for: compiler/rustc_ast/src/tokenstream.rs

+45-72
Original file line numberDiff line numberDiff line change
@@ -245,12 +245,12 @@ impl AttrTokenStream {
245245
// properly implemented - we always synthesize fake tokens,
246246
// so we never reach this code.
247247

248-
let mut builder = TokenStreamBuilder::new();
248+
let mut stream = TokenStream::default();
249249
for inner_attr in inner_attrs {
250-
builder.push(inner_attr.tokens());
250+
stream.push_stream(inner_attr.tokens());
251251
}
252-
builder.push(delim_tokens.clone());
253-
*tree = TokenTree::Delimited(*span, *delim, builder.build());
252+
stream.push_stream(delim_tokens.clone());
253+
*tree = TokenTree::Delimited(*span, *delim, stream);
254254
found = true;
255255
break;
256256
}
@@ -505,76 +505,49 @@ impl TokenStream {
505505

506506
self.trees().map(|tree| TokenStream::flatten_token_tree(tree)).collect()
507507
}
508-
}
509508

510-
// 99.5%+ of the time we have 1 or 2 elements in this vector.
511-
#[derive(Clone)]
512-
pub struct TokenStreamBuilder(SmallVec<[TokenStream; 2]>);
513-
514-
impl TokenStreamBuilder {
515-
pub fn new() -> TokenStreamBuilder {
516-
TokenStreamBuilder(SmallVec::new())
517-
}
518-
519-
pub fn push(&mut self, stream: TokenStream) {
520-
self.0.push(stream);
521-
}
522-
523-
pub fn build(self) -> TokenStream {
524-
let mut streams = self.0;
525-
match streams.len() {
526-
0 => TokenStream::default(),
527-
1 => streams.pop().unwrap(),
528-
_ => {
529-
// We will extend the first stream in `streams` with the
530-
// elements from the subsequent streams. This requires using
531-
// `make_mut()` on the first stream, and in practice this
532-
// doesn't cause cloning 99.9% of the time.
533-
//
534-
// One very common use case is when `streams` has two elements,
535-
// where the first stream has any number of elements within
536-
// (often 1, but sometimes many more) and the second stream has
537-
// a single element within.
538-
539-
// Determine how much the first stream will be extended.
540-
// Needed to avoid quadratic blow up from on-the-fly
541-
// reallocations (#57735).
542-
let num_appends = streams.iter().skip(1).map(|ts| ts.len()).sum();
543-
544-
// Get the first stream, which will become the result stream.
545-
// If it's `None`, create an empty stream.
546-
let mut iter = streams.into_iter();
547-
let mut res_stream_lrc = iter.next().unwrap().0;
548-
549-
// Append the subsequent elements to the result stream, after
550-
// reserving space for them.
551-
let res_vec_mut = Lrc::make_mut(&mut res_stream_lrc);
552-
res_vec_mut.reserve(num_appends);
553-
for stream in iter {
554-
let stream_iter = stream.0.iter().cloned();
555-
556-
// If (a) `res_mut_vec` is not empty and the last tree
557-
// within it is a token tree marked with `Joint`, and (b)
558-
// `stream` is not empty and the first tree within it is a
559-
// token tree, and (c) the two tokens can be glued
560-
// together...
561-
if let Some(TokenTree::Token(last_tok, Spacing::Joint)) = res_vec_mut.last()
562-
&& let Some(TokenTree::Token(tok, spacing)) = stream.0.first()
563-
&& let Some(glued_tok) = last_tok.glue(&tok)
564-
{
565-
// ...then overwrite the last token tree in
566-
// `res_vec_mut` with the glued token, and skip the
567-
// first token tree from `stream`.
568-
*res_vec_mut.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
569-
res_vec_mut.extend(stream_iter.skip(1));
570-
} else {
571-
// Append all of `stream`.
572-
res_vec_mut.extend(stream_iter);
573-
}
574-
}
509+
// If `vec` is not empty, try to glue `tt` onto its last token. The return
510+
// value indicates if gluing took place.
511+
fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
512+
if let Some(TokenTree::Token(last_tok, Spacing::Joint)) = vec.last()
513+
&& let TokenTree::Token(tok, spacing) = tt
514+
&& let Some(glued_tok) = last_tok.glue(&tok)
515+
{
516+
// ...then overwrite the last token tree in `vec` with the
517+
// glued token, and skip the first token tree from `stream`.
518+
*vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
519+
true
520+
} else {
521+
false
522+
}
523+
}
575524

576-
TokenStream(res_stream_lrc)
577-
}
525+
// Push `tt` onto the end of the stream, possibly gluing it to the last
526+
// token. Uses `make_mut` to maximize efficiency.
527+
pub fn push_tree(&mut self, tt: TokenTree) {
528+
let vec_mut = Lrc::make_mut(&mut self.0);
529+
530+
if Self::try_glue_to_last(vec_mut, &tt) {
531+
// nothing else to do
532+
} else {
533+
vec_mut.push(tt);
534+
}
535+
}
536+
537+
// Push `stream` onto the end of the stream, possibly gluing the first
538+
// token tree to the last token. (No other token trees will be glued.)
539+
// Uses `make_mut` to maximize efficiency.
540+
pub fn push_stream(&mut self, stream: TokenStream) {
541+
let vec_mut = Lrc::make_mut(&mut self.0);
542+
543+
let stream_iter = stream.0.iter().cloned();
544+
545+
if let Some(first) = stream.0.first() && Self::try_glue_to_last(vec_mut, first) {
546+
// Now skip the first token tree from `stream`.
547+
vec_mut.extend(stream_iter.skip(1));
548+
} else {
549+
// Append all of `stream`.
550+
vec_mut.extend(stream_iter);
578551
}
579552
}
580553
}

Diff for: compiler/rustc_expand/src/proc_macro_server.rs

+34-33
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
use crate::base::ExtCtxt;
2-
2+
use pm::bridge::{
3+
server, DelimSpan, Diagnostic, ExpnGlobals, Group, Ident, LitKind, Literal, Punct, TokenTree,
4+
};
5+
use pm::{Delimiter, Level, LineColumn};
36
use rustc_ast as ast;
47
use rustc_ast::token;
58
use rustc_ast::tokenstream::{self, Spacing::*, TokenStream};
@@ -13,11 +16,7 @@ use rustc_session::parse::ParseSess;
1316
use rustc_span::def_id::CrateNum;
1417
use rustc_span::symbol::{self, sym, Symbol};
1518
use rustc_span::{BytePos, FileName, Pos, SourceFile, Span};
16-
17-
use pm::bridge::{
18-
server, DelimSpan, Diagnostic, ExpnGlobals, Group, Ident, LitKind, Literal, Punct, TokenTree,
19-
};
20-
use pm::{Delimiter, Level, LineColumn};
19+
use smallvec::{smallvec, SmallVec};
2120
use std::ops::Bound;
2221

2322
trait FromInternal<T> {
@@ -241,8 +240,11 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec<TokenTree<TokenStre
241240
}
242241
}
243242

244-
impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rustc<'_, '_>) {
245-
fn to_internal(self) -> TokenStream {
243+
// We use a `SmallVec` because the output size is always one or two `TokenTree`s.
244+
impl ToInternal<SmallVec<[tokenstream::TokenTree; 2]>>
245+
for (TokenTree<TokenStream, Span, Symbol>, &mut Rustc<'_, '_>)
246+
{
247+
fn to_internal(self) -> SmallVec<[tokenstream::TokenTree; 2]> {
246248
use rustc_ast::token::*;
247249

248250
let (tree, rustc) = self;
@@ -273,22 +275,22 @@ impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rus
273275
b'\'' => SingleQuote,
274276
_ => unreachable!(),
275277
};
276-
if joint {
277-
tokenstream::TokenStream::token_joint(kind, span)
278+
smallvec![if joint {
279+
tokenstream::TokenTree::token_joint(kind, span)
278280
} else {
279-
tokenstream::TokenStream::token_alone(kind, span)
280-
}
281+
tokenstream::TokenTree::token_alone(kind, span)
282+
}]
281283
}
282284
TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => {
283-
tokenstream::TokenStream::delimited(
285+
smallvec![tokenstream::TokenTree::Delimited(
284286
tokenstream::DelimSpan { open, close },
285287
delimiter.to_internal(),
286288
stream.unwrap_or_default(),
287-
)
289+
)]
288290
}
289291
TokenTree::Ident(self::Ident { sym, is_raw, span }) => {
290292
rustc.sess().symbol_gallery.insert(sym, span);
291-
tokenstream::TokenStream::token_alone(Ident(sym, is_raw), span)
293+
smallvec![tokenstream::TokenTree::token_alone(Ident(sym, is_raw), span)]
292294
}
293295
TokenTree::Literal(self::Literal {
294296
kind: self::LitKind::Integer,
@@ -301,7 +303,7 @@ impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rus
301303
let integer = TokenKind::lit(token::Integer, symbol, suffix);
302304
let a = tokenstream::TokenTree::token_alone(minus, span);
303305
let b = tokenstream::TokenTree::token_alone(integer, span);
304-
[a, b].into_iter().collect()
306+
smallvec![a, b]
305307
}
306308
TokenTree::Literal(self::Literal {
307309
kind: self::LitKind::Float,
@@ -314,13 +316,13 @@ impl ToInternal<TokenStream> for (TokenTree<TokenStream, Span, Symbol>, &mut Rus
314316
let float = TokenKind::lit(token::Float, symbol, suffix);
315317
let a = tokenstream::TokenTree::token_alone(minus, span);
316318
let b = tokenstream::TokenTree::token_alone(float, span);
317-
[a, b].into_iter().collect()
319+
smallvec![a, b]
318320
}
319321
TokenTree::Literal(self::Literal { kind, symbol, suffix, span }) => {
320-
tokenstream::TokenStream::token_alone(
322+
smallvec![tokenstream::TokenTree::token_alone(
321323
TokenKind::lit(kind.to_internal(), symbol, suffix),
322324
span,
323-
)
325+
)]
324326
}
325327
}
326328
}
@@ -536,37 +538,35 @@ impl server::TokenStream for Rustc<'_, '_> {
536538
&mut self,
537539
tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol>,
538540
) -> Self::TokenStream {
539-
(tree, &mut *self).to_internal()
541+
Self::TokenStream::new((tree, &mut *self).to_internal().into_iter().collect::<Vec<_>>())
540542
}
541543

542544
fn concat_trees(
543545
&mut self,
544546
base: Option<Self::TokenStream>,
545547
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol>>,
546548
) -> Self::TokenStream {
547-
let mut builder = tokenstream::TokenStreamBuilder::new();
548-
if let Some(base) = base {
549-
builder.push(base);
550-
}
549+
let mut stream =
550+
if let Some(base) = base { base } else { tokenstream::TokenStream::default() };
551551
for tree in trees {
552-
builder.push((tree, &mut *self).to_internal());
552+
for tt in (tree, &mut *self).to_internal() {
553+
stream.push_tree(tt);
554+
}
553555
}
554-
builder.build()
556+
stream
555557
}
556558

557559
fn concat_streams(
558560
&mut self,
559561
base: Option<Self::TokenStream>,
560562
streams: Vec<Self::TokenStream>,
561563
) -> Self::TokenStream {
562-
let mut builder = tokenstream::TokenStreamBuilder::new();
563-
if let Some(base) = base {
564-
builder.push(base);
565-
}
566-
for stream in streams {
567-
builder.push(stream);
564+
let mut stream =
565+
if let Some(base) = base { base } else { tokenstream::TokenStream::default() };
566+
for s in streams {
567+
stream.push_stream(s);
568568
}
569-
builder.build()
569+
stream
570570
}
571571

572572
fn into_trees(
@@ -692,6 +692,7 @@ impl server::Span for Rustc<'_, '_> {
692692
fn source_text(&mut self, span: Self::Span) -> Option<String> {
693693
self.sess().source_map().span_to_snippet(span).ok()
694694
}
695+
695696
/// Saves the provided span into the metadata of
696697
/// *the crate we are currently compiling*, which must
697698
/// be a proc-macro crate. This id can be passed to

Diff for: compiler/rustc_expand/src/tokenstream/tests.rs

+8-10
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::tests::string_to_stream;
22

33
use rustc_ast::token;
4-
use rustc_ast::tokenstream::{TokenStream, TokenStreamBuilder};
4+
use rustc_ast::tokenstream::{TokenStream, TokenTree};
55
use rustc_span::create_default_session_globals_then;
66
use rustc_span::{BytePos, Span, Symbol};
77

@@ -19,10 +19,9 @@ fn test_concat() {
1919
let test_res = string_to_ts("foo::bar::baz");
2020
let test_fst = string_to_ts("foo::bar");
2121
let test_snd = string_to_ts("::baz");
22-
let mut builder = TokenStreamBuilder::new();
23-
builder.push(test_fst);
24-
builder.push(test_snd);
25-
let eq_res = builder.build();
22+
let mut eq_res = TokenStream::default();
23+
eq_res.push_stream(test_fst);
24+
eq_res.push_stream(test_snd);
2625
assert_eq!(test_res.trees().count(), 5);
2726
assert_eq!(eq_res.trees().count(), 5);
2827
assert_eq!(test_res.eq_unspanned(&eq_res), true);
@@ -99,11 +98,10 @@ fn test_is_empty() {
9998
#[test]
10099
fn test_dotdotdot() {
101100
create_default_session_globals_then(|| {
102-
let mut builder = TokenStreamBuilder::new();
103-
builder.push(TokenStream::token_joint(token::Dot, sp(0, 1)));
104-
builder.push(TokenStream::token_joint(token::Dot, sp(1, 2)));
105-
builder.push(TokenStream::token_alone(token::Dot, sp(2, 3)));
106-
let stream = builder.build();
101+
let mut stream = TokenStream::default();
102+
stream.push_tree(TokenTree::token_joint(token::Dot, sp(0, 1)));
103+
stream.push_tree(TokenTree::token_joint(token::Dot, sp(1, 2)));
104+
stream.push_tree(TokenTree::token_alone(token::Dot, sp(2, 3)));
107105
assert!(stream.eq_unspanned(&string_to_ts("...")));
108106
assert_eq!(stream.trees().count(), 1);
109107
})

0 commit comments

Comments
 (0)