Skip to content

Commit d12c6e9

Browse files
committed
Auto merge of rust-lang#114273 - nnethercote:move-doc-comment-desugaring, r=petrochenkov
Move doc comment desugaring out of `TokenCursor`. It's awkward that `TokenCursor` sometimes desugars doc comments on the fly, but usually doesn't. r? `@petrochenkov`
2 parents abd3637 + 2e6ce68 commit d12c6e9

File tree

4 files changed

+120
-104
lines changed

4 files changed

+120
-104
lines changed

compiler/rustc_ast/src/tokenstream.rs

+89-12
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
//! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
1414
//! ownership of the original.
1515
16-
use crate::ast::StmtKind;
16+
use crate::ast::{AttrStyle, StmtKind};
1717
use crate::ast_traits::{HasAttrs, HasSpan, HasTokens};
1818
use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind};
1919
use crate::AttrVec;
@@ -22,11 +22,11 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
2222
use rustc_data_structures::sync::{self, Lrc};
2323
use rustc_macros::HashStable_Generic;
2424
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
25-
use rustc_span::{Span, DUMMY_SP};
25+
use rustc_span::{sym, Span, Symbol, DUMMY_SP};
2626
use smallvec::{smallvec, SmallVec};
2727

2828
use std::borrow::Cow;
29-
use std::{fmt, iter, mem};
29+
use std::{cmp, fmt, iter, mem};
3030

3131
/// When the main Rust parser encounters a syntax-extension invocation, it
3232
/// parses the arguments to the invocation as a token tree. This is a very
@@ -566,6 +566,92 @@ impl TokenStream {
566566
pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
567567
self.0.chunks(chunk_size)
568568
}
569+
570+
/// Desugar doc comments like `/// foo` in the stream into `#[doc =
571+
/// r"foo"]`. Modifies the `TokenStream` via `Lrc::make_mut`, but as little
572+
/// as possible.
573+
pub fn desugar_doc_comments(&mut self) {
574+
if let Some(desugared_stream) = desugar_inner(self.clone()) {
575+
*self = desugared_stream;
576+
}
577+
578+
// The return value is `None` if nothing in `stream` changed.
579+
fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
580+
let mut i = 0;
581+
let mut modified = false;
582+
while let Some(tt) = stream.0.get(i) {
583+
match tt {
584+
&TokenTree::Token(
585+
Token { kind: token::DocComment(_, attr_style, data), span },
586+
_spacing,
587+
) => {
588+
let desugared = desugared_tts(attr_style, data, span);
589+
let desugared_len = desugared.len();
590+
Lrc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
591+
modified = true;
592+
i += desugared_len;
593+
}
594+
595+
&TokenTree::Token(..) => i += 1,
596+
597+
&TokenTree::Delimited(sp, delim, ref delim_stream) => {
598+
if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
599+
let new_tt = TokenTree::Delimited(sp, delim, desugared_delim_stream);
600+
Lrc::make_mut(&mut stream.0)[i] = new_tt;
601+
modified = true;
602+
}
603+
i += 1;
604+
}
605+
}
606+
}
607+
if modified { Some(stream) } else { None }
608+
}
609+
610+
fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
611+
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
612+
// required to wrap the text. E.g.
613+
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
614+
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
615+
// - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
616+
let mut num_of_hashes = 0;
617+
let mut count = 0;
618+
for ch in data.as_str().chars() {
619+
count = match ch {
620+
'"' => 1,
621+
'#' if count > 0 => count + 1,
622+
_ => 0,
623+
};
624+
num_of_hashes = cmp::max(num_of_hashes, count);
625+
}
626+
627+
// `/// foo` becomes `doc = r"foo"`.
628+
let delim_span = DelimSpan::from_single(span);
629+
let body = TokenTree::Delimited(
630+
delim_span,
631+
Delimiter::Bracket,
632+
[
633+
TokenTree::token_alone(token::Ident(sym::doc, false), span),
634+
TokenTree::token_alone(token::Eq, span),
635+
TokenTree::token_alone(
636+
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
637+
span,
638+
),
639+
]
640+
.into_iter()
641+
.collect::<TokenStream>(),
642+
);
643+
644+
if attr_style == AttrStyle::Inner {
645+
vec![
646+
TokenTree::token_alone(token::Pound, span),
647+
TokenTree::token_alone(token::Not, span),
648+
body,
649+
]
650+
} else {
651+
vec![TokenTree::token_alone(token::Pound, span), body]
652+
}
653+
}
654+
}
569655
}
570656

571657
/// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
@@ -628,15 +714,6 @@ impl TokenTreeCursor {
628714
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
629715
self.stream.0.get(self.index + n)
630716
}
631-
632-
// Replace the previously obtained token tree with `tts`, and rewind to
633-
// just before them.
634-
pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
635-
assert!(self.index > 0);
636-
self.index -= 1;
637-
let stream = Lrc::make_mut(&mut self.stream.0);
638-
stream.splice(self.index..self.index + 1, tts);
639-
}
640717
}
641718

642719
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]

compiler/rustc_expand/src/mbe/macro_rules.rs

+16-12
Original file line numberDiff line numberDiff line change
@@ -249,15 +249,15 @@ fn expand_macro<'cx>(
249249
trace_macros_note(&mut cx.expansions, sp, msg);
250250
}
251251

252-
let p = Parser::new(sess, tts, false, None);
252+
let p = Parser::new(sess, tts, None);
253253

254254
if is_local {
255255
cx.resolver.record_macro_rule_usage(node_id, i);
256256
}
257257

258258
// Let the context choose how to interpret the result.
259259
// Weird, but useful for X-macros.
260-
return Box::new(ParserAnyMacro {
260+
Box::new(ParserAnyMacro {
261261
parser: p,
262262

263263
// Pass along the original expansion site and the name of the macro
@@ -269,18 +269,17 @@ fn expand_macro<'cx>(
269269
is_trailing_mac: cx.current_expansion.is_trailing_mac,
270270
arm_span,
271271
is_local,
272-
});
272+
})
273273
}
274274
Err(CanRetry::No(_)) => {
275275
debug!("Will not retry matching as an error was emitted already");
276-
return DummyResult::any(sp);
276+
DummyResult::any(sp)
277277
}
278278
Err(CanRetry::Yes) => {
279-
// Retry and emit a better error below.
279+
// Retry and emit a better error.
280+
diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
280281
}
281282
}
282-
283-
diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
284283
}
285284

286285
pub(super) enum CanRetry {
@@ -447,7 +446,7 @@ pub fn compile_declarative_macro(
447446

448447
let create_parser = || {
449448
let body = macro_def.body.tokens.clone();
450-
Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS)
449+
Parser::new(&sess.parse_sess, body, rustc_parse::MACRO_ARGUMENTS)
451450
};
452451

453452
let parser = create_parser();
@@ -457,8 +456,8 @@ pub fn compile_declarative_macro(
457456
match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
458457
Success(m) => m,
459458
Failure(()) => {
460-
// The fast `NoopTracker` doesn't have any info on failure, so we need to retry it with another one
461-
// that gives us the information we need.
459+
// The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
460+
// with another one that gives us the information we need.
462461
// For this we need to reclone the macro body as the previous parser consumed it.
463462
let retry_parser = create_parser();
464463

@@ -1417,6 +1416,11 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
14171416
}
14181417
}
14191418

1420-
pub(super) fn parser_from_cx(sess: &ParseSess, tts: TokenStream, recovery: Recovery) -> Parser<'_> {
1421-
Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
1419+
pub(super) fn parser_from_cx(
1420+
sess: &ParseSess,
1421+
mut tts: TokenStream,
1422+
recovery: Recovery,
1423+
) -> Parser<'_> {
1424+
tts.desugar_doc_comments();
1425+
Parser::new(sess, tts, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
14221426
}

compiler/rustc_parse/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ pub fn stream_to_parser<'a>(
205205
stream: TokenStream,
206206
subparser_name: Option<&'static str>,
207207
) -> Parser<'a> {
208-
Parser::new(sess, stream, false, subparser_name)
208+
Parser::new(sess, stream, subparser_name)
209209
}
210210

211211
/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
@@ -215,7 +215,7 @@ pub fn parse_in<'a, T>(
215215
name: &'static str,
216216
mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
217217
) -> PResult<'a, T> {
218-
let mut parser = Parser::new(sess, tts, false, Some(name));
218+
let mut parser = Parser::new(sess, tts, Some(name));
219219
let result = f(&mut parser)?;
220220
if parser.token != token::Eof {
221221
parser.unexpected()?;

compiler/rustc_parse/src/parser/mod.rs

+13-78
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
2424
use rustc_ast::util::case::Case;
2525
use rustc_ast::AttrId;
2626
use rustc_ast::DUMMY_NODE_ID;
27-
use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern};
27+
use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
2828
use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
2929
use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
3030
use rustc_ast_pretty::pprust;
@@ -38,7 +38,7 @@ use rustc_session::parse::ParseSess;
3838
use rustc_span::source_map::{Span, DUMMY_SP};
3939
use rustc_span::symbol::{kw, sym, Ident, Symbol};
4040
use std::ops::Range;
41-
use std::{cmp, mem, slice};
41+
use std::{mem, slice};
4242
use thin_vec::ThinVec;
4343
use tracing::debug;
4444

@@ -224,11 +224,6 @@ struct TokenCursor {
224224
// because it's the outermost token stream which never has delimiters.
225225
stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
226226

227-
// We need to desugar doc comments from `/// foo` form into `#[doc =
228-
// r"foo"]` form when parsing declarative macro inputs in `parse_tt`,
229-
// because some declarative macros look for `doc` attributes.
230-
desugar_doc_comments: bool,
231-
232227
// Counts the number of calls to `{,inlined_}next`.
233228
num_next_calls: usize,
234229

@@ -265,29 +260,17 @@ impl TokenCursor {
265260
#[inline(always)]
266261
fn inlined_next(&mut self) -> (Token, Spacing) {
267262
loop {
268-
// FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
269-
// need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
270-
// removed.
263+
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
264+
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
265+
// below can be removed.
271266
if let Some(tree) = self.tree_cursor.next_ref() {
272267
match tree {
273268
&TokenTree::Token(ref token, spacing) => {
274-
match (self.desugar_doc_comments, token) {
275-
(
276-
true,
277-
&Token { kind: token::DocComment(_, attr_style, data), span },
278-
) => {
279-
let desugared = self.desugar(attr_style, data, span);
280-
self.tree_cursor.replace_prev_and_rewind(desugared);
281-
// Continue to get the first token of the desugared doc comment.
282-
}
283-
_ => {
284-
debug_assert!(!matches!(
285-
token.kind,
286-
token::OpenDelim(_) | token::CloseDelim(_)
287-
));
288-
return (token.clone(), spacing);
289-
}
290-
}
269+
debug_assert!(!matches!(
270+
token.kind,
271+
token::OpenDelim(_) | token::CloseDelim(_)
272+
));
273+
return (token.clone(), spacing);
291274
}
292275
&TokenTree::Delimited(sp, delim, ref tts) => {
293276
let trees = tts.clone().into_trees();
@@ -311,52 +294,6 @@ impl TokenCursor {
311294
}
312295
}
313296
}
314-
315-
// Desugar a doc comment into something like `#[doc = r"foo"]`.
316-
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
317-
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
318-
// required to wrap the text. E.g.
319-
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
320-
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
321-
// - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
322-
let mut num_of_hashes = 0;
323-
let mut count = 0;
324-
for ch in data.as_str().chars() {
325-
count = match ch {
326-
'"' => 1,
327-
'#' if count > 0 => count + 1,
328-
_ => 0,
329-
};
330-
num_of_hashes = cmp::max(num_of_hashes, count);
331-
}
332-
333-
// `/// foo` becomes `doc = r"foo"`.
334-
let delim_span = DelimSpan::from_single(span);
335-
let body = TokenTree::Delimited(
336-
delim_span,
337-
Delimiter::Bracket,
338-
[
339-
TokenTree::token_alone(token::Ident(sym::doc, false), span),
340-
TokenTree::token_alone(token::Eq, span),
341-
TokenTree::token_alone(
342-
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
343-
span,
344-
),
345-
]
346-
.into_iter()
347-
.collect::<TokenStream>(),
348-
);
349-
350-
if attr_style == AttrStyle::Inner {
351-
vec![
352-
TokenTree::token_alone(token::Pound, span),
353-
TokenTree::token_alone(token::Not, span),
354-
body,
355-
]
356-
} else {
357-
vec![TokenTree::token_alone(token::Pound, span), body]
358-
}
359-
}
360297
}
361298

362299
#[derive(Debug, Clone, PartialEq)]
@@ -451,8 +388,7 @@ pub(super) fn token_descr(token: &Token) -> String {
451388
impl<'a> Parser<'a> {
452389
pub fn new(
453390
sess: &'a ParseSess,
454-
tokens: TokenStream,
455-
desugar_doc_comments: bool,
391+
stream: TokenStream,
456392
subparser_name: Option<&'static str>,
457393
) -> Self {
458394
let mut parser = Parser {
@@ -464,10 +400,9 @@ impl<'a> Parser<'a> {
464400
restrictions: Restrictions::empty(),
465401
expected_tokens: Vec::new(),
466402
token_cursor: TokenCursor {
467-
tree_cursor: tokens.into_trees(),
403+
tree_cursor: stream.into_trees(),
468404
stack: Vec::new(),
469405
num_next_calls: 0,
470-
desugar_doc_comments,
471406
break_last_token: false,
472407
},
473408
unmatched_angle_bracket_count: 0,
@@ -1172,7 +1107,7 @@ impl<'a> Parser<'a> {
11721107
}
11731108
i += 1;
11741109
}
1175-
return looker(&token);
1110+
looker(&token)
11761111
}
11771112

11781113
/// Returns whether any of the given keywords are `dist` tokens ahead of the current one.

0 commit comments

Comments
 (0)