Skip to content

Commit 18c6d39

Browse files
authored
Rollup merge of #69006 - petrochenkov:prevspan2, r=Centril
parser: Keep current and previous tokens precisely ...including their unnormalized forms. Add more documentation for them. Hopefully, this will help to eliminate footguns like #68728 (comment). I'll try to address the FIXMEs in separate PRs during the next week. r? @Centril
2 parents a8d4ccf + cd7a428 commit 18c6d39

File tree

3 files changed

+54
-26
lines changed

3 files changed

+54
-26
lines changed

src/librustc_parse/parser/mod.rs

+51-23
Original file line numberDiff line numberDiff line change
@@ -95,23 +95,32 @@ enum PrevTokenKind {
9595
Other,
9696
}
9797

98-
// NOTE: `Ident`s are handled by `common.rs`.
99-
10098
#[derive(Clone)]
10199
pub struct Parser<'a> {
102100
pub sess: &'a ParseSess,
103101
/// The current normalized token.
104102
/// "Normalized" means that some interpolated tokens
105103
/// (`$i: ident` and `$l: lifetime` meta-variables) are replaced
106104
/// with non-interpolated identifier and lifetime tokens they refer to.
107-
/// Perhaps the normalized / non-normalized setup can be simplified somehow.
105+
/// Use span from this token if you need an isolated span.
108106
pub token: Token,
109-
/// The span of the current non-normalized token.
110-
meta_var_span: Option<Span>,
111-
/// The span of the previous non-normalized token.
112-
pub prev_span: Span,
113-
/// The kind of the previous normalized token (in simplified form).
107+
/// The current non-normalized token if it's different from `token`.
108+
/// Preferable use is through the `unnormalized_token()` getter.
109+
/// Use span from this token if you need to concatenate it with some neighbouring spans.
110+
unnormalized_token: Option<Token>,
111+
/// The previous normalized token.
112+
/// Use span from this token if you need an isolated span.
113+
prev_token: Token,
114+
/// The previous non-normalized token if it's different from `prev_token`.
115+
/// Preferable use is through the `unnormalized_prev_token()` getter.
116+
/// Use span from this token if you need to concatenate it with some neighbouring spans.
117+
unnormalized_prev_token: Option<Token>,
118+
/// Equivalent to `prev_token.kind` in simplified form.
119+
/// FIXME: Remove in favor of `(unnormalized_)prev_token().kind`.
114120
prev_token_kind: PrevTokenKind,
121+
/// Equivalent to `unnormalized_prev_token().span`.
122+
/// FIXME: Remove in favor of `(unnormalized_)prev_token().span`.
123+
pub prev_span: Span,
115124
restrictions: Restrictions,
116125
/// Used to determine the path to externally loaded source files.
117126
pub(super) directory: Directory<'a>,
@@ -384,9 +393,11 @@ impl<'a> Parser<'a> {
384393
let mut parser = Parser {
385394
sess,
386395
token: Token::dummy(),
387-
prev_span: DUMMY_SP,
388-
meta_var_span: None,
396+
unnormalized_token: None,
397+
prev_token: Token::dummy(),
398+
unnormalized_prev_token: None,
389399
prev_token_kind: PrevTokenKind::Other,
400+
prev_span: DUMMY_SP,
390401
restrictions: Restrictions::empty(),
391402
recurse_into_file_modules,
392403
directory: Directory {
@@ -427,6 +438,14 @@ impl<'a> Parser<'a> {
427438
parser
428439
}
429440

441+
fn unnormalized_token(&self) -> &Token {
442+
self.unnormalized_token.as_ref().unwrap_or(&self.token)
443+
}
444+
445+
fn unnormalized_prev_token(&self) -> &Token {
446+
self.unnormalized_prev_token.as_ref().unwrap_or(&self.prev_token)
447+
}
448+
430449
fn next_tok(&mut self) -> Token {
431450
let mut next = if self.desugar_doc_comments {
432451
self.token_cursor.next_desugared()
@@ -435,7 +454,7 @@ impl<'a> Parser<'a> {
435454
};
436455
if next.span.is_dummy() {
437456
// Tweak the location for better diagnostics, but keep syntactic context intact.
438-
next.span = self.prev_span.with_ctxt(next.span.ctxt());
457+
next.span = self.unnormalized_token().span.with_ctxt(next.span.ctxt());
439458
}
440459
next
441460
}
@@ -895,10 +914,13 @@ impl<'a> Parser<'a> {
895914
self.span_bug(self.token.span, msg);
896915
}
897916

898-
self.prev_span = self.meta_var_span.take().unwrap_or(self.token.span);
917+
// Update the current and previous tokens.
918+
let next_token = self.next_tok();
919+
self.prev_token = mem::replace(&mut self.token, next_token);
920+
self.unnormalized_prev_token = self.unnormalized_token.take();
899921

900-
// Record last token kind for possible error recovery.
901-
self.prev_token_kind = match self.token.kind {
922+
// Update fields derived from the previous token.
923+
self.prev_token_kind = match self.prev_token.kind {
902924
token::DocComment(..) => PrevTokenKind::DocComment,
903925
token::Comma => PrevTokenKind::Comma,
904926
token::BinOp(token::Plus) => PrevTokenKind::Plus,
@@ -908,22 +930,28 @@ impl<'a> Parser<'a> {
908930
token::Ident(..) => PrevTokenKind::Ident,
909931
_ => PrevTokenKind::Other,
910932
};
933+
self.prev_span = self.unnormalized_prev_token().span;
911934

912-
self.token = self.next_tok();
913935
self.expected_tokens.clear();
914936
// Check after each token.
915937
self.process_potential_macro_variable();
916938
}
917939

918940
/// Advances the parser using provided token as a next one. Use this when
919941
/// consuming a part of a token. For example a single `<` from `<<`.
942+
/// FIXME: this function sets the previous token data to some semi-nonsensical values
943+
/// which kind of work because they are currently used in very limited ways in practice.
944+
/// Correct token kinds and spans need to be calculated instead.
920945
fn bump_with(&mut self, next: TokenKind, span: Span) {
921-
self.prev_span = self.token.span.with_hi(span.lo());
922-
// It would be incorrect to record the kind of the current token, but
923-
// fortunately for tokens currently using `bump_with`, the
924-
// `prev_token_kind` will be of no use anyway.
946+
// Update the current and previous tokens.
947+
let next_token = Token::new(next, span);
948+
self.prev_token = mem::replace(&mut self.token, next_token);
949+
self.unnormalized_prev_token = self.unnormalized_token.take();
950+
951+
// Update fields derived from the previous token.
925952
self.prev_token_kind = PrevTokenKind::Other;
926-
self.token = Token::new(next, span);
953+
self.prev_span = self.unnormalized_prev_token().span.with_hi(span.lo());
954+
927955
self.expected_tokens.clear();
928956
}
929957

@@ -1054,7 +1082,7 @@ impl<'a> Parser<'a> {
10541082
}
10551083

10561084
pub fn process_potential_macro_variable(&mut self) {
1057-
self.token = match self.token.kind {
1085+
let normalized_token = match self.token.kind {
10581086
token::Dollar
10591087
if self.token.span.from_expansion() && self.look_ahead(1, |t| t.is_ident()) =>
10601088
{
@@ -1071,7 +1099,6 @@ impl<'a> Parser<'a> {
10711099
return;
10721100
}
10731101
token::Interpolated(ref nt) => {
1074-
self.meta_var_span = Some(self.token.span);
10751102
// Interpolated identifier and lifetime tokens are replaced with usual identifier
10761103
// and lifetime tokens, so the former are never encountered during normal parsing.
10771104
match **nt {
@@ -1084,6 +1111,7 @@ impl<'a> Parser<'a> {
10841111
}
10851112
_ => return,
10861113
};
1114+
self.unnormalized_token = Some(mem::replace(&mut self.token, normalized_token));
10871115
}
10881116

10891117
/// Parses a single token tree from the input.
@@ -1100,7 +1128,7 @@ impl<'a> Parser<'a> {
11001128
}
11011129
token::CloseDelim(_) | token::Eof => unreachable!(),
11021130
_ => {
1103-
let token = self.token.take();
1131+
let token = self.token.clone();
11041132
self.bump();
11051133
TokenTree::Token(token)
11061134
}

src/librustc_parse/parser/path.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ impl<'a> Parser<'a> {
134134
path
135135
});
136136

137-
let lo = self.meta_var_span.unwrap_or(self.token.span);
137+
let lo = self.unnormalized_token().span;
138138
let mut segments = Vec::new();
139139
let mod_sep_ctxt = self.token.span.ctxt();
140140
if self.eat(&token::ModSep) {

src/test/ui/parser/mbe_missing_right_paren.stderr

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ LL | macro_rules! abc(ؼ;
2222
| ^
2323

2424
error: unexpected end of macro invocation
25-
--> $DIR/mbe_missing_right_paren.rs:3:1
25+
--> $DIR/mbe_missing_right_paren.rs:3:19
2626
|
2727
LL | macro_rules! abc(ؼ
28-
| ^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
28+
| ^ missing tokens in macro arguments
2929

3030
error: aborting due to 3 previous errors
3131

0 commit comments

Comments
 (0)