Skip to content

Commit 98777b4

Browse files
committed
Merge TokenTreesReader into StringReader.
There is a not-very-useful layering in the lexer, where `TokenTreesReader` contains a `StringReader`. This commit combines them and names the result `Lexer`, which is a more obvious name for it. The methods of `Lexer` are now split across `mod.rs` and `tokentrees.rs` which isn't ideal, but it doesn't seem worth moving a bunch of code to avoid it.
1 parent 481b5fa commit 98777b4

File tree

3 files changed

+31
-49
lines changed

3 files changed

+31
-49
lines changed

compiler/rustc_parse/src/lexer/mod.rs

+15-6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use rustc_span::symbol::Symbol;
1818
use rustc_span::{BytePos, Pos, Span};
1919
use tracing::debug;
2020

21+
use crate::lexer::diagnostics::TokenTreeDiagInfo;
2122
use crate::lexer::unicode_chars::UNICODE_ARRAY;
2223
use crate::{errors, make_unclosed_delims_error};
2324

@@ -56,7 +57,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
5657
}
5758

5859
let cursor = Cursor::new(src);
59-
let string_reader = StringReader {
60+
let mut lexer = Lexer {
6061
psess,
6162
start_pos,
6263
pos: start_pos,
@@ -65,9 +66,12 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
6566
override_span,
6667
nbsp_is_whitespace: false,
6768
last_lifetime: None,
69+
token: Token::dummy(),
70+
diag_info: TokenTreeDiagInfo::default(),
6871
};
69-
let (stream, res, unmatched_delims) =
70-
tokentrees::TokenTreesReader::lex_all_token_trees(string_reader);
72+
let (_open_spacing, stream, res) = lexer.lex_token_trees(/* is_delimited */ false);
73+
let unmatched_delims = lexer.diag_info.unmatched_delims;
74+
7175
match res {
7276
Ok(()) if unmatched_delims.is_empty() => Ok(stream),
7377
_ => {
@@ -92,7 +96,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
9296
}
9397
}
9498

95-
struct StringReader<'psess, 'src> {
99+
struct Lexer<'psess, 'src> {
96100
psess: &'psess ParseSess,
97101
/// Initial position, read-only.
98102
start_pos: BytePos,
@@ -111,9 +115,14 @@ struct StringReader<'psess, 'src> {
111115
/// Track the `Span` for the leading `'` of the last lifetime. Used for
112116
/// diagnostics to detect possible typo where `"` was meant.
113117
last_lifetime: Option<Span>,
118+
119+
/// The current token.
120+
token: Token,
121+
122+
diag_info: TokenTreeDiagInfo,
114123
}
115124

116-
impl<'psess, 'src> StringReader<'psess, 'src> {
125+
impl<'psess, 'src> Lexer<'psess, 'src> {
117126
fn dcx(&self) -> DiagCtxtHandle<'psess> {
118127
self.psess.dcx()
119128
}
@@ -124,7 +133,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
124133

125134
/// Returns the next token, paired with a bool indicating if the token was
126135
/// preceded by whitespace.
127-
fn next_token(&mut self) -> (Token, bool) {
136+
fn next_token_from_cursor(&mut self) -> (Token, bool) {
128137
let mut preceded_by_whitespace = false;
129138
let mut swallow_next_invalid = 0;
130139
// Skip trivial (whitespace & comments) tokens

compiler/rustc_parse/src/lexer/tokentrees.rs

+12-39
Original file line numberDiff line numberDiff line change
@@ -4,41 +4,19 @@ use rustc_ast_pretty::pprust::token_to_string;
44
use rustc_errors::{Applicability, PErr};
55
use rustc_span::symbol::kw;
66

7-
use super::diagnostics::{
8-
TokenTreeDiagInfo, report_suspicious_mismatch_block, same_indentation_level,
9-
};
10-
use super::{StringReader, UnmatchedDelim};
7+
use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level};
8+
use super::{Lexer, UnmatchedDelim};
119
use crate::Parser;
1210

13-
pub(super) struct TokenTreesReader<'psess, 'src> {
14-
string_reader: StringReader<'psess, 'src>,
15-
/// The "next" token, which has been obtained from the `StringReader` but
16-
/// not yet handled by the `TokenTreesReader`.
17-
token: Token,
18-
diag_info: TokenTreeDiagInfo,
19-
}
20-
21-
impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
22-
pub(super) fn lex_all_token_trees(
23-
string_reader: StringReader<'psess, 'src>,
24-
) -> (TokenStream, Result<(), Vec<PErr<'psess>>>, Vec<UnmatchedDelim>) {
25-
let mut tt_reader = TokenTreesReader {
26-
string_reader,
27-
token: Token::dummy(),
28-
diag_info: TokenTreeDiagInfo::default(),
29-
};
30-
let (_open_spacing, stream, res) = tt_reader.lex_token_trees(/* is_delimited */ false);
31-
(stream, res, tt_reader.diag_info.unmatched_delims)
32-
}
33-
11+
impl<'psess, 'src> Lexer<'psess, 'src> {
3412
// Lex into a token stream. The `Spacing` in the result is that of the
3513
// opening delimiter.
36-
fn lex_token_trees(
14+
pub(super) fn lex_token_trees(
3715
&mut self,
3816
is_delimited: bool,
3917
) -> (Spacing, TokenStream, Result<(), Vec<PErr<'psess>>>) {
4018
// Move past the opening delimiter.
41-
let (_, open_spacing) = self.bump(false);
19+
let open_spacing = self.bump(false).1;
4220

4321
let mut buf = Vec::new();
4422
loop {
@@ -80,7 +58,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
8058

8159
fn eof_err(&mut self) -> PErr<'psess> {
8260
let msg = "this file contains an unclosed delimiter";
83-
let mut err = self.string_reader.dcx().struct_span_err(self.token.span, msg);
61+
let mut err = self.dcx().struct_span_err(self.token.span, msg);
8462

8563
let unclosed_delimiter_show_limit = 5;
8664
let len = usize::min(unclosed_delimiter_show_limit, self.diag_info.open_braces.len());
@@ -110,7 +88,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
11088
report_suspicious_mismatch_block(
11189
&mut err,
11290
&self.diag_info,
113-
self.string_reader.psess.source_map(),
91+
self.psess.source_map(),
11492
*delim,
11593
)
11694
}
@@ -136,7 +114,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
136114

137115
// Expand to cover the entire delimited token tree.
138116
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
139-
let sm = self.string_reader.psess.source_map();
117+
let sm = self.psess.source_map();
140118

141119
let close_spacing = match self.token.kind {
142120
// Correct delimiter.
@@ -228,7 +206,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
228206
// Will glue adjacent single-char tokens together if `glue` is set.
229207
fn bump(&mut self, glue: bool) -> (Token, Spacing) {
230208
let (this_spacing, next_tok) = loop {
231-
let (next_tok, is_next_tok_preceded_by_whitespace) = self.string_reader.next_token();
209+
let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
232210

233211
if is_next_tok_preceded_by_whitespace {
234212
break (Spacing::Alone, next_tok);
@@ -256,7 +234,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
256234
) -> Vec<PErr<'psess>> {
257235
// If there are unclosed delims, see if there are diff markers and if so, point them
258236
// out instead of complaining about the unclosed delims.
259-
let mut parser = Parser::new(self.string_reader.psess, tts, None);
237+
let mut parser = Parser::new(self.psess, tts, None);
260238
let mut diff_errs = vec![];
261239
// Suggest removing a `{` we think appears in an `if`/`while` condition.
262240
// We want to suggest removing a `{` only if we think we're in an `if`/`while` condition,
@@ -314,14 +292,9 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
314292
// An unexpected closing delimiter (i.e., there is no matching opening delimiter).
315293
let token_str = token_to_string(&self.token);
316294
let msg = format!("unexpected closing delimiter: `{token_str}`");
317-
let mut err = self.string_reader.dcx().struct_span_err(self.token.span, msg);
295+
let mut err = self.dcx().struct_span_err(self.token.span, msg);
318296

319-
report_suspicious_mismatch_block(
320-
&mut err,
321-
&self.diag_info,
322-
self.string_reader.psess.source_map(),
323-
delim,
324-
);
297+
report_suspicious_mismatch_block(&mut err, &self.diag_info, self.psess.source_map(), delim);
325298
err.span_label(self.token.span, "unexpected closing delimiter");
326299
err
327300
}

compiler/rustc_parse/src/lexer/unicode_chars.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
use rustc_span::symbol::kw;
55
use rustc_span::{BytePos, Pos, Span};
66

7-
use super::StringReader;
7+
use super::Lexer;
88
use crate::errors::TokenSubstitution;
99
use crate::token::{self, Delimiter};
1010

@@ -338,7 +338,7 @@ const ASCII_ARRAY: &[(&str, &str, Option<token::TokenKind>)] = &[
338338
];
339339

340340
pub(super) fn check_for_substitution(
341-
reader: &StringReader<'_, '_>,
341+
lexer: &Lexer<'_, '_>,
342342
pos: BytePos,
343343
ch: char,
344344
count: usize,
@@ -351,11 +351,11 @@ pub(super) fn check_for_substitution(
351351

352352
let Some((_, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(s, _, _)| s == ascii_str) else {
353353
let msg = format!("substitution character not found for '{ch}'");
354-
reader.dcx().span_bug(span, msg);
354+
lexer.dcx().span_bug(span, msg);
355355
};
356356

357357
// special help suggestion for "directed" double quotes
358-
let sugg = if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') {
358+
let sugg = if let Some(s) = peek_delimited(&lexer.src[lexer.src_index(pos)..], '“', '”') {
359359
let span = Span::with_root_ctxt(
360360
pos,
361361
pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()),

0 commit comments

Comments
 (0)