|
3 | 3 |
|
4 | 4 | use super::StringReader;
|
5 | 5 | use errors::{Applicability, DiagnosticBuilder};
|
6 |
| -use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION}; |
| 6 | +use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw}; |
| 7 | +use crate::parse::token; |
7 | 8 |
|
8 | 9 | #[rustfmt::skip] // for line breaks
|
9 | 10 | const UNICODE_ARRAY: &[(char, &str, char)] = &[
|
@@ -297,53 +298,59 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[
|
297 | 298 | ('>', "Fullwidth Greater-Than Sign", '>'),
|
298 | 299 | ];
|
299 | 300 |
|
300 |
| -const ASCII_ARRAY: &[(char, &str)] = &[ |
301 |
| - (' ', "Space"), |
302 |
| - ('_', "Underscore"), |
303 |
| - ('-', "Minus/Hyphen"), |
304 |
| - (',', "Comma"), |
305 |
| - (';', "Semicolon"), |
306 |
| - (':', "Colon"), |
307 |
| - ('!', "Exclamation Mark"), |
308 |
| - ('?', "Question Mark"), |
309 |
| - ('.', "Period"), |
310 |
| - ('\'', "Single Quote"), |
311 |
| - ('"', "Quotation Mark"), |
312 |
| - ('(', "Left Parenthesis"), |
313 |
| - (')', "Right Parenthesis"), |
314 |
| - ('[', "Left Square Bracket"), |
315 |
| - (']', "Right Square Bracket"), |
316 |
| - ('{', "Left Curly Brace"), |
317 |
| - ('}', "Right Curly Brace"), |
318 |
| - ('*', "Asterisk"), |
319 |
| - ('/', "Slash"), |
320 |
| - ('\\', "Backslash"), |
321 |
| - ('&', "Ampersand"), |
322 |
| - ('+', "Plus Sign"), |
323 |
| - ('<', "Less-Than Sign"), |
324 |
| - ('=', "Equals Sign"), |
325 |
| - ('>', "Greater-Than Sign"), |
| 301 | +// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of |
| 302 | +// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. |
| 303 | +// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add |
| 304 | +// fancier error recovery to it, as there will be less overall work to do this way. |
| 305 | +const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ |
| 306 | + (' ', "Space", Some(token::Whitespace)), |
| 307 | + ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), |
| 308 | + ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), |
| 309 | + (',', "Comma", Some(token::Comma)), |
| 310 | + (';', "Semicolon", Some(token::Semi)), |
| 311 | + (':', "Colon", Some(token::Colon)), |
| 312 | + ('!', "Exclamation Mark", Some(token::Not)), |
| 313 | + ('?', "Question Mark", Some(token::Question)), |
| 314 | + ('.', "Period", Some(token::Dot)), |
| 315 | + ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))), |
| 316 | + (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))), |
| 317 | + ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))), |
| 318 | + (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))), |
| 319 | + ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))), |
| 320 | + ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))), |
| 321 | + ('*', "Asterisk", Some(token::BinOp(token::Star))), |
| 322 | + ('/', "Slash", Some(token::BinOp(token::Slash))), |
| 323 | + ('\\', "Backslash", None), |
| 324 | + ('&', "Ampersand", Some(token::BinOp(token::And))), |
| 325 | + ('+', "Plus Sign", Some(token::BinOp(token::Plus))), |
| 326 | + ('<', "Less-Than Sign", Some(token::Lt)), |
| 327 | + ('=', "Equals Sign", Some(token::Eq)), |
| 328 | + ('>', "Greater-Than Sign", Some(token::Gt)), |
| 329 | + // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by |
| 330 | + // spitting the correct token out. |
| 331 | + ('\'', "Single Quote", None), |
| 332 | + ('"', "Quotation Mark", None), |
326 | 333 | ];
|
327 | 334 |
|
328 | 335 | crate fn check_for_substitution<'a>(
|
329 | 336 | reader: &StringReader<'a>,
|
330 | 337 | pos: BytePos,
|
331 | 338 | ch: char,
|
332 | 339 | err: &mut DiagnosticBuilder<'a>,
|
333 |
| -) -> bool { |
| 340 | +) -> Option<token::TokenKind> { |
334 | 341 | let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) {
|
335 | 342 | Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char),
|
336 |
| - None => return false, |
| 343 | + None => return None, |
337 | 344 | };
|
338 | 345 |
|
339 | 346 | let span = Span::new(pos, pos + Pos::from_usize(ch.len_utf8()), NO_EXPANSION);
|
340 | 347 |
|
341 |
| - let ascii_name = match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) { |
342 |
| - Some((_ascii_char, ascii_name)) => ascii_name, |
| 348 | + let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) { |
| 349 | + Some((_ascii_char, ascii_name, token)) => (ascii_name, token), |
343 | 350 | None => {
|
344 | 351 | let msg = format!("substitution character not found for '{}'", ch);
|
345 | 352 | reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
|
346 |
| - return false; |
| 353 | + return None; |
347 | 354 | }
|
348 | 355 | };
|
349 | 356 |
|
@@ -371,7 +378,7 @@ crate fn check_for_substitution<'a>(
|
371 | 378 | );
|
372 | 379 | err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect);
|
373 | 380 | }
|
374 |
| - true |
| 381 | + token.clone() |
375 | 382 | }
|
376 | 383 |
|
377 | 384 | /// Extract string if found at current position with given delimiters
|
|
0 commit comments