Skip to content

Commit 0a2d708

Browse files
authored
Rollup merge of rust-lang#134253 - nnethercote:overhaul-keywords, r=petrochenkov
Overhaul keyword handling The compiler's list of keywords has some problems. - It contains several items that aren't keywords. - The order isn't quite right in a couple of places. - Some of the names of predicates relating to keywords are confusing. - rustdoc and rustfmt have their own (incorrect) versions of the keyword list. - `AllKeywords` is unnecessarily complex. r? ```@jieyouxu```
2 parents 477f222 + 6de550c commit 0a2d708

File tree

4 files changed

+67
-121
lines changed

4 files changed

+67
-121
lines changed

compiler/rustc_ast/src/token.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,8 @@ impl Token {
903903
self.is_non_raw_ident_where(|id| id.name == kw)
904904
}
905905

906-
/// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this token is an identifier equal to `kw` ignoring the case.
906+
/// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this
907+
/// token is an identifier equal to `kw` ignoring the case.
907908
pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
908909
self.is_keyword(kw)
909910
|| (case == Case::Insensitive
@@ -916,6 +917,11 @@ impl Token {
916917
self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
917918
}
918919

920+
/// Don't use this unless you're doing something very loose and heuristic-y.
921+
pub fn is_any_keyword(&self) -> bool {
922+
self.is_non_raw_ident_where(Ident::is_any_keyword)
923+
}
924+
919925
/// Returns true for reserved identifiers used internally for elided lifetimes,
920926
/// unnamed method parameters, crate root module, error recovery etc.
921927
pub fn is_special_ident(&self) -> bool {

compiler/rustc_parse/src/parser/diagnostics.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use rustc_errors::{
2222
use rustc_session::errors::ExprParenthesesNeeded;
2323
use rustc_span::edit_distance::find_best_match_for_name;
2424
use rustc_span::source_map::Spanned;
25-
use rustc_span::symbol::AllKeywords;
25+
use rustc_span::symbol::used_keywords;
2626
use rustc_span::{BytePos, DUMMY_SP, Ident, Span, SpanSnippetError, Symbol, kw, sym};
2727
use thin_vec::{ThinVec, thin_vec};
2828
use tracing::{debug, trace};
@@ -811,12 +811,12 @@ impl<'a> Parser<'a> {
811811
// so that it gets generated only when the diagnostic needs it.
812812
// Also, it is unlikely that this list is generated multiple times because the
813813
// parser halts after execution hits this path.
814-
let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition());
814+
let all_keywords = used_keywords(|| prev_ident.span.edition());
815815

816816
// Otherwise, check the previous token with all the keywords as possible candidates.
817817
// This handles code like `Struct Human;` and `While a < b {}`.
818-
// We check the previous token only when the current token is an identifier to avoid false
819-
// positives like suggesting keyword `for` for `extern crate foo {}`.
818+
// We check the previous token only when the current token is an identifier to avoid
819+
// false positives like suggesting keyword `for` for `extern crate foo {}`.
820820
if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
821821
err.subdiagnostic(misspelled_kw);
822822
// We don't want other suggestions to be added as they are most likely meaningless

compiler/rustc_span/src/symbol.rs

+44-41
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,26 @@ mod tests;
2020

2121
// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
2222
symbols! {
23-
// If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword`
24-
// and `AllKeywords`.
23+
// This list includes things that are definitely keywords (e.g. `if`),
24+
// a few things that are definitely not keywords (e.g. the empty symbol,
25+
// `{{root}}`) and things where there is disagreement between people and/or
26+
// documents (such as the Rust Reference) about whether it is a keyword
27+
// (e.g. `_`).
28+
//
29+
// If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` predicates and
30+
// `used_keywords`.
2531
// But this should rarely be necessary if the keywords are kept in alphabetic order.
2632
Keywords {
2733
// Special reserved identifiers used internally for elided lifetimes,
2834
// unnamed method parameters, crate root module, error recovery etc.
35+
// Matching predicates: `is_any_keyword`, `is_special`/`is_reserved`
2936
Empty: "",
3037
PathRoot: "{{root}}",
3138
DollarCrate: "$crate",
3239
Underscore: "_",
3340

3441
// Keywords that are used in stable Rust.
42+
// Matching predicates: `is_any_keyword`, `is_used_keyword_always`/`is_reserved`
3543
As: "as",
3644
Break: "break",
3745
Const: "const",
@@ -69,6 +77,7 @@ symbols! {
6977
While: "while",
7078

7179
// Keywords that are used in unstable Rust or reserved for future use.
80+
// Matching predicates: `is_any_keyword`, `is_unused_keyword_always`/`is_reserved`
7281
Abstract: "abstract",
7382
Become: "become",
7483
Box: "box",
@@ -83,23 +92,29 @@ symbols! {
8392
Yield: "yield",
8493

8594
// Edition-specific keywords that are used in stable Rust.
95+
// Matching predicates: `is_any_keyword`, `is_used_keyword_conditional`/`is_reserved` (if
96+
// the edition suffices)
8697
Async: "async", // >= 2018 Edition only
8798
Await: "await", // >= 2018 Edition only
8899
Dyn: "dyn", // >= 2018 Edition only
89100

90101
// Edition-specific keywords that are used in unstable Rust or reserved for future use.
102+
// Matching predicates: `is_any_keyword`, `is_unused_keyword_conditional`/`is_reserved` (if
103+
// the edition suffices)
104+
Gen: "gen", // >= 2024 Edition only
91105
Try: "try", // >= 2018 Edition only
92106

93-
// Special lifetime names
107+
// "Lifetime keywords": regular keywords with a leading `'`.
108+
// Matching predicates: `is_any_keyword`
94109
UnderscoreLifetime: "'_",
95110
StaticLifetime: "'static",
96111

97112
// Weak keywords, have special meaning only in specific contexts.
113+
// Matching predicates: `is_any_keyword`
98114
Auto: "auto",
99115
Builtin: "builtin",
100116
Catch: "catch",
101117
Default: "default",
102-
Gen: "gen",
103118
MacroRules: "macro_rules",
104119
Raw: "raw",
105120
Reuse: "reuse",
@@ -2589,6 +2604,11 @@ pub mod sym {
25892604
}
25902605

25912606
impl Symbol {
2607+
/// Don't use this unless you're doing something very loose and heuristic-y.
2608+
pub fn is_any_keyword(self) -> bool {
2609+
self >= kw::As && self <= kw::Yeet
2610+
}
2611+
25922612
fn is_special(self) -> bool {
25932613
self <= kw::Underscore
25942614
}
@@ -2606,8 +2626,8 @@ impl Symbol {
26062626
}
26072627

26082628
fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
2609-
self == kw::Try && edition().at_least_rust_2018()
2610-
|| self == kw::Gen && edition().at_least_rust_2024()
2629+
self == kw::Gen && edition().at_least_rust_2024()
2630+
|| self == kw::Try && edition().at_least_rust_2018()
26112631
}
26122632

26132633
pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
@@ -2645,6 +2665,11 @@ impl Symbol {
26452665
}
26462666

26472667
impl Ident {
2668+
/// Don't use this unless you're doing something very loose and heuristic-y.
2669+
pub fn is_any_keyword(self) -> bool {
2670+
self.name.is_any_keyword()
2671+
}
2672+
26482673
/// Returns `true` for reserved identifiers used internally for elided lifetimes,
26492674
/// unnamed method parameters, crate root module, error recovery etc.
26502675
pub fn is_special(self) -> bool {
@@ -2683,41 +2708,19 @@ impl Ident {
26832708
}
26842709
}
26852710

2686-
/// An iterator over all the keywords in Rust.
2687-
#[derive(Copy, Clone)]
2688-
pub struct AllKeywords {
2689-
curr_idx: u32,
2690-
end_idx: u32,
2691-
}
2692-
2693-
impl AllKeywords {
2694-
/// Initialize a new iterator over all the keywords.
2695-
///
2696-
/// *Note:* Please update this if a new keyword is added beyond the current
2697-
/// range.
2698-
pub fn new() -> Self {
2699-
AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() }
2700-
}
2701-
2702-
/// Collect all the keywords in a given edition into a vector.
2703-
pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
2704-
self.filter(|&keyword| {
2705-
keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition)
2711+
/// Collect all the keywords in a given edition into a vector.
2712+
///
2713+
/// *Note:* Please update this if a new keyword is added beyond the current
2714+
/// range.
2715+
pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
2716+
(kw::Empty.as_u32()..kw::Yeet.as_u32())
2717+
.filter_map(|kw| {
2718+
let kw = Symbol::new(kw);
2719+
if kw.is_used_keyword_always() || kw.is_used_keyword_conditional(edition) {
2720+
Some(kw)
2721+
} else {
2722+
None
2723+
}
27062724
})
27072725
.collect()
2708-
}
2709-
}
2710-
2711-
impl Iterator for AllKeywords {
2712-
type Item = Symbol;
2713-
2714-
fn next(&mut self) -> Option<Self::Item> {
2715-
if self.curr_idx <= self.end_idx {
2716-
let keyword = Symbol::new(self.curr_idx);
2717-
self.curr_idx += 1;
2718-
Some(keyword)
2719-
} else {
2720-
None
2721-
}
2722-
}
27232726
}

src/tools/rustfmt/src/parse/macros/mod.rs

+12-75
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ use rustc_ast::{ast, ptr};
44
use rustc_parse::MACRO_ARGUMENTS;
55
use rustc_parse::parser::{ForceCollect, Parser, Recovery};
66
use rustc_session::parse::ParseSess;
7-
use rustc_span::Symbol;
8-
use rustc_span::symbol::{self, kw};
7+
use rustc_span::symbol;
98

109
use crate::macros::MacroArg;
1110
use crate::rewrite::RewriteContext;
@@ -82,18 +81,18 @@ pub(crate) struct ParsedMacroArgs {
8281
}
8382

8483
fn check_keyword<'a, 'b: 'a>(parser: &'a mut Parser<'b>) -> Option<MacroArg> {
85-
for &keyword in RUST_KW.iter() {
86-
if parser.token.is_keyword(keyword)
87-
&& parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma)
88-
{
89-
parser.bump();
90-
return Some(MacroArg::Keyword(
91-
symbol::Ident::with_dummy_span(keyword),
92-
parser.prev_token.span,
93-
));
94-
}
84+
if parser.token.is_any_keyword()
85+
&& parser.look_ahead(1, |t| *t == TokenKind::Eof || *t == TokenKind::Comma)
86+
{
87+
let keyword = parser.token.ident().unwrap().0.name;
88+
parser.bump();
89+
Some(MacroArg::Keyword(
90+
symbol::Ident::with_dummy_span(keyword),
91+
parser.prev_token.span,
92+
))
93+
} else {
94+
None
9595
}
96-
None
9796
}
9897

9998
pub(crate) fn parse_macro_args(
@@ -169,65 +168,3 @@ pub(crate) fn parse_expr(
169168
let mut parser = build_parser(context, tokens);
170169
parser.parse_expr().ok()
171170
}
172-
173-
const RUST_KW: [Symbol; 59] = [
174-
kw::PathRoot,
175-
kw::DollarCrate,
176-
kw::Underscore,
177-
kw::As,
178-
kw::Box,
179-
kw::Break,
180-
kw::Const,
181-
kw::Continue,
182-
kw::Crate,
183-
kw::Else,
184-
kw::Enum,
185-
kw::Extern,
186-
kw::False,
187-
kw::Fn,
188-
kw::For,
189-
kw::If,
190-
kw::Impl,
191-
kw::In,
192-
kw::Let,
193-
kw::Loop,
194-
kw::Match,
195-
kw::Mod,
196-
kw::Move,
197-
kw::Mut,
198-
kw::Pub,
199-
kw::Ref,
200-
kw::Return,
201-
kw::SelfLower,
202-
kw::SelfUpper,
203-
kw::Static,
204-
kw::Struct,
205-
kw::Super,
206-
kw::Trait,
207-
kw::True,
208-
kw::Type,
209-
kw::Unsafe,
210-
kw::Use,
211-
kw::Where,
212-
kw::While,
213-
kw::Abstract,
214-
kw::Become,
215-
kw::Do,
216-
kw::Final,
217-
kw::Macro,
218-
kw::Override,
219-
kw::Priv,
220-
kw::Typeof,
221-
kw::Unsized,
222-
kw::Virtual,
223-
kw::Yield,
224-
kw::Dyn,
225-
kw::Async,
226-
kw::Try,
227-
kw::UnderscoreLifetime,
228-
kw::StaticLifetime,
229-
kw::Auto,
230-
kw::Catch,
231-
kw::Default,
232-
kw::Union,
233-
];

0 commit comments

Comments
 (0)