Skip to content

Commit f9112c6

Browse files
committed
Implement RFC 3503: frontmatters
Supercedes rust-lang#137193
1 parent 65fa0ab commit f9112c6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+787
-19
lines changed

Diff for: compiler/rustc_ast_passes/src/feature_gate.rs

+1
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session, features: &Features) {
514514
gate_all!(contracts_internals, "contract internal machinery is for internal use only");
515515
gate_all!(where_clause_attrs, "attributes in `where` clause are unstable");
516516
gate_all!(super_let, "`super let` is experimental");
517+
gate_all!(frontmatter, "frontmatters are experimental");
517518

518519
if !visitor.features.never_patterns() {
519520
if let Some(spans) = spans.get(&sym::never_patterns) {

Diff for: compiler/rustc_feature/src/unstable.rs

+2
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,8 @@ declare_features! (
502502
(incomplete, fn_delegation, "1.76.0", Some(118212)),
503503
/// Allows impls for the Freeze trait.
504504
(internal, freeze_impls, "1.78.0", Some(121675)),
505+
/// Frontmatter `---` blocks for use by external tools.
506+
(unstable, frontmatter, "CURRENT_RUSTC_VERSION", Some(136889)),
505507
/// Allows defining gen blocks and `gen fn`.
506508
(unstable, gen_blocks, "1.75.0", Some(117078)),
507509
/// Infer generic args for both consts and types.

Diff for: compiler/rustc_lexer/src/cursor.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,19 @@ pub struct Cursor<'a> {
88
len_remaining: usize,
99
/// Iterator over chars. Slightly faster than a &str.
1010
chars: Chars<'a>,
11+
pub(crate) frontmatter_allowed: bool,
1112
#[cfg(debug_assertions)]
1213
prev: char,
1314
}
1415

1516
pub(crate) const EOF_CHAR: char = '\0';
1617

1718
impl<'a> Cursor<'a> {
18-
pub fn new(input: &'a str) -> Cursor<'a> {
19+
pub fn new(input: &'a str, frontmatter_allowed: bool) -> Cursor<'a> {
1920
Cursor {
2021
len_remaining: input.len(),
2122
chars: input.chars(),
23+
frontmatter_allowed,
2224
#[cfg(debug_assertions)]
2325
prev: EOF_CHAR,
2426
}
@@ -95,6 +97,11 @@ impl<'a> Cursor<'a> {
9597
Some(c)
9698
}
9799

100+
/// Moves to a substring by a number of bytes.
101+
pub(crate) fn bump_bytes(&mut self, n: usize) {
102+
self.chars = self.as_str()[n..].chars();
103+
}
104+
98105
/// Eats symbols while predicate returns true or until the end of file is reached.
99106
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
100107
// It was tried making optimized version of this for eg. line comments, but

Diff for: compiler/rustc_lexer/src/lib.rs

+115-6
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,27 @@ impl Token {
5757
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
5858
pub enum TokenKind {
5959
/// A line comment, e.g. `// comment`.
60-
LineComment { doc_style: Option<DocStyle> },
60+
LineComment {
61+
doc_style: Option<DocStyle>,
62+
},
6163

6264
/// A block comment, e.g. `/* block comment */`.
6365
///
6466
/// Block comments can be recursive, so a sequence like `/* /* */`
6567
/// will not be considered terminated and will result in a parsing error.
66-
BlockComment { doc_style: Option<DocStyle>, terminated: bool },
68+
BlockComment {
69+
doc_style: Option<DocStyle>,
70+
terminated: bool,
71+
},
6772

6873
/// Any whitespace character sequence.
6974
Whitespace,
7075

76+
Frontmatter {
77+
has_invalid_preceding_whitespace: bool,
78+
invalid_infostring: bool,
79+
},
80+
7181
/// An identifier or keyword, e.g. `ident` or `continue`.
7282
Ident,
7383

@@ -109,10 +119,15 @@ pub enum TokenKind {
109119
/// this type will need to check for and reject that case.
110120
///
111121
/// See [LiteralKind] for more details.
112-
Literal { kind: LiteralKind, suffix_start: u32 },
122+
Literal {
123+
kind: LiteralKind,
124+
suffix_start: u32,
125+
},
113126

114127
/// A lifetime, e.g. `'a`.
115-
Lifetime { starts_with_number: bool },
128+
Lifetime {
129+
starts_with_number: bool,
130+
},
116131

117132
/// `;`
118133
Semi,
@@ -280,7 +295,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
280295
#[inline]
281296
pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
282297
debug_assert!(!input.is_empty());
283-
let mut cursor = Cursor::new(input);
298+
let mut cursor = Cursor::new(input, false);
284299
// Move past the leading `r` or `br`.
285300
for _ in 0..prefix_len {
286301
cursor.bump().unwrap();
@@ -290,7 +305,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
290305

291306
/// Creates an iterator that produces tokens from the input string.
292307
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
293-
let mut cursor = Cursor::new(input);
308+
let mut cursor = Cursor::new(input, false);
294309
std::iter::from_fn(move || {
295310
let token = cursor.advance_token();
296311
if token.kind != TokenKind::Eof { Some(token) } else { None }
@@ -361,7 +376,30 @@ impl Cursor<'_> {
361376
Some(c) => c,
362377
None => return Token::new(TokenKind::Eof, 0),
363378
};
379+
364380
let token_kind = match first_char {
381+
c if self.frontmatter_allowed && is_whitespace(c) => {
382+
let mut last = first_char;
383+
while is_whitespace(self.first()) {
384+
let Some(c) = self.bump() else {
385+
break;
386+
};
387+
last = c;
388+
}
389+
// invalid frontmatter opening as whitespace preceding it isn't newline.
390+
// combine the whitespace and the frontmatter to a single token as we shall
391+
// error later.
392+
if last != '\n' && self.as_str().starts_with("---") {
393+
self.bump();
394+
self.frontmatter(true)
395+
} else {
396+
Whitespace
397+
}
398+
}
399+
'-' if self.frontmatter_allowed && self.as_str().starts_with("--") => {
400+
// happy path
401+
self.frontmatter(false)
402+
}
365403
// Slash, comment or block comment.
366404
'/' => match self.first() {
367405
'/' => self.line_comment(),
@@ -464,11 +502,82 @@ impl Cursor<'_> {
464502
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
465503
_ => Unknown,
466504
};
505+
if self.frontmatter_allowed {
506+
self.frontmatter_allowed = matches!(token_kind, Whitespace);
507+
}
467508
let res = Token::new(token_kind, self.pos_within_token());
468509
self.reset_pos_within_token();
469510
res
470511
}
471512

513+
/// Given that one `-` was eaten, eat the rest of the frontmatter.
514+
fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind {
515+
debug_assert_eq!('-', self.prev());
516+
517+
let pos = self.pos_within_token();
518+
self.eat_while(|c| c == '-');
519+
520+
// one `-` is eaten by the caller.
521+
let length_opening = self.pos_within_token() - pos + 1;
522+
523+
// must be ensured by the caller
524+
debug_assert!(length_opening >= 3);
525+
526+
self.eat_identifier();
527+
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
528+
let invalid_infostring = self.first() != '\n';
529+
530+
let mut s = self.as_str();
531+
let mut found = false;
532+
while let Some(closing) = s.find(&"-".repeat(length_opening as usize)) {
533+
let preceding_chars_start = s[..closing].rfind("\n").map_or(0, |i| i + 1);
534+
if s[preceding_chars_start..closing].chars().all(is_whitespace) {
535+
// candidate found
536+
self.bump_bytes(closing);
537+
// in case like
538+
// ---cargo
539+
// --- blahblah
540+
// or
541+
// ---cargo
542+
// ----
543+
// combine those stuff into this frontmatter token such that it gets detected later.
544+
self.eat_until(b'\n');
545+
found = true;
546+
break;
547+
} else {
548+
s = &s[closing + length_opening as usize..];
549+
}
550+
}
551+
552+
if !found {
553+
// recovery strategy: a closing statement might have precending whitespace/newline
554+
// but not have enough dashes to properly close. In this case, we eat until there,
555+
// and report a mismatch in the parser.
556+
let mut potential_closing = None;
557+
let mut rest = self.as_str();
558+
while let Some(closing) = rest.find("---") {
559+
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
560+
if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
561+
// candidate found
562+
potential_closing = Some(closing);
563+
break;
564+
} else {
565+
rest = &rest[closing + 3..];
566+
}
567+
}
568+
if let Some(potential_closing) = potential_closing {
569+
// bump to the potential closing, and eat everything on that line.
570+
self.bump_bytes(potential_closing);
571+
self.eat_until(b'\n');
572+
} else {
573+
// eat everything. this will get reported as an unclosed frontmatter.
574+
self.eat_while(|_| true);
575+
}
576+
}
577+
578+
Frontmatter { has_invalid_preceding_whitespace, invalid_infostring }
579+
}
580+
472581
fn line_comment(&mut self) -> TokenKind {
473582
debug_assert!(self.prev() == '/' && self.first() == '/');
474583
self.bump();

Diff for: compiler/rustc_lexer/src/tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use super::*;
44

55
fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) {
66
let s = &format!("r{}", s);
7-
let mut cursor = Cursor::new(s);
7+
let mut cursor = Cursor::new(s, false);
88
cursor.bump();
99
let res = cursor.raw_double_quoted_string(0);
1010
assert_eq!(res, expected);

Diff for: compiler/rustc_parse/messages.ftl

+13
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,19 @@ parse_forgot_paren = perhaps you forgot parentheses?
297297
parse_found_expr_would_be_stmt = expected expression, found `{$token}`
298298
.label = expected expression
299299
300+
parse_frontmatter_extra_characters_after_close = extra characters after frontmatter close are not allowed
301+
parse_frontmatter_invalid_close_preceding_whitespace = invalid preceding whitespace for frontmatter close
302+
.note = frontmatter close should not be preceded by whitespace
303+
parse_frontmatter_invalid_infostring = invalid infostring for frontmatter
304+
.note = frontmatter infostrings must be a single identifier immediately following the opening
305+
parse_frontmatter_invalid_opening_preceding_whitespace = invalid preceding whitespace for frontmatter opening
306+
.note = frontmatter opening should not be preceded by whitespace
307+
parse_frontmatter_length_mismatch = frontmatter close does not match the opening
308+
.label_opening = the opening here has {$len_opening} dashes...
309+
.label_close = ...while the close has {$len_close} dashes
310+
parse_frontmatter_unclosed = unclosed frontmatter
311+
.note = frontmatter opening here was not closed
312+
300313
parse_function_body_equals_expr = function body cannot be `= expression;`
301314
.suggestion = surround the expression with `{"{"}` and `{"}"}` instead of `=` and `;`
302315

Diff for: compiler/rustc_parse/src/errors.rs

+55
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,61 @@ pub(crate) struct FoundExprWouldBeStmt {
730730
pub suggestion: ExprParenthesesNeeded,
731731
}
732732

733+
#[derive(Diagnostic)]
734+
#[diag(parse_frontmatter_extra_characters_after_close)]
735+
pub(crate) struct FrontmatterExtraCharactersAfterClose {
736+
#[primary_span]
737+
pub span: Span,
738+
}
739+
740+
#[derive(Diagnostic)]
741+
#[diag(parse_frontmatter_invalid_infostring)]
742+
#[note]
743+
pub(crate) struct FrontmatterInvalidInfostring {
744+
#[primary_span]
745+
pub span: Span,
746+
}
747+
748+
#[derive(Diagnostic)]
749+
#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)]
750+
pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace {
751+
#[primary_span]
752+
pub span: Span,
753+
#[note]
754+
pub note_span: Span,
755+
}
756+
757+
#[derive(Diagnostic)]
758+
#[diag(parse_frontmatter_unclosed)]
759+
pub(crate) struct FrontmatterUnclosed {
760+
#[primary_span]
761+
pub span: Span,
762+
#[note]
763+
pub note_span: Span,
764+
}
765+
766+
#[derive(Diagnostic)]
767+
#[diag(parse_frontmatter_invalid_close_preceding_whitespace)]
768+
pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace {
769+
#[primary_span]
770+
pub span: Span,
771+
#[note]
772+
pub note_span: Span,
773+
}
774+
775+
#[derive(Diagnostic)]
776+
#[diag(parse_frontmatter_length_mismatch)]
777+
pub(crate) struct FrontmatterLengthMismatch {
778+
#[primary_span]
779+
pub span: Span,
780+
#[label(parse_label_opening)]
781+
pub opening: Span,
782+
#[label(parse_label_close)]
783+
pub close: Span,
784+
pub len_opening: usize,
785+
pub len_close: usize,
786+
}
787+
733788
#[derive(Diagnostic)]
734789
#[diag(parse_leading_plus_not_supported)]
735790
pub(crate) struct LeadingPlusNotSupported {

0 commit comments

Comments
 (0)