Skip to content

Commit 23405bb

Browse files
committed
Auto merge of #113476 - fee1-dead-contrib:c-str-lit, r=petrochenkov
Reimplement C-str literals This reverts #113334, cc `@fmease.` While converting lexer tokens to ast Tokens in `rustc_parse`, we check the edition of the span of the token. If the edition < 2021, we split the token into two, one being the identifier and other being the str literal.
2 parents ff8fe76 + a0376e9 commit 23405bb

File tree

12 files changed

+85
-97
lines changed

12 files changed

+85
-97
lines changed

compiler/rustc_lexer/src/cursor.rs

+4
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ impl<'a> Cursor<'a> {
2424
}
2525
}
2626

27+
pub fn as_str(&self) -> &'a str {
28+
self.chars.as_str()
29+
}
30+
2731
/// Returns the last eaten symbol (or `'\0'` in release builds).
2832
/// (For debug assertions only.)
2933
pub(crate) fn prev(&self) -> char {

compiler/rustc_lexer/src/lib.rs

+7
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,13 @@ impl Cursor<'_> {
367367
Some(|terminated| Byte { terminated }),
368368
),
369369

370+
// c-string literal, raw c-string literal or identifier.
371+
'c' => self.c_or_byte_string(
372+
|terminated| CStr { terminated },
373+
|n_hashes| RawCStr { n_hashes },
374+
None,
375+
),
376+
370377
// Identifier (this should be checked after other variant that can
371378
// start as identifier).
372379
c if is_id_start(c) => self.ident_or_unknown_prefix(),

compiler/rustc_parse/src/lexer/mod.rs

+32-9
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream;
99
use rustc_ast::util::unicode::contains_text_flow_control_chars;
1010
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
1111
use rustc_lexer::unescape::{self, EscapeError, Mode};
12-
use rustc_lexer::Cursor;
1312
use rustc_lexer::{Base, DocStyle, RawStrError};
13+
use rustc_lexer::{Cursor, LiteralKind};
1414
use rustc_session::lint::builtin::{
1515
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
1616
};
@@ -118,6 +118,7 @@ impl<'a> StringReader<'a> {
118118
let mut swallow_next_invalid = 0;
119119
// Skip trivial (whitespace & comments) tokens
120120
loop {
121+
let str_before = self.cursor.as_str();
121122
let token = self.cursor.advance_token();
122123
let start = self.pos;
123124
self.pos = self.pos + BytePos(token.len);
@@ -165,10 +166,7 @@ impl<'a> StringReader<'a> {
165166
continue;
166167
}
167168
rustc_lexer::TokenKind::Ident => {
168-
let sym = nfc_normalize(self.str_from(start));
169-
let span = self.mk_sp(start, self.pos);
170-
self.sess.symbol_gallery.insert(sym, span);
171-
token::Ident(sym, false)
169+
self.ident(start)
172170
}
173171
rustc_lexer::TokenKind::RawIdent => {
174172
let sym = nfc_normalize(self.str_from(start + BytePos(2)));
@@ -182,10 +180,7 @@ impl<'a> StringReader<'a> {
182180
}
183181
rustc_lexer::TokenKind::UnknownPrefix => {
184182
self.report_unknown_prefix(start);
185-
let sym = nfc_normalize(self.str_from(start));
186-
let span = self.mk_sp(start, self.pos);
187-
self.sess.symbol_gallery.insert(sym, span);
188-
token::Ident(sym, false)
183+
self.ident(start)
189184
}
190185
rustc_lexer::TokenKind::InvalidIdent
191186
// Do not recover an identifier with emoji if the codepoint is a confusable
@@ -203,6 +198,27 @@ impl<'a> StringReader<'a> {
203198
.push(span);
204199
token::Ident(sym, false)
205200
}
201+
// split up (raw) c string literals to an ident and a string literal when edition < 2021.
202+
rustc_lexer::TokenKind::Literal {
203+
kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
204+
suffix_start: _,
205+
} if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
206+
let prefix_len = match kind {
207+
LiteralKind::CStr { .. } => 1,
208+
LiteralKind::RawCStr { .. } => 2,
209+
_ => unreachable!(),
210+
};
211+
212+
// reset the state so that only the prefix ("c" or "cr")
213+
// was consumed.
214+
let lit_start = start + BytePos(prefix_len);
215+
self.pos = lit_start;
216+
self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
217+
218+
self.report_unknown_prefix(start);
219+
let prefix_span = self.mk_sp(start, lit_start);
220+
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
221+
}
206222
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
207223
let suffix_start = start + BytePos(suffix_start);
208224
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
@@ -317,6 +333,13 @@ impl<'a> StringReader<'a> {
317333
}
318334
}
319335

336+
fn ident(&self, start: BytePos) -> TokenKind {
337+
let sym = nfc_normalize(self.str_from(start));
338+
let span = self.mk_sp(start, self.pos);
339+
self.sess.symbol_gallery.insert(sym, span);
340+
token::Ident(sym, false)
341+
}
342+
320343
fn struct_fatal_span_char(
321344
&self,
322345
from_pos: BytePos,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// force-host
2+
// edition: 2018
3+
// no-prefer-dynamic
4+
#![crate_type = "proc-macro"]
5+
6+
extern crate proc_macro;
7+
8+
use proc_macro::TokenStream;
9+
use std::str::FromStr;
10+
11+
#[proc_macro]
12+
pub fn number_of_tokens(_: TokenStream) -> TokenStream {
13+
TokenStream::from_str("c\"\"").unwrap().into_iter().count().to_string().parse().unwrap()
14+
}

tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// FIXME(c_str_literals): This should be `run-pass`
2-
// known-bug: #113333
1+
// run-pass
32
// edition: 2021
43

54
#![feature(c_str_literals)]

tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr

-25
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// even if this crate is edition 2021, proc macros compiled using older
2+
// editions should still be able to observe the pre-2021 token behavior
3+
//
4+
// adapted from tests/ui/rust-2021/reserved-prefixes-via-macro.rs
5+
6+
// edition: 2021
7+
// check-pass
8+
9+
// aux-build: count.rs
10+
extern crate count;
11+
12+
const _: () = {
13+
assert!(count::number_of_tokens!() == 2);
14+
};
15+
16+
fn main() {}
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,21 @@
1-
error: prefix `c` is unknown
1+
error[E0658]: `c".."` literals are experimental
22
--> $DIR/gate.rs:10:5
33
|
44
LL | c"foo";
5-
| ^ unknown prefix
5+
| ^^^^^^
66
|
7-
= note: prefixed identifiers and literals are reserved since Rust 2021
8-
help: consider inserting whitespace here
9-
|
10-
LL | c "foo";
11-
| +
7+
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
8+
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable
129

13-
error: prefix `c` is unknown
10+
error[E0658]: `c".."` literals are experimental
1411
--> $DIR/gate.rs:13:8
1512
|
1613
LL | m!(c"test");
17-
| ^ unknown prefix
18-
|
19-
= note: prefixed identifiers and literals are reserved since Rust 2021
20-
help: consider inserting whitespace here
14+
| ^^^^^^^
2115
|
22-
LL | m!(c "test");
23-
| +
24-
25-
error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `"foo"`
26-
--> $DIR/gate.rs:10:6
27-
|
28-
LL | c"foo";
29-
| ^^^^^ expected one of 8 possible tokens
16+
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
17+
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable
3018

31-
error: aborting due to 3 previous errors
19+
error: aborting due to 2 previous errors
3220

21+
For more information about this error, try `rustc --explain E0658`.
-137 Bytes
Binary file not shown.
Binary file not shown.

tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// FIXME(c_str_literals): This should be `run-pass`
2-
// known-bug: #113333
1+
// run-pass
32
// edition: 2021
43

54
#![feature(c_str_literals)]

tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr

-38
This file was deleted.

0 commit comments

Comments
 (0)