Skip to content

Commit 477de79

Browse files
bors[bot]aochagavia
andcommitted
Merge #222
222: Validate string literals r=aochagavia a=aochagavia Related: #6 (some validators are still missing), fixes #27 Co-authored-by: Adolfo Ochagavía <[email protected]>
2 parents 5a9150d + 3b4c02c commit 477de79

File tree

9 files changed

+621
-274
lines changed

9 files changed

+621
-274
lines changed

crates/ra_syntax/src/ast/generated.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3236,6 +3236,43 @@ impl<'a> AstNode<'a> for Stmt<'a> {
32363236

32373237
impl<'a> Stmt<'a> {}
32383238

3239+
// String
3240+
#[derive(Debug, Clone, Copy,)]
3241+
pub struct StringNode<R: TreeRoot<RaTypes> = OwnedRoot> {
3242+
pub(crate) syntax: SyntaxNode<R>,
3243+
}
3244+
pub type String<'a> = StringNode<RefRoot<'a>>;
3245+
3246+
impl<R1: TreeRoot<RaTypes>, R2: TreeRoot<RaTypes>> PartialEq<StringNode<R1>> for StringNode<R2> {
3247+
fn eq(&self, other: &StringNode<R1>) -> bool { self.syntax == other.syntax }
3248+
}
3249+
impl<R: TreeRoot<RaTypes>> Eq for StringNode<R> {}
3250+
impl<R: TreeRoot<RaTypes>> Hash for StringNode<R> {
3251+
fn hash<H: Hasher>(&self, state: &mut H) { self.syntax.hash(state) }
3252+
}
3253+
3254+
impl<'a> AstNode<'a> for String<'a> {
3255+
fn cast(syntax: SyntaxNodeRef<'a>) -> Option<Self> {
3256+
match syntax.kind() {
3257+
STRING => Some(String { syntax }),
3258+
_ => None,
3259+
}
3260+
}
3261+
fn syntax(self) -> SyntaxNodeRef<'a> { self.syntax }
3262+
}
3263+
3264+
impl<R: TreeRoot<RaTypes>> StringNode<R> {
3265+
pub fn borrowed(&self) -> String {
3266+
StringNode { syntax: self.syntax.borrowed() }
3267+
}
3268+
pub fn owned(&self) -> StringNode {
3269+
StringNode { syntax: self.syntax.owned() }
3270+
}
3271+
}
3272+
3273+
3274+
impl<'a> String<'a> {}
3275+
32393276
// StructDef
32403277
#[derive(Debug, Clone, Copy,)]
32413278
pub struct StructDefNode<R: TreeRoot<RaTypes> = OwnedRoot> {

crates/ra_syntax/src/ast/mod.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
mod generated;
22

33
use std::marker::PhantomData;
4+
use std::string::String as RustString;
45

56
use itertools::Itertools;
67

@@ -76,7 +77,7 @@ pub trait DocCommentsOwner<'a>: AstNode<'a> {
7677

7778
/// Returns the textual content of a doc comment block as a single string.
7879
/// That is, strips leading `///` and joins lines
79-
fn doc_comment_text(self) -> String {
80+
fn doc_comment_text(self) -> RustString {
8081
self.doc_comments()
8182
.map(|comment| {
8283
let prefix = comment.prefix();
@@ -133,6 +134,12 @@ impl<'a> Char<'a> {
133134
}
134135
}
135136

137+
impl<'a> String<'a> {
138+
pub fn text(&self) -> &SmolStr {
139+
&self.syntax().leaf_text().unwrap()
140+
}
141+
}
142+
136143
impl<'a> Comment<'a> {
137144
pub fn text(&self) -> &SmolStr {
138145
self.syntax().leaf_text().unwrap()

crates/ra_syntax/src/grammar.ron

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,7 @@ Grammar(
411411
"PrefixExpr": (),
412412
"RangeExpr": (),
413413
"BinExpr": (),
414+
"String": (),
414415
"Char": (),
415416
"Literal": (),
416417

crates/ra_syntax/src/string_lexing/mod.rs renamed to crates/ra_syntax/src/string_lexing.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,68 @@
11
use self::CharComponentKind::*;
22
use rowan::{TextRange, TextUnit};
33

4+
pub fn parse_string_literal(src: &str) -> StringComponentIterator {
5+
StringComponentIterator {
6+
parser: Parser::new(src),
7+
has_closing_quote: false,
8+
}
9+
}
10+
11+
#[derive(Debug, Eq, PartialEq, Clone)]
12+
pub struct StringComponent {
13+
pub range: TextRange,
14+
pub kind: StringComponentKind,
15+
}
16+
17+
impl StringComponent {
18+
fn new(range: TextRange, kind: StringComponentKind) -> StringComponent {
19+
StringComponent { range, kind }
20+
}
21+
}
22+
23+
#[derive(Debug, Eq, PartialEq, Clone)]
24+
pub enum StringComponentKind {
25+
IgnoreNewline,
26+
Char(CharComponentKind),
27+
}
28+
29+
pub struct StringComponentIterator<'a> {
30+
parser: Parser<'a>,
31+
pub has_closing_quote: bool,
32+
}
33+
34+
impl<'a> Iterator for StringComponentIterator<'a> {
35+
type Item = StringComponent;
36+
fn next(&mut self) -> Option<StringComponent> {
37+
if self.parser.pos == 0 {
38+
assert!(
39+
self.parser.advance() == '"',
40+
"string literal should start with double quotes"
41+
);
42+
}
43+
44+
if let Some(component) = self.parser.parse_string_component() {
45+
return Some(component);
46+
}
47+
48+
// We get here when there are no char components left to parse
49+
if self.parser.peek() == Some('"') {
50+
self.parser.advance();
51+
self.has_closing_quote = true;
52+
}
53+
54+
assert!(
55+
self.parser.peek() == None,
56+
"string literal should leave no unparsed input: src = {}, pos = {}, length = {}",
57+
self.parser.src,
58+
self.parser.pos,
59+
self.parser.src.len()
60+
);
61+
62+
None
63+
}
64+
}
65+
466
pub fn parse_char_literal(src: &str) -> CharComponentIterator {
567
CharComponentIterator {
668
parser: Parser::new(src),
@@ -93,6 +155,12 @@ impl<'a> Parser<'a> {
93155
next
94156
}
95157

158+
pub fn skip_whitespace(&mut self) {
159+
while self.peek().map(|c| c.is_whitespace()) == Some(true) {
160+
self.advance();
161+
}
162+
}
163+
96164
pub fn get_pos(&self) -> TextUnit {
97165
(self.pos as u32).into()
98166
}
@@ -172,6 +240,51 @@ impl<'a> Parser<'a> {
172240
))
173241
}
174242
}
243+
244+
pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> {
245+
// In string literals, when a `\` occurs immediately before the newline, the `\`,
246+
// the newline, and all whitespace at the beginning of the next line are ignored
247+
match self.peek() {
248+
Some('\n') | Some('\r') => {
249+
self.skip_whitespace();
250+
Some(StringComponent::new(
251+
TextRange::from_to(start, self.get_pos()),
252+
StringComponentKind::IgnoreNewline,
253+
))
254+
}
255+
_ => None,
256+
}
257+
}
258+
259+
pub fn parse_string_component(&mut self) -> Option<StringComponent> {
260+
let next = self.peek()?;
261+
262+
// Ignore string close
263+
if next == '"' {
264+
return None;
265+
}
266+
267+
let start = self.get_pos();
268+
self.advance();
269+
270+
if next == '\\' {
271+
// Strings can use `\` to ignore newlines, so we first try to parse one of those
272+
// before falling back to parsing char escapes
273+
self.parse_ignore_newline(start).or_else(|| {
274+
let char_component = self.parse_escape(start);
275+
Some(StringComponent::new(
276+
char_component.range,
277+
StringComponentKind::Char(char_component.kind),
278+
))
279+
})
280+
} else {
281+
let end = self.get_pos();
282+
Some(StringComponent::new(
283+
TextRange::from_to(start, end),
284+
StringComponentKind::Char(CodePoint),
285+
))
286+
}
287+
}
175288
}
176289

177290
#[cfg(test)]

0 commit comments

Comments
 (0)