Skip to content

Commit 327e6cd

Browse files
committed
Report an error for unterminated string literals
...updated the TODOs regarding single-quoted literals parsing while at it.
1 parent 40853fe commit 327e6cd

File tree

3 files changed

+48
-11
lines changed

3 files changed

+48
-11
lines changed

src/parser.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,6 +1324,16 @@ impl Parser {
13241324
}
13251325
// MSSQL supports single-quoted strings as aliases for columns
13261326
// We accept them as table aliases too, although MSSQL does not.
1327+
//
1328+
// Note, that this conflicts with an obscure rule from the SQL
1329+
// standard, which we don't implement:
1330+
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
1331+
// "[Obscure Rule] SQL allows you to break a long <character
1332+
// string literal> up into two or more smaller <character string
1333+
// literal>s, split by a <separator> that includes a newline
1334+
// character. When it sees such a <literal>, your DBMS will
1335+
// ignore the <separator> and treat the multiple strings as
1336+
// a single <literal>."
13271337
Some(Token::SingleQuotedString(ref s)) => Ok(Some(Ident::with_quote('\'', s.clone()))),
13281338
not_an_ident => {
13291339
if after_as {

src/tokenizer.rs

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ impl<'a> Tokenizer<'a> {
278278
match chars.peek() {
279279
Some('\'') => {
280280
// N'...' - a <national character string literal>
281-
let s = self.tokenize_single_quoted_string(chars);
281+
let s = self.tokenize_single_quoted_string(chars)?;
282282
Ok(Some(Token::NationalStringLiteral(s)))
283283
}
284284
_ => {
@@ -295,7 +295,7 @@ impl<'a> Tokenizer<'a> {
295295
match chars.peek() {
296296
Some('\'') => {
297297
// X'...' - a <binary string literal>
298-
let s = self.tokenize_single_quoted_string(chars);
298+
let s = self.tokenize_single_quoted_string(chars)?;
299299
Ok(Some(Token::HexStringLiteral(s)))
300300
}
301301
_ => {
@@ -313,7 +313,7 @@ impl<'a> Tokenizer<'a> {
313313
}
314314
// string
315315
'\'' => {
316-
let s = self.tokenize_single_quoted_string(chars);
316+
let s = self.tokenize_single_quoted_string(chars)?;
317317
Ok(Some(Token::SingleQuotedString(s)))
318318
}
319319
// delimited (quoted) identifier
@@ -431,11 +431,10 @@ impl<'a> Tokenizer<'a> {
431431
}
432432

433433
/// Read a single quoted string, starting with the opening quote.
434-
fn tokenize_single_quoted_string(&self, chars: &mut Peekable<Chars<'_>>) -> String {
435-
//TODO: handle escaped quotes in string
436-
//TODO: handle newlines in string
437-
//TODO: handle EOF before terminating quote
438-
//TODO: handle 'string' <white space> 'string continuation'
434+
fn tokenize_single_quoted_string(
435+
&self,
436+
chars: &mut Peekable<Chars<'_>>,
437+
) -> Result<String, TokenizerError> {
439438
let mut s = String::new();
440439
chars.next(); // consume the opening quote
441440
while let Some(&ch) = chars.peek() {
@@ -447,7 +446,7 @@ impl<'a> Tokenizer<'a> {
447446
s.push('\'');
448447
chars.next();
449448
} else {
450-
break;
449+
return Ok(s);
451450
}
452451
}
453452
_ => {
@@ -456,7 +455,10 @@ impl<'a> Tokenizer<'a> {
456455
}
457456
}
458457
}
459-
s
458+
Err(TokenizerError(format!(
459+
"Unterminated string literal at Line: {}, Col: {}",
460+
self.line, self.col
461+
)))
460462
}
461463

462464
fn tokenize_multiline_comment(
@@ -640,6 +642,31 @@ mod tests {
640642
compare(expected, tokens);
641643
}
642644

645+
#[test]
646+
fn tokenize_newline_in_string_literal() {
647+
let sql = String::from("'foo\r\nbar\nbaz'");
648+
649+
let dialect = GenericDialect {};
650+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
651+
let tokens = tokenizer.tokenize().unwrap();
652+
let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
653+
compare(expected, tokens);
654+
}
655+
656+
#[test]
657+
fn tokenize_unterminated_string_literal() {
658+
let sql = String::from("select 'foo");
659+
660+
let dialect = GenericDialect {};
661+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
662+
assert_eq!(
663+
tokenizer.tokenize(),
664+
Err(TokenizerError(
665+
"Unterminated string literal at Line: 1, Col: 8".to_string()
666+
))
667+
);
668+
}
669+
643670
#[test]
644671
fn tokenize_invalid_string_cols() {
645672
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");

tests/sqlparser_common.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1031,7 +1031,7 @@ fn parse_create_external_table() {
10311031
name VARCHAR(100) NOT NULL,\
10321032
lat DOUBLE NULL,\
10331033
lng DOUBLE)\
1034-
STORED AS TEXTFILE LOCATION '/tmp/example.csv";
1034+
STORED AS TEXTFILE LOCATION '/tmp/example.csv'";
10351035
let ast = one_statement_parses_to(
10361036
sql,
10371037
"CREATE EXTERNAL TABLE uk_cities (\

0 commit comments

Comments
 (0)