Skip to content

Commit f614481

Browse files
authored
Merge pull request apache#165 from nickolay/pr/unterminated-string-literal
Report an error on unterminated string literals (and more)
2 parents 5ad578e + 7d60bfd commit f614481

File tree

5 files changed

+61
-15
lines changed

5 files changed

+61
-15
lines changed

CHANGELOG.md

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
Given that the parser produces a typed AST, any changes to the AST will technically be breaking and thus will result in a `0.(N+1)` version. We document changes that break via addition as "Added".
77

88
## [Unreleased]
9-
Nothing here yet! Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes.
9+
Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented changes.
1010

11+
### Changed
12+
- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit!
13+
- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem!
14+
15+
### Added
16+
- Support MSSQL `TOP (<N>) [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo!
17+
- Support MySQL `LIMIT row_count OFFSET offset` (not followed by `ROW` or `ROWS`) and remember which variant was parsed (#158) - thanks @mjibson!
18+
- Support PostgreSQL `CREATE TABLE IF NOT EXISTS table_name` (#163) - thanks @alex-dukhno!
19+
20+
### Fixed
21+
- Report an error for unterminated string literals (#165)
1122

12-
## [0.5.0] - 2019-10-10
23+
## [0.5.0] - 2019-10-10
1324

1425
### Changed
1526
- Replace the `Value::Long(u64)` and `Value::Double(f64)` variants with `Value::Number(String)` to avoid losing precision when parsing decimal literals (#130) - thanks @benesch!

examples/cli.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212

1313
#![warn(clippy::all)]
1414

15-
use simple_logger;
16-
1715
///! A small command-line app to run the parser.
1816
/// Run with `cargo run --example cli`
1917
use std::fs;

src/parser.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,6 +1324,16 @@ impl Parser {
13241324
}
13251325
// MSSQL supports single-quoted strings as aliases for columns
13261326
// We accept them as table aliases too, although MSSQL does not.
1327+
//
1328+
// Note, that this conflicts with an obscure rule from the SQL
1329+
// standard, which we don't implement:
1330+
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
1331+
// "[Obscure Rule] SQL allows you to break a long <character
1332+
// string literal> up into two or more smaller <character string
1333+
// literal>s, split by a <separator> that includes a newline
1334+
// character. When it sees such a <literal>, your DBMS will
1335+
// ignore the <separator> and treat the multiple strings as
1336+
// a single <literal>."
13271337
Some(Token::SingleQuotedString(ref s)) => Ok(Some(Ident::with_quote('\'', s.clone()))),
13281338
not_an_ident => {
13291339
if after_as {

src/tokenizer.rs

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ impl<'a> Tokenizer<'a> {
278278
match chars.peek() {
279279
Some('\'') => {
280280
// N'...' - a <national character string literal>
281-
let s = self.tokenize_single_quoted_string(chars);
281+
let s = self.tokenize_single_quoted_string(chars)?;
282282
Ok(Some(Token::NationalStringLiteral(s)))
283283
}
284284
_ => {
@@ -295,7 +295,7 @@ impl<'a> Tokenizer<'a> {
295295
match chars.peek() {
296296
Some('\'') => {
297297
// X'...' - a <binary string literal>
298-
let s = self.tokenize_single_quoted_string(chars);
298+
let s = self.tokenize_single_quoted_string(chars)?;
299299
Ok(Some(Token::HexStringLiteral(s)))
300300
}
301301
_ => {
@@ -313,7 +313,7 @@ impl<'a> Tokenizer<'a> {
313313
}
314314
// string
315315
'\'' => {
316-
let s = self.tokenize_single_quoted_string(chars);
316+
let s = self.tokenize_single_quoted_string(chars)?;
317317
Ok(Some(Token::SingleQuotedString(s)))
318318
}
319319
// delimited (quoted) identifier
@@ -431,11 +431,10 @@ impl<'a> Tokenizer<'a> {
431431
}
432432

433433
/// Read a single quoted string, starting with the opening quote.
434-
fn tokenize_single_quoted_string(&self, chars: &mut Peekable<Chars<'_>>) -> String {
435-
//TODO: handle escaped quotes in string
436-
//TODO: handle newlines in string
437-
//TODO: handle EOF before terminating quote
438-
//TODO: handle 'string' <white space> 'string continuation'
434+
fn tokenize_single_quoted_string(
435+
&self,
436+
chars: &mut Peekable<Chars<'_>>,
437+
) -> Result<String, TokenizerError> {
439438
let mut s = String::new();
440439
chars.next(); // consume the opening quote
441440
while let Some(&ch) = chars.peek() {
@@ -447,7 +446,7 @@ impl<'a> Tokenizer<'a> {
447446
s.push('\'');
448447
chars.next();
449448
} else {
450-
break;
449+
return Ok(s);
451450
}
452451
}
453452
_ => {
@@ -456,7 +455,10 @@ impl<'a> Tokenizer<'a> {
456455
}
457456
}
458457
}
459-
s
458+
Err(TokenizerError(format!(
459+
"Unterminated string literal at Line: {}, Col: {}",
460+
self.line, self.col
461+
)))
460462
}
461463

462464
fn tokenize_multiline_comment(
@@ -640,6 +642,31 @@ mod tests {
640642
compare(expected, tokens);
641643
}
642644

645+
#[test]
646+
fn tokenize_newline_in_string_literal() {
647+
let sql = String::from("'foo\r\nbar\nbaz'");
648+
649+
let dialect = GenericDialect {};
650+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
651+
let tokens = tokenizer.tokenize().unwrap();
652+
let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())];
653+
compare(expected, tokens);
654+
}
655+
656+
#[test]
657+
fn tokenize_unterminated_string_literal() {
658+
let sql = String::from("select 'foo");
659+
660+
let dialect = GenericDialect {};
661+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
662+
assert_eq!(
663+
tokenizer.tokenize(),
664+
Err(TokenizerError(
665+
"Unterminated string literal at Line: 1, Col: 8".to_string()
666+
))
667+
);
668+
}
669+
643670
#[test]
644671
fn tokenize_invalid_string_cols() {
645672
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");

tests/sqlparser_common.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1031,7 +1031,7 @@ fn parse_create_external_table() {
10311031
name VARCHAR(100) NOT NULL,\
10321032
lat DOUBLE NULL,\
10331033
lng DOUBLE)\
1034-
STORED AS TEXTFILE LOCATION '/tmp/example.csv";
1034+
STORED AS TEXTFILE LOCATION '/tmp/example.csv'";
10351035
let ast = one_statement_parses_to(
10361036
sql,
10371037
"CREATE EXTERNAL TABLE uk_cities (\

0 commit comments

Comments
 (0)