@@ -278,7 +278,7 @@ impl<'a> Tokenizer<'a> {
278
278
match chars. peek ( ) {
279
279
Some ( '\'' ) => {
280
280
// N'...' - a <national character string literal>
281
- let s = self . tokenize_single_quoted_string ( chars) ;
281
+ let s = self . tokenize_single_quoted_string ( chars) ? ;
282
282
Ok ( Some ( Token :: NationalStringLiteral ( s) ) )
283
283
}
284
284
_ => {
@@ -295,7 +295,7 @@ impl<'a> Tokenizer<'a> {
295
295
match chars. peek ( ) {
296
296
Some ( '\'' ) => {
297
297
// X'...' - a <binary string literal>
298
- let s = self . tokenize_single_quoted_string ( chars) ;
298
+ let s = self . tokenize_single_quoted_string ( chars) ? ;
299
299
Ok ( Some ( Token :: HexStringLiteral ( s) ) )
300
300
}
301
301
_ => {
@@ -313,7 +313,7 @@ impl<'a> Tokenizer<'a> {
313
313
}
314
314
// string
315
315
'\'' => {
316
- let s = self . tokenize_single_quoted_string ( chars) ;
316
+ let s = self . tokenize_single_quoted_string ( chars) ? ;
317
317
Ok ( Some ( Token :: SingleQuotedString ( s) ) )
318
318
}
319
319
// delimited (quoted) identifier
@@ -431,11 +431,10 @@ impl<'a> Tokenizer<'a> {
431
431
}
432
432
433
433
/// Read a single quoted string, starting with the opening quote.
434
- fn tokenize_single_quoted_string ( & self , chars : & mut Peekable < Chars < ' _ > > ) -> String {
435
- //TODO: handle escaped quotes in string
436
- //TODO: handle newlines in string
437
- //TODO: handle EOF before terminating quote
438
- //TODO: handle 'string' <white space> 'string continuation'
434
+ fn tokenize_single_quoted_string (
435
+ & self ,
436
+ chars : & mut Peekable < Chars < ' _ > > ,
437
+ ) -> Result < String , TokenizerError > {
439
438
let mut s = String :: new ( ) ;
440
439
chars. next ( ) ; // consume the opening quote
441
440
while let Some ( & ch) = chars. peek ( ) {
@@ -447,7 +446,7 @@ impl<'a> Tokenizer<'a> {
447
446
s. push ( '\'' ) ;
448
447
chars. next ( ) ;
449
448
} else {
450
- break ;
449
+ return Ok ( s ) ;
451
450
}
452
451
}
453
452
_ => {
@@ -456,7 +455,10 @@ impl<'a> Tokenizer<'a> {
456
455
}
457
456
}
458
457
}
459
- s
458
+ Err ( TokenizerError ( format ! (
459
+ "Unterminated string literal at Line: {}, Col: {}" ,
460
+ self . line, self . col
461
+ ) ) )
460
462
}
461
463
462
464
fn tokenize_multiline_comment (
@@ -640,6 +642,31 @@ mod tests {
640
642
compare ( expected, tokens) ;
641
643
}
642
644
645
+ #[ test]
646
+ fn tokenize_newline_in_string_literal ( ) {
647
+ let sql = String :: from ( "'foo\r \n bar\n baz'" ) ;
648
+
649
+ let dialect = GenericDialect { } ;
650
+ let mut tokenizer = Tokenizer :: new ( & dialect, & sql) ;
651
+ let tokens = tokenizer. tokenize ( ) . unwrap ( ) ;
652
+ let expected = vec ! [ Token :: SingleQuotedString ( "foo\r \n bar\n baz" . to_string( ) ) ] ;
653
+ compare ( expected, tokens) ;
654
+ }
655
+
656
+ #[ test]
657
+ fn tokenize_unterminated_string_literal ( ) {
658
+ let sql = String :: from ( "select 'foo" ) ;
659
+
660
+ let dialect = GenericDialect { } ;
661
+ let mut tokenizer = Tokenizer :: new ( & dialect, & sql) ;
662
+ assert_eq ! (
663
+ tokenizer. tokenize( ) ,
664
+ Err ( TokenizerError (
665
+ "Unterminated string literal at Line: 1, Col: 8" . to_string( )
666
+ ) )
667
+ ) ;
668
+ }
669
+
643
670
#[ test]
644
671
fn tokenize_invalid_string_cols ( ) {
645
672
let sql = String :: from ( "\n \n SELECT * FROM table\t مصطفىh" ) ;
0 commit comments