@@ -260,16 +260,15 @@ impl<'a> Tokenizer<'a> {
260
260
//println!("next_token: {:?}", chars.peek());
261
261
match chars. peek ( ) {
262
262
Some ( & ch) => match ch {
263
- ' ' => {
264
- chars. next ( ) ;
265
- Ok ( Some ( Token :: Whitespace ( Whitespace :: Space ) ) )
266
- }
267
- '\t' => {
268
- chars. next ( ) ;
269
- Ok ( Some ( Token :: Whitespace ( Whitespace :: Tab ) ) )
270
- }
271
- '\n' => {
263
+ ' ' => self . consume_and_return ( chars, Token :: Whitespace ( Whitespace :: Space ) ) ,
264
+ '\t' => self . consume_and_return ( chars, Token :: Whitespace ( Whitespace :: Tab ) ) ,
265
+ '\n' => self . consume_and_return ( chars, Token :: Whitespace ( Whitespace :: Newline ) ) ,
266
+ '\r' => {
267
+ // Emit a single Whitespace::Newline token for \r and \r\n
272
268
chars. next ( ) ;
269
+ if let Some ( '\n' ) = chars. peek ( ) {
270
+ chars. next ( ) ;
271
+ }
273
272
Ok ( Some ( Token :: Whitespace ( Whitespace :: Newline ) ) )
274
273
}
275
274
'N' => {
@@ -749,6 +748,26 @@ mod tests {
749
748
compare ( expected, tokens) ;
750
749
}
751
750
751
+ #[ test]
752
+ fn tokenize_newlines ( ) {
753
+ let sql = String :: from ( "line1\n line2\r line3\r \n line4\r " ) ;
754
+
755
+ let dialect = GenericSqlDialect { } ;
756
+ let mut tokenizer = Tokenizer :: new ( & dialect, & sql) ;
757
+ let tokens = tokenizer. tokenize ( ) . unwrap ( ) ;
758
+ let expected = vec ! [
759
+ Token :: make_word( "line1" , None ) ,
760
+ Token :: Whitespace ( Whitespace :: Newline ) ,
761
+ Token :: make_word( "line2" , None ) ,
762
+ Token :: Whitespace ( Whitespace :: Newline ) ,
763
+ Token :: make_word( "line3" , None ) ,
764
+ Token :: Whitespace ( Whitespace :: Newline ) ,
765
+ Token :: make_word( "line4" , None ) ,
766
+ Token :: Whitespace ( Whitespace :: Newline ) ,
767
+ ] ;
768
+ compare ( expected, tokens) ;
769
+ }
770
+
752
771
fn compare ( expected : Vec < Token > , actual : Vec < Token > ) {
753
772
//println!("------------------------------");
754
773
//println!("tokens = {:?}", actual);
0 commit comments