@@ -21,6 +21,7 @@ use std::str::Chars;
21
21
22
22
use super :: dialect:: keywords:: { Keyword , ALL_KEYWORDS , ALL_KEYWORDS_INDEX } ;
23
23
use super :: dialect:: Dialect ;
24
+ use super :: dialect:: SnowflakeDialect ;
24
25
#[ cfg( feature = "serde" ) ]
25
26
use serde:: { Deserialize , Serialize } ;
26
27
use std:: fmt;
@@ -209,7 +210,7 @@ pub enum Whitespace {
209
210
Space ,
210
211
Newline ,
211
212
Tab ,
212
- SingleLineComment ( String ) ,
213
+ SingleLineComment { comment : String , prefix : String } ,
213
214
MultiLineComment ( String ) ,
214
215
}
215
216
@@ -219,7 +220,7 @@ impl fmt::Display for Whitespace {
219
220
Whitespace :: Space => f. write_str ( " " ) ,
220
221
Whitespace :: Newline => f. write_str ( "\n " ) ,
221
222
Whitespace :: Tab => f. write_str ( "\t " ) ,
222
- Whitespace :: SingleLineComment ( s ) => write ! ( f, "--{} " , s ) ,
223
+ Whitespace :: SingleLineComment { prefix , comment } => write ! ( f, "{}{} " , prefix , comment ) ,
223
224
Whitespace :: MultiLineComment ( s) => write ! ( f, "/*{}*/" , s) ,
224
225
}
225
226
}
@@ -370,12 +371,11 @@ impl<'a> Tokenizer<'a> {
370
371
match chars. peek ( ) {
371
372
Some ( '-' ) => {
372
373
chars. next ( ) ; // consume the second '-', starting a single-line comment
373
- let mut s = peeking_take_while ( chars, |ch| ch != '\n' ) ;
374
- if let Some ( ch) = chars. next ( ) {
375
- assert_eq ! ( ch, '\n' ) ;
376
- s. push ( ch) ;
377
- }
378
- Ok ( Some ( Token :: Whitespace ( Whitespace :: SingleLineComment ( s) ) ) )
374
+ let comment = self . tokenize_single_line_comment ( chars) ;
375
+ Ok ( Some ( Token :: Whitespace ( Whitespace :: SingleLineComment {
376
+ prefix : "--" . to_owned ( ) ,
377
+ comment,
378
+ } ) ) )
379
379
}
380
380
// a regular '-' operator
381
381
_ => Ok ( Some ( Token :: Minus ) ) ,
@@ -388,6 +388,14 @@ impl<'a> Tokenizer<'a> {
388
388
chars. next ( ) ; // consume the '*', starting a multi-line comment
389
389
self . tokenize_multiline_comment ( chars)
390
390
}
391
+ Some ( '/' ) if dialect_of ! ( self is SnowflakeDialect ) => {
392
+ chars. next ( ) ; // consume the second '/', starting a snowflake single-line comment
393
+ let comment = self . tokenize_single_line_comment ( chars) ;
394
+ Ok ( Some ( Token :: Whitespace ( Whitespace :: SingleLineComment {
395
+ prefix : "//" . to_owned ( ) ,
396
+ comment,
397
+ } ) ) )
398
+ }
391
399
// a regular '/' operator
392
400
_ => Ok ( Some ( Token :: Div ) ) ,
393
401
}
@@ -448,6 +456,14 @@ impl<'a> Tokenizer<'a> {
448
456
'^' => self . consume_and_return ( chars, Token :: Caret ) ,
449
457
'{' => self . consume_and_return ( chars, Token :: LBrace ) ,
450
458
'}' => self . consume_and_return ( chars, Token :: RBrace ) ,
459
+ '#' if dialect_of ! ( self is SnowflakeDialect ) => {
460
+ chars. next ( ) ; // consume the '#', starting a snowflake single-line comment
461
+ let comment = self . tokenize_single_line_comment ( chars) ;
462
+ Ok ( Some ( Token :: Whitespace ( Whitespace :: SingleLineComment {
463
+ prefix : "#" . to_owned ( ) ,
464
+ comment,
465
+ } ) ) )
466
+ }
451
467
other => self . consume_and_return ( chars, Token :: Char ( other) ) ,
452
468
} ,
453
469
None => Ok ( None ) ,
@@ -462,6 +478,16 @@ impl<'a> Tokenizer<'a> {
462
478
} )
463
479
}
464
480
481
+ // Consume characters until newline
482
+ fn tokenize_single_line_comment ( & self , chars : & mut Peekable < Chars < ' _ > > ) -> String {
483
+ let mut comment = peeking_take_while ( chars, |ch| ch != '\n' ) ;
484
+ if let Some ( ch) = chars. next ( ) {
485
+ assert_eq ! ( ch, '\n' ) ;
486
+ comment. push ( ch) ;
487
+ }
488
+ comment
489
+ }
490
+
465
491
/// Tokenize an identifier or keyword, after the first char is already consumed.
466
492
fn tokenize_word ( & self , first_char : char , chars : & mut Peekable < Chars < ' _ > > ) -> String {
467
493
let mut s = first_char. to_string ( ) ;
@@ -819,9 +845,10 @@ mod tests {
819
845
let tokens = tokenizer. tokenize ( ) . unwrap ( ) ;
820
846
let expected = vec ! [
821
847
Token :: Number ( "0" . to_string( ) ) ,
822
- Token :: Whitespace ( Whitespace :: SingleLineComment (
823
- "this is a comment\n " . to_string( ) ,
824
- ) ) ,
848
+ Token :: Whitespace ( Whitespace :: SingleLineComment {
849
+ prefix: "--" . to_string( ) ,
850
+ comment: "this is a comment\n " . to_string( ) ,
851
+ } ) ,
825
852
Token :: Number ( "1" . to_string( ) ) ,
826
853
] ;
827
854
compare ( expected, tokens) ;
@@ -834,9 +861,10 @@ mod tests {
834
861
let dialect = GenericDialect { } ;
835
862
let mut tokenizer = Tokenizer :: new ( & dialect, & sql) ;
836
863
let tokens = tokenizer. tokenize ( ) . unwrap ( ) ;
837
- let expected = vec ! [ Token :: Whitespace ( Whitespace :: SingleLineComment (
838
- "this is a comment" . to_string( ) ,
839
- ) ) ] ;
864
+ let expected = vec ! [ Token :: Whitespace ( Whitespace :: SingleLineComment {
865
+ prefix: "--" . to_string( ) ,
866
+ comment: "this is a comment" . to_string( ) ,
867
+ } ) ] ;
840
868
compare ( expected, tokens) ;
841
869
}
842
870
0 commit comments