Skip to content

Commit 1c6077c

Browse files
[snowflake] Support single line comments starting with '#' or '//' (apache#264)
Co-authored-by: Eyal Leshem <[email protected]>
1 parent e9aa87f commit 1c6077c

File tree

2 files changed

+81
-14
lines changed

2 files changed

+81
-14
lines changed

src/tokenizer.rs

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::str::Chars;
2121

2222
use super::dialect::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
2323
use super::dialect::Dialect;
24+
use super::dialect::SnowflakeDialect;
2425
#[cfg(feature = "serde")]
2526
use serde::{Deserialize, Serialize};
2627
use std::fmt;
@@ -209,7 +210,7 @@ pub enum Whitespace {
209210
Space,
210211
Newline,
211212
Tab,
212-
SingleLineComment(String),
213+
SingleLineComment { comment: String, prefix: String },
213214
MultiLineComment(String),
214215
}
215216

@@ -219,7 +220,7 @@ impl fmt::Display for Whitespace {
219220
Whitespace::Space => f.write_str(" "),
220221
Whitespace::Newline => f.write_str("\n"),
221222
Whitespace::Tab => f.write_str("\t"),
222-
Whitespace::SingleLineComment(s) => write!(f, "--{}", s),
223+
Whitespace::SingleLineComment { prefix, comment } => write!(f, "{}{}", prefix, comment),
223224
Whitespace::MultiLineComment(s) => write!(f, "/*{}*/", s),
224225
}
225226
}
@@ -370,12 +371,11 @@ impl<'a> Tokenizer<'a> {
370371
match chars.peek() {
371372
Some('-') => {
372373
chars.next(); // consume the second '-', starting a single-line comment
373-
let mut s = peeking_take_while(chars, |ch| ch != '\n');
374-
if let Some(ch) = chars.next() {
375-
assert_eq!(ch, '\n');
376-
s.push(ch);
377-
}
378-
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment(s))))
374+
let comment = self.tokenize_single_line_comment(chars);
375+
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
376+
prefix: "--".to_owned(),
377+
comment,
378+
})))
379379
}
380380
// a regular '-' operator
381381
_ => Ok(Some(Token::Minus)),
@@ -388,6 +388,14 @@ impl<'a> Tokenizer<'a> {
388388
chars.next(); // consume the '*', starting a multi-line comment
389389
self.tokenize_multiline_comment(chars)
390390
}
391+
Some('/') if dialect_of!(self is SnowflakeDialect) => {
392+
chars.next(); // consume the second '/', starting a snowflake single-line comment
393+
let comment = self.tokenize_single_line_comment(chars);
394+
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
395+
prefix: "//".to_owned(),
396+
comment,
397+
})))
398+
}
391399
// a regular '/' operator
392400
_ => Ok(Some(Token::Div)),
393401
}
@@ -448,6 +456,14 @@ impl<'a> Tokenizer<'a> {
448456
'^' => self.consume_and_return(chars, Token::Caret),
449457
'{' => self.consume_and_return(chars, Token::LBrace),
450458
'}' => self.consume_and_return(chars, Token::RBrace),
459+
'#' if dialect_of!(self is SnowflakeDialect) => {
460+
chars.next(); // consume the '#', starting a snowflake single-line comment
461+
let comment = self.tokenize_single_line_comment(chars);
462+
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
463+
prefix: "#".to_owned(),
464+
comment,
465+
})))
466+
}
451467
other => self.consume_and_return(chars, Token::Char(other)),
452468
},
453469
None => Ok(None),
@@ -462,6 +478,16 @@ impl<'a> Tokenizer<'a> {
462478
})
463479
}
464480

481+
// Consume characters until newline
482+
fn tokenize_single_line_comment(&self, chars: &mut Peekable<Chars<'_>>) -> String {
483+
let mut comment = peeking_take_while(chars, |ch| ch != '\n');
484+
if let Some(ch) = chars.next() {
485+
assert_eq!(ch, '\n');
486+
comment.push(ch);
487+
}
488+
comment
489+
}
490+
465491
/// Tokenize an identifier or keyword, after the first char is already consumed.
466492
fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String {
467493
let mut s = first_char.to_string();
@@ -819,9 +845,10 @@ mod tests {
819845
let tokens = tokenizer.tokenize().unwrap();
820846
let expected = vec![
821847
Token::Number("0".to_string()),
822-
Token::Whitespace(Whitespace::SingleLineComment(
823-
"this is a comment\n".to_string(),
824-
)),
848+
Token::Whitespace(Whitespace::SingleLineComment {
849+
prefix: "--".to_string(),
850+
comment: "this is a comment\n".to_string(),
851+
}),
825852
Token::Number("1".to_string()),
826853
];
827854
compare(expected, tokens);
@@ -834,9 +861,10 @@ mod tests {
834861
let dialect = GenericDialect {};
835862
let mut tokenizer = Tokenizer::new(&dialect, &sql);
836863
let tokens = tokenizer.tokenize().unwrap();
837-
let expected = vec![Token::Whitespace(Whitespace::SingleLineComment(
838-
"this is a comment".to_string(),
839-
))];
864+
let expected = vec![Token::Whitespace(Whitespace::SingleLineComment {
865+
prefix: "--".to_string(),
866+
comment: "this is a comment".to_string(),
867+
})];
840868
compare(expected, tokens);
841869
}
842870

tests/sqlparser_snowflake.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
use sqlparser::ast::*;
1313
use sqlparser::dialect::{GenericDialect, SnowflakeDialect};
1414
use sqlparser::test_utils::*;
15+
use sqlparser::tokenizer::*;
1516

1617
#[test]
1718
fn test_snowflake_create_table() {
@@ -24,6 +25,44 @@ fn test_snowflake_create_table() {
2425
}
2526
}
2627

28+
#[test]
29+
fn test_snowflake_single_line_tokenize() {
30+
let sql = "CREATE TABLE# this is a comment \ntable_1";
31+
let dialect = SnowflakeDialect {};
32+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
33+
let tokens = tokenizer.tokenize().unwrap();
34+
35+
let expected = vec![
36+
Token::make_keyword("CREATE"),
37+
Token::Whitespace(Whitespace::Space),
38+
Token::make_keyword("TABLE"),
39+
Token::Whitespace(Whitespace::SingleLineComment {
40+
prefix: "#".to_string(),
41+
comment: " this is a comment \n".to_string(),
42+
}),
43+
Token::make_word("table_1", None),
44+
];
45+
46+
assert_eq!(expected, tokens);
47+
48+
let sql = "CREATE TABLE// this is a comment \ntable_1";
49+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
50+
let tokens = tokenizer.tokenize().unwrap();
51+
52+
let expected = vec![
53+
Token::make_keyword("CREATE"),
54+
Token::Whitespace(Whitespace::Space),
55+
Token::make_keyword("TABLE"),
56+
Token::Whitespace(Whitespace::SingleLineComment {
57+
prefix: "//".to_string(),
58+
comment: " this is a comment \n".to_string(),
59+
}),
60+
Token::make_word("table_1", None),
61+
];
62+
63+
assert_eq!(expected, tokens);
64+
}
65+
2766
fn snowflake_and_generic() -> TestedDialects {
2867
TestedDialects {
2968
dialects: vec![Box::new(SnowflakeDialect {}), Box::new(GenericDialect {})],

0 commit comments

Comments
 (0)