From 6e0588ca2a1e2e8eb9de247a3d1098db3f196494 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Thu, 19 Oct 2023 11:35:51 +0200 Subject: [PATCH 1/2] snowflake: Fix handling of @~% in the stage name --- src/dialect/snowflake.rs | 4 +--- src/tokenizer.rs | 25 +++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 30 ++++++++++++++++++++---------- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 33425e846..95e445a48 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -35,7 +35,7 @@ pub struct SnowflakeDialect; impl Dialect for SnowflakeDialect { // see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html fn is_identifier_start(&self, ch: char) -> bool { - ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' || ch == '@' || ch == '%' + ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } fn is_identifier_part(&self, ch: char) -> bool { @@ -44,8 +44,6 @@ impl Dialect for SnowflakeDialect { || ch.is_ascii_digit() || ch == '$' || ch == '_' - || ch == '/' - || ch == '~' } fn supports_within_after_array_aggregation(&self) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 067aa5a84..9ba662eac 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1004,6 +1004,18 @@ impl<'a> Tokenizer<'a> { } } Some(' ') => Ok(Some(Token::AtSign)), + // Snowflake stage identifier, this should be consumed as multiple dot separated word tokens + Some(_) if dialect_of!(self is SnowflakeDialect) => { + let mut s = "@".to_string(); + s.push_str(&peeking_take_while(chars, |ch| { + self.dialect.is_identifier_part(ch) + || ch == '/' + || ch == '~' + || ch == '%' + || ch == '.' + })); + Ok(Some(Token::make_word(&s, None))) + } Some(sch) if self.dialect.is_identifier_start('@') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } @@ -2001,6 +2013,19 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_snowflake_div() { + let sql = r#"field/1000"#; + let dialect = SnowflakeDialect {}; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + let expected = vec![ + Token::make_word(r#"field"#, None), + Token::Div, + Token::Number("1000".to_string(), false), + ]; + compare(expected, tokens); + } + #[test] fn tokenize_quoted_identifier_with_no_escape() { let sql = r#" "a "" b" "a """ "c """"" "#; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index bef96dfc0..c66d25674 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -26,6 +26,9 @@ use test_utils::*; #[macro_use] mod test_utils; +#[cfg(test)] +use pretty_assertions::assert_eq; + #[test] fn test_snowflake_create_table() { let sql = "CREATE TABLE _my_$table (am00unt number)"; @@ -917,7 +920,7 @@ fn test_copy_into_with_transformations() { } => { assert_eq!( from_stage, - ObjectName(vec![Ident::new("@schema"), Ident::new("general_finished")]) + ObjectName(vec![Ident::new("@schema.general_finished")]) ); assert_eq!( from_transformations.as_ref().unwrap()[0], @@ -1024,15 +1027,9 @@ fn test_snowflake_stage_object_names() { ]; let mut allowed_object_names = vec![ ObjectName(vec![Ident::new("my_company"), Ident::new("emp_basic")]), - ObjectName(vec![Ident::new("@namespace"), Ident::new("%table_name")]), - ObjectName(vec![ - Ident::new("@namespace"), - Ident::new("%table_name/path"), - ]), - ObjectName(vec![ - Ident::new("@namespace"), - Ident::new("stage_name/path"), - ]), + ObjectName(vec![Ident::new("@namespace.%table_name")]), + ObjectName(vec![Ident::new("@namespace.%table_name/path")]), + ObjectName(vec![Ident::new("@namespace.stage_name/path")]), ObjectName(vec![Ident::new("@~/path")]), ]; @@ -1118,3 +1115,16 @@ fn parse_subquery_function_argument() { // the function. snowflake().one_statement_parses_to("SELECT func(SELECT 1, 2)", "SELECT func((SELECT 1, 2))"); } + +#[test] +fn parse_division_correctly() { + snowflake_and_generic().one_statement_parses_to( + "SELECT field/1000 FROM tbl1", + "SELECT field / 1000 FROM tbl1", + ); + + snowflake_and_generic().one_statement_parses_to( + "SELECT tbl1.field/tbl2.field FROM tbl1 JOIN tbl2 ON tbl1.id = tbl2.entity_id", + "SELECT tbl1.field / tbl2.field FROM tbl1 JOIN tbl2 ON tbl1.id = tbl2.entity_id", + ); +} From 02fe12619a6e1c61f54a7d9bb3ddc102671f2e2b Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Thu, 26 Oct 2023 11:17:19 +0200 Subject: [PATCH 2/2] Move stage parsing from tokenizer to parser --- src/dialect/snowflake.rs | 44 ++++++++++++++++++++++++++++++++++-- src/tokenizer.rs | 12 ---------- tests/sqlparser_snowflake.rs | 14 ++++++++---- 3 files changed, 52 insertions(+), 18 deletions(-) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 95e445a48..ca318cad4 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -146,8 +146,48 @@ pub fn parse_create_stage( }) } +pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result { + let mut ident = String::new(); + while let Some(next_token) = parser.next_token_no_skip() { + match &next_token.token { + Token::Whitespace(_) => break, + Token::Period => { + parser.prev_token(); + break; + } + Token::AtSign => ident.push('@'), + Token::Tilde => ident.push('~'), + Token::Mod => ident.push('%'), + Token::Div => ident.push('/'), + Token::Word(w) => ident.push_str(&w.value), + _ => return parser.expected("stage name identifier", parser.peek_token()), + } + } + Ok(Ident::new(ident)) +} + +pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result { + match parser.next_token().token { + Token::AtSign => { + parser.prev_token(); + let mut idents = vec![]; + loop { + idents.push(parse_stage_name_identifier(parser)?); + if !parser.consume_token(&Token::Period) { + break; + } + } + Ok(ObjectName(idents)) + } + _ => { + parser.prev_token(); + Ok(parser.parse_object_name()?) + } + } +} + pub fn parse_copy_into(parser: &mut Parser) -> Result { - let into: ObjectName = parser.parse_object_name()?; + let into: ObjectName = parse_snowflake_stage_name(parser)?; let mut files: Vec = vec![]; let mut from_transformations: Option> = None; let from_stage_alias; @@ -163,7 +203,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { from_transformations = parse_select_items_for_data_load(parser)?; parser.expect_keyword(Keyword::FROM)?; - from_stage = parser.parse_object_name()?; + from_stage = parse_snowflake_stage_name(parser)?; stage_params = parse_stage_params(parser)?; // as diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 9ba662eac..16e6bbec0 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1004,18 +1004,6 @@ impl<'a> Tokenizer<'a> { } } Some(' ') => Ok(Some(Token::AtSign)), - // Snowflake stage identifier, this should be consumed as multiple dot separated word tokens - Some(_) if dialect_of!(self is SnowflakeDialect) => { - let mut s = "@".to_string(); - s.push_str(&peeking_take_while(chars, |ch| { - self.dialect.is_identifier_part(ch) - || ch == '/' - || ch == '~' - || ch == '%' - || ch == '.' - })); - Ok(Some(Token::make_word(&s, None))) - } Some(sch) if self.dialect.is_identifier_start('@') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index c66d25674..f0a077973 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -920,7 +920,7 @@ fn test_copy_into_with_transformations() { } => { assert_eq!( from_stage, - ObjectName(vec![Ident::new("@schema.general_finished")]) + ObjectName(vec![Ident::new("@schema"), Ident::new("general_finished")]) ); assert_eq!( from_transformations.as_ref().unwrap()[0], @@ -1027,9 +1027,15 @@ fn test_snowflake_stage_object_names() { ]; let mut allowed_object_names = vec![ ObjectName(vec![Ident::new("my_company"), Ident::new("emp_basic")]), - ObjectName(vec![Ident::new("@namespace.%table_name")]), - ObjectName(vec![Ident::new("@namespace.%table_name/path")]), - ObjectName(vec![Ident::new("@namespace.stage_name/path")]), + ObjectName(vec![Ident::new("@namespace"), Ident::new("%table_name")]), + ObjectName(vec![ + Ident::new("@namespace"), + Ident::new("%table_name/path"), + ]), + ObjectName(vec![ + Ident::new("@namespace"), + Ident::new("stage_name/path"), + ]), ObjectName(vec![Ident::new("@~/path")]), ];