From 23ecc0694d1f2a6867712012a6424eec5e164af6 Mon Sep 17 00:00:00 2001 From: bar sela Date: Mon, 6 Jan 2025 14:10:35 +0200 Subject: [PATCH 1/7] add support for trailing commas in from clause --- src/dialect/mod.rs | 10 +++++++ src/dialect/snowflake.rs | 4 +++ src/keywords.rs | 7 +++++ src/parser/mod.rs | 52 ++++++++++++++++++++++++++++-------- tests/sqlparser_snowflake.rs | 31 +++++++++++++++++++++ 5 files changed, 93 insertions(+), 11 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1343efca6..b395003aa 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -399,6 +399,11 @@ pub trait Dialect: Debug + Any { self.supports_trailing_commas() } + /// Does the dialect support trailing commas in the FROM clause list? + fn supports_from_trailing_commas(&self) -> bool { + false + } + /// Returns true if the dialect supports double dot notation for object names /// /// Example @@ -758,6 +763,11 @@ pub trait Dialect: Debug + Any { keywords::RESERVED_FOR_IDENTIFIER.contains(&kw) } + // Returns list of keyword allowed after from + fn get_reserved_keyword_after_from(&self) -> &[Keyword] { + keywords::ALLOWED_KEYWORD_AFTER_FROM + } + /// Returns true if this dialect supports the `TABLESAMPLE` option /// before the table alias option. For example: /// diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 249241d73..3f4e0c419 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -54,6 +54,10 @@ impl Dialect for SnowflakeDialect { true } + fn supports_from_trailing_commas(&self) -> bool { + true + } + // Snowflake supports double-dot notation when the schema name is not specified // In this case the default PUBLIC schema is used // diff --git a/src/keywords.rs b/src/keywords.rs index 43abc2b03..b60a0c848 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -975,6 +975,13 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::END, ]; +pub const ALLOWED_KEYWORD_AFTER_FROM: &[Keyword] = &[ + Keyword::INTO, + Keyword::LIMIT, + Keyword::HAVING, + Keyword::WHERE, +]; + /// Global list of reserved keywords that cannot be parsed as identifiers /// without special handling like quoting. Parser should call `Dialect::is_reserved_for_identifier` /// to allow for each dialect to customize the list. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 47d4d6f0d..199b17267 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3921,7 +3921,11 @@ impl<'a> Parser<'a> { let trailing_commas = self.options.trailing_commas | self.dialect.supports_projection_trailing_commas(); - self.parse_comma_separated_with_trailing_commas(|p| p.parse_select_item(), trailing_commas) + self.parse_comma_separated_with_trailing_commas( + |p| p.parse_select_item(), + trailing_commas, + self.default_reserved_keywords(), + ) } pub fn parse_actions_list(&mut self) -> Result, ParserError> { @@ -3947,20 +3951,31 @@ impl<'a> Parser<'a> { Ok(values) } + //Parse a 'FROM' statment. support trailing comma + pub fn parse_from(&mut self) -> Result, ParserError> { + let trailing_commas = self.dialect.supports_from_trailing_commas(); + + self.parse_comma_separated_with_trailing_commas( + Parser::parse_table_and_joins, + trailing_commas, + self.dialect.get_reserved_keyword_after_from(), + ) + } + /// Parse the comma of a comma-separated syntax element. /// Allows for control over trailing commas /// Returns true if there is a next element - fn is_parse_comma_separated_end_with_trailing_commas(&mut self, trailing_commas: bool) -> bool { + fn is_parse_comma_separated_end_with_trailing_commas( + &mut self, + trailing_commas: bool, + reserved_keywords: &[Keyword], + ) -> bool { if !self.consume_token(&Token::Comma) { true } else if trailing_commas { let token = self.peek_token().token; match token { - Token::Word(ref kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => - { - true - } + Token::Word(ref kw) if reserved_keywords.contains(&kw.keyword) => true, Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { true } @@ -3974,7 +3989,14 @@ impl<'a> Parser<'a> { /// Parse the comma of a comma-separated syntax element. /// Returns true if there is a next element fn is_parse_comma_separated_end(&mut self) -> bool { - self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas) + self.is_parse_comma_separated_end_with_trailing_commas( + self.options.trailing_commas, + self.default_reserved_keywords(), + ) + } + + fn default_reserved_keywords(&self) -> &'static [Keyword] { + keywords::RESERVED_FOR_COLUMN_ALIAS } /// Parse a comma-separated list of 1+ items accepted by `F` @@ -3982,7 +4004,11 @@ impl<'a> Parser<'a> { where F: FnMut(&mut Parser<'a>) -> Result, { - self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas) + self.parse_comma_separated_with_trailing_commas( + f, + self.options.trailing_commas, + self.default_reserved_keywords(), + ) } /// Parse a comma-separated list of 1+ items accepted by `F` @@ -3991,6 +4017,7 @@ impl<'a> Parser<'a> { &mut self, mut f: F, trailing_commas: bool, + reserved_keywords: &[Keyword], ) -> Result, ParserError> where F: FnMut(&mut Parser<'a>) -> Result, @@ -3998,7 +4025,10 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { values.push(f(self)?); - if self.is_parse_comma_separated_end_with_trailing_commas(trailing_commas) { + if self.is_parse_comma_separated_end_with_trailing_commas( + trailing_commas, + reserved_keywords, + ) { break; } } @@ -9951,7 +9981,7 @@ impl<'a> Parser<'a> { // or `from`. let from = if self.parse_keyword(Keyword::FROM) { - self.parse_comma_separated(Parser::parse_table_and_joins)? + self.parse_from()? } else { vec![] }; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 9fe14783c..5c0908ed9 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2483,6 +2483,37 @@ fn test_sf_trailing_commas() { snowflake().verified_only_select_with_canonical("SELECT 1, 2, FROM t", "SELECT 1, 2 FROM t"); } +#[test] +fn test_sf_trailing_commas_in_from() { + snowflake().verified_only_select_with_canonical("SELECT 1, 2 FROM t,", "SELECT 1, 2 FROM t"); +} + +#[test] +fn test_sf_trailing_commas_multiple_sources() { + snowflake() + .verified_only_select_with_canonical("SELECT 1, 2 FROM t1, t2,", "SELECT 1, 2 FROM t1, t2"); +} + +#[test] +fn test_from_trailing_commas_with_limit() { + let sql = "SELECT a, FROM b, LIMIT 1"; + let _ = snowflake().parse_sql_statements(sql).unwrap(); +} + +#[test] +fn test_sf_trailing_commas_multiple_subqueries() { + snowflake().verified_only_select_with_canonical( + "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2),", + "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2)", + ); +} + +#[test] +fn test_from_with_nested_trailing_commas() { + let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),"; + let _ = snowflake().parse_sql_statements(sql).unwrap(); +} + #[test] fn test_select_wildcard_with_ilike() { let select = snowflake_and_generic().verified_only_select(r#"SELECT * ILIKE '%id%' FROM tbl"#); From 5f3de315842963e6db47c9f30ddc914dc7c605c9 Mon Sep 17 00:00:00 2001 From: bar sela Date: Mon, 6 Jan 2025 14:19:52 +0200 Subject: [PATCH 2/7] add comment at keywords.rs --- src/keywords.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/keywords.rs b/src/keywords.rs index b60a0c848..852d9930c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -975,6 +975,9 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::END, ]; +// Global list of reserved keywords alloweed after FROM. +// Parser should call Dialect::get_reserved_keyword_after_from +// to allow for each dialect to customize the list. pub const ALLOWED_KEYWORD_AFTER_FROM: &[Keyword] = &[ Keyword::INTO, Keyword::LIMIT, From de8587fc36c20031b9175769e8fc4663397c5df1 Mon Sep 17 00:00:00 2001 From: bar sela Date: Sun, 12 Jan 2025 14:37:23 +0200 Subject: [PATCH 3/7] fix pr notes --- src/dialect/mod.rs | 10 ++++++---- src/keywords.rs | 2 +- src/parser/mod.rs | 30 ++++++++++-------------------- tests/sqlparser_common.rs | 6 ++++++ tests/sqlparser_snowflake.rs | 33 +++++++++++++++------------------ 5 files changed, 38 insertions(+), 43 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b395003aa..24eba75a2 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -399,7 +399,8 @@ pub trait Dialect: Debug + Any { self.supports_trailing_commas() } - /// Does the dialect support trailing commas in the FROM clause list? + /// Returns true if the dialect supports trailing commas in the `FROM` clause of a `SELECT` statement. + /// /// Example: `SELECT 1 FROM T, U, LIMIT 1` fn supports_from_trailing_commas(&self) -> bool { false } @@ -763,9 +764,10 @@ pub trait Dialect: Debug + Any { keywords::RESERVED_FOR_IDENTIFIER.contains(&kw) } - // Returns list of keyword allowed after from - fn get_reserved_keyword_after_from(&self) -> &[Keyword] { - keywords::ALLOWED_KEYWORD_AFTER_FROM + // Returns reserved keywords when looking to parse a [TableFactor]. + /// See [Self::supports_from_trailing_commas] + fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] { + keywords::RESERVED_FOR_TABLE_FACTOR } /// Returns true if this dialect supports the `TABLESAMPLE` option diff --git a/src/keywords.rs b/src/keywords.rs index 852d9930c..9ce8717a6 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -978,7 +978,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ // Global list of reserved keywords alloweed after FROM. // Parser should call Dialect::get_reserved_keyword_after_from // to allow for each dialect to customize the list. -pub const ALLOWED_KEYWORD_AFTER_FROM: &[Keyword] = &[ +pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[ Keyword::INTO, Keyword::LIMIT, Keyword::HAVING, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 199b17267..7c4f4e586 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3924,7 +3924,7 @@ impl<'a> Parser<'a> { self.parse_comma_separated_with_trailing_commas( |p| p.parse_select_item(), trailing_commas, - self.default_reserved_keywords(), + None, ) } @@ -3951,14 +3951,14 @@ impl<'a> Parser<'a> { Ok(values) } - //Parse a 'FROM' statment. support trailing comma - pub fn parse_from(&mut self) -> Result, ParserError> { + //Parse a list of [TableWithJoins] + fn parse_table_with_joins(&mut self) -> Result, ParserError> { let trailing_commas = self.dialect.supports_from_trailing_commas(); self.parse_comma_separated_with_trailing_commas( Parser::parse_table_and_joins, trailing_commas, - self.dialect.get_reserved_keyword_after_from(), + Some(self.dialect.get_reserved_keywords_for_table_factor()), ) } @@ -3968,8 +3968,9 @@ impl<'a> Parser<'a> { fn is_parse_comma_separated_end_with_trailing_commas( &mut self, trailing_commas: bool, - reserved_keywords: &[Keyword], + reserved_keywords: Option<&[Keyword]>, ) -> bool { + let reserved_keywords = reserved_keywords.unwrap_or(keywords::RESERVED_FOR_COLUMN_ALIAS); if !self.consume_token(&Token::Comma) { true } else if trailing_commas { @@ -3989,14 +3990,7 @@ impl<'a> Parser<'a> { /// Parse the comma of a comma-separated syntax element. /// Returns true if there is a next element fn is_parse_comma_separated_end(&mut self) -> bool { - self.is_parse_comma_separated_end_with_trailing_commas( - self.options.trailing_commas, - self.default_reserved_keywords(), - ) - } - - fn default_reserved_keywords(&self) -> &'static [Keyword] { - keywords::RESERVED_FOR_COLUMN_ALIAS + self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas, None) } /// Parse a comma-separated list of 1+ items accepted by `F` @@ -4004,11 +3998,7 @@ impl<'a> Parser<'a> { where F: FnMut(&mut Parser<'a>) -> Result, { - self.parse_comma_separated_with_trailing_commas( - f, - self.options.trailing_commas, - self.default_reserved_keywords(), - ) + self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas, None) } /// Parse a comma-separated list of 1+ items accepted by `F` @@ -4017,7 +4007,7 @@ impl<'a> Parser<'a> { &mut self, mut f: F, trailing_commas: bool, - reserved_keywords: &[Keyword], + reserved_keywords: Option<&[Keyword]>, ) -> Result, ParserError> where F: FnMut(&mut Parser<'a>) -> Result, @@ -9981,7 +9971,7 @@ impl<'a> Parser<'a> { // or `from`. let from = if self.parse_keyword(Keyword::FROM) { - self.parse_from()? + self.parse_table_with_joins()? } else { vec![] }; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3c2e0899f..ca9c7f20c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12796,3 +12796,9 @@ fn parse_update_from_before_select() { parse_sql_statements(query).unwrap_err() ); } + +#[test] +fn test_trailing_commas_in_from() { + let dialects = all_dialects_where(|d| d.supports_from_trailing_commas()); + dialects.verified_only_select_with_canonical("SELECT 1, 2 FROM t,", "SELECT 1, 2 FROM t"); +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 5c0908ed9..caafdda89 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2484,36 +2484,33 @@ fn test_sf_trailing_commas() { } #[test] -fn test_sf_trailing_commas_in_from() { - snowflake().verified_only_select_with_canonical("SELECT 1, 2 FROM t,", "SELECT 1, 2 FROM t"); -} - -#[test] -fn test_sf_trailing_commas_multiple_sources() { +fn test_sf_from_trailing_commas() { snowflake() .verified_only_select_with_canonical("SELECT 1, 2 FROM t1, t2,", "SELECT 1, 2 FROM t1, t2"); -} -#[test] -fn test_from_trailing_commas_with_limit() { let sql = "SELECT a, FROM b, LIMIT 1"; let _ = snowflake().parse_sql_statements(sql).unwrap(); -} -#[test] -fn test_sf_trailing_commas_multiple_subqueries() { + let sql = "INSERT INTO a SELECT b FROM c,"; + let _ = snowflake().parse_sql_statements(sql).unwrap(); + + let sql = "SELECT a FROM b, HAVING COUNT(*) > 1"; + let _ = snowflake().parse_sql_statements(sql).unwrap(); + + let sql = "SELECT a FROM b, WHERE c = 1"; + let _ = snowflake().parse_sql_statements(sql).unwrap(); + + // nasted + let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),"; + let _ = snowflake().parse_sql_statements(sql).unwrap(); + + // multiple_subqueries snowflake().verified_only_select_with_canonical( "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2),", "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2)", ); } -#[test] -fn test_from_with_nested_trailing_commas() { - let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),"; - let _ = snowflake().parse_sql_statements(sql).unwrap(); -} - #[test] fn test_select_wildcard_with_ilike() { let select = snowflake_and_generic().verified_only_select(r#"SELECT * ILIKE '%id%' FROM tbl"#); From a01805762398381ccd2dc0ab4c2e4ec1c1aed02d Mon Sep 17 00:00:00 2001 From: bar sela Date: Tue, 14 Jan 2025 10:28:16 +0200 Subject: [PATCH 4/7] Update src/parser/mod.rs Co-authored-by: Ifeanyi Ubah --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7c4f4e586..effc404c2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3951,7 +3951,7 @@ impl<'a> Parser<'a> { Ok(values) } - //Parse a list of [TableWithJoins] + /// Parse a list of [TableWithJoins] fn parse_table_with_joins(&mut self) -> Result, ParserError> { let trailing_commas = self.dialect.supports_from_trailing_commas(); From 5d4fbd94c5d605eacdf64f396f19af2f1cc23ceb Mon Sep 17 00:00:00 2001 From: bar sela Date: Tue, 14 Jan 2025 10:37:00 +0200 Subject: [PATCH 5/7] move tests from snowflake tests to sqlparser_common tests --- tests/sqlparser_common.rs | 25 +++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 28 ---------------------------- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ca9c7f20c..0d97f2d95 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12801,4 +12801,29 @@ fn parse_update_from_before_select() { fn test_trailing_commas_in_from() { let dialects = all_dialects_where(|d| d.supports_from_trailing_commas()); dialects.verified_only_select_with_canonical("SELECT 1, 2 FROM t,", "SELECT 1, 2 FROM t"); + + dialects + .verified_only_select_with_canonical("SELECT 1, 2 FROM t1, t2,", "SELECT 1, 2 FROM t1, t2"); + + let sql = "SELECT a, FROM b, LIMIT 1"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + let sql = "INSERT INTO a SELECT b FROM c,"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + let sql = "SELECT a FROM b, HAVING COUNT(*) > 1"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + let sql = "SELECT a FROM b, WHERE c = 1"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + // nasted + let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),"; + let _ = dialects.parse_sql_statements(sql).unwrap(); + + // multiple_subqueries + dialects.verified_only_select_with_canonical( + "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2),", + "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2)", + ); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index caafdda89..9fe14783c 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2483,34 +2483,6 @@ fn test_sf_trailing_commas() { snowflake().verified_only_select_with_canonical("SELECT 1, 2, FROM t", "SELECT 1, 2 FROM t"); } -#[test] -fn test_sf_from_trailing_commas() { - snowflake() - .verified_only_select_with_canonical("SELECT 1, 2 FROM t1, t2,", "SELECT 1, 2 FROM t1, t2"); - - let sql = "SELECT a, FROM b, LIMIT 1"; - let _ = snowflake().parse_sql_statements(sql).unwrap(); - - let sql = "INSERT INTO a SELECT b FROM c,"; - let _ = snowflake().parse_sql_statements(sql).unwrap(); - - let sql = "SELECT a FROM b, HAVING COUNT(*) > 1"; - let _ = snowflake().parse_sql_statements(sql).unwrap(); - - let sql = "SELECT a FROM b, WHERE c = 1"; - let _ = snowflake().parse_sql_statements(sql).unwrap(); - - // nasted - let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),"; - let _ = snowflake().parse_sql_statements(sql).unwrap(); - - // multiple_subqueries - snowflake().verified_only_select_with_canonical( - "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2),", - "SELECT 1, 2 FROM (SELECT * FROM t1), (SELECT * FROM t2)", - ); -} - #[test] fn test_select_wildcard_with_ilike() { let select = snowflake_and_generic().verified_only_select(r#"SELECT * ILIKE '%id%' FROM tbl"#); From e7c8ba190f6cf8d003df8140b357ea9cf6bdf3a6 Mon Sep 17 00:00:00 2001 From: bar sela Date: Tue, 14 Jan 2025 13:57:15 +0200 Subject: [PATCH 6/7] Update sqlparser_common.rs --- tests/sqlparser_common.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 698b276d9..ea47655e1 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12960,6 +12960,7 @@ fn parse_update_from_before_select() { #[test] fn parse_overlaps() { verified_stmt("SELECT (DATE '2016-01-10', DATE '2016-02-01') OVERLAPS (DATE '2016-01-20', DATE '2016-02-10')"); +} #[test] fn test_trailing_commas_in_from() { From b7ec154a48538b593418f71e09bb54a40c8ae0fc Mon Sep 17 00:00:00 2001 From: bar sela Date: Tue, 14 Jan 2025 14:05:48 +0200 Subject: [PATCH 7/7] fmt fix --- tests/sqlparser_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ea47655e1..07a30bc08 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12961,7 +12961,7 @@ fn parse_update_from_before_select() { fn parse_overlaps() { verified_stmt("SELECT (DATE '2016-01-10', DATE '2016-02-01') OVERLAPS (DATE '2016-01-20', DATE '2016-02-10')"); } - + #[test] fn test_trailing_commas_in_from() { let dialects = all_dialects_where(|d| d.supports_from_trailing_commas());