From 58efd2f7d1de64f34321f39a514e402d22f97f93 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 10 May 2023 14:45:40 +0000 Subject: [PATCH 1/7] feat: add DuckDB dialect --- examples/cli.rs | 1 + src/ast/operator.rs | 2 ++ src/dialect/duckdb.rs | 31 +++++++++++++++++++ src/dialect/mod.rs | 5 ++++ src/parser.rs | 18 ++++++----- src/tokenizer.rs | 8 ++++- tests/sqlparser_common.rs | 11 +++++-- tests/sqlparser_duckdb.rs | 63 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 129 insertions(+), 10 deletions(-) create mode 100644 src/dialect/duckdb.rs create mode 100644 tests/sqlparser_duckdb.rs diff --git a/examples/cli.rs b/examples/cli.rs index a320a00bc..8af6246a0 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -46,6 +46,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--hive" => Box::new(HiveDialect {}), "--redshift" => Box::new(RedshiftSqlDialect {}), "--clickhouse" => Box::new(ClickHouseDialect {}), + "--duckdb" => Box::new(DuckDbDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {s}"), }; diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 75877c949..8065845a4 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -85,6 +85,7 @@ pub enum BinaryOperator { BitwiseOr, BitwiseAnd, BitwiseXor, + DuckIntegerDivide, PGBitwiseXor, PGBitwiseShiftLeft, PGBitwiseShiftRight, @@ -122,6 +123,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::BitwiseOr => f.write_str("|"), BinaryOperator::BitwiseAnd => f.write_str("&"), BinaryOperator::BitwiseXor => f.write_str("^"), + BinaryOperator::DuckIntegerDivide => f.write_str("//"), BinaryOperator::PGBitwiseXor => f.write_str("#"), BinaryOperator::PGBitwiseShiftLeft => f.write_str("<<"), BinaryOperator::PGBitwiseShiftRight => f.write_str(">>"), diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs new file mode 100644 index 000000000..55f258e53 --- /dev/null +++ b/src/dialect/duckdb.rs @@ -0,0 +1,31 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; + +#[derive(Debug, Default)] +pub struct DuckDbDialect; + +// In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. +impl Dialect for DuckDbDialect { + fn is_identifier_start(&self, ch: char) -> bool { + ch.is_alphabetic() || ch == '_' + } + + fn is_identifier_part(&self, ch: char) -> bool { + ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' + } + + fn supports_filter_during_aggregation(&self) -> bool { + true + } +} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 5744ae65e..48357501c 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -13,6 +13,7 @@ mod ansi; mod bigquery; mod clickhouse; +mod duckdb; mod generic; mod hive; mod mssql; @@ -31,6 +32,7 @@ use core::str::Chars; pub use self::ansi::AnsiDialect; pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; +pub use self::duckdb::DuckDbDialect; pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; @@ -163,6 +165,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef) -> Option Some(Box::new(ClickHouseDialect {})), "bigquery" => Some(Box::new(BigQueryDialect)), "ansi" => Some(Box::new(AnsiDialect {})), + "duckdb" => Some(Box::new(DuckDbDialect {})), _ => None, } } @@ -214,6 +217,8 @@ mod tests { assert!(parse_dialect("BigQuery").is::()); assert!(parse_dialect("ansi").is::()); assert!(parse_dialect("ANSI").is::()); + assert!(parse_dialect("duckdb").is::()); + assert!(parse_dialect("DuckDb").is::()); // error cases assert!(dialect_from_str("Unknown").is_none()); diff --git a/src/parser.rs b/src/parser.rs index 7299a5c5d..51d187f18 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -991,7 +991,7 @@ impl<'a> Parser<'a> { /// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple /// expr. fn parse_group_by_expr(&mut self) -> Result { - if dialect_of!(self is PostgreSqlDialect | GenericDialect) { + if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { self.expect_token(&Token::LParen)?; let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; @@ -1659,10 +1659,13 @@ impl<'a> Parser<'a> { } Token::Ampersand => Some(BinaryOperator::BitwiseAnd), Token::Div => Some(BinaryOperator::Divide), - Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Token::DuckIntDiv if dialect_of!(self is DuckDbDialect) => { + Some(BinaryOperator::DuckIntegerDivide) + } + Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { Some(BinaryOperator::PGBitwiseShiftLeft) } - Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { Some(BinaryOperator::PGBitwiseShiftRight) } Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { @@ -2045,7 +2048,7 @@ impl<'a> Parser<'a> { Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), Token::Ampersand => Ok(23), Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mul | Token::Div | Token::Mod | Token::StringConcat => Ok(40), + Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => Ok(40), Token::DoubleColon => Ok(50), Token::Colon => Ok(50), Token::ExclamationMark => Ok(50), @@ -3836,7 +3839,7 @@ impl<'a> Parser<'a> { } else { let column_keyword = self.parse_keyword(Keyword::COLUMN); - let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | GenericDialect) + let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) { self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) || if_not_exists @@ -6223,7 +6226,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::SET)?; let assignments = self.parse_comma_separated(Parser::parse_assignment)?; let from = if self.parse_keyword(Keyword::FROM) - && dialect_of!(self is GenericDialect | PostgreSqlDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect) + && dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect) { Some(self.parse_table_and_joins()?) } else { @@ -6323,7 +6326,8 @@ impl<'a> Parser<'a> { pub fn parse_wildcard_additional_options( &mut self, ) -> Result { - let opt_exclude = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + let opt_exclude = if dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) + { self.parse_optional_select_item_exclude()? } else { None diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a550c4f5d..bc78afb3f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::DollarQuotedString; -use crate::dialect::{BigQueryDialect, GenericDialect, SnowflakeDialect}; +use crate::dialect::{BigQueryDialect, DuckDbDialect, GenericDialect, SnowflakeDialect}; use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; @@ -98,6 +98,8 @@ pub enum Token { Mul, /// Division operator `/` Div, + /// Integer division operator `//` in DuckDB + DuckIntDiv, /// Modulo Operator `%` Mod, /// String concatenation `||` @@ -212,6 +214,7 @@ impl fmt::Display for Token { Token::Minus => f.write_str("-"), Token::Mul => f.write_str("*"), Token::Div => f.write_str("/"), + Token::DuckIntDiv => f.write_str("//"), Token::StringConcat => f.write_str("||"), Token::Mod => f.write_str("%"), Token::LParen => f.write_str("("), @@ -768,6 +771,9 @@ impl<'a> Tokenizer<'a> { comment, }))) } + Some('/') if dialect_of!(self is DuckDbDialect) => { + self.consume_and_return(chars, Token::DuckIntDiv) + } // a regular '/' operator _ => Ok(Some(Token::Div)), } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5d28118fb..382ffa5bf 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -24,8 +24,9 @@ use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Pivot; use sqlparser::ast::*; use sqlparser::dialect::{ - AnsiDialect, BigQueryDialect, ClickHouseDialect, GenericDialect, HiveDialect, MsSqlDialect, - MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect, + AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect, + MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, + SnowflakeDialect, }; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError, ParserOptions}; @@ -195,6 +196,7 @@ fn parse_update_set_from() { let dialects = TestedDialects { dialects: vec![ Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), Box::new(PostgreSqlDialect {}), Box::new(BigQueryDialect {}), Box::new(SnowflakeDialect {}), @@ -941,6 +943,7 @@ fn parse_exponent_in_select() -> Result<(), ParserError> { Box::new(AnsiDialect {}), Box::new(BigQueryDialect {}), Box::new(ClickHouseDialect {}), + Box::new(DuckDbDialect {}), Box::new(GenericDialect {}), // Box::new(HiveDialect {}), Box::new(MsSqlDialect {}), @@ -2053,6 +2056,7 @@ fn parse_array_agg_func() { let supported_dialects = TestedDialects { dialects: vec![ Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), @@ -2846,6 +2850,7 @@ fn parse_alter_table_add_column_if_not_exists() { Box::new(PostgreSqlDialect {}), Box::new(BigQueryDialect {}), Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), ], options: None, }; @@ -6137,6 +6142,7 @@ fn test_placeholder() { let dialects = TestedDialects { dialects: vec![ Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), @@ -6871,6 +6877,7 @@ fn parse_non_latin_identifiers() { let supported_dialects = TestedDialects { dialects: vec![ Box::new(GenericDialect {}), + Box::new(DuckDbDialect {}), Box::new(PostgreSqlDialect {}), Box::new(MsSqlDialect {}), Box::new(RedshiftSqlDialect {}), diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs new file mode 100644 index 000000000..99317f137 --- /dev/null +++ b/tests/sqlparser_duckdb.rs @@ -0,0 +1,63 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[macro_use] +mod test_utils; + +use test_utils::*; + +use sqlparser::ast::*; +use sqlparser::dialect::DuckDbDialect; + +fn duckdb() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(DuckDbDialect {})], + options: None, + } +} + +#[test] +fn test_select_wildcard_with_exclude() { + let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data"); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); + + let select = duckdb() + .verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table"); + let expected = SelectItem::QualifiedWildcard( + ObjectName(vec![Ident::new("name")]), + WildcardAdditionalOptions { + opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))), + ..Default::default() + }, + ); + assert_eq!(expected, select.projection[0]); + + let select = duckdb() + .verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table"); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ + Ident::new("department_id"), + Ident::new("employee_id"), + ])), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); +} + +#[test] +fn parse_div_infix() { + duckdb().verified_stmt(r#"SELECT 5 // 2"#); +} From c08b40fa0ec7042e0bc7602e8bc8dfe14580fe19 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Thu, 18 May 2023 12:56:36 +0000 Subject: [PATCH 2/7] formatting --- src/parser.rs | 4 +++- tests/sqlparser_duckdb.rs | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 4ff0abf23..3381c35db 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2053,7 +2053,9 @@ impl<'a> Parser<'a> { Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), Token::Ampersand => Ok(23), Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => Ok(40), + Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { + Ok(40) + } Token::DoubleColon => Ok(50), Token::Colon => Ok(50), Token::ExclamationMark => Ok(50), diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 99317f137..c99ab5486 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -34,8 +34,8 @@ fn test_select_wildcard_with_exclude() { }); assert_eq!(expected, select.projection[0]); - let select = duckdb() - .verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table"); + let select = + duckdb().verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table"); let expected = SelectItem::QualifiedWildcard( ObjectName(vec![Ident::new("name")]), WildcardAdditionalOptions { From fdf904b6747a951673f4e65592c44cc969aa30e7 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Thu, 18 May 2023 13:00:43 +0000 Subject: [PATCH 3/7] fix conflict --- src/ast/operator.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 1e5b648bf..733a25608 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -85,14 +85,11 @@ pub enum BinaryOperator { BitwiseOr, BitwiseAnd, BitwiseXor, -<<<<<<< HEAD DuckIntegerDivide, -======= /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division MyIntegerDivide, /// Support for custom operators (built by parsers outside this crate) Custom(String), ->>>>>>> upstream/main PGBitwiseXor, PGBitwiseShiftLeft, PGBitwiseShiftRight, From 6c903a94f2d6439e98c85da24a1023e3a8d48d16 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Thu, 18 May 2023 13:01:05 +0000 Subject: [PATCH 4/7] support // in GenericDialect --- src/tokenizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index bc78afb3f..ffa1a96f2 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -771,7 +771,7 @@ impl<'a> Tokenizer<'a> { comment, }))) } - Some('/') if dialect_of!(self is DuckDbDialect) => { + Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => { self.consume_and_return(chars, Token::DuckIntDiv) } // a regular '/' operator From 4622d620ce2faa99fc6f33fa7c2ca94dbf286b09 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Thu, 18 May 2023 13:21:59 +0000 Subject: [PATCH 5/7] add DucDbDialect to all_dialects --- src/test_utils.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test_utils.rs b/src/test_utils.rs index d01bbbab9..57b21e1c9 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -168,6 +168,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(MySqlDialect {}), Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), + Box::new(DuckDbDialect {}), ], options: None, } From f2909fae9992b6d89d0471065bf66e4e282ec35b Mon Sep 17 00:00:00 2001 From: eitsupi <50911393+eitsupi@users.noreply.github.com> Date: Thu, 18 May 2023 22:22:36 +0900 Subject: [PATCH 6/7] add comment from suggestion Co-authored-by: Andrew Lamb --- src/ast/operator.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 733a25608..b988265ba 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -85,6 +85,7 @@ pub enum BinaryOperator { BitwiseOr, BitwiseAnd, BitwiseXor, + /// Integer division operator `//` in DuckDB DuckIntegerDivide, /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division MyIntegerDivide, From c71b13e3b1d219730b5f39a9601ac4ceb2730a31 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Thu, 18 May 2023 13:27:17 +0000 Subject: [PATCH 7/7] fix: support // in GenericDialect --- src/parser.rs | 2 +- tests/sqlparser_duckdb.rs | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 3381c35db..734b86708 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1661,7 +1661,7 @@ impl<'a> Parser<'a> { } Token::Ampersand => Some(BinaryOperator::BitwiseAnd), Token::Div => Some(BinaryOperator::Divide), - Token::DuckIntDiv if dialect_of!(self is DuckDbDialect) => { + Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { Some(BinaryOperator::DuckIntegerDivide) } Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index c99ab5486..1a4f04c33 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -16,7 +16,7 @@ mod test_utils; use test_utils::*; use sqlparser::ast::*; -use sqlparser::dialect::DuckDbDialect; +use sqlparser::dialect::{DuckDbDialect, GenericDialect}; fn duckdb() -> TestedDialects { TestedDialects { @@ -25,6 +25,13 @@ fn duckdb() -> TestedDialects { } } +fn duckdb_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(DuckDbDialect {}), Box::new(GenericDialect {})], + options: None, + } +} + #[test] fn test_select_wildcard_with_exclude() { let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data"); @@ -59,5 +66,5 @@ fn test_select_wildcard_with_exclude() { #[test] fn parse_div_infix() { - duckdb().verified_stmt(r#"SELECT 5 // 2"#); + duckdb_and_generic().verified_stmt(r#"SELECT 5 // 2"#); }