Skip to content

Commit 33b12ac

Browse files
eitsupialamb
andauthored
feat: add DuckDB dialect (#878)
* feat: add DuckDB dialect * formatting * fix conflict * support // in GenericDialect * add DucDbDialect to all_dialects * add comment from suggestion Co-authored-by: Andrew Lamb <[email protected]> * fix: support // in GenericDialect --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 3be19c7 commit 33b12ac

File tree

9 files changed

+140
-10
lines changed

9 files changed

+140
-10
lines changed

examples/cli.rs

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
4646
"--hive" => Box::new(HiveDialect {}),
4747
"--redshift" => Box::new(RedshiftSqlDialect {}),
4848
"--clickhouse" => Box::new(ClickHouseDialect {}),
49+
"--duckdb" => Box::new(DuckDbDialect {}),
4950
"--generic" | "" => Box::new(GenericDialect {}),
5051
s => panic!("Unexpected parameter: {s}"),
5152
};

src/ast/operator.rs

+3
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ pub enum BinaryOperator {
8585
BitwiseOr,
8686
BitwiseAnd,
8787
BitwiseXor,
88+
/// Integer division operator `//` in DuckDB
89+
DuckIntegerDivide,
8890
/// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division
8991
MyIntegerDivide,
9092
/// Support for custom operators (built by parsers outside this crate)
@@ -126,6 +128,7 @@ impl fmt::Display for BinaryOperator {
126128
BinaryOperator::BitwiseOr => f.write_str("|"),
127129
BinaryOperator::BitwiseAnd => f.write_str("&"),
128130
BinaryOperator::BitwiseXor => f.write_str("^"),
131+
BinaryOperator::DuckIntegerDivide => f.write_str("//"),
129132
BinaryOperator::MyIntegerDivide => f.write_str("DIV"),
130133
BinaryOperator::Custom(s) => f.write_str(s),
131134
BinaryOperator::PGBitwiseXor => f.write_str("#"),

src/dialect/duckdb.rs

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Licensed under the Apache License, Version 2.0 (the "License");
2+
// you may not use this file except in compliance with the License.
3+
// You may obtain a copy of the License at
4+
//
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
//
7+
// Unless required by applicable law or agreed to in writing, software
8+
// distributed under the License is distributed on an "AS IS" BASIS,
9+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
// See the License for the specific language governing permissions and
11+
// limitations under the License.
12+
13+
use crate::dialect::Dialect;
14+
15+
#[derive(Debug, Default)]
16+
pub struct DuckDbDialect;
17+
18+
// In most cases the redshift dialect is identical to [`PostgresSqlDialect`].
19+
impl Dialect for DuckDbDialect {
20+
fn is_identifier_start(&self, ch: char) -> bool {
21+
ch.is_alphabetic() || ch == '_'
22+
}
23+
24+
fn is_identifier_part(&self, ch: char) -> bool {
25+
ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_'
26+
}
27+
28+
fn supports_filter_during_aggregation(&self) -> bool {
29+
true
30+
}
31+
}

src/dialect/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
mod ansi;
1414
mod bigquery;
1515
mod clickhouse;
16+
mod duckdb;
1617
mod generic;
1718
mod hive;
1819
mod mssql;
@@ -31,6 +32,7 @@ use core::str::Chars;
3132
pub use self::ansi::AnsiDialect;
3233
pub use self::bigquery::BigQueryDialect;
3334
pub use self::clickhouse::ClickHouseDialect;
35+
pub use self::duckdb::DuckDbDialect;
3436
pub use self::generic::GenericDialect;
3537
pub use self::hive::HiveDialect;
3638
pub use self::mssql::MsSqlDialect;
@@ -163,6 +165,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef<str>) -> Option<Box<dyn Dialect
163165
"clickhouse" => Some(Box::new(ClickHouseDialect {})),
164166
"bigquery" => Some(Box::new(BigQueryDialect)),
165167
"ansi" => Some(Box::new(AnsiDialect {})),
168+
"duckdb" => Some(Box::new(DuckDbDialect {})),
166169
_ => None,
167170
}
168171
}
@@ -214,6 +217,8 @@ mod tests {
214217
assert!(parse_dialect("BigQuery").is::<BigQueryDialect>());
215218
assert!(parse_dialect("ansi").is::<AnsiDialect>());
216219
assert!(parse_dialect("ANSI").is::<AnsiDialect>());
220+
assert!(parse_dialect("duckdb").is::<DuckDbDialect>());
221+
assert!(parse_dialect("DuckDb").is::<DuckDbDialect>());
217222

218223
// error cases
219224
assert!(dialect_from_str("Unknown").is_none());

src/parser.rs

+13-7
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@ impl<'a> Parser<'a> {
992992
/// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple
993993
/// expr.
994994
fn parse_group_by_expr(&mut self) -> Result<Expr, ParserError> {
995-
if dialect_of!(self is PostgreSqlDialect | GenericDialect) {
995+
if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) {
996996
if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) {
997997
self.expect_token(&Token::LParen)?;
998998
let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?;
@@ -1662,10 +1662,13 @@ impl<'a> Parser<'a> {
16621662
}
16631663
Token::Ampersand => Some(BinaryOperator::BitwiseAnd),
16641664
Token::Div => Some(BinaryOperator::Divide),
1665-
Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
1665+
Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => {
1666+
Some(BinaryOperator::DuckIntegerDivide)
1667+
}
1668+
Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => {
16661669
Some(BinaryOperator::PGBitwiseShiftLeft)
16671670
}
1668-
Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
1671+
Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => {
16691672
Some(BinaryOperator::PGBitwiseShiftRight)
16701673
}
16711674
Token::Sharp if dialect_of!(self is PostgreSqlDialect) => {
@@ -2051,7 +2054,9 @@ impl<'a> Parser<'a> {
20512054
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
20522055
Token::Ampersand => Ok(23),
20532056
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
2054-
Token::Mul | Token::Div | Token::Mod | Token::StringConcat => Ok(40),
2057+
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
2058+
Ok(40)
2059+
}
20552060
Token::DoubleColon => Ok(50),
20562061
Token::Colon => Ok(50),
20572062
Token::ExclamationMark => Ok(50),
@@ -3842,7 +3847,7 @@ impl<'a> Parser<'a> {
38423847
} else {
38433848
let column_keyword = self.parse_keyword(Keyword::COLUMN);
38443849

3845-
let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | GenericDialect)
3850+
let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect)
38463851
{
38473852
self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS])
38483853
|| if_not_exists
@@ -6315,7 +6320,7 @@ impl<'a> Parser<'a> {
63156320
self.expect_keyword(Keyword::SET)?;
63166321
let assignments = self.parse_comma_separated(Parser::parse_assignment)?;
63176322
let from = if self.parse_keyword(Keyword::FROM)
6318-
&& dialect_of!(self is GenericDialect | PostgreSqlDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect)
6323+
&& dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect)
63196324
{
63206325
Some(self.parse_table_and_joins()?)
63216326
} else {
@@ -6415,7 +6420,8 @@ impl<'a> Parser<'a> {
64156420
pub fn parse_wildcard_additional_options(
64166421
&mut self,
64176422
) -> Result<WildcardAdditionalOptions, ParserError> {
6418-
let opt_exclude = if dialect_of!(self is GenericDialect | SnowflakeDialect) {
6423+
let opt_exclude = if dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect)
6424+
{
64196425
self.parse_optional_select_item_exclude()?
64206426
} else {
64216427
None

src/test_utils.rs

+1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ pub fn all_dialects() -> TestedDialects {
168168
Box::new(MySqlDialect {}),
169169
Box::new(BigQueryDialect {}),
170170
Box::new(SQLiteDialect {}),
171+
Box::new(DuckDbDialect {}),
171172
],
172173
options: None,
173174
}

src/tokenizer.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use serde::{Deserialize, Serialize};
3535
use sqlparser_derive::{Visit, VisitMut};
3636

3737
use crate::ast::DollarQuotedString;
38-
use crate::dialect::{BigQueryDialect, GenericDialect, SnowflakeDialect};
38+
use crate::dialect::{BigQueryDialect, DuckDbDialect, GenericDialect, SnowflakeDialect};
3939
use crate::dialect::{Dialect, MySqlDialect};
4040
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
4141

@@ -98,6 +98,8 @@ pub enum Token {
9898
Mul,
9999
/// Division operator `/`
100100
Div,
101+
/// Integer division operator `//` in DuckDB
102+
DuckIntDiv,
101103
/// Modulo Operator `%`
102104
Mod,
103105
/// String concatenation `||`
@@ -212,6 +214,7 @@ impl fmt::Display for Token {
212214
Token::Minus => f.write_str("-"),
213215
Token::Mul => f.write_str("*"),
214216
Token::Div => f.write_str("/"),
217+
Token::DuckIntDiv => f.write_str("//"),
215218
Token::StringConcat => f.write_str("||"),
216219
Token::Mod => f.write_str("%"),
217220
Token::LParen => f.write_str("("),
@@ -768,6 +771,9 @@ impl<'a> Tokenizer<'a> {
768771
comment,
769772
})))
770773
}
774+
Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
775+
self.consume_and_return(chars, Token::DuckIntDiv)
776+
}
771777
// a regular '/' operator
772778
_ => Ok(Some(Token::Div)),
773779
}

tests/sqlparser_common.rs

+9-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ use sqlparser::ast::SelectItem::UnnamedExpr;
2424
use sqlparser::ast::TableFactor::Pivot;
2525
use sqlparser::ast::*;
2626
use sqlparser::dialect::{
27-
AnsiDialect, BigQueryDialect, ClickHouseDialect, GenericDialect, HiveDialect, MsSqlDialect,
28-
MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect,
27+
AnsiDialect, BigQueryDialect, ClickHouseDialect, DuckDbDialect, GenericDialect, HiveDialect,
28+
MsSqlDialect, MySqlDialect, PostgreSqlDialect, RedshiftSqlDialect, SQLiteDialect,
29+
SnowflakeDialect,
2930
};
3031
use sqlparser::keywords::ALL_KEYWORDS;
3132
use sqlparser::parser::{Parser, ParserError, ParserOptions};
@@ -195,6 +196,7 @@ fn parse_update_set_from() {
195196
let dialects = TestedDialects {
196197
dialects: vec![
197198
Box::new(GenericDialect {}),
199+
Box::new(DuckDbDialect {}),
198200
Box::new(PostgreSqlDialect {}),
199201
Box::new(BigQueryDialect {}),
200202
Box::new(SnowflakeDialect {}),
@@ -941,6 +943,7 @@ fn parse_exponent_in_select() -> Result<(), ParserError> {
941943
Box::new(AnsiDialect {}),
942944
Box::new(BigQueryDialect {}),
943945
Box::new(ClickHouseDialect {}),
946+
Box::new(DuckDbDialect {}),
944947
Box::new(GenericDialect {}),
945948
// Box::new(HiveDialect {}),
946949
Box::new(MsSqlDialect {}),
@@ -2053,6 +2056,7 @@ fn parse_array_agg_func() {
20532056
let supported_dialects = TestedDialects {
20542057
dialects: vec![
20552058
Box::new(GenericDialect {}),
2059+
Box::new(DuckDbDialect {}),
20562060
Box::new(PostgreSqlDialect {}),
20572061
Box::new(MsSqlDialect {}),
20582062
Box::new(AnsiDialect {}),
@@ -2848,6 +2852,7 @@ fn parse_alter_table_add_column_if_not_exists() {
28482852
Box::new(PostgreSqlDialect {}),
28492853
Box::new(BigQueryDialect {}),
28502854
Box::new(GenericDialect {}),
2855+
Box::new(DuckDbDialect {}),
28512856
],
28522857
options: None,
28532858
};
@@ -6139,6 +6144,7 @@ fn test_placeholder() {
61396144
let dialects = TestedDialects {
61406145
dialects: vec![
61416146
Box::new(GenericDialect {}),
6147+
Box::new(DuckDbDialect {}),
61426148
Box::new(PostgreSqlDialect {}),
61436149
Box::new(MsSqlDialect {}),
61446150
Box::new(AnsiDialect {}),
@@ -6873,6 +6879,7 @@ fn parse_non_latin_identifiers() {
68736879
let supported_dialects = TestedDialects {
68746880
dialects: vec![
68756881
Box::new(GenericDialect {}),
6882+
Box::new(DuckDbDialect {}),
68766883
Box::new(PostgreSqlDialect {}),
68776884
Box::new(MsSqlDialect {}),
68786885
Box::new(RedshiftSqlDialect {}),

tests/sqlparser_duckdb.rs

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Licensed under the Apache License, Version 2.0 (the "License");
2+
// you may not use this file except in compliance with the License.
3+
// You may obtain a copy of the License at
4+
//
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
//
7+
// Unless required by applicable law or agreed to in writing, software
8+
// distributed under the License is distributed on an "AS IS" BASIS,
9+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
// See the License for the specific language governing permissions and
11+
// limitations under the License.
12+
13+
#[macro_use]
14+
mod test_utils;
15+
16+
use test_utils::*;
17+
18+
use sqlparser::ast::*;
19+
use sqlparser::dialect::{DuckDbDialect, GenericDialect};
20+
21+
fn duckdb() -> TestedDialects {
22+
TestedDialects {
23+
dialects: vec![Box::new(DuckDbDialect {})],
24+
options: None,
25+
}
26+
}
27+
28+
fn duckdb_and_generic() -> TestedDialects {
29+
TestedDialects {
30+
dialects: vec![Box::new(DuckDbDialect {}), Box::new(GenericDialect {})],
31+
options: None,
32+
}
33+
}
34+
35+
#[test]
36+
fn test_select_wildcard_with_exclude() {
37+
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
38+
let expected = SelectItem::Wildcard(WildcardAdditionalOptions {
39+
opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])),
40+
..Default::default()
41+
});
42+
assert_eq!(expected, select.projection[0]);
43+
44+
let select =
45+
duckdb().verified_only_select("SELECT name.* EXCLUDE department_id FROM employee_table");
46+
let expected = SelectItem::QualifiedWildcard(
47+
ObjectName(vec![Ident::new("name")]),
48+
WildcardAdditionalOptions {
49+
opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))),
50+
..Default::default()
51+
},
52+
);
53+
assert_eq!(expected, select.projection[0]);
54+
55+
let select = duckdb()
56+
.verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table");
57+
let expected = SelectItem::Wildcard(WildcardAdditionalOptions {
58+
opt_exclude: Some(ExcludeSelectItem::Multiple(vec![
59+
Ident::new("department_id"),
60+
Ident::new("employee_id"),
61+
])),
62+
..Default::default()
63+
});
64+
assert_eq!(expected, select.projection[0]);
65+
}
66+
67+
#[test]
68+
fn parse_div_infix() {
69+
duckdb_and_generic().verified_stmt(r#"SELECT 5 // 2"#);
70+
}

0 commit comments

Comments
 (0)