Skip to content

Commit 8c4d30b

Browse files
jayzhan211alambiffyio
authored
Support DuckDB struct syntax and support list of struct syntax (apache#1372)
Signed-off-by: jayzhan211 <[email protected]> Co-authored-by: Andrew Lamb <[email protected]> Co-authored-by: Ifeanyi Ubah <[email protected]>
1 parent fab834d commit 8c4d30b

File tree

6 files changed

+209
-33
lines changed

6 files changed

+209
-33
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ Cargo.lock
1616
.vscode
1717

1818
*.swp
19+
20+
.DS_store

src/ast/data_type.rs

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ pub enum DataType {
302302
///
303303
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
304304
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
305-
Struct(Vec<StructField>),
305+
Struct(Vec<StructField>, StructBracketKind),
306306
/// Union
307307
///
308308
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
@@ -517,9 +517,16 @@ impl fmt::Display for DataType {
517517
}
518518
write!(f, ")")
519519
}
520-
DataType::Struct(fields) => {
520+
DataType::Struct(fields, bracket) => {
521521
if !fields.is_empty() {
522-
write!(f, "STRUCT<{}>", display_comma_separated(fields))
522+
match bracket {
523+
StructBracketKind::Parentheses => {
524+
write!(f, "STRUCT({})", display_comma_separated(fields))
525+
}
526+
StructBracketKind::AngleBrackets => {
527+
write!(f, "STRUCT<{}>", display_comma_separated(fields))
528+
}
529+
}
523530
} else {
524531
write!(f, "STRUCT")
525532
}
@@ -618,6 +625,17 @@ fn format_clickhouse_datetime_precision_and_timezone(
618625
Ok(())
619626
}
620627

628+
/// Type of brackets used for `STRUCT` literals.
629+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
630+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
631+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
632+
pub enum StructBracketKind {
633+
/// Example: `STRUCT(a INT, b STRING)`
634+
Parentheses,
635+
/// Example: `STRUCT<a INT, b STRING>`
636+
AngleBrackets,
637+
}
638+
621639
/// Timestamp and Time data types information about TimeZone formatting.
622640
///
623641
/// This is more related to a display information than real differences between each variant. To

src/ast/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ use serde::{Deserialize, Serialize};
2828
use sqlparser_derive::{Visit, VisitMut};
2929

3030
pub use self::data_type::{
31-
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
31+
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
32+
StructBracketKind, TimezoneInfo,
3233
};
3334
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
3435
pub use self::ddl::{

src/parser/mod.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,23 @@ impl<'a> Parser<'a> {
22662266
))
22672267
}
22682268

2269+
/// Duckdb Struct Data Type <https://duckdb.org/docs/sql/data_types/struct.html#retrieving-from-structs>
2270+
fn parse_duckdb_struct_type_def(&mut self) -> Result<Vec<StructField>, ParserError> {
2271+
self.expect_keyword(Keyword::STRUCT)?;
2272+
self.expect_token(&Token::LParen)?;
2273+
let struct_body = self.parse_comma_separated(|parser| {
2274+
let field_name = parser.parse_identifier(false)?;
2275+
let field_type = parser.parse_data_type()?;
2276+
2277+
Ok(StructField {
2278+
field_name: Some(field_name),
2279+
field_type,
2280+
})
2281+
});
2282+
self.expect_token(&Token::RParen)?;
2283+
struct_body
2284+
}
2285+
22692286
/// Parse a field definition in a [struct] or [tuple].
22702287
/// Syntax:
22712288
///
@@ -7495,12 +7512,20 @@ impl<'a> Parser<'a> {
74957512
))))
74967513
}
74977514
}
7515+
Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => {
7516+
self.prev_token();
7517+
let field_defs = self.parse_duckdb_struct_type_def()?;
7518+
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
7519+
}
74987520
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
74997521
self.prev_token();
75007522
let (field_defs, _trailing_bracket) =
75017523
self.parse_struct_type_def(Self::parse_struct_field_def)?;
75027524
trailing_bracket = _trailing_bracket;
7503-
Ok(DataType::Struct(field_defs))
7525+
Ok(DataType::Struct(
7526+
field_defs,
7527+
StructBracketKind::AngleBrackets,
7528+
))
75047529
}
75057530
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
75067531
self.prev_token();

tests/sqlparser_bigquery.rs

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -489,28 +489,34 @@ fn parse_nested_data_types() {
489489
vec![
490490
ColumnDef {
491491
name: Ident::new("x"),
492-
data_type: DataType::Struct(vec![
493-
StructField {
494-
field_name: Some("a".into()),
495-
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
496-
Box::new(DataType::Int64,)
497-
))
498-
},
499-
StructField {
500-
field_name: Some("b".into()),
501-
field_type: DataType::Bytes(Some(42))
502-
},
503-
]),
492+
data_type: DataType::Struct(
493+
vec![
494+
StructField {
495+
field_name: Some("a".into()),
496+
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
497+
Box::new(DataType::Int64,)
498+
))
499+
},
500+
StructField {
501+
field_name: Some("b".into()),
502+
field_type: DataType::Bytes(Some(42))
503+
},
504+
],
505+
StructBracketKind::AngleBrackets
506+
),
504507
collation: None,
505508
options: vec![],
506509
},
507510
ColumnDef {
508511
name: Ident::new("y"),
509512
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
510-
DataType::Struct(vec![StructField {
511-
field_name: None,
512-
field_type: DataType::Int64,
513-
}]),
513+
DataType::Struct(
514+
vec![StructField {
515+
field_name: None,
516+
field_type: DataType::Int64,
517+
}],
518+
StructBracketKind::AngleBrackets
519+
),
514520
))),
515521
collation: None,
516522
options: vec![],
@@ -708,10 +714,13 @@ fn parse_typed_struct_syntax_bigquery() {
708714
},
709715
StructField {
710716
field_name: Some("str".into()),
711-
field_type: DataType::Struct(vec![StructField {
712-
field_name: None,
713-
field_type: DataType::Bool
714-
}])
717+
field_type: DataType::Struct(
718+
vec![StructField {
719+
field_name: None,
720+
field_type: DataType::Bool
721+
}],
722+
StructBracketKind::AngleBrackets
723+
)
715724
},
716725
]
717726
},
@@ -730,12 +739,15 @@ fn parse_typed_struct_syntax_bigquery() {
730739
fields: vec![
731740
StructField {
732741
field_name: Some("x".into()),
733-
field_type: DataType::Struct(Default::default())
742+
field_type: DataType::Struct(
743+
Default::default(),
744+
StructBracketKind::AngleBrackets
745+
)
734746
},
735747
StructField {
736748
field_name: Some("y".into()),
737749
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
738-
DataType::Struct(Default::default())
750+
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
739751
)))
740752
},
741753
]
@@ -1013,10 +1025,13 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
10131025
},
10141026
StructField {
10151027
field_name: Some("str".into()),
1016-
field_type: DataType::Struct(vec![StructField {
1017-
field_name: None,
1018-
field_type: DataType::Bool
1019-
}])
1028+
field_type: DataType::Struct(
1029+
vec![StructField {
1030+
field_name: None,
1031+
field_type: DataType::Bool
1032+
}],
1033+
StructBracketKind::AngleBrackets
1034+
)
10201035
},
10211036
]
10221037
},
@@ -1035,12 +1050,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
10351050
fields: vec![
10361051
StructField {
10371052
field_name: Some("x".into()),
1038-
field_type: DataType::Struct(Default::default())
1053+
field_type: DataType::Struct(
1054+
Default::default(),
1055+
StructBracketKind::AngleBrackets
1056+
)
10391057
},
10401058
StructField {
10411059
field_name: Some("y".into()),
10421060
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
1043-
DataType::Struct(Default::default())
1061+
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
10441062
)))
10451063
},
10461064
]

tests/sqlparser_duckdb.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,118 @@ fn duckdb_and_generic() -> TestedDialects {
3232
}
3333
}
3434

35+
#[test]
36+
fn test_struct() {
37+
// s STRUCT(v VARCHAR, i INTEGER)
38+
let struct_type1 = DataType::Struct(
39+
vec![
40+
StructField {
41+
field_name: Some(Ident::new("v")),
42+
field_type: DataType::Varchar(None),
43+
},
44+
StructField {
45+
field_name: Some(Ident::new("i")),
46+
field_type: DataType::Integer(None),
47+
},
48+
],
49+
StructBracketKind::Parentheses,
50+
);
51+
52+
// basic struct
53+
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))"#);
54+
assert_eq!(
55+
column_defs(statement),
56+
vec![ColumnDef {
57+
name: "s".into(),
58+
data_type: struct_type1.clone(),
59+
collation: None,
60+
options: vec![],
61+
}]
62+
);
63+
64+
// struct array
65+
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)[])"#);
66+
assert_eq!(
67+
column_defs(statement),
68+
vec![ColumnDef {
69+
name: "s".into(),
70+
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
71+
Box::new(struct_type1),
72+
None
73+
)),
74+
collation: None,
75+
options: vec![],
76+
}]
77+
);
78+
79+
// s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))
80+
let struct_type2 = DataType::Struct(
81+
vec![
82+
StructField {
83+
field_name: Some(Ident::new("v")),
84+
field_type: DataType::Varchar(None),
85+
},
86+
StructField {
87+
field_name: Some(Ident::new("s")),
88+
field_type: DataType::Struct(
89+
vec![
90+
StructField {
91+
field_name: Some(Ident::new("a1")),
92+
field_type: DataType::Integer(None),
93+
},
94+
StructField {
95+
field_name: Some(Ident::new("a2")),
96+
field_type: DataType::Varchar(None),
97+
},
98+
],
99+
StructBracketKind::Parentheses,
100+
),
101+
},
102+
],
103+
StructBracketKind::Parentheses,
104+
);
105+
106+
// nested struct
107+
let statement = duckdb().verified_stmt(
108+
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))[])"#,
109+
);
110+
111+
assert_eq!(
112+
column_defs(statement),
113+
vec![ColumnDef {
114+
name: "s".into(),
115+
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
116+
Box::new(struct_type2),
117+
None
118+
)),
119+
collation: None,
120+
options: vec![],
121+
}]
122+
);
123+
124+
// failing test (duckdb does not support bracket syntax)
125+
let sql_list = vec![
126+
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)))"#,
127+
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER>)"#,
128+
r#"CREATE TABLE t1 (s STRUCT<v VARCHAR, i INTEGER>)"#,
129+
r#"CREATE TABLE t1 (s STRUCT v VARCHAR, i INTEGER )"#,
130+
r#"CREATE TABLE t1 (s STRUCT VARCHAR, i INTEGER )"#,
131+
r#"CREATE TABLE t1 (s STRUCT (VARCHAR, INTEGER))"#,
132+
];
133+
134+
for sql in sql_list {
135+
duckdb().parse_sql_statements(sql).unwrap_err();
136+
}
137+
}
138+
139+
/// Returns the ColumnDefinitions from a CreateTable statement
140+
fn column_defs(statement: Statement) -> Vec<ColumnDef> {
141+
match statement {
142+
Statement::CreateTable(CreateTable { columns, .. }) => columns,
143+
_ => panic!("Expected CreateTable"),
144+
}
145+
}
146+
35147
#[test]
36148
fn test_select_wildcard_with_exclude() {
37149
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");

0 commit comments

Comments
 (0)