Skip to content

Support DuckDB struct syntax and support list of struct syntax #1372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Aug 15, 2024
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ Cargo.lock
.vscode

*.swp

.DS_store
21 changes: 18 additions & 3 deletions src/ast/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ pub enum DataType {
///
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct(Vec<StructField>),
Struct(Vec<StructField>, StructBracketKind),
/// Union
///
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
Expand Down Expand Up @@ -513,9 +513,16 @@ impl fmt::Display for DataType {
}
write!(f, ")")
}
DataType::Struct(fields) => {
DataType::Struct(fields, bracket) => {
if !fields.is_empty() {
write!(f, "STRUCT<{}>", display_comma_separated(fields))
match bracket {
StructBracketKind::Parentheses => {
write!(f, "STRUCT({})", display_comma_separated(fields))
}
StructBracketKind::AngleBrackets => {
write!(f, "STRUCT<{}>", display_comma_separated(fields))
}
}
} else {
write!(f, "STRUCT")
}
Expand Down Expand Up @@ -613,6 +620,14 @@ fn format_clickhouse_datetime_precision_and_timezone(
Ok(())
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum StructBracketKind {
Parentheses,
AngleBrackets,
}

/// Timestamp and Time data types information about TimeZone formatting.
///
/// This is more related to a display information than real differences between each variant. To
Expand Down
3 changes: 2 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};

pub use self::data_type::{
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
StructBracketKind, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
pub use self::ddl::{
Expand Down
27 changes: 26 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2231,6 +2231,23 @@ impl<'a> Parser<'a> {
))
}

/// Duckdb Struct Data Type <https://duckdb.org/docs/sql/data_types/struct.html#retrieving-from-structs>
fn parse_duckdb_struct_type_def(&mut self) -> Result<Vec<StructField>, ParserError> {
self.expect_keyword(Keyword::STRUCT)?;
self.expect_token(&Token::LParen)?;
let struct_body = self.parse_comma_separated(|parser| {
let field_name = parser.parse_identifier(false)?;
let field_type = parser.parse_data_type()?;

Ok(StructField {
field_name: Some(field_name),
field_type,
})
});
self.expect_token(&Token::RParen)?;
struct_body
}

/// Parse a field definition in a [struct] or [tuple].
/// Syntax:
///
Expand Down Expand Up @@ -7229,12 +7246,20 @@ impl<'a> Parser<'a> {
))))
}
}
Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => {
self.prev_token();
let field_defs = self.parse_duckdb_struct_type_def()?;
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
}
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
self.prev_token();
let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_struct_field_def)?;
trailing_bracket = _trailing_bracket;
Ok(DataType::Struct(field_defs))
Ok(DataType::Struct(
field_defs,
StructBracketKind::AngleBrackets,
))
}
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
self.prev_token();
Expand Down
74 changes: 46 additions & 28 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -489,28 +489,34 @@ fn parse_nested_data_types() {
vec![
ColumnDef {
name: Ident::new("x"),
data_type: DataType::Struct(vec![
StructField {
field_name: Some("a".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
Box::new(DataType::Int64,)
))
},
StructField {
field_name: Some("b".into()),
field_type: DataType::Bytes(Some(42))
},
]),
data_type: DataType::Struct(
vec![
StructField {
field_name: Some("a".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
Box::new(DataType::Int64,)
))
},
StructField {
field_name: Some("b".into()),
field_type: DataType::Bytes(Some(42))
},
],
StructBracketKind::AngleBrackets
),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("y"),
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Int64,
}]),
DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Int64,
}],
StructBracketKind::AngleBrackets
),
))),
collation: None,
options: vec![],
Expand Down Expand Up @@ -708,10 +714,13 @@ fn parse_typed_struct_syntax_bigquery() {
},
StructField {
field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Bool
}])
field_type: DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Bool
}],
StructBracketKind::AngleBrackets
)
},
]
},
Expand All @@ -730,12 +739,15 @@ fn parse_typed_struct_syntax_bigquery() {
fields: vec![
StructField {
field_name: Some("x".into()),
field_type: DataType::Struct(Default::default())
field_type: DataType::Struct(
Default::default(),
StructBracketKind::AngleBrackets
)
},
StructField {
field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default())
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
)))
},
]
Expand Down Expand Up @@ -1013,10 +1025,13 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
},
StructField {
field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Bool
}])
field_type: DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Bool
}],
StructBracketKind::AngleBrackets
)
},
]
},
Expand All @@ -1035,12 +1050,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
fields: vec![
StructField {
field_name: Some("x".into()),
field_type: DataType::Struct(Default::default())
field_type: DataType::Struct(
Default::default(),
StructBracketKind::AngleBrackets
)
},
StructField {
field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default())
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
)))
},
]
Expand Down
32 changes: 32 additions & 0 deletions tests/sqlparser_duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,38 @@ fn duckdb_and_generic() -> TestedDialects {
}
}

#[test]
fn test_struct() {
// basic struct
let sql = r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))"#;
let statement = duckdb().verified_stmt(sql);
assert_eq!(statement.to_string(), sql);

// struct array
let sql = r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)[])"#;
let statement = duckdb().verified_stmt(sql);
assert_eq!(statement.to_string(), sql);

// nested struct
let sql = r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))[])"#;
let statement = duckdb().verified_stmt(sql);
assert_eq!(statement.to_string(), sql);

// failing test
let sql_list = vec![
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)))"#,
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER>)"#,
r#"CREATE TABLE t1 (s STRUCT<v VARCHAR, i INTEGER>)"#,
r#"CREATE TABLE t1 (s STRUCT v VARCHAR, i INTEGER )"#,
r#"CREATE TABLE t1 (s STRUCT VARCHAR, i INTEGER )"#,
r#"CREATE TABLE t1 (s STRUCT (VARCHAR, INTEGER))"#,
];

for sql in sql_list {
duckdb().parse_sql_statements(sql).unwrap_err();
}
}

#[test]
fn test_select_wildcard_with_exclude() {
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
Expand Down
Loading