From 82dff42f8d295d737f821d0a5d40c9c55f61da03 Mon Sep 17 00:00:00 2001 From: lovasoa Date: Sat, 19 Apr 2025 01:16:17 +0200 Subject: [PATCH 1/3] xmltable table-valued function adds support for xmltable(...) see https://www.postgresql.org/docs/15/functions-xml.html#FUNCTIONS-XML-PROCESSING fixes https://github.com/apache/datafusion-sqlparser-rs/issues/1816 --- src/ast/mod.rs | 3 +- src/ast/query.rs | 153 ++++++++++++++++++++++++++++++++++++++ src/ast/spans.rs | 1 + src/keywords.rs | 2 + src/parser/mod.rs | 75 +++++++++++++++++++ tests/sqlparser_common.rs | 28 +++++++ 6 files changed, 261 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ab3be35c1..dbb58b923 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -81,7 +81,8 @@ pub use self::query::{ TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values, - WildcardAdditionalOptions, With, WithFill, + WildcardAdditionalOptions, With, WithFill, XmlPassingArgument, XmlPassingClause, + XmlTableColumn, XmlTableColumnOption, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index abc115a0d..0c603301e 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1271,6 +1271,36 @@ pub enum TableFactor { symbols: Vec, alias: Option, }, + /// The `XMLTABLE` table-valued function. + /// Part of the SQL standard, supported by PostgreSQL, Oracle, and DB2. + /// + /// + /// + /// ```sql + /// SELECT xmltable.* + /// FROM xmldata, + /// XMLTABLE('//ROWS/ROW' + /// PASSING data + /// COLUMNS id int PATH '@id', + /// ordinality FOR ORDINALITY, + /// "COUNTRY_NAME" text, + /// country_id text PATH 'COUNTRY_ID', + /// size_sq_km float PATH 'SIZE[@unit = "sq_km"]', + /// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)', + /// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified' + /// ); + /// ```` + XmlTable { + // TODO: Add XMLNAMESPACES clause support + /// The row-generating XPath expression. + row_expression: Expr, + /// The PASSING clause specifying the document expression. + passing: XmlPassingClause, + /// The columns to be extracted from each generated row. + columns: Vec, + /// The alias for the table. + alias: Option, + }, } /// The table sample modifier options @@ -1936,6 +1966,22 @@ impl fmt::Display for TableFactor { } Ok(()) } + TableFactor::XmlTable { + row_expression, + passing, + columns, + alias, + } => { + write!( + f, + "XMLTABLE({row_expression}{passing} COLUMNS {columns})", + columns = display_comma_separated(columns) + )?; + if let Some(alias) = alias { + write!(f, " AS {alias}")?; + } + Ok(()) + } } } } @@ -3082,3 +3128,110 @@ pub enum UpdateTableFromKind { /// For Example: `UPDATE SET t1.name='aaa' FROM t1` AfterSet(Vec), } + +/// Defines the options for an XmlTable column: Named or ForOrdinality +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum XmlTableColumnOption { + /// A named column with a type, optional path, and default value. + NamedInfo { + /// The type of the column to be extracted. + r#type: DataType, + /// The path to the column to be extracted. If None, defaults to the column name. + path: Option, + /// Default value if path does not match + default: Option, + // TODO: Add NULL ON EMPTY / ERROR handling if needed later + // TODO: Add NOT NULL / NULL constraints + }, + /// The FOR ORDINALITY marker + ForOrdinality, +} + +/// A single column definition in XMLTABLE +/// +/// ```sql +/// COLUMNS +/// id int PATH '@id', +/// ordinality FOR ORDINALITY, +/// "COUNTRY_NAME" text, +/// country_id text PATH 'COUNTRY_ID', +/// size_sq_km float PATH 'SIZE[@unit = "sq_km"]', +/// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)', +/// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified' +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlTableColumn { + /// The name of the column. + pub name: Ident, + /// Column options: type/path/default or FOR ORDINALITY + pub option: XmlTableColumnOption, +} + +impl fmt::Display for XmlTableColumn { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name)?; + match &self.option { + XmlTableColumnOption::NamedInfo { + r#type, + path, + default, + } => { + write!(f, " {}", r#type)?; + if let Some(p) = path { + write!(f, " PATH {}", p)?; + } + if let Some(d) = default { + write!(f, " DEFAULT {}", d)?; + } + Ok(()) + } + XmlTableColumnOption::ForOrdinality => { + write!(f, " FOR ORDINALITY") + } + } + } +} + +/// Argument passed in the XMLTABLE PASSING clause +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlPassingArgument { + pub expr: Expr, + pub alias: Option, + pub by_value: bool, // True if BY VALUE is specified +} + +impl fmt::Display for XmlPassingArgument { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.by_value { + write!(f, "BY VALUE ")?; + } + write!(f, "{}", self.expr)?; + if let Some(alias) = &self.alias { + write!(f, " AS {}", alias)?; + } + Ok(()) + } +} + +/// The PASSING clause for XMLTABLE +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlPassingClause { + pub arguments: Vec, +} + +impl fmt::Display for XmlPassingClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if !self.arguments.is_empty() { + write!(f, " PASSING {}", display_comma_separated(&self.arguments))?; + } + Ok(()) + } +} diff --git a/src/ast/spans.rs b/src/ast/spans.rs index a241fdf4d..27d52c26f 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1909,6 +1909,7 @@ impl Spanned for TableFactor { .chain(alias.as_ref().map(|alias| alias.span())), ), TableFactor::JsonTable { .. } => Span::empty(), + TableFactor::XmlTable { .. } => Span::empty(), TableFactor::Pivot { table, aggregate_functions, diff --git a/src/keywords.rs b/src/keywords.rs index a5400a5b0..540a42cc8 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -654,6 +654,7 @@ define_keywords!( PARTITION, PARTITIONED, PARTITIONS, + PASSING, PASSWORD, PAST, PATH, @@ -989,6 +990,7 @@ define_keywords!( WORK, WRITE, XML, + XMLTABLE, XOR, YEAR, YEARS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a9ddd1837..08d2f401f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11992,6 +11992,7 @@ impl<'a> Parser<'a> { | TableFactor::Function { alias, .. } | TableFactor::UNNEST { alias, .. } | TableFactor::JsonTable { alias, .. } + | TableFactor::XmlTable { alias, .. } | TableFactor::OpenJsonTable { alias, .. } | TableFactor::TableFunction { alias, .. } | TableFactor::Pivot { alias, .. } @@ -12107,6 +12108,9 @@ impl<'a> Parser<'a> { } else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) { self.prev_token(); self.parse_open_json_table_factor() + } else if self.parse_keyword_with_tokens(Keyword::XMLTABLE, &[Token::LParen]) { + self.prev_token(); + self.parse_xml_table_factor() } else { let name = self.parse_object_name(true)?; @@ -12339,6 +12343,77 @@ impl<'a> Parser<'a> { }) } + fn parse_xml_table_factor(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let row_expression = self.parse_expr()?; + let passing = self.parse_xml_passing_clause()?; + self.expect_keyword_is(Keyword::COLUMNS)?; + let columns = self.parse_comma_separated(Parser::parse_xml_table_column)?; + self.expect_token(&Token::RParen)?; + let alias = self.maybe_parse_table_alias()?; + Ok(TableFactor::XmlTable { + row_expression, + passing, + columns, + alias, + }) + } + + fn parse_xml_table_column(&mut self) -> Result { + let name = self.parse_identifier()?; + + let option = if self.parse_keyword(Keyword::FOR) { + self.expect_keyword(Keyword::ORDINALITY)?; + XmlTableColumnOption::ForOrdinality + } else { + let r#type = self.parse_data_type()?; + let mut path = None; + let mut default = None; + + if self.parse_keyword(Keyword::PATH) { + path = Some(self.parse_expr()?); + } + + if self.parse_keyword(Keyword::DEFAULT) { + default = Some(self.parse_expr()?); + } + + // TODO: Parse NOT NULL/NULL constraints + + XmlTableColumnOption::NamedInfo { + r#type, + path, + default, + } + }; + Ok(XmlTableColumn { name, option }) + } + + fn parse_xml_passing_clause(&mut self) -> Result { + let mut arguments = vec![]; + if self.parse_keyword(Keyword::PASSING) { + loop { + let by_value = + self.parse_keyword(Keyword::BY) && self.expect_keyword(Keyword::VALUE).is_ok(); + let expr = self.parse_expr()?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + None + }; + arguments.push(XmlPassingArgument { + expr, + alias, + by_value, + }); + if !self.consume_token(&Token::Comma) { + break; + } + } + } + Ok(XmlPassingClause { arguments }) + } + fn parse_match_recognize(&mut self, table: TableFactor) -> Result { self.expect_token(&Token::LParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index be848a603..5398af10f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11729,6 +11729,34 @@ fn test_group_by_grouping_sets() { ); } +#[test] +fn test_xmltable() { + all_dialects() + .verified_only_select("SELECT * FROM XMLTABLE('/root' PASSING data COLUMNS element TEXT)"); + + // Minimal meaningful working example: returns a single row with a single column named y containing the value z + all_dialects().verified_only_select( + "SELECT y FROM XMLTABLE('/X' PASSING 'z' COLUMNS y TEXT)", + ); + + // Test using subqueries + all_dialects().verified_only_select("SELECT y FROM XMLTABLE((SELECT '/X') PASSING (SELECT CAST('z' AS xml)) COLUMNS y TEXT PATH (SELECT 'y'))"); + + all_dialects().verified_only_select("SELECT * FROM XMLTABLE('/root/row' PASSING xmldata COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()', value FLOAT PATH 'value')"); + + all_dialects().verified_only_select("SELECT * FROM XMLTABLE('//ROWS/ROW' PASSING data COLUMNS row_num FOR ORDINALITY, id INT PATH '@id', name TEXT PATH 'NAME' DEFAULT 'unnamed')"); + + // Example from https://www.postgresql.org/docs/15/functions-xml.html#FUNCTIONS-XML-PROCESSING + all_dialects().verified_only_select( + "SELECT xmltable.* FROM xmldata, XMLTABLE('//ROWS/ROW' PASSING data COLUMNS id INT PATH '@id', ordinality FOR ORDINALITY, \"COUNTRY_NAME\" TEXT, country_id TEXT PATH 'COUNTRY_ID', size_sq_km FLOAT PATH 'SIZE[@unit = \"sq_km\"]', size_other TEXT PATH 'concat(SIZE[@unit!=\"sq_km\"], \" \", SIZE[@unit!=\"sq_km\"]/@unit)', premier_name TEXT PATH 'PREMIER_NAME' DEFAULT 'not specified')" + ); + + // Example from DB2 docs without explicit PASSING clause: https://www.ibm.com/docs/en/db2/12.1.0?topic=xquery-simple-column-name-passing-xmlexists-xmlquery-xmltable + all_dialects().verified_only_select( + "SELECT X.* FROM T1, XMLTABLE('$CUSTLIST/customers/customerinfo' COLUMNS \"Cid\" BIGINT PATH '@Cid', \"Info\" XML PATH 'document{.}', \"History\" XML PATH 'NULL') AS X" + ); +} + #[test] fn test_match_recognize() { use MatchRecognizePattern::*; From 6c11bb978516f60ebbe320c4e013dd650148befb Mon Sep 17 00:00:00 2001 From: lovasoa Date: Sat, 19 Apr 2025 01:26:49 +0200 Subject: [PATCH 2/3] add support for [NOT] NULL in xml columns --- src/ast/query.rs | 8 ++++++-- src/parser/mod.rs | 7 ++++++- tests/sqlparser_common.rs | 5 +++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 0c603301e..2dcb77c79 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -3142,8 +3142,8 @@ pub enum XmlTableColumnOption { path: Option, /// Default value if path does not match default: Option, - // TODO: Add NULL ON EMPTY / ERROR handling if needed later - // TODO: Add NOT NULL / NULL constraints + /// Whether the column is nullable (NULL=true, NOT NULL=false) + nullable: bool, }, /// The FOR ORDINALITY marker ForOrdinality, @@ -3179,6 +3179,7 @@ impl fmt::Display for XmlTableColumn { r#type, path, default, + nullable, } => { write!(f, " {}", r#type)?; if let Some(p) = path { @@ -3187,6 +3188,9 @@ impl fmt::Display for XmlTableColumn { if let Some(d) = default { write!(f, " DEFAULT {}", d)?; } + if !*nullable { + write!(f, " NOT NULL")?; + } Ok(()) } XmlTableColumnOption::ForOrdinality => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 08d2f401f..764f2ac18 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12378,12 +12378,17 @@ impl<'a> Parser<'a> { default = Some(self.parse_expr()?); } - // TODO: Parse NOT NULL/NULL constraints + let not_null = self.parse_keywords(&[Keyword::NOT, Keyword::NULL]); + if !not_null { + // NULL is the default but can be specified explicitly + let _ = self.parse_keyword(Keyword::NULL); + } XmlTableColumnOption::NamedInfo { r#type, path, default, + nullable: !not_null, } }; Ok(XmlTableColumn { name, option }) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5398af10f..71ef3ba33 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11742,6 +11742,11 @@ fn test_xmltable() { // Test using subqueries all_dialects().verified_only_select("SELECT y FROM XMLTABLE((SELECT '/X') PASSING (SELECT CAST('z' AS xml)) COLUMNS y TEXT PATH (SELECT 'y'))"); + // NOT NULL + all_dialects().verified_only_select( + "SELECT y FROM XMLTABLE('/X' PASSING '' COLUMNS y TEXT NOT NULL)", + ); + all_dialects().verified_only_select("SELECT * FROM XMLTABLE('/root/row' PASSING xmldata COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()', value FLOAT PATH 'value')"); all_dialects().verified_only_select("SELECT * FROM XMLTABLE('//ROWS/ROW' PASSING data COLUMNS row_num FOR ORDINALITY, id INT PATH '@id', name TEXT PATH 'NAME' DEFAULT 'unnamed')"); From c1a62556b9c1aa77a05131db7668b4895dcc3887 Mon Sep 17 00:00:00 2001 From: lovasoa Date: Sat, 19 Apr 2025 01:45:45 +0200 Subject: [PATCH 3/3] add support for xml namespaces --- src/ast/mod.rs | 4 ++-- src/ast/query.rs | 33 +++++++++++++++++++++++++++++++-- src/keywords.rs | 1 + src/parser/mod.rs | 17 +++++++++++++++++ tests/sqlparser_common.rs | 5 +++++ 5 files changed, 56 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index dbb58b923..74e8cb55c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -81,8 +81,8 @@ pub use self::query::{ TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values, - WildcardAdditionalOptions, With, WithFill, XmlPassingArgument, XmlPassingClause, - XmlTableColumn, XmlTableColumnOption, + WildcardAdditionalOptions, With, WithFill, XmlNamespaceDefinition, XmlPassingArgument, + XmlPassingClause, XmlTableColumn, XmlTableColumnOption, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index 2dcb77c79..982985ec3 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1291,7 +1291,8 @@ pub enum TableFactor { /// ); /// ```` XmlTable { - // TODO: Add XMLNAMESPACES clause support + /// Optional XMLNAMESPACES clause (empty if not present) + namespaces: Vec, /// The row-generating XPath expression. row_expression: Expr, /// The PASSING clause specifying the document expression. @@ -1971,10 +1972,19 @@ impl fmt::Display for TableFactor { passing, columns, alias, + namespaces, } => { + write!(f, "XMLTABLE(")?; + if !namespaces.is_empty() { + write!( + f, + "XMLNAMESPACES({}), ", + display_comma_separated(namespaces) + )?; + } write!( f, - "XMLTABLE({row_expression}{passing} COLUMNS {columns})", + "{row_expression}{passing} COLUMNS {columns})", columns = display_comma_separated(columns) )?; if let Some(alias) = alias { @@ -3239,3 +3249,22 @@ impl fmt::Display for XmlPassingClause { Ok(()) } } + +/// Represents a single XML namespace definition in the XMLNAMESPACES clause. +/// +/// `namespace_uri AS namespace_name` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlNamespaceDefinition { + /// The namespace URI (a text expression). + pub uri: Expr, + /// The alias for the namespace (a simple identifier). + pub name: Ident, +} + +impl fmt::Display for XmlNamespaceDefinition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} AS {}", self.uri, self.name) + } +} diff --git a/src/keywords.rs b/src/keywords.rs index 540a42cc8..4eaad7ed2 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -990,6 +990,7 @@ define_keywords!( WORK, WRITE, XML, + XMLNAMESPACES, XMLTABLE, XOR, YEAR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 764f2ac18..77466b97e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12345,6 +12345,15 @@ impl<'a> Parser<'a> { fn parse_xml_table_factor(&mut self) -> Result { self.expect_token(&Token::LParen)?; + let namespaces = if self.parse_keyword(Keyword::XMLNAMESPACES) { + self.expect_token(&Token::LParen)?; + let namespaces = self.parse_comma_separated(Parser::parse_xml_namespace_definition)?; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::Comma)?; + namespaces + } else { + vec![] + }; let row_expression = self.parse_expr()?; let passing = self.parse_xml_passing_clause()?; self.expect_keyword_is(Keyword::COLUMNS)?; @@ -12352,6 +12361,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::XmlTable { + namespaces, row_expression, passing, columns, @@ -12359,6 +12369,13 @@ impl<'a> Parser<'a> { }) } + fn parse_xml_namespace_definition(&mut self) -> Result { + let uri = self.parse_expr()?; + self.expect_keyword_is(Keyword::AS)?; + let name = self.parse_identifier()?; + Ok(XmlNamespaceDefinition { uri, name }) + } + fn parse_xml_table_column(&mut self) -> Result { let name = self.parse_identifier()?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 71ef3ba33..15b9bef66 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11760,6 +11760,11 @@ fn test_xmltable() { all_dialects().verified_only_select( "SELECT X.* FROM T1, XMLTABLE('$CUSTLIST/customers/customerinfo' COLUMNS \"Cid\" BIGINT PATH '@Cid', \"Info\" XML PATH 'document{.}', \"History\" XML PATH 'NULL') AS X" ); + + // Example from PostgreSQL with XMLNAMESPACES + all_dialects().verified_only_select( + "SELECT xmltable.* FROM XMLTABLE(XMLNAMESPACES('http://example.com/myns' AS x, 'http://example.com/b' AS \"B\"), '/x:example/x:item' PASSING (SELECT data FROM xmldata) COLUMNS foo INT PATH '@foo', bar INT PATH '@B:bar')" + ); } #[test]