diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 271325a50..02121acc9 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -34,10 +34,10 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join, JoinConstraint, - JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows, OrderByExpr, - Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, - SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor, + Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join, + JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows, + OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, + SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor, TableWithJoins, Top, Values, WildcardAdditionalOptions, With, }; pub use self::value::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index 6762730c5..a85c62a25 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -193,7 +193,7 @@ impl fmt::Display for Table { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Select { - pub distinct: bool, + pub distinct: Option, /// MSSQL syntax: `TOP () [ PERCENT ] [ WITH TIES ]` pub top: Option, /// projection expressions @@ -222,7 +222,10 @@ pub struct Select { impl fmt::Display for Select { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "SELECT{}", if self.distinct { " DISTINCT" } else { "" })?; + write!(f, "SELECT")?; + if let Some(ref distinct) = self.distinct { + write!(f, " {distinct}")?; + } if let Some(ref top) = self.top { write!(f, " {top}")?; } @@ -1079,6 +1082,29 @@ impl fmt::Display for NonBlock { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Distinct { + /// DISTINCT + Distinct, + + /// DISTINCT ON({column names}) + On(Vec), +} + +impl fmt::Display for Distinct { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Distinct::Distinct => write!(f, "DISTINCT"), + Distinct::On(col_names) => { + let col_names = display_comma_separated(col_names); + write!(f, "DISTINCT ON ({col_names})") + } + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -1105,7 +1131,7 @@ impl fmt::Display for Top { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Values { - /// Was there an explict ROWs keyword (MySQL)? + /// Was there an explicit ROWs keyword (MySQL)? /// pub explicit_row: bool, pub rows: Vec>, diff --git a/src/parser.rs b/src/parser.rs index 003369de4..c468e9be8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -879,7 +879,7 @@ impl<'a> Parser<'a> { pub fn parse_function(&mut self, name: ObjectName) -> Result { self.expect_token(&Token::LParen)?; - let distinct = self.parse_all_or_distinct()?; + let distinct = self.parse_all_or_distinct()?.is_some(); let args = self.parse_optional_args()?; let over = if self.parse_keyword(Keyword::OVER) { // TBD: support window names (`OVER mywin`) in place of inline specification @@ -1302,7 +1302,7 @@ impl<'a> Parser<'a> { /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. pub fn parse_listagg_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; - let distinct = self.parse_all_or_distinct()?; + let distinct = self.parse_all_or_distinct()?.is_some(); let expr = Box::new(self.parse_expr()?); // While ANSI SQL would would require the separator, Redshift makes this optional. Here we // choose to make the separator optional as this provides the more general implementation. @@ -2300,16 +2300,31 @@ impl<'a> Parser<'a> { } } - /// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a - /// `ParserError` if both `ALL` and `DISTINCT` are fround. - pub fn parse_all_or_distinct(&mut self) -> Result { + /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed + /// and results in a `ParserError` if both `ALL` and `DISTINCT` are found. + pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { let all = self.parse_keyword(Keyword::ALL); let distinct = self.parse_keyword(Keyword::DISTINCT); - if all && distinct { - parser_err!("Cannot specify both ALL and DISTINCT".to_string()) - } else { - Ok(distinct) + if !distinct { + return Ok(None); } + if all { + return parser_err!("Cannot specify both ALL and DISTINCT".to_string()); + } + let on = self.parse_keyword(Keyword::ON); + if !on { + return Ok(Some(Distinct::Distinct)); + } + + self.expect_token(&Token::LParen)?; + let col_names = if self.consume_token(&Token::RParen) { + self.prev_token(); + Vec::new() + } else { + self.parse_comma_separated(Parser::parse_expr)? + }; + self.expect_token(&Token::RParen)?; + Ok(Some(Distinct::On(col_names))) } /// Parse a SQL CREATE statement diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 3ee3fbc03..8af8dd839 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -32,7 +32,7 @@ fn parse_map_access_expr() { let select = clickhouse().verified_only_select(sql); assert_eq!( Select { - distinct: false, + distinct: None, top: None, projection: vec![UnnamedExpr(MapAccess { column: Box::new(Identifier(Ident { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index aee9efd70..27a642dbe 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -225,7 +225,7 @@ fn parse_update_set_from() { subquery: Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![ SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))), @@ -597,7 +597,7 @@ fn parse_top_level() { fn parse_simple_select() { let sql = "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5"; let select = verified_only_select(sql); - assert!(!select.distinct); + assert!(select.distinct.is_none()); assert_eq!(3, select.projection.len()); let select = verified_query(sql); assert_eq!(Some(Expr::Value(number("5"))), select.limit); @@ -622,7 +622,7 @@ fn parse_limit_is_not_an_alias() { fn parse_select_distinct() { let sql = "SELECT DISTINCT name FROM customer"; let select = verified_only_select(sql); - assert!(select.distinct); + assert!(select.distinct.is_some()); assert_eq!( &SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))), only(&select.projection) @@ -633,7 +633,7 @@ fn parse_select_distinct() { fn parse_select_distinct_two_fields() { let sql = "SELECT DISTINCT name, id FROM customer"; let select = verified_only_select(sql); - assert!(select.distinct); + assert!(select.distinct.is_some()); assert_eq!( &SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))), &select.projection[0] @@ -657,6 +657,30 @@ fn parse_select_distinct_tuple() { ); } +#[test] +fn parse_select_distinct_on() { + let sql = "SELECT DISTINCT ON (album_id) name FROM track ORDER BY album_id, milliseconds"; + let select = verified_only_select(sql); + assert_eq!( + &Some(Distinct::On(vec![Expr::Identifier(Ident::new("album_id"))])), + &select.distinct + ); + + let sql = "SELECT DISTINCT ON () name FROM track ORDER BY milliseconds"; + let select = verified_only_select(sql); + assert_eq!(&Some(Distinct::On(vec![])), &select.distinct); + + let sql = "SELECT DISTINCT ON (album_id, milliseconds) name FROM track"; + let select = verified_only_select(sql); + assert_eq!( + &Some(Distinct::On(vec![ + Expr::Identifier(Ident::new("album_id")), + Expr::Identifier(Ident::new("milliseconds")), + ])), + &select.distinct + ); +} + #[test] fn parse_select_distinct_missing_paren() { let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer"); @@ -3517,7 +3541,7 @@ fn parse_interval_and_or_xor() { let expected_ast = vec![Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![UnnamedExpr(Expr::Identifier(Ident { value: "col".to_string(), @@ -5834,7 +5858,7 @@ fn parse_merge() { subquery: Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![SelectItem::Wildcard( WildcardAdditionalOptions::default() diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 2b312dc5e..1bdce0009 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -445,7 +445,7 @@ fn parse_quote_identifiers_2() { Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { value: "quoted ` identifier".into(), @@ -479,7 +479,7 @@ fn parse_quote_identifiers_3() { Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { value: "`quoted identifier`".into(), @@ -857,7 +857,7 @@ fn parse_select_with_numeric_prefix_column_name() { assert_eq!( q.body, Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new( "123col_$@123abc" @@ -896,7 +896,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { assert_eq!( q.body, Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![ SelectItem::UnnamedExpr(Expr::Value(Value::Number( @@ -1075,7 +1075,7 @@ fn parse_substring_in_select() { Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { - distinct: true, + distinct: Some(Distinct::Distinct), top: None, projection: vec![SelectItem::UnnamedExpr(Expr::Substring { expr: Box::new(Expr::Identifier(Ident { @@ -1372,7 +1372,7 @@ fn parse_hex_string_introducer() { Statement::Query(Box::new(Query { with: None, body: Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString { introducer: "_latin1".to_string(), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 471421215..af9783f35 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1694,7 +1694,7 @@ fn parse_array_subquery_expr() { op: SetOperator::Union, set_quantifier: SetQuantifier::None, left: Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number( #[cfg(not(feature = "bigdecimal"))] @@ -1715,7 +1715,7 @@ fn parse_array_subquery_expr() { qualify: None, }))), right: Box::new(SetExpr::Select(Box::new(Select { - distinct: false, + distinct: None, top: None, projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number( #[cfg(not(feature = "bigdecimal"))]