Skip to content

Support DISTINCT ON (...) #852

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ pub use self::ddl::{
};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
Cte, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join, JoinConstraint,
JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows, OrderByExpr,
Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto,
SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, IdentWithAlias, Join,
JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NonBlock, Offset, OffsetRows,
OrderByExpr, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select,
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
};
pub use self::value::{
Expand Down
32 changes: 29 additions & 3 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ impl fmt::Display for Table {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Select {
pub distinct: bool,
pub distinct: Option<Distinct>,
/// MSSQL syntax: `TOP (<N>) [ PERCENT ] [ WITH TIES ]`
pub top: Option<Top>,
/// projection expressions
Expand Down Expand Up @@ -222,7 +222,10 @@ pub struct Select {

impl fmt::Display for Select {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "SELECT{}", if self.distinct { " DISTINCT" } else { "" })?;
write!(f, "SELECT")?;
if let Some(ref distinct) = self.distinct {
write!(f, " {distinct}")?;
}
if let Some(ref top) = self.top {
write!(f, " {top}")?;
}
Expand Down Expand Up @@ -1079,6 +1082,29 @@ impl fmt::Display for NonBlock {
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum Distinct {
/// DISTINCT
Distinct,

/// DISTINCT ON({column names})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

On(Vec<Expr>),
}

impl fmt::Display for Distinct {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Distinct::Distinct => write!(f, "DISTINCT"),
Distinct::On(col_names) => {
let col_names = display_comma_separated(col_names);
write!(f, "DISTINCT ON ({col_names})")
}
}
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
Expand All @@ -1105,7 +1131,7 @@ impl fmt::Display for Top {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Values {
/// Was there an explict ROWs keyword (MySQL)?
/// Was there an explicit ROWs keyword (MySQL)?
/// <https://dev.mysql.com/doc/refman/8.0/en/values.html>
pub explicit_row: bool,
pub rows: Vec<Vec<Expr>>,
Expand Down
33 changes: 24 additions & 9 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@ impl<'a> Parser<'a> {

pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let distinct = self.parse_all_or_distinct()?;
let distinct = self.parse_all_or_distinct()?.is_some();
let args = self.parse_optional_args()?;
let over = if self.parse_keyword(Keyword::OVER) {
// TBD: support window names (`OVER mywin`) in place of inline specification
Expand Down Expand Up @@ -1302,7 +1302,7 @@ impl<'a> Parser<'a> {
/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let distinct = self.parse_all_or_distinct()?;
let distinct = self.parse_all_or_distinct()?.is_some();
let expr = Box::new(self.parse_expr()?);
// While ANSI SQL would would require the separator, Redshift makes this optional. Here we
// choose to make the separator optional as this provides the more general implementation.
Expand Down Expand Up @@ -2300,16 +2300,31 @@ impl<'a> Parser<'a> {
}
}

/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
/// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
let all = self.parse_keyword(Keyword::ALL);
let distinct = self.parse_keyword(Keyword::DISTINCT);
if all && distinct {
parser_err!("Cannot specify both ALL and DISTINCT".to_string())
} else {
Ok(distinct)
if !distinct {
return Ok(None);
}
if all {
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
}
let on = self.parse_keyword(Keyword::ON);
if !on {
return Ok(Some(Distinct::Distinct));
}

self.expect_token(&Token::LParen)?;
let col_names = if self.consume_token(&Token::RParen) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like special case code to handle SELECT DISTINCT ON() - can you please add a test for this case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

self.prev_token();
Vec::new()
} else {
self.parse_comma_separated(Parser::parse_expr)?
};
self.expect_token(&Token::RParen)?;
Ok(Some(Distinct::On(col_names)))
}

/// Parse a SQL CREATE statement
Expand Down
2 changes: 1 addition & 1 deletion tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fn parse_map_access_expr() {
let select = clickhouse().verified_only_select(sql);
assert_eq!(
Select {
distinct: false,
distinct: None,
top: None,
projection: vec![UnnamedExpr(MapAccess {
column: Box::new(Identifier(Ident {
Expand Down
36 changes: 30 additions & 6 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ fn parse_update_set_from() {
subquery: Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
Expand Down Expand Up @@ -597,7 +597,7 @@ fn parse_top_level() {
fn parse_simple_select() {
let sql = "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5";
let select = verified_only_select(sql);
assert!(!select.distinct);
assert!(select.distinct.is_none());
assert_eq!(3, select.projection.len());
let select = verified_query(sql);
assert_eq!(Some(Expr::Value(number("5"))), select.limit);
Expand All @@ -622,7 +622,7 @@ fn parse_limit_is_not_an_alias() {
fn parse_select_distinct() {
let sql = "SELECT DISTINCT name FROM customer";
let select = verified_only_select(sql);
assert!(select.distinct);
assert!(select.distinct.is_some());
assert_eq!(
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
only(&select.projection)
Expand All @@ -633,7 +633,7 @@ fn parse_select_distinct() {
fn parse_select_distinct_two_fields() {
let sql = "SELECT DISTINCT name, id FROM customer";
let select = verified_only_select(sql);
assert!(select.distinct);
assert!(select.distinct.is_some());
assert_eq!(
&SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("name"))),
&select.projection[0]
Expand All @@ -657,6 +657,30 @@ fn parse_select_distinct_tuple() {
);
}

#[test]
fn parse_select_distinct_on() {
let sql = "SELECT DISTINCT ON (album_id) name FROM track ORDER BY album_id, milliseconds";
let select = verified_only_select(sql);
assert_eq!(
&Some(Distinct::On(vec![Expr::Identifier(Ident::new("album_id"))])),
&select.distinct
);

let sql = "SELECT DISTINCT ON () name FROM track ORDER BY milliseconds";
let select = verified_only_select(sql);
assert_eq!(&Some(Distinct::On(vec![])), &select.distinct);

let sql = "SELECT DISTINCT ON (album_id, milliseconds) name FROM track";
let select = verified_only_select(sql);
assert_eq!(
&Some(Distinct::On(vec![
Expr::Identifier(Ident::new("album_id")),
Expr::Identifier(Ident::new("milliseconds")),
])),
&select.distinct
);
}

#[test]
fn parse_select_distinct_missing_paren() {
let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer");
Expand Down Expand Up @@ -3517,7 +3541,7 @@ fn parse_interval_and_or_xor() {
let expected_ast = vec![Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![UnnamedExpr(Expr::Identifier(Ident {
value: "col".to_string(),
Expand Down Expand Up @@ -5834,7 +5858,7 @@ fn parse_merge() {
subquery: Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::Wildcard(
WildcardAdditionalOptions::default()
Expand Down
12 changes: 6 additions & 6 deletions tests/sqlparser_mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ fn parse_quote_identifiers_2() {
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "quoted ` identifier".into(),
Expand Down Expand Up @@ -479,7 +479,7 @@ fn parse_quote_identifiers_3() {
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
value: "`quoted identifier`".into(),
Expand Down Expand Up @@ -857,7 +857,7 @@ fn parse_select_with_numeric_prefix_column_name() {
assert_eq!(
q.body,
Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(
"123col_$@123abc"
Expand Down Expand Up @@ -896,7 +896,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() {
assert_eq!(
q.body,
Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![
SelectItem::UnnamedExpr(Expr::Value(Value::Number(
Expand Down Expand Up @@ -1075,7 +1075,7 @@ fn parse_substring_in_select() {
Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: true,
distinct: Some(Distinct::Distinct),
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Substring {
expr: Box::new(Expr::Identifier(Ident {
Expand Down Expand Up @@ -1372,7 +1372,7 @@ fn parse_hex_string_introducer() {
Statement::Query(Box::new(Query {
with: None,
body: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::IntroducedString {
introducer: "_latin1".to_string(),
Expand Down
4 changes: 2 additions & 2 deletions tests/sqlparser_postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1694,7 +1694,7 @@ fn parse_array_subquery_expr() {
op: SetOperator::Union,
set_quantifier: SetQuantifier::None,
left: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
#[cfg(not(feature = "bigdecimal"))]
Expand All @@ -1715,7 +1715,7 @@ fn parse_array_subquery_expr() {
qualify: None,
}))),
right: Box::new(SetExpr::Select(Box::new(Select {
distinct: false,
distinct: None,
top: None,
projection: vec![SelectItem::UnnamedExpr(Expr::Value(Value::Number(
#[cfg(not(feature = "bigdecimal"))]
Expand Down