-
Notifications
You must be signed in to change notification settings - Fork 605
Add support for first, last aggregate function parsing #880
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -741,6 +741,8 @@ impl<'a> Parser<'a> { | |
self.parse_array_subquery() | ||
} | ||
Keyword::ARRAY_AGG => self.parse_array_agg_expr(), | ||
Keyword::FIRST => self.parse_first_agg_expr(), | ||
Keyword::LAST => self.parse_last_agg_expr(), | ||
Keyword::NOT => self.parse_not(), | ||
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { | ||
self.parse_match_against() | ||
|
@@ -1410,6 +1412,82 @@ impl<'a> Parser<'a> { | |
})) | ||
} | ||
|
||
pub fn parse_first_agg_expr(&mut self) -> Result<Expr, ParserError> { | ||
self.expect_token(&Token::LParen)?; | ||
let expr = Box::new(self.parse_expr()?); | ||
// ANSI SQL and BigQuery define ORDER BY inside function. | ||
if !self.dialect.supports_within_after_array_aggregation() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand the call to |
||
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { | ||
let order_by_expr = self.parse_order_by_expr()?; | ||
Some(Box::new(order_by_expr)) | ||
} else { | ||
None | ||
}; | ||
self.expect_token(&Token::RParen)?; | ||
return Ok(Expr::FIRST(FirstAgg { | ||
expr, | ||
order_by, | ||
within_group: false, | ||
})); | ||
} | ||
// Snowflake defines ORDERY BY in within group instead of inside the function like | ||
// ANSI SQL. | ||
self.expect_token(&Token::RParen)?; | ||
let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { | ||
self.expect_token(&Token::LParen)?; | ||
self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; | ||
let order_by_expr = self.parse_order_by_expr()?; | ||
self.expect_token(&Token::RParen)?; | ||
Some(Box::new(order_by_expr)) | ||
} else { | ||
None | ||
}; | ||
|
||
Ok(Expr::FIRST(FirstAgg { | ||
expr, | ||
order_by: within_group, | ||
within_group: true, | ||
})) | ||
} | ||
|
||
pub fn parse_last_agg_expr(&mut self) -> Result<Expr, ParserError> { | ||
self.expect_token(&Token::LParen)?; | ||
let expr = Box::new(self.parse_expr()?); | ||
// ANSI SQL and BigQuery define ORDER BY inside function. | ||
if !self.dialect.supports_within_after_array_aggregation() { | ||
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { | ||
let order_by_expr = self.parse_order_by_expr()?; | ||
Some(Box::new(order_by_expr)) | ||
} else { | ||
None | ||
}; | ||
self.expect_token(&Token::RParen)?; | ||
return Ok(Expr::LAST(LastAgg { | ||
expr, | ||
order_by, | ||
within_group: false, | ||
})); | ||
} | ||
// Snowflake defines ORDERY BY in within group instead of inside the function like | ||
// ANSI SQL. | ||
self.expect_token(&Token::RParen)?; | ||
let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { | ||
self.expect_token(&Token::LParen)?; | ||
self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; | ||
let order_by_expr = self.parse_order_by_expr()?; | ||
self.expect_token(&Token::RParen)?; | ||
Some(Box::new(order_by_expr)) | ||
} else { | ||
None | ||
}; | ||
|
||
Ok(Expr::LAST(LastAgg { | ||
expr, | ||
order_by: within_group, | ||
within_group: true, | ||
})) | ||
} | ||
|
||
// This function parses date/time fields for the EXTRACT function-like | ||
// operator, interval qualifiers, and the ceil/floor operations. | ||
// EXTRACT supports a wider set of date/time fields than interval qualifiers, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2070,6 +2070,27 @@ fn parse_array_agg_func() { | |
} | ||
} | ||
|
||
#[test] | ||
fn parse_first_last_func() { | ||
let supported_dialects = TestedDialects { | ||
dialects: vec![ | ||
Box::new(GenericDialect {}), | ||
Box::new(PostgreSqlDialect {}), | ||
Box::new(MsSqlDialect {}), | ||
Box::new(AnsiDialect {}), | ||
Box::new(HiveDialect {}), | ||
], | ||
options: None, | ||
}; | ||
|
||
for sql in [ | ||
"SELECT FIRST(x ORDER BY x) AS a FROM T", | ||
"SELECT LAST(x ORDER BY x) AS a FROM T", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. given there is code above to handle WITHIN GROUP during parsing, can you either: 1) Remove that code, or 2) add test coverage for it?
|
||
] { | ||
supported_dialects.verified_stmt(sql); | ||
} | ||
} | ||
|
||
#[test] | ||
fn parse_create_table() { | ||
let sql = "CREATE TABLE uk_cities (\ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think these new structs follow the existing patterns in this file but they seem very specific to me (as in do we really need two new structs that are almost identical to each other as well as to
Function
.Did you consider adding an
order_by
clause toFunction
and adjusting theparse_function
function?So something like
I think this would both reduce the code required for this PR and make the codebase easier to maintain, as well as would extend naturally to other aggregate functions (e.g. databricks offers the
first_value
function in addition tofirst
)Adding an
order_by
toFunction
would allow users to parse any function with an order_by clause which I think would be quite valuableThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't consider, this. However, it seems like a better approach to me. I will experiment with it, then let you know about result. Thanks for the suggestion.