Skip to content

Commit e4eeee9

Browse files
committed
Support BigQuery window function null treatment
Syntax differs for BigQuery on positioning of the `IGNORE|RESPECT NULL` clause within a window function. This extends the parser to cover that syntax. https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#first_value
1 parent deaa6d8 commit e4eeee9

File tree

6 files changed

+200
-37
lines changed

6 files changed

+200
-37
lines changed

src/ast/mod.rs

+48-9
Original file line numberDiff line numberDiff line change
@@ -1404,6 +1404,36 @@ impl fmt::Display for NullTreatment {
14041404
}
14051405
}
14061406

1407+
/// Specifies Ignore / Respect NULL within window functions.
1408+
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1409+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1410+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1411+
pub enum NullTreatmentType {
1412+
/// The declaration is part of the function's arguments.
1413+
///
1414+
/// ```sql
1415+
/// FIRST_VALUE(x IGNORE NULLS) OVER ()
1416+
/// ```
1417+
FunctionArg(NullTreatment),
1418+
/// The declaration occurs after the function call.
1419+
///
1420+
/// ```sql
1421+
/// FIRST_VALUE(x IGNORE NULLS) OVER ()
1422+
/// FIRST_VALUE(x) IGNORE NULLS OVER ()
1423+
/// ```
1424+
AfterFunction(NullTreatment),
1425+
}
1426+
1427+
impl Display for NullTreatmentType {
1428+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1429+
let null_treatment = match self {
1430+
NullTreatmentType::FunctionArg(n) => n,
1431+
NullTreatmentType::AfterFunction(n) => n,
1432+
};
1433+
write!(f, "{null_treatment}")
1434+
}
1435+
}
1436+
14071437
/// Specifies [WindowFrame]'s `start_bound` and `end_bound`
14081438
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
14091439
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -4785,15 +4815,18 @@ pub struct Function {
47854815
pub args: Vec<FunctionArg>,
47864816
/// e.g. `x > 5` in `COUNT(x) FILTER (WHERE x > 5)`
47874817
pub filter: Option<Box<Expr>>,
4788-
// Snowflake/MSSQL supports different options for null treatment in rank functions
4789-
pub null_treatment: Option<NullTreatment>,
4818+
/// Specifies Ignore / Respect NULL within window functions.
4819+
///
4820+
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#first_value)
4821+
/// [Snowflake](https://docs.snowflake.com/en/sql-reference/functions/first_value)
4822+
pub null_treatment: Option<NullTreatmentType>,
47904823
pub over: Option<WindowType>,
4791-
// aggregate functions may specify eg `COUNT(DISTINCT x)`
4824+
/// aggregate functions may specify eg `COUNT(DISTINCT x)`
47924825
pub distinct: bool,
4793-
// Some functions must be called without trailing parentheses, for example Postgres
4794-
// do it for current_catalog, current_schema, etc. This flags is used for formatting.
4826+
/// Some functions must be called without trailing parentheses, for example Postgres
4827+
/// do it for current_catalog, current_schema, etc. This flags is used for formatting.
47954828
pub special: bool,
4796-
// Required ordering for the function (if empty, there is no requirement).
4829+
/// Required ordering for the function (if empty, there is no requirement).
47974830
pub order_by: Vec<OrderByExpr>,
47984831
}
47994832

@@ -4828,19 +4861,25 @@ impl fmt::Display for Function {
48284861
};
48294862
write!(
48304863
f,
4831-
"{}({}{}{order_by}{})",
4864+
"{}({}{}{order_by}{}{})",
48324865
self.name,
48334866
if self.distinct { "DISTINCT " } else { "" },
48344867
display_comma_separated(&self.args),
48354868
display_comma_separated(&self.order_by),
4869+
match self.null_treatment {
4870+
Some(NullTreatmentType::FunctionArg(null_treatment)) => {
4871+
format!(" {null_treatment}")
4872+
}
4873+
_ => "".to_string(),
4874+
}
48364875
)?;
48374876

48384877
if let Some(filter_cond) = &self.filter {
48394878
write!(f, " FILTER (WHERE {filter_cond})")?;
48404879
}
48414880

4842-
if let Some(o) = &self.null_treatment {
4843-
write!(f, " {o}")?;
4881+
if let Some(NullTreatmentType::AfterFunction(null_treatment)) = &self.null_treatment {
4882+
write!(f, " {null_treatment}")?;
48444883
}
48454884

48464885
if let Some(o) = &self.over {

src/dialect/bigquery.rs

+5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ impl Dialect for BigQueryDialect {
3030
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
3131
}
3232

33+
/// See [doc](https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#first_value)
34+
fn supports_window_function_null_treatment_arg(&self) -> bool {
35+
true
36+
}
37+
3338
// See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences
3439
fn supports_string_literal_backslash_escape(&self) -> bool {
3540
true

src/dialect/generic.rs

+4
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ impl Dialect for GenericDialect {
4747
true
4848
}
4949

50+
fn supports_window_function_null_treatment_arg(&self) -> bool {
51+
true
52+
}
53+
5054
fn supports_dictionary_syntax(&self) -> bool {
5155
true
5256
}

src/dialect/mod.rs

+14
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,20 @@ pub trait Dialect: Debug + Any {
170170
fn supports_named_fn_args_with_eq_operator(&self) -> bool {
171171
false
172172
}
173+
/// Returns true if the dialects supports specifying null treatment
174+
/// as part of a window function's parameter list. As opposed
175+
/// to after the parameter list.
176+
/// i.e The following syntax returns true
177+
/// ```sql
178+
/// FIRST_VALUE(a IGNORE NULLS) OVER ()
179+
/// ```
180+
/// while the following syntax returns false
181+
/// ```sql
182+
/// FIRST_VALUE(a) IGNORE NULLS OVER ()
183+
/// ```
184+
fn supports_window_function_null_treatment_arg(&self) -> bool {
185+
false
186+
}
173187
/// Returns true if the dialect supports defining structs or objects using a
174188
/// syntax like `{'x': 1, 'y': 2, 'z': 3}`.
175189
fn supports_dictionary_syntax(&self) -> bool {

src/parser/mod.rs

+77-28
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,13 @@ impl From<bool> for MatchedTrailingBracket {
208208
}
209209
}
210210

211+
/// Output of the [`Parser::parse_window_function_args`] function.
212+
struct ParseWindowFunctionArgsOutput {
213+
args: Vec<FunctionArg>,
214+
order_by: Vec<OrderByExpr>,
215+
null_treatment: Option<NullTreatment>,
216+
}
217+
211218
/// Options that control how the [`Parser`] parses SQL text
212219
#[derive(Debug, Clone, PartialEq, Eq)]
213220
pub struct ParserOptions {
@@ -1212,7 +1219,11 @@ impl<'a> Parser<'a> {
12121219
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
12131220
self.expect_token(&Token::LParen)?;
12141221
let distinct = self.parse_all_or_distinct()?.is_some();
1215-
let (args, order_by) = self.parse_optional_args_with_orderby()?;
1222+
let ParseWindowFunctionArgsOutput {
1223+
args,
1224+
order_by,
1225+
null_treatment,
1226+
} = self.parse_window_function_args()?;
12161227
let filter = if self.dialect.supports_filter_during_aggregation()
12171228
&& self.parse_keyword(Keyword::FILTER)
12181229
&& self.consume_token(&Token::LParen)
@@ -1224,19 +1235,15 @@ impl<'a> Parser<'a> {
12241235
} else {
12251236
None
12261237
};
1227-
let null_treatment = match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE])
1228-
{
1229-
Some(keyword) => {
1230-
self.expect_keyword(Keyword::NULLS)?;
12311238

1232-
match keyword {
1233-
Keyword::RESPECT => Some(NullTreatment::RespectNulls),
1234-
Keyword::IGNORE => Some(NullTreatment::IgnoreNulls),
1235-
_ => None,
1236-
}
1237-
}
1238-
None => None,
1239-
};
1239+
// Syntax for null treatment shows up either in the args list
1240+
// or after the function call, but not both.
1241+
let mut null_treatment = null_treatment.map(NullTreatmentType::FunctionArg);
1242+
if null_treatment.is_none() {
1243+
null_treatment = self
1244+
.parse_null_treatment()?
1245+
.map(NullTreatmentType::AfterFunction);
1246+
}
12401247
let over = if self.parse_keyword(Keyword::OVER) {
12411248
if self.consume_token(&Token::LParen) {
12421249
let window_spec = self.parse_window_spec()?;
@@ -1259,17 +1266,37 @@ impl<'a> Parser<'a> {
12591266
}))
12601267
}
12611268

1269+
/// Optionally parses a null treatment clause.
1270+
fn parse_null_treatment(&mut self) -> Result<Option<NullTreatment>, ParserError> {
1271+
match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) {
1272+
Some(keyword) => {
1273+
self.expect_keyword(Keyword::NULLS)?;
1274+
1275+
Ok(match keyword {
1276+
Keyword::RESPECT => Some(NullTreatment::RespectNulls),
1277+
Keyword::IGNORE => Some(NullTreatment::IgnoreNulls),
1278+
_ => None,
1279+
})
1280+
}
1281+
None => Ok(None),
1282+
}
1283+
}
1284+
12621285
pub fn parse_time_functions(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
1263-
let (args, order_by, special) = if self.consume_token(&Token::LParen) {
1264-
let (args, order_by) = self.parse_optional_args_with_orderby()?;
1265-
(args, order_by, false)
1286+
let (args, order_by, null_treatment, special) = if self.consume_token(&Token::LParen) {
1287+
let ParseWindowFunctionArgsOutput {
1288+
args,
1289+
order_by,
1290+
null_treatment,
1291+
} = self.parse_window_function_args()?;
1292+
(args, order_by, null_treatment, false)
12661293
} else {
1267-
(vec![], vec![], true)
1294+
(vec![], vec![], None, true)
12681295
};
12691296
Ok(Expr::Function(Function {
12701297
name,
12711298
args,
1272-
null_treatment: None,
1299+
null_treatment: null_treatment.map(NullTreatmentType::FunctionArg),
12731300
filter: None,
12741301
over: None,
12751302
distinct: false,
@@ -9203,11 +9230,21 @@ impl<'a> Parser<'a> {
92039230
}
92049231
}
92059232

9206-
pub fn parse_optional_args_with_orderby(
9207-
&mut self,
9208-
) -> Result<(Vec<FunctionArg>, Vec<OrderByExpr>), ParserError> {
9233+
/// Parses a potentially empty list of arguments to a window function
9234+
/// (including the closing parenthesis).
9235+
///
9236+
/// Examples:
9237+
/// ```sql
9238+
/// FIRST_VALUE(x ORDER BY 1,2,3);
9239+
/// FIRST_VALUE(x IGNORE NULL);
9240+
/// ```
9241+
fn parse_window_function_args(&mut self) -> Result<ParseWindowFunctionArgsOutput, ParserError> {
92099242
if self.consume_token(&Token::RParen) {
9210-
Ok((vec![], vec![]))
9243+
Ok(ParseWindowFunctionArgsOutput {
9244+
args: vec![],
9245+
order_by: vec![],
9246+
null_treatment: None,
9247+
})
92119248
} else {
92129249
// Snowflake permits a subquery to be passed as an argument without
92139250
// an enclosing set of parens if it's the only argument.
@@ -9219,22 +9256,34 @@ impl<'a> Parser<'a> {
92199256
self.prev_token();
92209257
let subquery = self.parse_boxed_query()?;
92219258
self.expect_token(&Token::RParen)?;
9222-
return Ok((
9223-
vec![FunctionArg::Unnamed(FunctionArgExpr::from(Expr::Subquery(
9259+
return Ok(ParseWindowFunctionArgsOutput {
9260+
args: vec![FunctionArg::Unnamed(FunctionArgExpr::from(Expr::Subquery(
92249261
subquery,
92259262
)))],
9226-
vec![],
9227-
));
9263+
order_by: vec![],
9264+
null_treatment: None,
9265+
});
92289266
}
92299267

92309268
let args = self.parse_comma_separated(Parser::parse_function_args)?;
92319269
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
92329270
self.parse_comma_separated(Parser::parse_order_by_expr)?
92339271
} else {
9234-
vec![]
9272+
Default::default()
9273+
};
9274+
9275+
let null_treatment = if self.dialect.supports_window_function_null_treatment_arg() {
9276+
self.parse_null_treatment()?
9277+
} else {
9278+
None
92359279
};
9280+
92369281
self.expect_token(&Token::RParen)?;
9237-
Ok((args, order_by))
9282+
Ok(ParseWindowFunctionArgsOutput {
9283+
args,
9284+
order_by,
9285+
null_treatment,
9286+
})
92389287
}
92399288
}
92409289

tests/sqlparser_common.rs

+52
Original file line numberDiff line numberDiff line change
@@ -2643,6 +2643,58 @@ fn parse_window_rank_function() {
26432643
}
26442644
}
26452645

2646+
#[test]
2647+
fn parse_window_function_null_treatment_arg() {
2648+
let dialects = all_dialects_where(|d| d.supports_window_function_null_treatment_arg());
2649+
let sql = "SELECT \
2650+
FIRST_VALUE(a IGNORE NULLS) OVER (), \
2651+
FIRST_VALUE(b RESPECT NULLS) OVER () \
2652+
FROM mytable";
2653+
let Select { projection, .. } = dialects.verified_only_select(sql);
2654+
for (i, (expected_expr, expected_null_treatment)) in [
2655+
("a", NullTreatment::IgnoreNulls),
2656+
("b", NullTreatment::RespectNulls),
2657+
]
2658+
.into_iter()
2659+
.enumerate()
2660+
{
2661+
let SelectItem::UnnamedExpr(Expr::Function(actual)) = &projection[i] else {
2662+
unreachable!()
2663+
};
2664+
assert_eq!(ObjectName(vec![Ident::new("FIRST_VALUE")]), actual.name);
2665+
assert!(actual.order_by.is_empty());
2666+
assert_eq!(1, actual.args.len());
2667+
let FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(actual_expr))) =
2668+
&actual.args[0]
2669+
else {
2670+
unreachable!()
2671+
};
2672+
assert_eq!(&Ident::new(expected_expr), actual_expr);
2673+
let Some(NullTreatmentType::FunctionArg(actual_null_treatment)) = actual.null_treatment
2674+
else {
2675+
unreachable!()
2676+
};
2677+
assert_eq!(expected_null_treatment, actual_null_treatment);
2678+
}
2679+
2680+
let sql = "SELECT FIRST_VALUE(a ORDER BY b IGNORE NULLS) OVER () FROM t1";
2681+
dialects.verified_stmt(sql);
2682+
2683+
let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1";
2684+
assert_eq!(
2685+
dialects.parse_sql_statements(sql).unwrap_err(),
2686+
ParserError::ParserError("Expected end of statement, found: NULLS".to_string())
2687+
);
2688+
2689+
let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1";
2690+
assert_eq!(
2691+
all_dialects_where(|d| !d.supports_window_function_null_treatment_arg())
2692+
.parse_sql_statements(sql)
2693+
.unwrap_err(),
2694+
ParserError::ParserError("Expected ), found: IGNORE".to_string())
2695+
);
2696+
}
2697+
26462698
#[test]
26472699
fn parse_create_table() {
26482700
let sql = "CREATE TABLE uk_cities (\

0 commit comments

Comments
 (0)