Skip to content

Commit 4726c90

Browse files
committed
provide LISTAGG implementation
This patch provides an initial implemenation of LISTAGG[1]. Notably this implemenation deviates from ANSI SQL by allowing both WITHIN GROUP and the delimiter to be optional. We do so because Redshift SQL works this way and this approach is ultimately more flexible. Fixes #169. [1] https://modern-sql.com/feature/listagg
1 parent 172ba42 commit 4726c90

File tree

4 files changed

+191
-0
lines changed

4 files changed

+191
-0
lines changed

src/ast/mod.rs

+68
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@ pub enum Expr {
223223
/// A parenthesized subquery `(SELECT ...)`, used in expression like
224224
/// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
225225
Subquery(Box<Query>),
226+
/// The `lISTAGG` function, e.g. `SELECT LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
227+
ListAgg(ListAgg),
226228
}
227229

228230
impl fmt::Display for Expr {
@@ -298,6 +300,7 @@ impl fmt::Display for Expr {
298300
}
299301
Expr::Exists(s) => write!(f, "EXISTS ({})", s),
300302
Expr::Subquery(s) => write!(f, "({})", s),
303+
Expr::ListAgg(listagg) => write!(f, "{}", listagg),
301304
}
302305
}
303306
}
@@ -812,6 +815,71 @@ impl FromStr for FileFormat {
812815
}
813816
}
814817

818+
/// A `LISTAGG` invocation
819+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
820+
pub struct ListAgg {
821+
pub distinct: bool,
822+
pub expr: Box<Expr>,
823+
pub separator: Option<Box<Expr>>,
824+
pub on_overflow: Option<ListAggOnOverflow>,
825+
pub within_group: Option<Vec<OrderByExpr>>,
826+
}
827+
828+
impl fmt::Display for ListAgg {
829+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
830+
let distinct = if self.distinct { "DISTINCT " } else { "ALL " };
831+
let args = if let Some(separator) = &self.separator {
832+
format!(
833+
"{}",
834+
display_comma_separated(&[self.expr.clone(), separator.clone()])
835+
)
836+
} else {
837+
format!("{}", self.expr)
838+
};
839+
if let Some(on_overflow) = &self.on_overflow {
840+
write!(f, "LISTAGG({}{}{})", distinct, args, on_overflow)
841+
} else {
842+
write!(f, "LISTAGG({}{})", distinct, args)
843+
}?;
844+
if let Some(within_group) = &self.within_group {
845+
write!(
846+
f,
847+
" WITHIN GROUP (ORDER BY {})",
848+
display_comma_separated(within_group)
849+
)
850+
} else {
851+
Ok(())
852+
}
853+
}
854+
}
855+
856+
/// The `ON OVERFLOW` clause of a LISTAGG invocation
857+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
858+
pub struct ListAggOnOverflow {
859+
pub error: bool,
860+
pub filler: Option<Box<Expr>>,
861+
pub with_count: bool,
862+
}
863+
864+
impl fmt::Display for ListAggOnOverflow {
865+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
866+
let overflow = if self.error {
867+
"ERROR".to_owned()
868+
} else {
869+
format!(
870+
"TRUNCATE {}{} COUNT",
871+
if let Some(filler) = &self.filler {
872+
format!("{} ", filler)
873+
} else {
874+
"".to_owned()
875+
},
876+
if self.with_count { "WITH" } else { "WITHOUT" }
877+
)
878+
};
879+
write!(f, " ON OVERFLOW {}", overflow)
880+
}
881+
}
882+
815883
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
816884
pub enum ObjectType {
817885
Table,

src/dialect/keywords.rs

+3
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ define_keywords!(
160160
END_FRAME,
161161
END_PARTITION,
162162
EQUALS,
163+
ERROR,
163164
ESCAPE,
164165
EVERY,
165166
EXCEPT,
@@ -227,6 +228,7 @@ define_keywords!(
227228
LIKE,
228229
LIKE_REGEX,
229230
LIMIT,
231+
LISTAGG,
230232
LN,
231233
LOCAL,
232234
LOCALTIME,
@@ -275,6 +277,7 @@ define_keywords!(
275277
OUT,
276278
OUTER,
277279
OVER,
280+
OVERFLOW,
278281
OVERLAPS,
279282
OVERLAY,
280283
PARAMETER,

src/parser.rs

+69
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ impl Parser {
191191
"EXISTS" => self.parse_exists_expr(),
192192
"EXTRACT" => self.parse_extract_expr(),
193193
"INTERVAL" => self.parse_literal_interval(),
194+
"LISTAGG" => self.parse_listagg_expr(),
194195
"NOT" => Ok(Expr::UnaryOp {
195196
op: UnaryOperator::Not,
196197
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
@@ -423,6 +424,74 @@ impl Parser {
423424
})
424425
}
425426

427+
/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
428+
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
429+
self.expect_token(&Token::LParen)?;
430+
let all = self.parse_keyword("ALL");
431+
let distinct = self.parse_keyword("DISTINCT");
432+
if all && distinct {
433+
return parser_err!("Cannot specify both ALL and DISTINCT in LISTAGG".to_string());
434+
}
435+
let args = self.parse_comma_separated(Parser::parse_expr)?;
436+
// TODO: Is there a safer way of grabbing the expr and separator?
437+
let expr = Box::new(args[0].clone());
438+
// While ANSI SQL would require the separator, Redshift makes this optional. Here we also
439+
// make the separator optional as this provides a more general implementation.
440+
let separator = if let Some(separtor) = args.get(1) {
441+
Some(Box::new(separtor.clone()))
442+
} else {
443+
None
444+
};
445+
let on_overflow = if self.parse_keywords(vec!["ON", "OVERFLOW"]) {
446+
let error = self.parse_keyword("ERROR");
447+
let filler = if !error {
448+
self.expect_keyword("TRUNCATE")?;
449+
Some(Box::new(self.parse_expr()?))
450+
} else {
451+
None
452+
};
453+
let with_count = if !error {
454+
let with_count = self.parse_keywords(vec!["WITH", "COUNT"]);
455+
let without_count = self.parse_keywords(vec!["WITHOUT", "COUNT"]);
456+
if !with_count && !without_count {
457+
return parser_err!(
458+
"Expected either WITH COUNT or WITHOUT COUNT in LISTAGG".to_string()
459+
);
460+
};
461+
with_count
462+
} else {
463+
false
464+
};
465+
Some(ListAggOnOverflow {
466+
error,
467+
filler,
468+
with_count,
469+
})
470+
} else {
471+
None
472+
};
473+
self.expect_token(&Token::RParen)?;
474+
// Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the
475+
// more general implementation.
476+
let within_group = if self.parse_keywords(vec!["WITHIN", "GROUP"]) {
477+
self.expect_token(&Token::LParen)?;
478+
self.expect_keywords(&["ORDER", "BY"])?;
479+
Some(self.parse_comma_separated(Parser::parse_order_by_expr)?)
480+
} else {
481+
None
482+
};
483+
if within_group.is_some() {
484+
self.expect_token(&Token::RParen)?;
485+
}
486+
Ok(Expr::ListAgg(ListAgg {
487+
distinct,
488+
expr,
489+
separator,
490+
on_overflow,
491+
within_group,
492+
}))
493+
}
494+
426495
// This function parses date/time fields for both the EXTRACT function-like
427496
// operator and interval qualifiers. EXTRACT supports a wider set of
428497
// date/time fields than interval qualifiers, so this function may need to

tests/sqlparser_common.rs

+51
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,57 @@ fn parse_extract() {
886886
);
887887
}
888888

889+
#[test]
890+
fn parse_listagg() {
891+
let sql = "SELECT LISTAGG(DISTINCT dateid, ', ' ON OVERFLOW TRUNCATE '%' WITHOUT COUNT) \
892+
WITHIN GROUP (ORDER BY id, username)";
893+
let select = verified_only_select(sql);
894+
895+
one_statement_parses_to(
896+
"SELECT LISTAGG(sellerid) WITHIN GROUP (ORDER BY dateid)",
897+
"SELECT LISTAGG(ALL sellerid) WITHIN GROUP (ORDER BY dateid)",
898+
);
899+
one_statement_parses_to("SELECT LISTAGG(dateid)", "SELECT LISTAGG(ALL dateid)");
900+
verified_stmt("SELECT LISTAGG(DISTINCT dateid)");
901+
902+
let expr = Box::new(Expr::Identifier(Ident::new("dateid")));
903+
let on_overflow = Some(ListAggOnOverflow {
904+
error: false,
905+
filler: Some(Box::new(Expr::Value(Value::SingleQuotedString(
906+
"%".to_string(),
907+
)))),
908+
with_count: false,
909+
});
910+
let within_group = Some(vec![
911+
OrderByExpr {
912+
expr: Expr::Identifier(Ident {
913+
value: "id".to_string(),
914+
quote_style: None,
915+
}),
916+
asc: None,
917+
},
918+
OrderByExpr {
919+
expr: Expr::Identifier(Ident {
920+
value: "username".to_string(),
921+
quote_style: None,
922+
}),
923+
asc: None,
924+
},
925+
]);
926+
assert_eq!(
927+
&Expr::ListAgg(ListAgg {
928+
distinct: true,
929+
expr,
930+
separator: Some(Box::new(Expr::Value(Value::SingleQuotedString(
931+
", ".to_string()
932+
)))),
933+
on_overflow,
934+
within_group
935+
}),
936+
expr_from_projection(only(&select.projection))
937+
);
938+
}
939+
889940
#[test]
890941
fn parse_create_table() {
891942
let sql = "CREATE TABLE uk_cities (\

0 commit comments

Comments
 (0)