Skip to content

Commit 1c0e5d3

Browse files
wugeeriffyio
andauthored
Add supports for Hive's SELECT ... GROUP BY .. GROUPING SETS syntax (#1653)
Co-authored-by: Ifeanyi Ubah <[email protected]>
1 parent a5bbb5e commit 1c0e5d3

File tree

8 files changed

+132
-60
lines changed

8 files changed

+132
-60
lines changed

src/ast/query.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -2547,13 +2547,18 @@ impl fmt::Display for SelectInto {
25472547
/// e.g. GROUP BY year WITH ROLLUP WITH TOTALS
25482548
///
25492549
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
2550-
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
2550+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
25512551
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
25522552
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
25532553
pub enum GroupByWithModifier {
25542554
Rollup,
25552555
Cube,
25562556
Totals,
2557+
/// Hive supports GROUP BY GROUPING SETS syntax.
2558+
/// e.g. GROUP BY year , month GROUPING SETS((year,month),(year),(month))
2559+
///
2560+
/// [Hive]: <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=30151323#EnhancedAggregation,Cube,GroupingandRollup-GROUPINGSETSclause>
2561+
GroupingSets(Expr),
25572562
}
25582563

25592564
impl fmt::Display for GroupByWithModifier {
@@ -2562,6 +2567,9 @@ impl fmt::Display for GroupByWithModifier {
25622567
GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"),
25632568
GroupByWithModifier::Cube => write!(f, "WITH CUBE"),
25642569
GroupByWithModifier::Totals => write!(f, "WITH TOTALS"),
2570+
GroupByWithModifier::GroupingSets(expr) => {
2571+
write!(f, "{expr}")
2572+
}
25652573
}
25662574
}
25672575
}

src/dialect/clickhouse.rs

+10
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,14 @@ impl Dialect for ClickHouseDialect {
7979
fn supports_from_first_select(&self) -> bool {
8080
true
8181
}
82+
83+
// See <https://clickhouse.com/docs/en/sql-reference/aggregate-functions/grouping_function#grouping-sets>
84+
fn supports_group_by_expr(&self) -> bool {
85+
true
86+
}
87+
88+
/// See <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
89+
fn supports_group_by_with_modifier(&self) -> bool {
90+
true
91+
}
8292
}

src/dialect/generic.rs

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ impl Dialect for GenericDialect {
4848
true
4949
}
5050

51+
fn supports_group_by_with_modifier(&self) -> bool {
52+
true
53+
}
54+
5155
fn supports_connect_by(&self) -> bool {
5256
true
5357
}

src/dialect/hive.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,23 @@ impl Dialect for HiveDialect {
5252
true
5353
}
5454

55-
/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362061#Tutorial-BuiltInOperators>
55+
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362061#Tutorial-BuiltInOperators>
5656
fn supports_bang_not_operator(&self) -> bool {
5757
true
5858
}
5959

60-
/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
60+
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
6161
fn supports_load_data(&self) -> bool {
6262
true
6363
}
6464

65-
/// See Hive <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
65+
/// See <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
6666
fn supports_table_sample_before_alias(&self) -> bool {
6767
true
6868
}
69+
70+
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=30151323#EnhancedAggregation,Cube,GroupingandRollup-CubesandRollupsr>
71+
fn supports_group_by_with_modifier(&self) -> bool {
72+
true
73+
}
6974
}

src/dialect/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,12 @@ pub trait Dialect: Debug + Any {
245245
false
246246
}
247247

248+
/// Returns true if the dialects supports `GROUP BY` modifiers prefixed by a `WITH` keyword.
249+
/// Example: `GROUP BY value WITH ROLLUP`.
250+
fn supports_group_by_with_modifier(&self) -> bool {
251+
false
252+
}
253+
248254
/// Returns true if the dialect supports CONNECT BY.
249255
fn supports_connect_by(&self) -> bool {
250256
false

src/parser/mod.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -9148,7 +9148,7 @@ impl<'a> Parser<'a> {
91489148
};
91499149

91509150
let mut modifiers = vec![];
9151-
if dialect_of!(self is ClickHouseDialect | GenericDialect) {
9151+
if self.dialect.supports_group_by_with_modifier() {
91529152
loop {
91539153
if !self.parse_keyword(Keyword::WITH) {
91549154
break;
@@ -9171,6 +9171,14 @@ impl<'a> Parser<'a> {
91719171
});
91729172
}
91739173
}
9174+
if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) {
9175+
self.expect_token(&Token::LParen)?;
9176+
let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?;
9177+
self.expect_token(&Token::RParen)?;
9178+
modifiers.push(GroupByWithModifier::GroupingSets(Expr::GroupingSets(
9179+
result,
9180+
)));
9181+
};
91749182
let group_by = match expressions {
91759183
None => GroupByExpr::All(modifiers),
91769184
Some(exprs) => GroupByExpr::Expressions(exprs, modifiers),

tests/sqlparser_clickhouse.rs

-55
Original file line numberDiff line numberDiff line change
@@ -1069,61 +1069,6 @@ fn parse_create_materialized_view() {
10691069
clickhouse_and_generic().verified_stmt(sql);
10701070
}
10711071

1072-
#[test]
1073-
fn parse_group_by_with_modifier() {
1074-
let clauses = ["x", "a, b", "ALL"];
1075-
let modifiers = [
1076-
"WITH ROLLUP",
1077-
"WITH CUBE",
1078-
"WITH TOTALS",
1079-
"WITH ROLLUP WITH CUBE",
1080-
];
1081-
let expected_modifiers = [
1082-
vec![GroupByWithModifier::Rollup],
1083-
vec![GroupByWithModifier::Cube],
1084-
vec![GroupByWithModifier::Totals],
1085-
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
1086-
];
1087-
for clause in &clauses {
1088-
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
1089-
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
1090-
match clickhouse_and_generic().verified_stmt(&sql) {
1091-
Statement::Query(query) => {
1092-
let group_by = &query.body.as_select().unwrap().group_by;
1093-
if clause == &"ALL" {
1094-
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
1095-
} else {
1096-
assert_eq!(
1097-
group_by,
1098-
&GroupByExpr::Expressions(
1099-
clause
1100-
.split(", ")
1101-
.map(|c| Identifier(Ident::new(c)))
1102-
.collect(),
1103-
expected_modifier.to_vec()
1104-
)
1105-
);
1106-
}
1107-
}
1108-
_ => unreachable!(),
1109-
}
1110-
}
1111-
}
1112-
1113-
// invalid cases
1114-
let invalid_cases = [
1115-
"SELECT * FROM t GROUP BY x WITH",
1116-
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
1117-
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
1118-
"SELECT * FROM t GROUP BY WITH ROLLUP",
1119-
];
1120-
for sql in invalid_cases {
1121-
clickhouse_and_generic()
1122-
.parse_sql_statements(sql)
1123-
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
1124-
}
1125-
}
1126-
11271072
#[test]
11281073
fn parse_select_order_by_with_fill_interpolate() {
11291074
let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \

tests/sqlparser_common.rs

+86
Original file line numberDiff line numberDiff line change
@@ -2447,6 +2447,92 @@ fn parse_select_group_by_all() {
24472447
);
24482448
}
24492449

2450+
#[test]
2451+
fn parse_group_by_with_modifier() {
2452+
let clauses = ["x", "a, b", "ALL"];
2453+
let modifiers = [
2454+
"WITH ROLLUP",
2455+
"WITH CUBE",
2456+
"WITH TOTALS",
2457+
"WITH ROLLUP WITH CUBE",
2458+
];
2459+
let expected_modifiers = [
2460+
vec![GroupByWithModifier::Rollup],
2461+
vec![GroupByWithModifier::Cube],
2462+
vec![GroupByWithModifier::Totals],
2463+
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
2464+
];
2465+
let dialects = all_dialects_where(|d| d.supports_group_by_with_modifier());
2466+
2467+
for clause in &clauses {
2468+
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
2469+
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
2470+
match dialects.verified_stmt(&sql) {
2471+
Statement::Query(query) => {
2472+
let group_by = &query.body.as_select().unwrap().group_by;
2473+
if clause == &"ALL" {
2474+
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
2475+
} else {
2476+
assert_eq!(
2477+
group_by,
2478+
&GroupByExpr::Expressions(
2479+
clause
2480+
.split(", ")
2481+
.map(|c| Identifier(Ident::new(c)))
2482+
.collect(),
2483+
expected_modifier.to_vec()
2484+
)
2485+
);
2486+
}
2487+
}
2488+
_ => unreachable!(),
2489+
}
2490+
}
2491+
}
2492+
2493+
// invalid cases
2494+
let invalid_cases = [
2495+
"SELECT * FROM t GROUP BY x WITH",
2496+
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
2497+
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
2498+
"SELECT * FROM t GROUP BY WITH ROLLUP",
2499+
];
2500+
for sql in invalid_cases {
2501+
dialects
2502+
.parse_sql_statements(sql)
2503+
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
2504+
}
2505+
}
2506+
2507+
#[test]
2508+
fn parse_group_by_special_grouping_sets() {
2509+
let sql = "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ())";
2510+
match all_dialects().verified_stmt(sql) {
2511+
Statement::Query(query) => {
2512+
let group_by = &query.body.as_select().unwrap().group_by;
2513+
assert_eq!(
2514+
group_by,
2515+
&GroupByExpr::Expressions(
2516+
vec![
2517+
Expr::Identifier(Ident::new("a")),
2518+
Expr::Identifier(Ident::new("b"))
2519+
],
2520+
vec![GroupByWithModifier::GroupingSets(Expr::GroupingSets(vec![
2521+
vec![
2522+
Expr::Identifier(Ident::new("a")),
2523+
Expr::Identifier(Ident::new("b"))
2524+
],
2525+
vec![Expr::Identifier(Ident::new("a")),],
2526+
vec![Expr::Identifier(Ident::new("b"))],
2527+
vec![]
2528+
]))]
2529+
)
2530+
);
2531+
}
2532+
_ => unreachable!(),
2533+
}
2534+
}
2535+
24502536
#[test]
24512537
fn parse_select_having() {
24522538
let sql = "SELECT foo FROM bar GROUP BY foo HAVING COUNT(*) > 1";

0 commit comments

Comments
 (0)