Skip to content

Commit f5f0747

Browse files
wugeeriffyio
authored andcommitted
Add supports for Hive's SELECT ... GROUP BY .. GROUPING SETS syntax (apache#1653)
Co-authored-by: Ifeanyi Ubah <[email protected]>
1 parent c8d35a3 commit f5f0747

File tree

8 files changed

+132
-60
lines changed

8 files changed

+132
-60
lines changed

src/ast/query.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -2547,13 +2547,18 @@ impl fmt::Display for SelectInto {
25472547
/// e.g. GROUP BY year WITH ROLLUP WITH TOTALS
25482548
///
25492549
/// [ClickHouse]: <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
2550-
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
2550+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
25512551
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
25522552
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
25532553
pub enum GroupByWithModifier {
25542554
Rollup,
25552555
Cube,
25562556
Totals,
2557+
/// Hive supports GROUP BY GROUPING SETS syntax.
2558+
/// e.g. GROUP BY year , month GROUPING SETS((year,month),(year),(month))
2559+
///
2560+
/// [Hive]: <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=30151323#EnhancedAggregation,Cube,GroupingandRollup-GROUPINGSETSclause>
2561+
GroupingSets(Expr),
25572562
}
25582563

25592564
impl fmt::Display for GroupByWithModifier {
@@ -2562,6 +2567,9 @@ impl fmt::Display for GroupByWithModifier {
25622567
GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"),
25632568
GroupByWithModifier::Cube => write!(f, "WITH CUBE"),
25642569
GroupByWithModifier::Totals => write!(f, "WITH TOTALS"),
2570+
GroupByWithModifier::GroupingSets(expr) => {
2571+
write!(f, "{expr}")
2572+
}
25652573
}
25662574
}
25672575
}

src/dialect/clickhouse.rs

+10
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,14 @@ impl Dialect for ClickHouseDialect {
7979
fn supports_from_first_select(&self) -> bool {
8080
true
8181
}
82+
83+
// See <https://clickhouse.com/docs/en/sql-reference/aggregate-functions/grouping_function#grouping-sets>
84+
fn supports_group_by_expr(&self) -> bool {
85+
true
86+
}
87+
88+
/// See <https://clickhouse.com/docs/en/sql-reference/statements/select/group-by#rollup-modifier>
89+
fn supports_group_by_with_modifier(&self) -> bool {
90+
true
91+
}
8292
}

src/dialect/generic.rs

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ impl Dialect for GenericDialect {
4848
true
4949
}
5050

51+
fn supports_group_by_with_modifier(&self) -> bool {
52+
true
53+
}
54+
5155
fn supports_connect_by(&self) -> bool {
5256
true
5357
}

src/dialect/hive.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,23 @@ impl Dialect for HiveDialect {
5252
true
5353
}
5454

55-
/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362061#Tutorial-BuiltInOperators>
55+
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362061#Tutorial-BuiltInOperators>
5656
fn supports_bang_not_operator(&self) -> bool {
5757
true
5858
}
5959

60-
/// See Hive <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
60+
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362036#LanguageManualDML-Loadingfilesintotables>
6161
fn supports_load_data(&self) -> bool {
6262
true
6363
}
6464

65-
/// See Hive <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
65+
/// See <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
6666
fn supports_table_sample_before_alias(&self) -> bool {
6767
true
6868
}
69+
70+
/// See <https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=30151323#EnhancedAggregation,Cube,GroupingandRollup-CubesandRollupsr>
71+
fn supports_group_by_with_modifier(&self) -> bool {
72+
true
73+
}
6974
}

src/dialect/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,12 @@ pub trait Dialect: Debug + Any {
245245
false
246246
}
247247

248+
/// Returns true if the dialects supports `GROUP BY` modifiers prefixed by a `WITH` keyword.
249+
/// Example: `GROUP BY value WITH ROLLUP`.
250+
fn supports_group_by_with_modifier(&self) -> bool {
251+
false
252+
}
253+
248254
/// Returns true if the dialect supports CONNECT BY.
249255
fn supports_connect_by(&self) -> bool {
250256
false

src/parser/mod.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -9158,7 +9158,7 @@ impl<'a> Parser<'a> {
91589158
};
91599159

91609160
let mut modifiers = vec![];
9161-
if dialect_of!(self is ClickHouseDialect | GenericDialect) {
9161+
if self.dialect.supports_group_by_with_modifier() {
91629162
loop {
91639163
if !self.parse_keyword(Keyword::WITH) {
91649164
break;
@@ -9181,6 +9181,14 @@ impl<'a> Parser<'a> {
91819181
});
91829182
}
91839183
}
9184+
if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) {
9185+
self.expect_token(&Token::LParen)?;
9186+
let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?;
9187+
self.expect_token(&Token::RParen)?;
9188+
modifiers.push(GroupByWithModifier::GroupingSets(Expr::GroupingSets(
9189+
result,
9190+
)));
9191+
};
91849192
let group_by = match expressions {
91859193
None => GroupByExpr::All(modifiers),
91869194
Some(exprs) => GroupByExpr::Expressions(exprs, modifiers),

tests/sqlparser_clickhouse.rs

-55
Original file line numberDiff line numberDiff line change
@@ -1069,61 +1069,6 @@ fn parse_create_materialized_view() {
10691069
clickhouse_and_generic().verified_stmt(sql);
10701070
}
10711071

1072-
#[test]
1073-
fn parse_group_by_with_modifier() {
1074-
let clauses = ["x", "a, b", "ALL"];
1075-
let modifiers = [
1076-
"WITH ROLLUP",
1077-
"WITH CUBE",
1078-
"WITH TOTALS",
1079-
"WITH ROLLUP WITH CUBE",
1080-
];
1081-
let expected_modifiers = [
1082-
vec![GroupByWithModifier::Rollup],
1083-
vec![GroupByWithModifier::Cube],
1084-
vec![GroupByWithModifier::Totals],
1085-
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
1086-
];
1087-
for clause in &clauses {
1088-
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
1089-
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
1090-
match clickhouse_and_generic().verified_stmt(&sql) {
1091-
Statement::Query(query) => {
1092-
let group_by = &query.body.as_select().unwrap().group_by;
1093-
if clause == &"ALL" {
1094-
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
1095-
} else {
1096-
assert_eq!(
1097-
group_by,
1098-
&GroupByExpr::Expressions(
1099-
clause
1100-
.split(", ")
1101-
.map(|c| Identifier(Ident::new(c)))
1102-
.collect(),
1103-
expected_modifier.to_vec()
1104-
)
1105-
);
1106-
}
1107-
}
1108-
_ => unreachable!(),
1109-
}
1110-
}
1111-
}
1112-
1113-
// invalid cases
1114-
let invalid_cases = [
1115-
"SELECT * FROM t GROUP BY x WITH",
1116-
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
1117-
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
1118-
"SELECT * FROM t GROUP BY WITH ROLLUP",
1119-
];
1120-
for sql in invalid_cases {
1121-
clickhouse_and_generic()
1122-
.parse_sql_statements(sql)
1123-
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
1124-
}
1125-
}
1126-
11271072
#[test]
11281073
fn parse_select_order_by_with_fill_interpolate() {
11291074
let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \

tests/sqlparser_common.rs

+86
Original file line numberDiff line numberDiff line change
@@ -2448,6 +2448,92 @@ fn parse_select_group_by_all() {
24482448
);
24492449
}
24502450

2451+
#[test]
2452+
fn parse_group_by_with_modifier() {
2453+
let clauses = ["x", "a, b", "ALL"];
2454+
let modifiers = [
2455+
"WITH ROLLUP",
2456+
"WITH CUBE",
2457+
"WITH TOTALS",
2458+
"WITH ROLLUP WITH CUBE",
2459+
];
2460+
let expected_modifiers = [
2461+
vec![GroupByWithModifier::Rollup],
2462+
vec![GroupByWithModifier::Cube],
2463+
vec![GroupByWithModifier::Totals],
2464+
vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube],
2465+
];
2466+
let dialects = all_dialects_where(|d| d.supports_group_by_with_modifier());
2467+
2468+
for clause in &clauses {
2469+
for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) {
2470+
let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}");
2471+
match dialects.verified_stmt(&sql) {
2472+
Statement::Query(query) => {
2473+
let group_by = &query.body.as_select().unwrap().group_by;
2474+
if clause == &"ALL" {
2475+
assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec()));
2476+
} else {
2477+
assert_eq!(
2478+
group_by,
2479+
&GroupByExpr::Expressions(
2480+
clause
2481+
.split(", ")
2482+
.map(|c| Identifier(Ident::new(c)))
2483+
.collect(),
2484+
expected_modifier.to_vec()
2485+
)
2486+
);
2487+
}
2488+
}
2489+
_ => unreachable!(),
2490+
}
2491+
}
2492+
}
2493+
2494+
// invalid cases
2495+
let invalid_cases = [
2496+
"SELECT * FROM t GROUP BY x WITH",
2497+
"SELECT * FROM t GROUP BY x WITH ROLLUP CUBE",
2498+
"SELECT * FROM t GROUP BY x WITH WITH ROLLUP",
2499+
"SELECT * FROM t GROUP BY WITH ROLLUP",
2500+
];
2501+
for sql in invalid_cases {
2502+
dialects
2503+
.parse_sql_statements(sql)
2504+
.expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH");
2505+
}
2506+
}
2507+
2508+
#[test]
2509+
fn parse_group_by_special_grouping_sets() {
2510+
let sql = "SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ())";
2511+
match all_dialects().verified_stmt(sql) {
2512+
Statement::Query(query) => {
2513+
let group_by = &query.body.as_select().unwrap().group_by;
2514+
assert_eq!(
2515+
group_by,
2516+
&GroupByExpr::Expressions(
2517+
vec![
2518+
Expr::Identifier(Ident::new("a")),
2519+
Expr::Identifier(Ident::new("b"))
2520+
],
2521+
vec![GroupByWithModifier::GroupingSets(Expr::GroupingSets(vec![
2522+
vec![
2523+
Expr::Identifier(Ident::new("a")),
2524+
Expr::Identifier(Ident::new("b"))
2525+
],
2526+
vec![Expr::Identifier(Ident::new("a")),],
2527+
vec![Expr::Identifier(Ident::new("b"))],
2528+
vec![]
2529+
]))]
2530+
)
2531+
);
2532+
}
2533+
_ => unreachable!(),
2534+
}
2535+
}
2536+
24512537
#[test]
24522538
fn parse_select_having() {
24532539
let sql = "SELECT foo FROM bar GROUP BY foo HAVING COUNT(*) > 1";

0 commit comments

Comments
 (0)