Skip to content

Commit 00b3b8b

Browse files
git-hulkayman-sigma
authored andcommitted
Add support of parsing OPTIMIZE TABLE statement for ClickHouse (apache#1359)
1 parent 6e79ce0 commit 00b3b8b

File tree

5 files changed

+175
-10
lines changed

5 files changed

+175
-10
lines changed

src/ast/ddl.rs

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,20 +1296,45 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef {
12961296
}
12971297
}
12981298

1299-
/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL
1299+
/// PARTITION statement used in ALTER TABLE et al. such as in Hive and ClickHouse SQL.
1300+
/// For example, ClickHouse's OPTIMIZE TABLE supports syntax like PARTITION ID 'partition_id' and PARTITION expr.
1301+
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
13001302
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
13011303
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
13021304
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1303-
pub struct Partition {
1304-
pub partitions: Vec<Expr>,
1305+
pub enum Partition {
1306+
Identifier(Ident),
1307+
Expr(Expr),
1308+
Partitions(Vec<Expr>),
13051309
}
13061310

13071311
impl fmt::Display for Partition {
13081312
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1309-
write!(
1310-
f,
1311-
"PARTITION ({})",
1312-
display_comma_separated(&self.partitions)
1313-
)
1313+
match self {
1314+
Partition::Identifier(id) => write!(f, "PARTITION ID {id}"),
1315+
Partition::Expr(expr) => write!(f, "PARTITION {expr}"),
1316+
Partition::Partitions(partitions) => {
1317+
write!(f, "PARTITION ({})", display_comma_separated(partitions))
1318+
}
1319+
}
1320+
}
1321+
}
1322+
1323+
/// DEDUPLICATE statement used in OPTIMIZE TABLE et al. such as in ClickHouse SQL
1324+
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
1325+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1326+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1327+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1328+
pub enum Deduplicate {
1329+
All,
1330+
ByExpression(Expr),
1331+
}
1332+
1333+
impl fmt::Display for Deduplicate {
1334+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1335+
match self {
1336+
Deduplicate::All => write!(f, "DEDUPLICATE"),
1337+
Deduplicate::ByExpression(expr) => write!(f, "DEDUPLICATE BY {expr}"),
1338+
}
13141339
}
13151340
}

src/ast/mod.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pub use self::data_type::{
3333
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
3434
pub use self::ddl::{
3535
AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption,
36-
ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs,
36+
ColumnOptionDef, ConstraintCharacteristics, Deduplicate, DeferrableInitial, GeneratedAs,
3737
GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition,
3838
ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef,
3939
UserDefinedTypeRepresentation, ViewColumnDef,
@@ -2852,6 +2852,18 @@ pub enum Statement {
28522852
to: Ident,
28532853
with: Vec<SqlOption>,
28542854
},
2855+
/// ```sql
2856+
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
2857+
/// ```
2858+
///
2859+
/// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
2860+
OptimizeTable {
2861+
name: ObjectName,
2862+
on_cluster: Option<Ident>,
2863+
partition: Option<Partition>,
2864+
include_final: bool,
2865+
deduplicate: Option<Deduplicate>,
2866+
},
28552867
}
28562868

28572869
impl fmt::Display for Statement {
@@ -4304,6 +4316,28 @@ impl fmt::Display for Statement {
43044316

43054317
Ok(())
43064318
}
4319+
Statement::OptimizeTable {
4320+
name,
4321+
on_cluster,
4322+
partition,
4323+
include_final,
4324+
deduplicate,
4325+
} => {
4326+
write!(f, "OPTIMIZE TABLE {name}")?;
4327+
if let Some(on_cluster) = on_cluster {
4328+
write!(f, " ON CLUSTER {on_cluster}", on_cluster = on_cluster)?;
4329+
}
4330+
if let Some(partition) = partition {
4331+
write!(f, " {partition}", partition = partition)?;
4332+
}
4333+
if *include_final {
4334+
write!(f, " FINAL")?;
4335+
}
4336+
if let Some(deduplicate) = deduplicate {
4337+
write!(f, " {deduplicate}")?;
4338+
}
4339+
Ok(())
4340+
}
43074341
}
43084342
}
43094343
}

src/keywords.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ define_keywords!(
218218
DECADE,
219219
DECIMAL,
220220
DECLARE,
221+
DEDUPLICATE,
221222
DEFAULT,
222223
DEFAULT_DDL_COLLATION,
223224
DEFERRABLE,
@@ -301,6 +302,7 @@ define_keywords!(
301302
FILE_FORMAT,
302303
FILL,
303304
FILTER,
305+
FINAL,
304306
FIRST,
305307
FIRST_VALUE,
306308
FIXEDSTRING,
@@ -354,6 +356,7 @@ define_keywords!(
354356
HOSTS,
355357
HOUR,
356358
HOURS,
359+
ID,
357360
IDENTITY,
358361
IF,
359362
IGNORE,

src/parser/mod.rs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,10 @@ impl<'a> Parser<'a> {
551551
Keyword::LOAD if dialect_of!(self is DuckDbDialect | GenericDialect) => {
552552
Ok(self.parse_load()?)
553553
}
554+
// `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
555+
Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
556+
Ok(self.parse_optimize_table()?)
557+
}
554558
_ => self.expected("an SQL statement", next_token),
555559
},
556560
Token::LParen => {
@@ -6284,7 +6288,7 @@ impl<'a> Parser<'a> {
62846288
self.expect_token(&Token::LParen)?;
62856289
let partitions = self.parse_comma_separated(Parser::parse_expr)?;
62866290
self.expect_token(&Token::RParen)?;
6287-
Ok(Partition { partitions })
6291+
Ok(Partition::Partitions(partitions))
62886292
}
62896293

62906294
pub fn parse_alter_table_operation(&mut self) -> Result<AlterTableOperation, ParserError> {
@@ -11194,6 +11198,45 @@ impl<'a> Parser<'a> {
1119411198
Ok(Statement::Load { extension_name })
1119511199
}
1119611200

11201+
/// ```sql
11202+
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
11203+
/// ```
11204+
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
11205+
pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
11206+
self.expect_keyword(Keyword::TABLE)?;
11207+
let name = self.parse_object_name(false)?;
11208+
let on_cluster = self.parse_optional_on_cluster()?;
11209+
11210+
let partition = if self.parse_keyword(Keyword::PARTITION) {
11211+
if self.parse_keyword(Keyword::ID) {
11212+
Some(Partition::Identifier(self.parse_identifier(false)?))
11213+
} else {
11214+
Some(Partition::Expr(self.parse_expr()?))
11215+
}
11216+
} else {
11217+
None
11218+
};
11219+
11220+
let include_final = self.parse_keyword(Keyword::FINAL);
11221+
let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) {
11222+
if self.parse_keyword(Keyword::BY) {
11223+
Some(Deduplicate::ByExpression(self.parse_expr()?))
11224+
} else {
11225+
Some(Deduplicate::All)
11226+
}
11227+
} else {
11228+
None
11229+
};
11230+
11231+
Ok(Statement::OptimizeTable {
11232+
name,
11233+
on_cluster,
11234+
partition,
11235+
include_final,
11236+
deduplicate,
11237+
})
11238+
}
11239+
1119711240
/// ```sql
1119811241
/// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] <sequence_name>
1119911242
/// ```

tests/sqlparser_clickhouse.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use sqlparser::ast::Value::Number;
2525
use sqlparser::ast::*;
2626
use sqlparser::dialect::ClickHouseDialect;
2727
use sqlparser::dialect::GenericDialect;
28+
use sqlparser::parser::ParserError::ParserError;
2829

2930
#[test]
3031
fn parse_map_access_expr() {
@@ -221,6 +222,65 @@ fn parse_create_table() {
221222
);
222223
}
223224

225+
#[test]
226+
fn parse_optimize_table() {
227+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0");
228+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE db.t0");
229+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster'");
230+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster' FINAL");
231+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE");
232+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE");
233+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE BY id");
234+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE BY id");
235+
clickhouse_and_generic()
236+
.verified_stmt("OPTIMIZE TABLE t0 PARTITION tuple('2023-04-22') DEDUPLICATE BY id");
237+
match clickhouse_and_generic().verified_stmt(
238+
"OPTIMIZE TABLE t0 ON CLUSTER cluster PARTITION ID '2024-07' FINAL DEDUPLICATE BY id",
239+
) {
240+
Statement::OptimizeTable {
241+
name,
242+
on_cluster,
243+
partition,
244+
include_final,
245+
deduplicate,
246+
..
247+
} => {
248+
assert_eq!(name.to_string(), "t0");
249+
assert_eq!(on_cluster, Some(Ident::new("cluster")));
250+
assert_eq!(
251+
partition,
252+
Some(Partition::Identifier(Ident::with_quote('\'', "2024-07")))
253+
);
254+
assert!(include_final);
255+
assert_eq!(
256+
deduplicate,
257+
Some(Deduplicate::ByExpression(Identifier(Ident::new("id"))))
258+
);
259+
}
260+
_ => unreachable!(),
261+
}
262+
263+
// negative cases
264+
assert_eq!(
265+
clickhouse_and_generic()
266+
.parse_sql_statements("OPTIMIZE TABLE t0 DEDUPLICATE BY")
267+
.unwrap_err(),
268+
ParserError("Expected: an expression:, found: EOF".to_string())
269+
);
270+
assert_eq!(
271+
clickhouse_and_generic()
272+
.parse_sql_statements("OPTIMIZE TABLE t0 PARTITION")
273+
.unwrap_err(),
274+
ParserError("Expected: an expression:, found: EOF".to_string())
275+
);
276+
assert_eq!(
277+
clickhouse_and_generic()
278+
.parse_sql_statements("OPTIMIZE TABLE t0 PARTITION ID")
279+
.unwrap_err(),
280+
ParserError("Expected: identifier, found: EOF".to_string())
281+
);
282+
}
283+
224284
fn column_def(name: Ident, data_type: DataType) -> ColumnDef {
225285
ColumnDef {
226286
name,

0 commit comments

Comments
 (0)