Skip to content

Commit 9ee8988

Browse files
committed
Add support of parsing OPTIMIZE TABLE statement for ClickHouse
ClickHouse supports using `OPTIMIZE TABLE` to initialize an unscheduled merge of data parts for tables. The syntax is following: ```SQL OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] ``` See: https://clickhouse.com/docs/en/sql-reference/statements/optimize
1 parent bc15f7b commit 9ee8988

File tree

5 files changed

+178
-10
lines changed

5 files changed

+178
-10
lines changed

src/ast/ddl.rs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,20 +1296,43 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef {
12961296
}
12971297
}
12981298

1299-
/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL
1299+
/// PARTITION statement used in ALTER TABLE et al. such as in Hive and ClickHouse SQL
13001300
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
13011301
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
13021302
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1303-
pub struct Partition {
1304-
pub partitions: Vec<Expr>,
1303+
pub enum Partition {
1304+
ID(Ident),
1305+
Expression(Expr),
1306+
Partitions(Vec<Expr>),
13051307
}
13061308

13071309
impl fmt::Display for Partition {
13081310
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1309-
write!(
1310-
f,
1311-
"PARTITION ({})",
1312-
display_comma_separated(&self.partitions)
1313-
)
1311+
match self {
1312+
Partition::ID(id) => write!(f, "PARTITION ID {id}"),
1313+
Partition::Expression(expr) => write!(f, "PARTITION {expr}"),
1314+
Partition::Partitions(partitions) => {
1315+
write!(f, "PARTITION ({})", display_comma_separated(partitions))
1316+
}
1317+
}
1318+
}
1319+
}
1320+
1321+
/// DEDUPLICATE statement used in OPTIMIZE TABLE et al. such as in ClickHouse SQL
1322+
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
1323+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1324+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1325+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1326+
pub enum Deduplicate {
1327+
All,
1328+
ByExpression(Expr),
1329+
}
1330+
1331+
impl fmt::Display for Deduplicate {
1332+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1333+
match self {
1334+
Deduplicate::All => write!(f, "DEDUPLICATE"),
1335+
Deduplicate::ByExpression(expr) => write!(f, "DEDUPLICATE BY {expr}"),
1336+
}
13141337
}
13151338
}

src/ast/mod.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pub use self::data_type::{
3333
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
3434
pub use self::ddl::{
3535
AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption,
36-
ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs,
36+
ColumnOptionDef, ConstraintCharacteristics, Deduplicate, DeferrableInitial, GeneratedAs,
3737
GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition,
3838
ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef,
3939
UserDefinedTypeRepresentation, ViewColumnDef,
@@ -2827,6 +2827,18 @@ pub enum Statement {
28272827
to: Ident,
28282828
with: Vec<SqlOption>,
28292829
},
2830+
/// ```sql
2831+
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
2832+
/// ```
2833+
///
2834+
/// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
2835+
OptimizeTable {
2836+
name: ObjectName,
2837+
on_cluster: Option<Ident>,
2838+
partition: Option<Partition>,
2839+
include_final: bool,
2840+
deduplicate: Option<Deduplicate>,
2841+
},
28302842
}
28312843

28322844
impl fmt::Display for Statement {
@@ -4274,6 +4286,28 @@ impl fmt::Display for Statement {
42744286

42754287
Ok(())
42764288
}
4289+
Statement::OptimizeTable {
4290+
name,
4291+
on_cluster,
4292+
partition,
4293+
include_final,
4294+
deduplicate,
4295+
} => {
4296+
write!(f, "OPTIMIZE TABLE {name}")?;
4297+
if let Some(on_cluster) = on_cluster {
4298+
write!(f, " ON CLUSTER {on_cluster}", on_cluster = on_cluster)?;
4299+
}
4300+
if let Some(partition) = partition {
4301+
write!(f, " {partition}", partition = partition)?;
4302+
}
4303+
if *include_final {
4304+
write!(f, " FINAL")?;
4305+
}
4306+
if let Some(deduplicate) = deduplicate {
4307+
write!(f, " {deduplicate}")?;
4308+
}
4309+
Ok(())
4310+
}
42774311
}
42784312
}
42794313
}

src/keywords.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ define_keywords!(
218218
DECADE,
219219
DECIMAL,
220220
DECLARE,
221+
DEDUPLICATE,
221222
DEFAULT,
222223
DEFAULT_DDL_COLLATION,
223224
DEFERRABLE,
@@ -301,6 +302,7 @@ define_keywords!(
301302
FILE_FORMAT,
302303
FILL,
303304
FILTER,
305+
FINAL,
304306
FIRST,
305307
FIRST_VALUE,
306308
FIXEDSTRING,
@@ -354,6 +356,7 @@ define_keywords!(
354356
HOSTS,
355357
HOUR,
356358
HOURS,
359+
ID,
357360
IDENTITY,
358361
IF,
359362
IGNORE,

src/parser/mod.rs

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,10 @@ impl<'a> Parser<'a> {
551551
Keyword::LOAD if dialect_of!(self is DuckDbDialect | GenericDialect) => {
552552
Ok(self.parse_load()?)
553553
}
554+
// `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
555+
Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
556+
Ok(self.parse_optimize_table()?)
557+
}
554558
_ => self.expected("an SQL statement", next_token),
555559
},
556560
Token::LParen => {
@@ -6271,7 +6275,7 @@ impl<'a> Parser<'a> {
62716275
self.expect_token(&Token::LParen)?;
62726276
let partitions = self.parse_comma_separated(Parser::parse_expr)?;
62736277
self.expect_token(&Token::RParen)?;
6274-
Ok(Partition { partitions })
6278+
Ok(Partition::Partitions(partitions))
62756279
}
62766280

62776281
pub fn parse_alter_table_operation(&mut self) -> Result<AlterTableOperation, ParserError> {
@@ -11159,6 +11163,50 @@ impl<'a> Parser<'a> {
1115911163
Ok(Statement::Load { extension_name })
1116011164
}
1116111165

11166+
/// ```sql
11167+
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
11168+
/// ```
11169+
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
11170+
pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
11171+
self.expect_keyword(Keyword::TABLE)?;
11172+
let name = self.parse_object_name(false)?;
11173+
11174+
let on_cluster = if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) {
11175+
Some(self.parse_identifier(false)?)
11176+
} else {
11177+
None
11178+
};
11179+
11180+
let partition = if self.parse_keyword(Keyword::PARTITION) {
11181+
if self.parse_keyword(Keyword::ID) {
11182+
Some(Partition::ID(self.parse_identifier(false)?))
11183+
} else {
11184+
Some(Partition::Expression(self.parse_expr()?))
11185+
}
11186+
} else {
11187+
None
11188+
};
11189+
11190+
let include_final = self.parse_keyword(Keyword::FINAL);
11191+
let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) {
11192+
if self.parse_keyword(Keyword::BY) {
11193+
Some(Deduplicate::ByExpression(self.parse_expr()?))
11194+
} else {
11195+
Some(Deduplicate::All)
11196+
}
11197+
} else {
11198+
None
11199+
};
11200+
11201+
Ok(Statement::OptimizeTable {
11202+
name,
11203+
on_cluster,
11204+
partition,
11205+
include_final,
11206+
deduplicate,
11207+
})
11208+
}
11209+
1116211210
/// ```sql
1116311211
/// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] <sequence_name>
1116411212
/// ```

tests/sqlparser_clickhouse.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use sqlparser::ast::Value::Number;
2525
use sqlparser::ast::*;
2626
use sqlparser::dialect::ClickHouseDialect;
2727
use sqlparser::dialect::GenericDialect;
28+
use sqlparser::parser::ParserError::ParserError;
2829

2930
#[test]
3031
fn parse_map_access_expr() {
@@ -221,6 +222,65 @@ fn parse_create_table() {
221222
);
222223
}
223224

225+
#[test]
226+
fn parse_optimize_table() {
227+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0");
228+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE db.t0");
229+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster'");
230+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster' FINAL");
231+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE");
232+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE");
233+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE BY id");
234+
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE BY id");
235+
clickhouse_and_generic()
236+
.verified_stmt("OPTIMIZE TABLE t0 PARTITION tuple('2023-04-22') DEDUPLICATE BY id");
237+
match clickhouse_and_generic().verified_stmt(
238+
"OPTIMIZE TABLE t0 ON CLUSTER cluster PARTITION ID '2024-07' FINAL DEDUPLICATE BY id",
239+
) {
240+
Statement::OptimizeTable {
241+
name,
242+
on_cluster,
243+
partition,
244+
include_final,
245+
deduplicate,
246+
..
247+
} => {
248+
assert_eq!(name.to_string(), "t0");
249+
assert_eq!(on_cluster, Some(Ident::new("cluster")));
250+
assert_eq!(
251+
partition,
252+
Some(Partition::ID(Ident::with_quote('\'', "2024-07")))
253+
);
254+
assert!(include_final);
255+
assert_eq!(
256+
deduplicate,
257+
Some(Deduplicate::ByExpression(Identifier(Ident::new("id"))))
258+
);
259+
}
260+
_ => unreachable!(),
261+
}
262+
263+
// negative cases
264+
assert_eq!(
265+
clickhouse_and_generic()
266+
.parse_sql_statements("OPTIMIZE TABLE t0 DEDUPLICATE BY")
267+
.unwrap_err(),
268+
ParserError("Expected: an expression:, found: EOF".to_string())
269+
);
270+
assert_eq!(
271+
clickhouse_and_generic()
272+
.parse_sql_statements("OPTIMIZE TABLE t0 PARTITION")
273+
.unwrap_err(),
274+
ParserError("Expected: an expression:, found: EOF".to_string())
275+
);
276+
assert_eq!(
277+
clickhouse_and_generic()
278+
.parse_sql_statements("OPTIMIZE TABLE t0 PARTITION ID")
279+
.unwrap_err(),
280+
ParserError("Expected: identifier, found: EOF".to_string())
281+
);
282+
}
283+
224284
fn column_def(name: Ident, data_type: DataType) -> ColumnDef {
225285
ColumnDef {
226286
name,

0 commit comments

Comments
 (0)