Skip to content

Commit 08fadd1

Browse files
committed
Add support for ClickHouse FORMAT on INSERT
Adds supports for the `SETTINGS` and `FORMAT` keywords used for ClickHouse when inserting data with other syntax than SQL. This can happen e.g. when using the ClickHouse CLI tool to pipe input from files or similar.
1 parent fe36020 commit 08fadd1

File tree

8 files changed

+157
-35
lines changed

8 files changed

+157
-35
lines changed

src/ast/dml.rs

+26-4
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ pub use super::ddl::{ColumnDef, TableConstraint};
3333

3434
use super::{
3535
display_comma_separated, display_separated, ClusteredBy, CommentDef, Expr, FileFormat,
36-
FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident,
36+
FormatClause, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident,
3737
InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens,
38-
OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine,
39-
TableWithJoins, Tag, WrappedCollection,
38+
OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption, SqliteOnConflict,
39+
TableEngine, TableWithJoins, Tag, WrappedCollection,
4040
};
4141

4242
/// CREATE INDEX statement.
@@ -495,6 +495,20 @@ pub struct Insert {
495495
pub priority: Option<MysqlInsertPriority>,
496496
/// Only for mysql
497497
pub insert_alias: Option<InsertAliases>,
498+
/// Settings used in together with a specified `FORMAT`.
499+
///
500+
/// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'`
501+
///
502+
/// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into)
503+
/// [ClickHouse Formats](https://clickhouse.com/docs/en/interfaces/formats)
504+
pub settings: Option<Vec<Setting>>,
505+
/// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`,
506+
/// `JSON`, `JSONAsString`, `LineAsString` and more.
507+
///
508+
/// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}`
509+
///
510+
/// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data)
511+
pub format_clause: Option<FormatClause>,
498512
}
499513

500514
impl Display for Insert {
@@ -547,7 +561,15 @@ impl Display for Insert {
547561
write!(f, "{source}")?;
548562
}
549563

550-
if self.source.is_none() && self.columns.is_empty() {
564+
if let Some(settings) = &self.settings {
565+
write!(f, "SETTINGS {} ", display_comma_separated(settings))?;
566+
}
567+
568+
if let Some(format_clause) = &self.format_clause {
569+
write!(f, "{format_clause}")?;
570+
}
571+
572+
if self.source.is_none() && self.columns.is_empty() && self.format_clause.is_none() {
551573
write!(f, "DEFAULT VALUES")?;
552574
}
553575

src/ast/query.rs

+13-2
Original file line numberDiff line numberDiff line change
@@ -2465,14 +2465,25 @@ impl fmt::Display for GroupByExpr {
24652465
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
24662466
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
24672467
pub enum FormatClause {
2468-
Identifier(Ident),
2468+
Identifier {
2469+
ident: Ident,
2470+
expr: Option<Vec<Expr>>,
2471+
},
24692472
Null,
24702473
}
24712474

24722475
impl fmt::Display for FormatClause {
24732476
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
24742477
match self {
2475-
FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident),
2478+
FormatClause::Identifier { ident, expr } => {
2479+
write!(f, "FORMAT {}", ident)?;
2480+
2481+
if let Some(exprs) = expr {
2482+
write!(f, " {}", display_comma_separated(exprs))?;
2483+
}
2484+
2485+
Ok(())
2486+
}
24762487
FormatClause::Null => write!(f, "FORMAT NULL"),
24772488
}
24782489
}

src/ast/spans.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -1148,9 +1148,11 @@ impl Spanned for Insert {
11481148
table: _, // bool
11491149
on,
11501150
returning,
1151-
replace_into: _, // bool
1152-
priority: _, // todo, mysql specific
1153-
insert_alias: _, // todo, mysql specific
1151+
replace_into: _, // bool
1152+
priority: _, // todo, mysql specific
1153+
insert_alias: _, // todo, mysql specific
1154+
settings: _, // todo, clickhouse specific
1155+
format_clause: _, // todo, clickhouse specific
11541156
} = self;
11551157

11561158
union_spans(

src/dialect/clickhouse.rs

+8
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,12 @@ impl Dialect for ClickHouseDialect {
5050
fn supports_limit_comma(&self) -> bool {
5151
true
5252
}
53+
54+
// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
55+
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
56+
//
57+
// [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats)
58+
fn supports_dictionary_syntax(&self) -> bool {
59+
true
60+
}
5361
}

src/keywords.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
931931
Keyword::PREWHERE,
932932
// for ClickHouse SELECT * FROM t SETTINGS ...
933933
Keyword::SETTINGS,
934-
// for ClickHouse SELECT * FROM t FORMAT...
934+
// for ClickHouse SELECT * FROM t FORMAT... or INSERT INTO t FORMAT...
935935
Keyword::FORMAT,
936936
// for Snowflake START WITH .. CONNECT BY
937937
Keyword::START,

src/parser/mod.rs

+71-22
Original file line numberDiff line numberDiff line change
@@ -9570,12 +9570,7 @@ impl<'a> Parser<'a> {
95709570
let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect)
95719571
&& self.parse_keyword(Keyword::FORMAT)
95729572
{
9573-
if self.parse_keyword(Keyword::NULL) {
9574-
Some(FormatClause::Null)
9575-
} else {
9576-
let ident = self.parse_identifier()?;
9577-
Some(FormatClause::Identifier(ident))
9578-
}
9573+
Some(self.parse_format_clause(false)?)
95799574
} else {
95809575
None
95819576
};
@@ -11824,30 +11819,53 @@ impl<'a> Parser<'a> {
1182411819

1182511820
let is_mysql = dialect_of!(self is MySqlDialect);
1182611821

11827-
let (columns, partitioned, after_columns, source) =
11828-
if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) {
11829-
(vec![], None, vec![], None)
11822+
let (columns, partitioned, after_columns, source) = if self
11823+
.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES])
11824+
{
11825+
(vec![], None, vec![], None)
11826+
} else {
11827+
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
11828+
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
11829+
11830+
let partitioned = self.parse_insert_partition()?;
11831+
// Hive allows you to specify columns after partitions as well if you want.
11832+
let after_columns = if dialect_of!(self is HiveDialect) {
11833+
self.parse_parenthesized_column_list(Optional, false)?
11834+
} else {
11835+
vec![]
11836+
};
11837+
(columns, partitioned, after_columns)
1183011838
} else {
11831-
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
11832-
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
11839+
Default::default()
11840+
};
1183311841

11834-
let partitioned = self.parse_insert_partition()?;
11835-
// Hive allows you to specify columns after partitions as well if you want.
11836-
let after_columns = if dialect_of!(self is HiveDialect) {
11837-
self.parse_parenthesized_column_list(Optional, false)?
11838-
} else {
11839-
vec![]
11840-
};
11841-
(columns, partitioned, after_columns)
11842+
let source =
11843+
if self.peek_keyword(Keyword::FORMAT) || self.peek_keyword(Keyword::SETTINGS) {
11844+
None
1184211845
} else {
11843-
Default::default()
11846+
Some(self.parse_query()?)
1184411847
};
1184511848

11846-
let source = Some(self.parse_query()?);
11849+
(columns, partitioned, after_columns, source)
11850+
};
11851+
11852+
let (format_clause, settings) = if dialect_of!(self is ClickHouseDialect | GenericDialect)
11853+
{
11854+
// Settings always comes before `FORMAT` for ClickHouse:
11855+
// <https://clickhouse.com/docs/en/sql-reference/statements/insert-into>
11856+
let settings = self.parse_settings()?;
1184711857

11848-
(columns, partitioned, after_columns, source)
11858+
let format = if self.parse_keyword(Keyword::FORMAT) {
11859+
Some(self.parse_format_clause(true)?)
11860+
} else {
11861+
None
1184911862
};
1185011863

11864+
(format, settings)
11865+
} else {
11866+
(None, None)
11867+
};
11868+
1185111869
let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect)
1185211870
&& self.parse_keyword(Keyword::AS)
1185311871
{
@@ -11931,10 +11949,41 @@ impl<'a> Parser<'a> {
1193111949
replace_into,
1193211950
priority,
1193311951
insert_alias,
11952+
settings,
11953+
format_clause,
1193411954
}))
1193511955
}
1193611956
}
1193711957

11958+
// Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and
11959+
// `INSERT` and also when using the CLI for pipes. It may or may not take an additional
11960+
// expression after the format so we try to parse the expression but allow failure.
11961+
//
11962+
// Since we know we never take an additional expression in `SELECT` context we never only try
11963+
// to parse if `can_have_expression` is true.
11964+
//
11965+
// <https://clickhouse.com/docs/en/interfaces/formats>
11966+
pub fn parse_format_clause(
11967+
&mut self,
11968+
can_have_expression: bool,
11969+
) -> Result<FormatClause, ParserError> {
11970+
if self.parse_keyword(Keyword::NULL) {
11971+
Ok(FormatClause::Null)
11972+
} else {
11973+
let ident = self.parse_identifier()?;
11974+
let expr = if can_have_expression {
11975+
match self.try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) {
11976+
Ok(expr) => Some(expr),
11977+
_ => None,
11978+
}
11979+
} else {
11980+
None
11981+
};
11982+
11983+
Ok(FormatClause::Identifier { ident, expr })
11984+
}
11985+
}
11986+
1193811987
/// Returns true if the immediate tokens look like the
1193911988
/// beginning of a subquery. `(SELECT ...`
1194011989
fn peek_subquery_start(&mut self) -> bool {

tests/sqlparser_clickhouse.rs

+25-1
Original file line numberDiff line numberDiff line change
@@ -1378,7 +1378,10 @@ fn test_query_with_format_clause() {
13781378
} else {
13791379
assert_eq!(
13801380
query.format_clause,
1381-
Some(FormatClause::Identifier(Ident::new(*format)))
1381+
Some(FormatClause::Identifier {
1382+
ident: Ident::new(*format),
1383+
expr: None
1384+
})
13821385
);
13831386
}
13841387
}
@@ -1398,6 +1401,27 @@ fn test_query_with_format_clause() {
13981401
}
13991402
}
14001403

1404+
#[test]
1405+
fn test_insert_query_with_format_clause() {
1406+
let cases = [
1407+
r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#,
1408+
r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#,
1409+
r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#,
1410+
r#"INSERT INTO tbl FORMAT jsoneachrow {"id": 1}"#,
1411+
r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#,
1412+
r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#,
1413+
r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#,
1414+
r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#,
1415+
r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
1416+
r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#,
1417+
r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
1418+
];
1419+
1420+
for sql in &cases {
1421+
clickhouse_and_generic().verified_stmt(sql);
1422+
}
1423+
}
1424+
14011425
#[test]
14021426
fn parse_create_table_on_commit_and_as_query() {
14031427
let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#;

tests/sqlparser_postgres.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -4430,7 +4430,9 @@ fn test_simple_postgres_insert_with_alias() {
44304430
returning: None,
44314431
replace_into: false,
44324432
priority: None,
4433-
insert_alias: None
4433+
insert_alias: None,
4434+
settings: None,
4435+
format_clause: None,
44344436
})
44354437
)
44364438
}
@@ -4500,7 +4502,9 @@ fn test_simple_postgres_insert_with_alias() {
45004502
returning: None,
45014503
replace_into: false,
45024504
priority: None,
4503-
insert_alias: None
4505+
insert_alias: None,
4506+
settings: None,
4507+
format_clause: None,
45044508
})
45054509
)
45064510
}
@@ -4567,6 +4571,8 @@ fn test_simple_insert_with_quoted_alias() {
45674571
replace_into: false,
45684572
priority: None,
45694573
insert_alias: None,
4574+
settings: None,
4575+
format_clause: None,
45704576
})
45714577
)
45724578
}

0 commit comments

Comments
 (0)