Skip to content

Commit f661dbc

Browse files
bombsimonayman-sigma
authored andcommitted
Add support for ClickHouse FORMAT on INSERT (apache#1628)
1 parent bbece90 commit f661dbc

File tree

10 files changed

+155
-36
lines changed

10 files changed

+155
-36
lines changed

src/ast/dml.rs

+25-6
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ use sqlparser_derive::{Visit, VisitMut};
3232
pub use super::ddl::{ColumnDef, TableConstraint};
3333

3434
use super::{
35-
display_comma_separated, display_separated, Assignment, ClusteredBy, CommentDef, Expr,
36-
FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident,
37-
InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens,
38-
OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine,
39-
TableObject, TableWithJoins, Tag, WrappedCollection,
35+
display_comma_separated, display_separated, query::InputFormatClause, Assignment, ClusteredBy,
36+
CommentDef, Expr, FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat,
37+
HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert,
38+
OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption,
39+
SqliteOnConflict, TableEngine, TableObject, TableWithJoins, Tag, WrappedCollection,
4040
};
4141

4242
/// CREATE INDEX statement.
@@ -497,6 +497,19 @@ pub struct Insert {
497497
pub priority: Option<MysqlInsertPriority>,
498498
/// Only for mysql
499499
pub insert_alias: Option<InsertAliases>,
500+
/// Settings used for ClickHouse.
501+
///
502+
/// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'`
503+
///
504+
/// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into)
505+
pub settings: Option<Vec<Setting>>,
506+
/// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`,
507+
/// `JSON`, `JSONAsString`, `LineAsString` and more.
508+
///
509+
/// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}`
510+
///
511+
/// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data)
512+
pub format_clause: Option<InputFormatClause>,
500513
}
501514

502515
impl Display for Insert {
@@ -545,12 +558,18 @@ impl Display for Insert {
545558
write!(f, "({}) ", display_comma_separated(&self.after_columns))?;
546559
}
547560

561+
if let Some(settings) = &self.settings {
562+
write!(f, "SETTINGS {} ", display_comma_separated(settings))?;
563+
}
564+
548565
if let Some(source) = &self.source {
549566
write!(f, "{source}")?;
550567
} else if !self.assignments.is_empty() {
551568
write!(f, "SET ")?;
552569
write!(f, "{}", display_comma_separated(&self.assignments))?;
553-
} else if self.source.is_none() && self.columns.is_empty() {
570+
} else if let Some(format_clause) = &self.format_clause {
571+
write!(f, "{format_clause}")?;
572+
} else if self.columns.is_empty() {
554573
write!(f, "DEFAULT VALUES")?;
555574
}
556575

src/ast/mod.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ pub use self::operator::{BinaryOperator, UnaryOperator};
6161
pub use self::query::{
6262
AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode,
6363
ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml,
64-
FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Interpolate,
65-
InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn,
66-
JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, LateralView,
67-
LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure,
64+
FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem,
65+
InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator,
66+
JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn,
67+
LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure,
6868
NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn,
6969
OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem,
7070
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,

src/ast/query.rs

+23
Original file line numberDiff line numberDiff line change
@@ -2480,6 +2480,29 @@ impl fmt::Display for FormatClause {
24802480
}
24812481
}
24822482

2483+
/// FORMAT identifier in input context, specific to ClickHouse.
2484+
///
2485+
/// [ClickHouse]: <https://clickhouse.com/docs/en/interfaces/formats>
2486+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
2487+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
2488+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
2489+
pub struct InputFormatClause {
2490+
pub ident: Ident,
2491+
pub values: Vec<Expr>,
2492+
}
2493+
2494+
impl fmt::Display for InputFormatClause {
2495+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2496+
write!(f, "FORMAT {}", self.ident)?;
2497+
2498+
if !self.values.is_empty() {
2499+
write!(f, " {}", display_comma_separated(self.values.as_slice()))?;
2500+
}
2501+
2502+
Ok(())
2503+
}
2504+
}
2505+
24832506
/// FOR XML or FOR JSON clause, specific to MSSQL
24842507
/// (formats the output of a query as XML or JSON)
24852508
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

src/ast/spans.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,8 @@ impl Spanned for Insert {
11561156
priority: _, // todo, mysql specific
11571157
insert_alias: _, // todo, mysql specific
11581158
assignments,
1159+
settings: _, // todo, clickhouse specific
1160+
format_clause: _, // todo, clickhouse specific
11591161
} = self;
11601162

11611163
union_spans(

src/dialect/clickhouse.rs

+12
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,16 @@ impl Dialect for ClickHouseDialect {
5454
fn supports_insert_table_function(&self) -> bool {
5555
true
5656
}
57+
58+
fn supports_insert_format(&self) -> bool {
59+
true
60+
}
61+
62+
// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
63+
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
64+
//
65+
// [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats)
66+
fn supports_dictionary_syntax(&self) -> bool {
67+
true
68+
}
5769
}

src/dialect/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,11 @@ pub trait Dialect: Debug + Any {
797797
fn supports_insert_table_function(&self) -> bool {
798798
false
799799
}
800+
801+
/// Does the dialect support insert formats, e.g. `INSERT INTO ... FORMAT <format>`
802+
fn supports_insert_format(&self) -> bool {
803+
false
804+
}
800805
}
801806

802807
/// This represents the operators for which precedence must be defined

src/keywords.rs

-2
Original file line numberDiff line numberDiff line change
@@ -949,9 +949,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
949949
Keyword::PARTITION,
950950
// for Clickhouse PREWHERE
951951
Keyword::PREWHERE,
952-
// for ClickHouse SELECT * FROM t SETTINGS ...
953952
Keyword::SETTINGS,
954-
// for ClickHouse SELECT * FROM t FORMAT...
955953
Keyword::FORMAT,
956954
// for Snowflake START WITH .. CONNECT BY
957955
Keyword::START,

src/parser/mod.rs

+56-22
Original file line numberDiff line numberDiff line change
@@ -12043,35 +12043,55 @@ impl<'a> Parser<'a> {
1204312043

1204412044
let is_mysql = dialect_of!(self is MySqlDialect);
1204512045

12046-
let (columns, partitioned, after_columns, source, assignments) =
12047-
if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) {
12048-
(vec![], None, vec![], None, vec![])
12049-
} else {
12050-
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
12051-
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
12046+
let (columns, partitioned, after_columns, source, assignments) = if self
12047+
.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES])
12048+
{
12049+
(vec![], None, vec![], None, vec![])
12050+
} else {
12051+
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
12052+
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
1205212053

12053-
let partitioned = self.parse_insert_partition()?;
12054-
// Hive allows you to specify columns after partitions as well if you want.
12055-
let after_columns = if dialect_of!(self is HiveDialect) {
12056-
self.parse_parenthesized_column_list(Optional, false)?
12057-
} else {
12058-
vec![]
12059-
};
12060-
(columns, partitioned, after_columns)
12054+
let partitioned = self.parse_insert_partition()?;
12055+
// Hive allows you to specify columns after partitions as well if you want.
12056+
let after_columns = if dialect_of!(self is HiveDialect) {
12057+
self.parse_parenthesized_column_list(Optional, false)?
1206112058
} else {
12062-
Default::default()
12059+
vec![]
1206312060
};
12061+
(columns, partitioned, after_columns)
12062+
} else {
12063+
Default::default()
12064+
};
1206412065

12065-
let (source, assignments) =
12066-
if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) {
12067-
(None, self.parse_comma_separated(Parser::parse_assignment)?)
12068-
} else {
12069-
(Some(self.parse_query()?), vec![])
12070-
};
12066+
let (source, assignments) = if self.peek_keyword(Keyword::FORMAT)
12067+
|| self.peek_keyword(Keyword::SETTINGS)
12068+
{
12069+
(None, vec![])
12070+
} else if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) {
12071+
(None, self.parse_comma_separated(Parser::parse_assignment)?)
12072+
} else {
12073+
(Some(self.parse_query()?), vec![])
12074+
};
12075+
12076+
(columns, partitioned, after_columns, source, assignments)
12077+
};
1207112078

12072-
(columns, partitioned, after_columns, source, assignments)
12079+
let (format_clause, settings) = if self.dialect.supports_insert_format() {
12080+
// Settings always comes before `FORMAT` for ClickHouse:
12081+
// <https://clickhouse.com/docs/en/sql-reference/statements/insert-into>
12082+
let settings = self.parse_settings()?;
12083+
12084+
let format = if self.parse_keyword(Keyword::FORMAT) {
12085+
Some(self.parse_input_format_clause()?)
12086+
} else {
12087+
None
1207312088
};
1207412089

12090+
(format, settings)
12091+
} else {
12092+
Default::default()
12093+
};
12094+
1207512095
let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect)
1207612096
&& self.parse_keyword(Keyword::AS)
1207712097
{
@@ -12156,10 +12176,24 @@ impl<'a> Parser<'a> {
1215612176
replace_into,
1215712177
priority,
1215812178
insert_alias,
12179+
settings,
12180+
format_clause,
1215912181
}))
1216012182
}
1216112183
}
1216212184

12185+
// Parses input format clause used for [ClickHouse].
12186+
//
12187+
// <https://clickhouse.com/docs/en/interfaces/formats>
12188+
pub fn parse_input_format_clause(&mut self) -> Result<InputFormatClause, ParserError> {
12189+
let ident = self.parse_identifier()?;
12190+
let values = self
12191+
.maybe_parse(|p| p.parse_comma_separated(|p| p.parse_expr()))?
12192+
.unwrap_or_default();
12193+
12194+
Ok(InputFormatClause { ident, values })
12195+
}
12196+
1216312197
/// Returns true if the immediate tokens look like the
1216412198
/// beginning of a subquery. `(SELECT ...`
1216512199
fn peek_subquery_start(&mut self) -> bool {

tests/sqlparser_clickhouse.rs

+20
Original file line numberDiff line numberDiff line change
@@ -1404,6 +1404,26 @@ fn test_query_with_format_clause() {
14041404
}
14051405
}
14061406

1407+
#[test]
1408+
fn test_insert_query_with_format_clause() {
1409+
let cases = [
1410+
r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#,
1411+
r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#,
1412+
r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#,
1413+
r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#,
1414+
r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#,
1415+
r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#,
1416+
r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#,
1417+
r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
1418+
r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#,
1419+
r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
1420+
];
1421+
1422+
for sql in &cases {
1423+
clickhouse().verified_stmt(sql);
1424+
}
1425+
}
1426+
14071427
#[test]
14081428
fn parse_create_table_on_commit_and_as_query() {
14091429
let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#;

tests/sqlparser_postgres.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -4431,7 +4431,9 @@ fn test_simple_postgres_insert_with_alias() {
44314431
returning: None,
44324432
replace_into: false,
44334433
priority: None,
4434-
insert_alias: None
4434+
insert_alias: None,
4435+
settings: None,
4436+
format_clause: None,
44354437
})
44364438
)
44374439
}
@@ -4502,7 +4504,9 @@ fn test_simple_postgres_insert_with_alias() {
45024504
returning: None,
45034505
replace_into: false,
45044506
priority: None,
4505-
insert_alias: None
4507+
insert_alias: None,
4508+
settings: None,
4509+
format_clause: None,
45064510
})
45074511
)
45084512
}
@@ -4570,6 +4574,8 @@ fn test_simple_insert_with_quoted_alias() {
45704574
replace_into: false,
45714575
priority: None,
45724576
insert_alias: None,
4577+
settings: None,
4578+
format_clause: None,
45734579
})
45744580
)
45754581
}

0 commit comments

Comments
 (0)