Skip to content

Commit c00b4fa

Browse files
committed
chore: Rebase
1 parent b09514e commit c00b4fa

File tree

8 files changed

+155
-38
lines changed

8 files changed

+155
-38
lines changed

src/ast/dml.rs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ pub use super::ddl::{ColumnDef, TableConstraint};
3333

3434
use super::{
3535
display_comma_separated, display_separated, Assignment, ClusteredBy, CommentDef, Expr,
36-
FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident,
37-
InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens,
38-
OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine,
39-
TableObject, TableWithJoins, Tag, WrappedCollection,
36+
FileFormat, FormatClause, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat,
37+
HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert,
38+
OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption,
39+
SqliteOnConflict, TableEngine, TableObject, TableWithJoins, Tag, WrappedCollection,
4040
};
4141

4242
/// CREATE INDEX statement.
@@ -497,6 +497,20 @@ pub struct Insert {
497497
pub priority: Option<MysqlInsertPriority>,
498498
/// Only for mysql
499499
pub insert_alias: Option<InsertAliases>,
500+
/// Settings used in together with a specified `FORMAT`.
501+
///
502+
/// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'`
503+
///
504+
/// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into)
505+
/// [ClickHouse Formats](https://clickhouse.com/docs/en/interfaces/formats)
506+
pub settings: Option<Vec<Setting>>,
507+
/// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`,
508+
/// `JSON`, `JSONAsString`, `LineAsString` and more.
509+
///
510+
/// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}`
511+
///
512+
/// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data)
513+
pub format_clause: Option<FormatClause>,
500514
}
501515

502516
impl Display for Insert {
@@ -545,11 +559,17 @@ impl Display for Insert {
545559
write!(f, "({}) ", display_comma_separated(&self.after_columns))?;
546560
}
547561

562+
if let Some(settings) = &self.settings {
563+
write!(f, "SETTINGS {} ", display_comma_separated(settings))?;
564+
}
565+
548566
if let Some(source) = &self.source {
549567
write!(f, "{source}")?;
550568
} else if !self.assignments.is_empty() {
551569
write!(f, "SET ")?;
552570
write!(f, "{}", display_comma_separated(&self.assignments))?;
571+
} else if let Some(format_clause) = &self.format_clause {
572+
write!(f, "{format_clause}")?;
553573
} else if self.source.is_none() && self.columns.is_empty() {
554574
write!(f, "DEFAULT VALUES")?;
555575
}

src/ast/query.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2467,14 +2467,25 @@ impl fmt::Display for GroupByExpr {
24672467
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
24682468
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
24692469
pub enum FormatClause {
2470-
Identifier(Ident),
2470+
Identifier {
2471+
ident: Ident,
2472+
expr: Option<Vec<Expr>>,
2473+
},
24712474
Null,
24722475
}
24732476

24742477
impl fmt::Display for FormatClause {
24752478
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
24762479
match self {
2477-
FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident),
2480+
FormatClause::Identifier { ident, expr } => {
2481+
write!(f, "FORMAT {}", ident)?;
2482+
2483+
if let Some(exprs) = expr {
2484+
write!(f, " {}", display_comma_separated(exprs))?;
2485+
}
2486+
2487+
Ok(())
2488+
}
24782489
FormatClause::Null => write!(f, "FORMAT NULL"),
24792490
}
24802491
}

src/ast/spans.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,8 @@ impl Spanned for Insert {
11561156
priority: _, // todo, mysql specific
11571157
insert_alias: _, // todo, mysql specific
11581158
assignments,
1159+
settings: _, // todo, clickhouse specific
1160+
format_clause: _, // todo, clickhouse specific
11591161
} = self;
11601162

11611163
union_spans(

src/dialect/clickhouse.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,11 @@ impl Dialect for ClickHouseDialect {
5454
fn supports_insert_table_function(&self) -> bool {
5555
true
5656
}
57+
// ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting
58+
// with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected.
59+
//
60+
// [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats)
61+
fn supports_dictionary_syntax(&self) -> bool {
62+
true
63+
}
5764
}

src/keywords.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
951951
Keyword::PREWHERE,
952952
// for ClickHouse SELECT * FROM t SETTINGS ...
953953
Keyword::SETTINGS,
954-
// for ClickHouse SELECT * FROM t FORMAT...
954+
// for ClickHouse SELECT * FROM t FORMAT... or INSERT INTO t FORMAT...
955955
Keyword::FORMAT,
956956
// for Snowflake START WITH .. CONNECT BY
957957
Keyword::START,

src/parser/mod.rs

Lines changed: 75 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9702,12 +9702,7 @@ impl<'a> Parser<'a> {
97029702
let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect)
97039703
&& self.parse_keyword(Keyword::FORMAT)
97049704
{
9705-
if self.parse_keyword(Keyword::NULL) {
9706-
Some(FormatClause::Null)
9707-
} else {
9708-
let ident = self.parse_identifier()?;
9709-
Some(FormatClause::Identifier(ident))
9710-
}
9705+
Some(self.parse_format_clause(false)?)
97119706
} else {
97129707
None
97139708
};
@@ -12033,35 +12028,56 @@ impl<'a> Parser<'a> {
1203312028

1203412029
let is_mysql = dialect_of!(self is MySqlDialect);
1203512030

12036-
let (columns, partitioned, after_columns, source, assignments) =
12037-
if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) {
12038-
(vec![], None, vec![], None, vec![])
12039-
} else {
12040-
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
12041-
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
12031+
let (columns, partitioned, after_columns, source, assignments) = if self
12032+
.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES])
12033+
{
12034+
(vec![], None, vec![], None, vec![])
12035+
} else {
12036+
let (columns, partitioned, after_columns) = if !self.peek_subquery_start() {
12037+
let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?;
1204212038

12043-
let partitioned = self.parse_insert_partition()?;
12044-
// Hive allows you to specify columns after partitions as well if you want.
12045-
let after_columns = if dialect_of!(self is HiveDialect) {
12046-
self.parse_parenthesized_column_list(Optional, false)?
12047-
} else {
12048-
vec![]
12049-
};
12050-
(columns, partitioned, after_columns)
12039+
let partitioned = self.parse_insert_partition()?;
12040+
// Hive allows you to specify columns after partitions as well if you want.
12041+
let after_columns = if dialect_of!(self is HiveDialect) {
12042+
self.parse_parenthesized_column_list(Optional, false)?
1205112043
} else {
12052-
Default::default()
12044+
vec![]
1205312045
};
12046+
(columns, partitioned, after_columns)
12047+
} else {
12048+
Default::default()
12049+
};
1205412050

12055-
let (source, assignments) =
12056-
if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) {
12057-
(None, self.parse_comma_separated(Parser::parse_assignment)?)
12058-
} else {
12059-
(Some(self.parse_query()?), vec![])
12060-
};
12051+
let (source, assignments) = if self.peek_keyword(Keyword::FORMAT)
12052+
|| self.peek_keyword(Keyword::SETTINGS)
12053+
{
12054+
(None, vec![])
12055+
} else if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) {
12056+
(None, self.parse_comma_separated(Parser::parse_assignment)?)
12057+
} else {
12058+
(Some(self.parse_query()?), vec![])
12059+
};
12060+
12061+
(columns, partitioned, after_columns, source, assignments)
12062+
};
12063+
12064+
let (format_clause, settings) = if dialect_of!(self is ClickHouseDialect | GenericDialect)
12065+
{
12066+
// Settings always comes before `FORMAT` for ClickHouse:
12067+
// <https://clickhouse.com/docs/en/sql-reference/statements/insert-into>
12068+
let settings = self.parse_settings()?;
1206112069

12062-
(columns, partitioned, after_columns, source, assignments)
12070+
let format = if self.parse_keyword(Keyword::FORMAT) {
12071+
Some(self.parse_format_clause(true)?)
12072+
} else {
12073+
None
1206312074
};
1206412075

12076+
(format, settings)
12077+
} else {
12078+
(None, None)
12079+
};
12080+
1206512081
let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect)
1206612082
&& self.parse_keyword(Keyword::AS)
1206712083
{
@@ -12146,10 +12162,41 @@ impl<'a> Parser<'a> {
1214612162
replace_into,
1214712163
priority,
1214812164
insert_alias,
12165+
settings,
12166+
format_clause,
1214912167
}))
1215012168
}
1215112169
}
1215212170

12171+
// Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and
12172+
// `INSERT` and also when using the CLI for pipes. It may or may not take an additional
12173+
// expression after the format so we try to parse the expression but allow failure.
12174+
//
12175+
// Since we know we never take an additional expression in `SELECT` context we never only try
12176+
// to parse if `can_have_expression` is true.
12177+
//
12178+
// <https://clickhouse.com/docs/en/interfaces/formats>
12179+
pub fn parse_format_clause(
12180+
&mut self,
12181+
can_have_expression: bool,
12182+
) -> Result<FormatClause, ParserError> {
12183+
if self.parse_keyword(Keyword::NULL) {
12184+
Ok(FormatClause::Null)
12185+
} else {
12186+
let ident = self.parse_identifier()?;
12187+
let expr = if can_have_expression {
12188+
match self.try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) {
12189+
Ok(expr) => Some(expr),
12190+
_ => None,
12191+
}
12192+
} else {
12193+
None
12194+
};
12195+
12196+
Ok(FormatClause::Identifier { ident, expr })
12197+
}
12198+
}
12199+
1215312200
/// Returns true if the immediate tokens look like the
1215412201
/// beginning of a subquery. `(SELECT ...`
1215512202
fn peek_subquery_start(&mut self) -> bool {

tests/sqlparser_clickhouse.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1384,7 +1384,10 @@ fn test_query_with_format_clause() {
13841384
} else {
13851385
assert_eq!(
13861386
query.format_clause,
1387-
Some(FormatClause::Identifier(Ident::new(*format)))
1387+
Some(FormatClause::Identifier {
1388+
ident: Ident::new(*format),
1389+
expr: None
1390+
})
13881391
);
13891392
}
13901393
}
@@ -1404,6 +1407,27 @@ fn test_query_with_format_clause() {
14041407
}
14051408
}
14061409

1410+
#[test]
1411+
fn test_insert_query_with_format_clause() {
1412+
let cases = [
1413+
r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#,
1414+
r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#,
1415+
r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#,
1416+
r#"INSERT INTO tbl FORMAT jsoneachrow {"id": 1}"#,
1417+
r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#,
1418+
r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#,
1419+
r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#,
1420+
r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#,
1421+
r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
1422+
r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#,
1423+
r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
1424+
];
1425+
1426+
for sql in &cases {
1427+
clickhouse_and_generic().verified_stmt(sql);
1428+
}
1429+
}
1430+
14071431
#[test]
14081432
fn parse_create_table_on_commit_and_as_query() {
14091433
let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#;

tests/sqlparser_postgres.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4431,7 +4431,9 @@ fn test_simple_postgres_insert_with_alias() {
44314431
returning: None,
44324432
replace_into: false,
44334433
priority: None,
4434-
insert_alias: None
4434+
insert_alias: None,
4435+
settings: None,
4436+
format_clause: None,
44354437
})
44364438
)
44374439
}
@@ -4502,7 +4504,9 @@ fn test_simple_postgres_insert_with_alias() {
45024504
returning: None,
45034505
replace_into: false,
45044506
priority: None,
4505-
insert_alias: None
4507+
insert_alias: None,
4508+
settings: None,
4509+
format_clause: None,
45064510
})
45074511
)
45084512
}
@@ -4570,6 +4574,8 @@ fn test_simple_insert_with_quoted_alias() {
45704574
replace_into: false,
45714575
priority: None,
45724576
insert_alias: None,
4577+
settings: None,
4578+
format_clause: None,
45734579
})
45744580
)
45754581
}

0 commit comments

Comments
 (0)