Skip to content

Standardize CREATE TABLE options equals signs #1751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/ast/dml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ impl Display for CreateTable {
write!(f, " WITH ({})", display_comma_separated(&self.with_options))?;
}
if let Some(engine) = &self.engine {
write!(f, " ENGINE={engine}")?;
write!(f, " ENGINE = {engine}")?;
}
if let Some(comment_def) = &self.comment {
match comment_def {
Expand All @@ -378,7 +378,7 @@ impl Display for CreateTable {
}

if let Some(auto_increment_offset) = self.auto_increment_offset {
write!(f, " AUTO_INCREMENT {auto_increment_offset}")?;
write!(f, " AUTO_INCREMENT = {auto_increment_offset}")?;
}
if let Some(primary_key) = &self.primary_key {
write!(f, " PRIMARY KEY {}", primary_key)?;
Expand Down Expand Up @@ -433,35 +433,35 @@ impl Display for CreateTable {
if let Some(is_enabled) = self.enable_schema_evolution {
write!(
f,
" ENABLE_SCHEMA_EVOLUTION={}",
" ENABLE_SCHEMA_EVOLUTION = {}",
if is_enabled { "TRUE" } else { "FALSE" }
)?;
}

if let Some(is_enabled) = self.change_tracking {
write!(
f,
" CHANGE_TRACKING={}",
" CHANGE_TRACKING = {}",
if is_enabled { "TRUE" } else { "FALSE" }
)?;
}

if let Some(data_retention_time_in_days) = self.data_retention_time_in_days {
write!(
f,
" DATA_RETENTION_TIME_IN_DAYS={data_retention_time_in_days}",
" DATA_RETENTION_TIME_IN_DAYS = {data_retention_time_in_days}",
)?;
}

if let Some(max_data_extension_time_in_days) = self.max_data_extension_time_in_days {
write!(
f,
" MAX_DATA_EXTENSION_TIME_IN_DAYS={max_data_extension_time_in_days}",
" MAX_DATA_EXTENSION_TIME_IN_DAYS = {max_data_extension_time_in_days}",
)?;
}

if let Some(default_ddl_collation) = &self.default_ddl_collation {
write!(f, " DEFAULT_DDL_COLLATION='{default_ddl_collation}'",)?;
write!(f, " DEFAULT_DDL_COLLATION = '{default_ddl_collation}'",)?;
}

if let Some(with_aggregation_policy) = &self.with_aggregation_policy {
Expand All @@ -477,10 +477,10 @@ impl Display for CreateTable {
}

if let Some(default_charset) = &self.default_charset {
write!(f, " DEFAULT CHARSET={default_charset}")?;
write!(f, " DEFAULT CHARSET = {default_charset}")?;
}
if let Some(collation) = &self.collation {
write!(f, " COLLATE={collation}")?;
write!(f, " COLLATE = {collation}")?;
}

if self.on_commit.is_some() {
Expand Down
14 changes: 9 additions & 5 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6730,7 +6730,7 @@ impl<'a> Parser<'a> {
let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?;

let engine = if self.parse_keyword(Keyword::ENGINE) {
self.expect_token(&Token::Eq)?;
let _ = self.consume_token(&Token::Eq);
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => {
Expand Down Expand Up @@ -6787,8 +6787,10 @@ impl<'a> Parser<'a> {

let create_table_config = self.parse_optional_create_table_config()?;

let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) {
self.expect_token(&Token::Eq)?;
let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET])
|| self.parse_keyword(Keyword::CHARSET)
{
let _ = self.consume_token(&Token::Eq);
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => Some(w.value),
Expand All @@ -6798,8 +6800,10 @@ impl<'a> Parser<'a> {
None
};

let collation = if self.parse_keywords(&[Keyword::COLLATE]) {
self.expect_token(&Token::Eq)?;
let collation = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::COLLATE])
|| self.parse_keyword(Keyword::COLLATE)
{
let _ = self.consume_token(&Token::Eq);
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => Some(w.value),
Expand Down
14 changes: 7 additions & 7 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,10 @@ fn parse_delimited_identifiers() {

#[test]
fn parse_create_table() {
clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x")"#);
clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x""#);
clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE = MergeTree ORDER BY ("x")"#);
clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE = MergeTree ORDER BY "x""#);
clickhouse().verified_stmt(
r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x" AS SELECT * FROM "t" WHERE true"#,
r#"CREATE TABLE "x" ("a" "int") ENGINE = MergeTree ORDER BY "x" AS SELECT * FROM "t" WHERE true"#,
);
}

Expand Down Expand Up @@ -589,7 +589,7 @@ fn parse_clickhouse_data_types() {

#[test]
fn parse_create_table_with_nullable() {
let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE=MergeTree ORDER BY (`k`)"#;
let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE = MergeTree ORDER BY (`k`)"#;
// ClickHouse has a case-sensitive definition of data type, but canonical representation is not
let canonical_sql = sql.replace("String", "STRING");

Expand Down Expand Up @@ -638,7 +638,7 @@ fn parse_create_table_with_nested_data_types() {
" k Array(Tuple(FixedString(128), Int128)),",
" l Tuple(a DateTime64(9), b Array(UUID)),",
" m Map(String, UInt16)",
") ENGINE=MergeTree ORDER BY (k)"
") ENGINE = MergeTree ORDER BY (k)"
);

match clickhouse().one_statement_parses_to(sql, "") {
Expand Down Expand Up @@ -714,7 +714,7 @@ fn parse_create_table_with_nested_data_types() {
fn parse_create_table_with_primary_key() {
match clickhouse_and_generic().verified_stmt(concat!(
r#"CREATE TABLE db.table (`i` INT, `k` INT)"#,
" ENGINE=SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')",
" ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')",
" PRIMARY KEY tuple(i)",
" ORDER BY tuple(i)",
)) {
Expand Down Expand Up @@ -798,7 +798,7 @@ fn parse_create_table_with_variant_default_expressions() {
" b DATETIME EPHEMERAL now(),",
" c DATETIME EPHEMERAL,",
" d STRING ALIAS toString(c)",
") ENGINE=MergeTree"
") ENGINE = MergeTree"
);
match clickhouse_and_generic().verified_stmt(sql) {
Statement::CreateTable(CreateTable { columns, .. }) => {
Expand Down
184 changes: 176 additions & 8 deletions tests/sqlparser_mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -860,11 +860,11 @@ fn parse_create_table_comment() {
#[test]
fn parse_create_table_auto_increment_offset() {
let canonical =
"CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE=InnoDB AUTO_INCREMENT 123";
let with_equal =
"CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE=InnoDB AUTO_INCREMENT=123";
"CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE = InnoDB AUTO_INCREMENT = 123";
let without_equal =
"CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) ENGINE = InnoDB AUTO_INCREMENT 123";

for sql in [canonical, with_equal] {
for sql in [canonical, without_equal] {
match mysql().one_statement_parses_to(sql, canonical) {
Statement::CreateTable(CreateTable {
name,
Expand Down Expand Up @@ -916,7 +916,7 @@ fn parse_create_table_set_enum() {

#[test]
fn parse_create_table_engine_default_charset() {
let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3";
let sql = "CREATE TABLE foo (id INT(11)) ENGINE = InnoDB DEFAULT CHARSET = utf8mb3";
match mysql().verified_stmt(sql) {
Statement::CreateTable(CreateTable {
name,
Expand Down Expand Up @@ -949,7 +949,7 @@ fn parse_create_table_engine_default_charset() {

#[test]
fn parse_create_table_collate() {
let sql = "CREATE TABLE foo (id INT(11)) COLLATE=utf8mb4_0900_ai_ci";
let sql = "CREATE TABLE foo (id INT(11)) COLLATE = utf8mb4_0900_ai_ci";
match mysql().verified_stmt(sql) {
Statement::CreateTable(CreateTable {
name,
Expand All @@ -974,7 +974,7 @@ fn parse_create_table_collate() {

#[test]
fn parse_create_table_both_options_and_as_query() {
let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb4_0900_ai_ci AS SELECT 1";
let sql = "CREATE TABLE foo (id INT(11)) ENGINE = InnoDB DEFAULT CHARSET = utf8mb3 COLLATE = utf8mb4_0900_ai_ci AS SELECT 1";
match mysql_and_generic().verified_stmt(sql) {
Statement::CreateTable(CreateTable {
name,
Expand All @@ -994,7 +994,7 @@ fn parse_create_table_both_options_and_as_query() {
_ => unreachable!(),
}

let sql = r"CREATE TABLE foo (id INT(11)) ENGINE=InnoDB AS SELECT 1 DEFAULT CHARSET=utf8mb3";
let sql = r"CREATE TABLE foo (id INT(11)) ENGINE = InnoDB AS SELECT 1 DEFAULT CHARSET=utf8mb3";
assert!(matches!(
mysql_and_generic().parse_sql_statements(sql),
Err(ParserError::ParserError(_))
Expand Down Expand Up @@ -1047,6 +1047,174 @@ fn parse_create_table_gencol() {
mysql_and_generic().verified_stmt("CREATE TABLE t1 (a INT, b INT AS (a * 2) STORED)");
}

#[test]
fn parse_create_table_without_equals() {
mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) CHARSET utf8mb4",
"CREATE TABLE foo (id INT) DEFAULT CHARSET = utf8mb4",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) COLLATE utf8mb4_unicode_ci",
"CREATE TABLE foo (id INT) COLLATE = utf8mb4_unicode_ci",
Comment on lines +1058 to +1059
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking the mismatches aren't ideal if we would rather represent the input SQL faithfully?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, I think ideally these options would all be in a vector similar to Vec<SqlOption>, but with a flag for the equals sign like I added for ALGORITHM and AUTO_INCREMENT, which would avoid these mismatches. Maybe I will see how #1747 shakes out and then revisit this, and mark this draft in the meantime.

);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) ENGINE InnoDB",
"CREATE TABLE foo (id INT) ENGINE = InnoDB",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) DEFAULT CHARSET utf8mb4",
"CREATE TABLE foo (id INT) DEFAULT CHARSET = utf8mb4",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) DEFAULT COLLATE utf8mb4_unicode_ci",
"CREATE TABLE foo (id INT) COLLATE = utf8mb4_unicode_ci",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) AUTO_INCREMENT 123",
"CREATE TABLE foo (id INT) AUTO_INCREMENT = 123",
);

// Test multiple options
mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) ENGINE InnoDB AUTO_INCREMENT 100 CHARSET utf8mb4 COLLATE utf8mb4_unicode_ci",
"CREATE TABLE foo (id INT) ENGINE = InnoDB AUTO_INCREMENT = 100 DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_unicode_ci",
);

// Test mixing options with and without equals sign
mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) ENGINE InnoDB AUTO_INCREMENT = 100 CHARSET = utf8mb4 COLLATE utf8mb4_unicode_ci",
"CREATE TABLE foo (id INT) ENGINE = InnoDB AUTO_INCREMENT = 100 DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_unicode_ci",
);
}

#[test]
#[ignore = "not yet supported"]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to remove this, especially if it ends up being covered by #1747 or something.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think we can remove and add it back when we have support for the syntax

fn parse_unsupported_create_table_options() {
mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) AVG_ROW_LENGTH 100",
"CREATE TABLE foo (id INT) AVG_ROW_LENGTH = 100",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) KEY_BLOCK_SIZE 8",
"CREATE TABLE foo (id INT) KEY_BLOCK_SIZE = 8",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) MAX_ROWS 1000000",
"CREATE TABLE foo (id INT) MAX_ROWS = 1000000",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) MIN_ROWS 10",
"CREATE TABLE foo (id INT) MIN_ROWS = 10",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) CHECKSUM 0",
"CREATE TABLE foo (id INT) CHECKSUM = 0",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) DELAY_KEY_WRITE 1",
"CREATE TABLE foo (id INT) DELAY_KEY_WRITE = 1",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) COMPRESSION 'ZLIB'",
"CREATE TABLE foo (id INT) COMPRESSION = 'ZLIB'",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) CONNECTION 'connection_string'",
"CREATE TABLE foo (id INT) CONNECTION = 'connection_string'",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) DATA DIRECTORY '/var/lib/mysql/data'",
"CREATE TABLE foo (id INT) DATA DIRECTORY = '/var/lib/mysql/data'",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) INDEX DIRECTORY '/var/lib/mysql/index'",
"CREATE TABLE foo (id INT) INDEX DIRECTORY = '/var/lib/mysql/index'",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) ENCRYPTION 'Y'",
"CREATE TABLE foo (id INT) ENCRYPTION = 'Y'",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) PACK_KEYS DEFAULT",
"CREATE TABLE foo (id INT) PACK_KEYS = DEFAULT",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) ROW_FORMAT DYNAMIC",
"CREATE TABLE foo (id INT) ROW_FORMAT = DYNAMIC",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) STATS_AUTO_RECALC DEFAULT",
"CREATE TABLE foo (id INT) STATS_AUTO_RECALC = DEFAULT",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) STATS_PERSISTENT DEFAULT",
"CREATE TABLE foo (id INT) STATS_PERSISTENT = DEFAULT",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) STATS_SAMPLE_PAGES 50",
"CREATE TABLE foo (id INT) STATS_SAMPLE_PAGES = 50",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) INSERT_METHOD NO",
"CREATE TABLE foo (id INT) INSERT_METHOD = NO",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) INSERT_METHOD FIRST",
"CREATE TABLE foo (id INT) INSERT_METHOD = FIRST",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) ENGINE_ATTRIBUTE 'custom-option'",
"CREATE TABLE foo (id INT) ENGINE_ATTRIBUTE = 'custom-option'",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) SECONDARY_ENGINE_ATTRIBUTE 'custom-option'",
"CREATE TABLE foo (id INT) SECONDARY_ENGINE_ATTRIBUTE = 'custom-option'",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) AUTOEXTEND_SIZE 4M",
"CREATE TABLE foo (id INT) AUTOEXTEND_SIZE = 4M",
);

mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) PASSWORD 'secret'",
"CREATE TABLE foo (id INT) PASSWORD = 'secret'",
);

// Test UNION
mysql_and_generic().one_statement_parses_to(
"CREATE TABLE foo (id INT) UNION (t1,t2)",
"CREATE TABLE foo (id INT) UNION = (t1,t2)",
);

// Test START TRANSACTION which doesn't take a value at all
mysql_and_generic().verified_stmt("CREATE TABLE foo (id INT) START TRANSACTION");
}

#[test]
fn parse_quote_identifiers() {
let sql = "CREATE TABLE `PRIMARY` (`BEGIN` INT PRIMARY KEY)";
Expand Down
Loading