Skip to content

Add support of the ENUM8|ENUM16 for ClickHouse dialect #1574

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions src/ast/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,21 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};

use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
use crate::ast::{display_comma_separated, Expr, ObjectName, StructField, UnionField};

use super::{value::escape_single_quote_string, ColumnDef};

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum EnumMember {
Name(String),
/// ClickHouse allows to specify an integer value for each enum value.
///
/// [clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
NamedValue(String, Expr),
}

/// SQL data types
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -334,7 +345,7 @@ pub enum DataType {
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
Nested(Vec<ColumnDef>),
/// Enums
Enum(Vec<String>),
Enum(Vec<EnumMember>, Option<u8>),
/// Set
Set(Vec<String>),
/// Struct
Expand Down Expand Up @@ -546,13 +557,24 @@ impl fmt::Display for DataType {
write!(f, "{}({})", ty, modifiers.join(", "))
}
}
DataType::Enum(vals) => {
write!(f, "ENUM(")?;
DataType::Enum(vals, bits) => {
match bits {
Some(bits) => write!(f, "ENUM{}", bits),
None => write!(f, "ENUM"),
}?;
write!(f, "(")?;
for (i, v) in vals.iter().enumerate() {
if i != 0 {
write!(f, ", ")?;
}
write!(f, "'{}'", escape_single_quote_string(v))?;
match v {
EnumMember::Name(name) => {
write!(f, "'{}'", escape_single_quote_string(name))?
}
EnumMember::NamedValue(name, value) => {
write!(f, "'{}' = {}", escape_single_quote_string(name), value)?
}
}
}
write!(f, ")")
}
Expand Down
2 changes: 1 addition & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use sqlparser_derive::{Visit, VisitMut};
use crate::tokenizer::Span;

pub use self::data_type::{
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumMember, ExactNumberInfo,
StructBracketKind, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue, Use};
Expand Down
2 changes: 2 additions & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ define_keywords!(
ENFORCED,
ENGINE,
ENUM,
ENUM16,
ENUM8,
EPHEMERAL,
EPOCH,
EQUALS,
Expand Down
91 changes: 55 additions & 36 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1049,18 +1049,18 @@ impl<'a> Parser<'a> {
| Keyword::CURRENT_USER
| Keyword::SESSION_USER
| Keyword::USER
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
Copy link
Member Author

@git-hulk git-hulk Nov 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The following parts are auto-formatted by the command: cargo format.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder why it just happened now 🤔 Is it possible you are using nightly rust 🤔

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alamb aha, I'm using 1.82.0 on my side.

{
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::None,
null_treatment: None,
filter: None,
over: None,
within_group: vec![],
})))
}
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
{
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::None,
null_treatment: None,
filter: None,
over: None,
within_group: vec![],
})))
}
Keyword::CURRENT_TIMESTAMP
| Keyword::CURRENT_TIME
| Keyword::CURRENT_DATE
Expand All @@ -1075,18 +1075,18 @@ impl<'a> Parser<'a> {
Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)),
Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)),
Keyword::EXISTS
// Support parsing Databricks has a function named `exists`.
if !dialect_of!(self is DatabricksDialect)
|| matches!(
// Support parsing Databricks has a function named `exists`.
if !dialect_of!(self is DatabricksDialect)
|| matches!(
self.peek_nth_token(1).token,
Token::Word(Word {
keyword: Keyword::SELECT | Keyword::WITH,
..
})
) =>
{
Ok(Some(self.parse_exists_expr(false)?))
}
{
Ok(Some(self.parse_exists_expr(false)?))
}
Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)),
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
Expand All @@ -1103,22 +1103,22 @@ impl<'a> Parser<'a> {
Ok(Some(self.parse_array_expr(true)?))
}
Keyword::ARRAY
if self.peek_token() == Token::LParen
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
{
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::Subquery(query),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
})))
}
if self.peek_token() == Token::LParen
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
{
self.expect_token(&Token::LParen)?;
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
parameters: FunctionArguments::None,
args: FunctionArguments::Subquery(query),
filter: None,
null_treatment: None,
over: None,
within_group: vec![],
})))
}
Keyword::NOT => Ok(Some(self.parse_not()?)),
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
Ok(Some(self.parse_match_against()?))
Expand Down Expand Up @@ -5023,7 +5023,7 @@ impl<'a> Parser<'a> {
return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}")))
}
}
},
}
};
Ok(owner)
}
Expand Down Expand Up @@ -7997,6 +7997,23 @@ impl<'a> Parser<'a> {
}
}

pub fn parse_enum_values(&mut self) -> Result<Vec<EnumMember>, ParserError> {
self.expect_token(&Token::LParen)?;
let values = self.parse_comma_separated(|parser| {
let name = parser.parse_literal_string()?;
let e = if parser.consume_token(&Token::Eq) {
let value = parser.parse_number()?;
EnumMember::NamedValue(name, value)
} else {
EnumMember::Name(name)
};
Ok(e)
})?;
self.expect_token(&Token::RParen)?;

Ok(values)
}

/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
Expand Down Expand Up @@ -8235,7 +8252,9 @@ impl<'a> Parser<'a> {
Keyword::BIGDECIMAL => Ok(DataType::BigDecimal(
self.parse_exact_number_optional_precision_scale()?,
)),
Keyword::ENUM => Ok(DataType::Enum(self.parse_string_values()?)),
Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)),
Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))),
Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))),
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
Keyword::ARRAY => {
if dialect_of!(self is SnowflakeDialect) {
Expand Down
87 changes: 84 additions & 3 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ mod test_utils;
use pretty_assertions::assert_eq;
use sqlparser::ast::ColumnOption::Comment;
use sqlparser::ast::Expr::{Identifier, UnaryOp};
use sqlparser::ast::Value::Number;
use sqlparser::test_utils::all_dialects_except;

#[test]
Expand Down Expand Up @@ -9250,7 +9251,7 @@ fn parse_cache_table() {
format!(
"CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 'V1', 'K2' = 0.88) {sql}",
)
.as_str()
.as_str()
),
Statement::Cache {
table_flag: Some(ObjectName(vec![Ident::new(table_flag)])),
Expand All @@ -9275,7 +9276,7 @@ fn parse_cache_table() {
format!(
"CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 'V1', 'K2' = 0.88) AS {sql}",
)
.as_str()
.as_str()
),
Statement::Cache {
table_flag: Some(ObjectName(vec![Ident::new(table_flag)])),
Expand Down Expand Up @@ -11459,7 +11460,7 @@ fn parse_explain_with_option_list() {
}),
},
];
run_explain_analyze (
run_explain_analyze(
all_dialects_where(|d| d.supports_explain_with_utility_options()),
"EXPLAIN (ANALYZE, VERBOSE true, WAL OFF, FORMAT YAML, USER_DEF_NUM -100.1) SELECT sqrt(id) FROM foo",
false,
Expand Down Expand Up @@ -12459,3 +12460,83 @@ fn parse_create_table_with_bit_types() {
_ => unreachable!(),
}
}

#[test]
fn parse_create_table_with_enum_types() {
let sql = "CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = 2), bar ENUM16('a' = 1, 'b' = 2), baz ENUM('a', 'b'))";
match all_dialects().verified_stmt(sql) {
Statement::CreateTable(CreateTable { name, columns, .. }) => {
assert_eq!(name.to_string(), "t0");
assert_eq!(
vec![
ColumnDef {
name: Ident::new("foo"),
data_type: DataType::Enum(
vec![
EnumMember::NamedValue(
"a".to_string(),
Expr::Value(Number("1".parse().unwrap(), false))
),
EnumMember::NamedValue(
"b".to_string(),
Expr::Value(Number("2".parse().unwrap(), false))
)
],
Some(8)
),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("bar"),
data_type: DataType::Enum(
vec![
EnumMember::NamedValue(
"a".to_string(),
Expr::Value(Number("1".parse().unwrap(), false))
),
EnumMember::NamedValue(
"b".to_string(),
Expr::Value(Number("2".parse().unwrap(), false))
)
],
Some(16)
),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("baz"),
data_type: DataType::Enum(
vec![
EnumMember::Name("a".to_string()),
EnumMember::Name("b".to_string())
],
None
),
collation: None,
options: vec![],
}
],
columns
);
}
_ => unreachable!(),
}

// invalid case missing value for enum pair
assert_eq!(
all_dialects()
.parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = ))")
.unwrap_err(),
ParserError::ParserError("Expected: a value, found: )".to_string())
);

// invalid case that name is not a string
assert_eq!(
all_dialects()
.parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 2))")
.unwrap_err(),
ParserError::ParserError("Expected: literal string, found: 2".to_string())
);
}
14 changes: 10 additions & 4 deletions tests/sqlparser_mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ fn table_constraint_unique_primary_ctor(
#[test]
fn parse_create_table_primary_and_unique_key() {
let sqls = ["UNIQUE KEY", "PRIMARY KEY"]
.map(|key_ty|format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
.map(|key_ty| format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));

let index_type_display = [Some(KeyOrIndexDisplay::Key), None];

Expand Down Expand Up @@ -753,7 +753,7 @@ fn parse_create_table_primary_and_unique_key() {
#[test]
fn parse_create_table_primary_and_unique_key_with_index_options() {
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
.map(|key_ty|format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
.map(|key_ty| format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));

let index_type_display = [Some(KeyOrIndexDisplay::Index), None];

Expand Down Expand Up @@ -827,7 +827,7 @@ fn parse_create_table_primary_and_unique_key_with_index_type() {
#[test]
fn parse_create_table_primary_and_unique_key_characteristic_test() {
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
.map(|key_ty|format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
.map(|key_ty| format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
for sql in &sqls {
mysql_and_generic().verified_stmt(sql);
}
Expand Down Expand Up @@ -890,7 +890,13 @@ fn parse_create_table_set_enum() {
},
ColumnDef {
name: Ident::new("baz"),
data_type: DataType::Enum(vec!["a".to_string(), "b".to_string()]),
data_type: DataType::Enum(
vec![
EnumMember::Name("a".to_string()),
EnumMember::Name("b".to_string())
],
None
),
collation: None,
options: vec![],
}
Expand Down
Loading