Skip to content

Commit 39d0109

Browse files
committed
Add support of the ENUM8|ENUM16 for ClickHouse dialect
For the documentation, please refer to: https://clickhouse.com/docs/en/sql-reference/data-types/enum
1 parent 4ab3ab9 commit 39d0109

File tree

8 files changed

+188
-46
lines changed

8 files changed

+188
-46
lines changed

src/ast/data_type.rs

+25-5
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,21 @@ use serde::{Deserialize, Serialize};
2525
#[cfg(feature = "visitor")]
2626
use sqlparser_derive::{Visit, VisitMut};
2727

28-
use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
28+
use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField, Value};
2929

3030
use super::{value::escape_single_quote_string, ColumnDef};
3131

32+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
33+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
34+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
35+
pub enum EnumValue {
36+
String(String),
37+
/// ClickHouse allows to specify an integer value for each enum value.
38+
///
39+
/// [clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
40+
Pair(String, Value),
41+
}
42+
3243
/// SQL data types
3344
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
3445
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -324,7 +335,7 @@ pub enum DataType {
324335
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
325336
Nested(Vec<ColumnDef>),
326337
/// Enums
327-
Enum(Vec<String>),
338+
Enum(Vec<EnumValue>, Option<i64>),
328339
/// Set
329340
Set(Vec<String>),
330341
/// Struct
@@ -532,13 +543,22 @@ impl fmt::Display for DataType {
532543
write!(f, "{}({})", ty, modifiers.join(", "))
533544
}
534545
}
535-
DataType::Enum(vals) => {
536-
write!(f, "ENUM(")?;
546+
DataType::Enum(vals, bits) => {
547+
match bits {
548+
Some(bits) => write!(f, "ENUM{}", bits),
549+
None => write!(f, "ENUM"),
550+
}?;
551+
write!(f, "(")?;
537552
for (i, v) in vals.iter().enumerate() {
538553
if i != 0 {
539554
write!(f, ", ")?;
540555
}
541-
write!(f, "'{}'", escape_single_quote_string(v))?;
556+
match v {
557+
EnumValue::String(v) => write!(f, "'{}'", escape_single_quote_string(v))?,
558+
EnumValue::Pair(v, i) => {
559+
write!(f, "'{}' = {}", escape_single_quote_string(v), i)?
560+
}
561+
}
542562
}
543563
write!(f, ")")
544564
}

src/ast/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ use sqlparser_derive::{Visit, VisitMut};
4040
use crate::tokenizer::Span;
4141

4242
pub use self::data_type::{
43-
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
43+
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumValue, ExactNumberInfo,
4444
StructBracketKind, TimezoneInfo,
4545
};
4646
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue, Use};

src/dialect/clickhouse.rs

+6
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,10 @@ impl Dialect for ClickHouseDialect {
5050
fn supports_limit_comma(&self) -> bool {
5151
true
5252
}
53+
54+
/// ClickHouse supports `Enum8` and `Enum16` types.
55+
/// See [ClickHouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
56+
fn supports_enum_type_with_bits(&self) -> bool {
57+
true
58+
}
5359
}

src/dialect/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,11 @@ pub trait Dialect: Debug + Any {
697697
fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {
698698
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
699699
}
700+
701+
/// Return true if the dialect supports the Enum type with bits like Enum8, Enum16
702+
fn supports_enum_type_with_bits(&self) -> bool {
703+
false
704+
}
700705
}
701706

702707
/// This represents the operators for which precedence must be defined

src/keywords.rs

+2
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ define_keywords!(
285285
ENFORCED,
286286
ENGINE,
287287
ENUM,
288+
ENUM16,
289+
ENUM8,
288290
EPHEMERAL,
289291
EPOCH,
290292
EQUALS,

src/parser/mod.rs

+59-36
Original file line numberDiff line numberDiff line change
@@ -1049,18 +1049,18 @@ impl<'a> Parser<'a> {
10491049
| Keyword::CURRENT_USER
10501050
| Keyword::SESSION_USER
10511051
| Keyword::USER
1052-
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1053-
{
1054-
Ok(Some(Expr::Function(Function {
1055-
name: ObjectName(vec![w.to_ident(w_span)]),
1056-
parameters: FunctionArguments::None,
1057-
args: FunctionArguments::None,
1058-
null_treatment: None,
1059-
filter: None,
1060-
over: None,
1061-
within_group: vec![],
1062-
})))
1063-
}
1052+
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
1053+
{
1054+
Ok(Some(Expr::Function(Function {
1055+
name: ObjectName(vec![w.to_ident(w_span)]),
1056+
parameters: FunctionArguments::None,
1057+
args: FunctionArguments::None,
1058+
null_treatment: None,
1059+
filter: None,
1060+
over: None,
1061+
within_group: vec![],
1062+
})))
1063+
}
10641064
Keyword::CURRENT_TIMESTAMP
10651065
| Keyword::CURRENT_TIME
10661066
| Keyword::CURRENT_DATE
@@ -1075,18 +1075,18 @@ impl<'a> Parser<'a> {
10751075
Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)),
10761076
Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)),
10771077
Keyword::EXISTS
1078-
// Support parsing Databricks has a function named `exists`.
1079-
if !dialect_of!(self is DatabricksDialect)
1080-
|| matches!(
1078+
// Support parsing Databricks has a function named `exists`.
1079+
if !dialect_of!(self is DatabricksDialect)
1080+
|| matches!(
10811081
self.peek_nth_token(1).token,
10821082
Token::Word(Word {
10831083
keyword: Keyword::SELECT | Keyword::WITH,
10841084
..
10851085
})
10861086
) =>
1087-
{
1088-
Ok(Some(self.parse_exists_expr(false)?))
1089-
}
1087+
{
1088+
Ok(Some(self.parse_exists_expr(false)?))
1089+
}
10901090
Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)),
10911091
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
10921092
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
@@ -1103,22 +1103,22 @@ impl<'a> Parser<'a> {
11031103
Ok(Some(self.parse_array_expr(true)?))
11041104
}
11051105
Keyword::ARRAY
1106-
if self.peek_token() == Token::LParen
1107-
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
1108-
{
1109-
self.expect_token(&Token::LParen)?;
1110-
let query = self.parse_query()?;
1111-
self.expect_token(&Token::RParen)?;
1112-
Ok(Some(Expr::Function(Function {
1113-
name: ObjectName(vec![w.to_ident(w_span)]),
1114-
parameters: FunctionArguments::None,
1115-
args: FunctionArguments::Subquery(query),
1116-
filter: None,
1117-
null_treatment: None,
1118-
over: None,
1119-
within_group: vec![],
1120-
})))
1121-
}
1106+
if self.peek_token() == Token::LParen
1107+
&& !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
1108+
{
1109+
self.expect_token(&Token::LParen)?;
1110+
let query = self.parse_query()?;
1111+
self.expect_token(&Token::RParen)?;
1112+
Ok(Some(Expr::Function(Function {
1113+
name: ObjectName(vec![w.to_ident(w_span)]),
1114+
parameters: FunctionArguments::None,
1115+
args: FunctionArguments::Subquery(query),
1116+
filter: None,
1117+
null_treatment: None,
1118+
over: None,
1119+
within_group: vec![],
1120+
})))
1121+
}
11221122
Keyword::NOT => Ok(Some(self.parse_not()?)),
11231123
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
11241124
Ok(Some(self.parse_match_against()?))
@@ -5022,7 +5022,7 @@ impl<'a> Parser<'a> {
50225022
return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}")))
50235023
}
50245024
}
5025-
},
5025+
}
50265026
};
50275027
Ok(owner)
50285028
}
@@ -7979,6 +7979,27 @@ impl<'a> Parser<'a> {
79797979
}
79807980
}
79817981

7982+
pub fn parse_enum_values(&mut self) -> Result<Vec<EnumValue>, ParserError> {
7983+
self.expect_token(&Token::LParen)?;
7984+
let values = self.parse_comma_separated(Parser::parse_enum_value)?;
7985+
self.expect_token(&Token::RParen)?;
7986+
Ok(values)
7987+
}
7988+
7989+
pub fn parse_enum_value(&mut self) -> Result<EnumValue, ParserError> {
7990+
let str = self.parse_literal_string()?;
7991+
let value = match self.peek_token().token {
7992+
Token::Eq => {
7993+
// Consume the `=` token
7994+
self.next_token();
7995+
let value = self.parse_number_value()?;
7996+
EnumValue::Pair(str, value)
7997+
}
7998+
_ => EnumValue::String(str),
7999+
};
8000+
Ok(value)
8001+
}
8002+
79828003
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
79838004
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
79848005
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
@@ -8210,7 +8231,9 @@ impl<'a> Parser<'a> {
82108231
Keyword::BIGDECIMAL => Ok(DataType::BigDecimal(
82118232
self.parse_exact_number_optional_precision_scale()?,
82128233
)),
8213-
Keyword::ENUM => Ok(DataType::Enum(self.parse_string_values()?)),
8234+
Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)),
8235+
Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))),
8236+
Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))),
82148237
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
82158238
Keyword::ARRAY => {
82168239
if dialect_of!(self is SnowflakeDialect) {

tests/sqlparser_clickhouse.rs

+80
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,86 @@ fn parse_explain_table() {
16211621
}
16221622
}
16231623

1624+
#[test]
1625+
fn parse_create_table_with_enum_types() {
1626+
let sql = "CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = 2), bar ENUM16('a' = 1, 'b' = 2), baz ENUM('a', 'b'))";
1627+
match clickhouse().verified_stmt(sql) {
1628+
Statement::CreateTable(CreateTable { name, columns, .. }) => {
1629+
assert_eq!(name.to_string(), "t0");
1630+
assert_eq!(
1631+
vec![
1632+
ColumnDef {
1633+
name: Ident::new("foo"),
1634+
data_type: DataType::Enum(
1635+
vec![
1636+
EnumValue::Pair(
1637+
"a".to_string(),
1638+
Number("1".parse().unwrap(), false)
1639+
),
1640+
EnumValue::Pair(
1641+
"b".to_string(),
1642+
Number("2".parse().unwrap(), false)
1643+
)
1644+
],
1645+
Some(8)
1646+
),
1647+
collation: None,
1648+
options: vec![],
1649+
},
1650+
ColumnDef {
1651+
name: Ident::new("bar"),
1652+
data_type: DataType::Enum(
1653+
vec![
1654+
EnumValue::Pair(
1655+
"a".to_string(),
1656+
Number("1".parse().unwrap(), false)
1657+
),
1658+
EnumValue::Pair(
1659+
"b".to_string(),
1660+
Number("2".parse().unwrap(), false)
1661+
)
1662+
],
1663+
Some(16)
1664+
),
1665+
collation: None,
1666+
options: vec![],
1667+
},
1668+
ColumnDef {
1669+
name: Ident::new("baz"),
1670+
data_type: DataType::Enum(
1671+
vec![
1672+
EnumValue::String("a".to_string()),
1673+
EnumValue::String("b".to_string())
1674+
],
1675+
None
1676+
),
1677+
collation: None,
1678+
options: vec![],
1679+
}
1680+
],
1681+
columns
1682+
);
1683+
}
1684+
_ => unreachable!(),
1685+
}
1686+
1687+
// invalid case missing value for enum pair
1688+
assert_eq!(
1689+
clickhouse_and_generic()
1690+
.parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = ))")
1691+
.unwrap_err(),
1692+
ParserError("Expected: a value, found: )".to_string())
1693+
);
1694+
1695+
// invalid case that name is not a string
1696+
assert_eq!(
1697+
clickhouse_and_generic()
1698+
.parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 2))")
1699+
.unwrap_err(),
1700+
ParserError("Expected: literal string, found: 2".to_string())
1701+
);
1702+
}
1703+
16241704
fn clickhouse() -> TestedDialects {
16251705
TestedDialects::new(vec![Box::new(ClickHouseDialect {})])
16261706
}

tests/sqlparser_mysql.rs

+10-4
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,7 @@ fn table_constraint_unique_primary_ctor(
684684
#[test]
685685
fn parse_create_table_primary_and_unique_key() {
686686
let sqls = ["UNIQUE KEY", "PRIMARY KEY"]
687-
.map(|key_ty|format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
687+
.map(|key_ty| format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
688688

689689
let index_type_display = [Some(KeyOrIndexDisplay::Key), None];
690690

@@ -752,7 +752,7 @@ fn parse_create_table_primary_and_unique_key() {
752752
#[test]
753753
fn parse_create_table_primary_and_unique_key_with_index_options() {
754754
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
755-
.map(|key_ty|format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
755+
.map(|key_ty| format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
756756

757757
let index_type_display = [Some(KeyOrIndexDisplay::Index), None];
758758

@@ -826,7 +826,7 @@ fn parse_create_table_primary_and_unique_key_with_index_type() {
826826
#[test]
827827
fn parse_create_table_primary_and_unique_key_characteristic_test() {
828828
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
829-
.map(|key_ty|format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
829+
.map(|key_ty| format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
830830
for sql in &sqls {
831831
mysql_and_generic().verified_stmt(sql);
832832
}
@@ -889,7 +889,13 @@ fn parse_create_table_set_enum() {
889889
},
890890
ColumnDef {
891891
name: Ident::new("baz"),
892-
data_type: DataType::Enum(vec!["a".to_string(), "b".to_string()]),
892+
data_type: DataType::Enum(
893+
vec![
894+
EnumValue::String("a".to_string()),
895+
EnumValue::String("b".to_string())
896+
],
897+
None
898+
),
893899
collation: None,
894900
options: vec![],
895901
}

0 commit comments

Comments
 (0)