From 3f707160946e7d9557a7103e836c27f256dcd527 Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Fri, 21 Feb 2025 14:46:51 -0800 Subject: [PATCH] Parse SIGNED INTEGER type in MySQL CAST MySQL doesn't have the same set of possible CAST types as for e.g. column definitions. For example, it raises a syntax error for `CAST(1 AS INTEGER SIGNED)` and instead expects `CAST(1 AS SIGNED INTEGER)`. We retain the current somewhat permissive datatype parsing behavior (e.g. allowing `CAST(1 AS BIGINT)` even though MySQL would raise a syntax error), and add datatypes for this specific case (`SIGNED [INTEGER]` and `UNSIGNED [INTEGER]`). To keep the naming consistent across datatypes, we also rename `UnsignedInt` to `IntUnsigned` and similar for other unsigned datatypes. This means they display in the same way they are written, i.e. `IntUnsigned = INT UNSIGNED` instead of `UnsignedInt = INT UNSIGNED`. Closes #1589 --- src/ast/data_type.rs | 70 +++++++++++++++++++++++++++++----------- src/ast/mod.rs | 5 +-- src/keywords.rs | 1 + src/parser/mod.rs | 32 ++++++++++++------ tests/sqlparser_mysql.rs | 28 +++++++++++++--- 5 files changed, 101 insertions(+), 35 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index cae8ca8f0..57bc67441 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -132,19 +132,19 @@ pub enum DataType { /// Tiny integer with optional display width e.g. TINYINT or TINYINT(3) TinyInt(Option), /// Unsigned tiny integer with optional display width e.g. TINYINT UNSIGNED or TINYINT(3) UNSIGNED - UnsignedTinyInt(Option), + TinyIntUnsigned(Option), /// Int2 as alias for SmallInt in [postgresql] /// Note: Int2 mean 2 bytes in postgres (not 2 bits) /// Int2 with optional display width e.g. INT2 or INT2(5) /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Int2(Option), - /// Unsigned Int2 with optional display width e.g. INT2 Unsigned or INT2(5) Unsigned - UnsignedInt2(Option), + /// Unsigned Int2 with optional display width e.g. INT2 UNSIGNED or INT2(5) UNSIGNED + Int2Unsigned(Option), /// Small integer with optional display width e.g. SMALLINT or SMALLINT(5) SmallInt(Option), /// Unsigned small integer with optional display width e.g. SMALLINT UNSIGNED or SMALLINT(5) UNSIGNED - UnsignedSmallInt(Option), + SmallIntUnsigned(Option), /// MySQL medium integer ([1]) with optional display width e.g. MEDIUMINT or MEDIUMINT(5) /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html @@ -152,7 +152,7 @@ pub enum DataType { /// Unsigned medium integer ([1]) with optional display width e.g. MEDIUMINT UNSIGNED or MEDIUMINT(5) UNSIGNED /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html - UnsignedMediumInt(Option), + MediumIntUnsigned(Option), /// Int with optional display width e.g. INT or INT(11) Int(Option), /// Int4 as alias for Integer in [postgresql] @@ -197,11 +197,11 @@ pub enum DataType { /// Integer with optional display width e.g. INTEGER or INTEGER(11) Integer(Option), /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED - UnsignedInt(Option), + IntUnsigned(Option), /// Unsigned int4 with optional display width e.g. INT4 UNSIGNED or INT4(11) UNSIGNED - UnsignedInt4(Option), + Int4Unsigned(Option), /// Unsigned integer with optional display width e.g. INTEGER UNSIGNED or INTEGER(11) UNSIGNED - UnsignedInteger(Option), + IntegerUnsigned(Option), /// Unsigned integer type in [clickhouse] /// Note: UInt8 mean 8 bits in [clickhouse] /// @@ -235,9 +235,29 @@ pub enum DataType { /// Big integer with optional display width e.g. BIGINT or BIGINT(20) BigInt(Option), /// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED - UnsignedBigInt(Option), + BigIntUnsigned(Option), /// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED - UnsignedInt8(Option), + Int8Unsigned(Option), + /// Signed integer as used in [MySQL CAST] target types, without optional `INTEGER` suffix: + /// `SIGNED` + /// + /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html + Signed, + /// Signed integer as used in [MySQL CAST] target types, with optional `INTEGER` suffix: + /// `SIGNED INTEGER` + /// + /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html + SignedInteger, + /// Signed integer as used in [MySQL CAST] target types, without optional `INTEGER` suffix: + /// `SIGNED` + /// + /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html + Unsigned, + /// Unsigned integer as used in [MySQL CAST] target types, with optional `INTEGER` suffix: + /// `UNSIGNED INTEGER` + /// + /// [MySQL CAST]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html + UnsignedInteger, /// Float4 as alias for Real in [postgresql] /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html @@ -433,29 +453,29 @@ impl fmt::Display for DataType { DataType::TinyInt(zerofill) => { format_type_with_optional_length(f, "TINYINT", zerofill, false) } - DataType::UnsignedTinyInt(zerofill) => { + DataType::TinyIntUnsigned(zerofill) => { format_type_with_optional_length(f, "TINYINT", zerofill, true) } DataType::Int2(zerofill) => { format_type_with_optional_length(f, "INT2", zerofill, false) } - DataType::UnsignedInt2(zerofill) => { + DataType::Int2Unsigned(zerofill) => { format_type_with_optional_length(f, "INT2", zerofill, true) } DataType::SmallInt(zerofill) => { format_type_with_optional_length(f, "SMALLINT", zerofill, false) } - DataType::UnsignedSmallInt(zerofill) => { + DataType::SmallIntUnsigned(zerofill) => { format_type_with_optional_length(f, "SMALLINT", zerofill, true) } DataType::MediumInt(zerofill) => { format_type_with_optional_length(f, "MEDIUMINT", zerofill, false) } - DataType::UnsignedMediumInt(zerofill) => { + DataType::MediumIntUnsigned(zerofill) => { format_type_with_optional_length(f, "MEDIUMINT", zerofill, true) } DataType::Int(zerofill) => format_type_with_optional_length(f, "INT", zerofill, false), - DataType::UnsignedInt(zerofill) => { + DataType::IntUnsigned(zerofill) => { format_type_with_optional_length(f, "INT", zerofill, true) } DataType::Int4(zerofill) => { @@ -479,22 +499,22 @@ impl fmt::Display for DataType { DataType::Int256 => { write!(f, "Int256") } - DataType::UnsignedInt4(zerofill) => { + DataType::Int4Unsigned(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, true) } DataType::Integer(zerofill) => { format_type_with_optional_length(f, "INTEGER", zerofill, false) } - DataType::UnsignedInteger(zerofill) => { + DataType::IntegerUnsigned(zerofill) => { format_type_with_optional_length(f, "INTEGER", zerofill, true) } DataType::BigInt(zerofill) => { format_type_with_optional_length(f, "BIGINT", zerofill, false) } - DataType::UnsignedBigInt(zerofill) => { + DataType::BigIntUnsigned(zerofill) => { format_type_with_optional_length(f, "BIGINT", zerofill, true) } - DataType::UnsignedInt8(zerofill) => { + DataType::Int8Unsigned(zerofill) => { format_type_with_optional_length(f, "INT8", zerofill, true) } DataType::UInt8 => { @@ -515,6 +535,18 @@ impl fmt::Display for DataType { DataType::UInt256 => { write!(f, "UInt256") } + DataType::Signed => { + write!(f, "SIGNED") + } + DataType::SignedInteger => { + write!(f, "SIGNED INTEGER") + } + DataType::Unsigned => { + write!(f, "UNSIGNED") + } + DataType::UnsignedInteger => { + write!(f, "UNSIGNED INTEGER") + } DataType::Real => write!(f, "REAL"), DataType::Float4 => write!(f, "FLOAT4"), DataType::Float32 => write!(f, "Float32"), diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5263bfc3c..85d3ed91c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -814,8 +814,9 @@ pub enum Expr { kind: CastKind, expr: Box, data_type: DataType, - // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery - // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + /// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by [BigQuery] + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax format: Option, }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` diff --git a/src/keywords.rs b/src/keywords.rs index d62a038b8..020b404ed 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -790,6 +790,7 @@ define_keywords!( SHARE, SHARING, SHOW, + SIGNED, SIMILAR, SKIP, SLOW, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2d64ff3e1..ddcb6055f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8867,7 +8867,7 @@ impl<'a> Parser<'a> { Keyword::TINYINT => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedTinyInt(optional_precision?)) + Ok(DataType::TinyIntUnsigned(optional_precision?)) } else { Ok(DataType::TinyInt(optional_precision?)) } @@ -8875,7 +8875,7 @@ impl<'a> Parser<'a> { Keyword::INT2 => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt2(optional_precision?)) + Ok(DataType::Int2Unsigned(optional_precision?)) } else { Ok(DataType::Int2(optional_precision?)) } @@ -8883,7 +8883,7 @@ impl<'a> Parser<'a> { Keyword::SMALLINT => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedSmallInt(optional_precision?)) + Ok(DataType::SmallIntUnsigned(optional_precision?)) } else { Ok(DataType::SmallInt(optional_precision?)) } @@ -8891,7 +8891,7 @@ impl<'a> Parser<'a> { Keyword::MEDIUMINT => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedMediumInt(optional_precision?)) + Ok(DataType::MediumIntUnsigned(optional_precision?)) } else { Ok(DataType::MediumInt(optional_precision?)) } @@ -8899,7 +8899,7 @@ impl<'a> Parser<'a> { Keyword::INT => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt(optional_precision?)) + Ok(DataType::IntUnsigned(optional_precision?)) } else { Ok(DataType::Int(optional_precision?)) } @@ -8907,7 +8907,7 @@ impl<'a> Parser<'a> { Keyword::INT4 => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt4(optional_precision?)) + Ok(DataType::Int4Unsigned(optional_precision?)) } else { Ok(DataType::Int4(optional_precision?)) } @@ -8915,7 +8915,7 @@ impl<'a> Parser<'a> { Keyword::INT8 => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt8(optional_precision?)) + Ok(DataType::Int8Unsigned(optional_precision?)) } else { Ok(DataType::Int8(optional_precision?)) } @@ -8928,7 +8928,7 @@ impl<'a> Parser<'a> { Keyword::INTEGER => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInteger(optional_precision?)) + Ok(DataType::IntegerUnsigned(optional_precision?)) } else { Ok(DataType::Integer(optional_precision?)) } @@ -8936,7 +8936,7 @@ impl<'a> Parser<'a> { Keyword::BIGINT => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedBigInt(optional_precision?)) + Ok(DataType::BigIntUnsigned(optional_precision?)) } else { Ok(DataType::BigInt(optional_precision?)) } @@ -9142,6 +9142,20 @@ impl<'a> Parser<'a> { let columns = self.parse_returns_table_columns()?; Ok(DataType::Table(columns)) } + Keyword::SIGNED => { + if self.parse_keyword(Keyword::INTEGER) { + Ok(DataType::SignedInteger) + } else { + Ok(DataType::Signed) + } + } + Keyword::UNSIGNED => { + if self.parse_keyword(Keyword::INTEGER) { + Ok(DataType::UnsignedInteger) + } else { + Ok(DataType::Unsigned) + } + } _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 861f782c6..4856bd894 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1359,27 +1359,27 @@ fn parse_create_table_unsigned() { vec![ ColumnDef { name: Ident::new("bar_tinyint"), - data_type: DataType::UnsignedTinyInt(Some(3)), + data_type: DataType::TinyIntUnsigned(Some(3)), options: vec![], }, ColumnDef { name: Ident::new("bar_smallint"), - data_type: DataType::UnsignedSmallInt(Some(5)), + data_type: DataType::SmallIntUnsigned(Some(5)), options: vec![], }, ColumnDef { name: Ident::new("bar_mediumint"), - data_type: DataType::UnsignedMediumInt(Some(13)), + data_type: DataType::MediumIntUnsigned(Some(13)), options: vec![], }, ColumnDef { name: Ident::new("bar_int"), - data_type: DataType::UnsignedInt(Some(11)), + data_type: DataType::IntUnsigned(Some(11)), options: vec![], }, ColumnDef { name: Ident::new("bar_bigint"), - data_type: DataType::UnsignedBigInt(Some(20)), + data_type: DataType::BigIntUnsigned(Some(20)), options: vec![], }, ], @@ -3339,3 +3339,21 @@ fn parse_drop_trigger() { } ); } + +#[test] +fn parse_cast_integers() { + mysql().verified_expr("CAST(foo AS UNSIGNED)"); + mysql().verified_expr("CAST(foo AS SIGNED)"); + mysql().verified_expr("CAST(foo AS UNSIGNED INTEGER)"); + mysql().verified_expr("CAST(foo AS SIGNED INTEGER)"); + + mysql() + .run_parser_method("CAST(foo AS UNSIGNED(3))", |p| p.parse_expr()) + .expect_err("CAST doesn't allow display width"); + mysql() + .run_parser_method("CAST(foo AS UNSIGNED(3) INTEGER)", |p| p.parse_expr()) + .expect_err("CAST doesn't allow display width"); + mysql() + .run_parser_method("CAST(foo AS UNSIGNED INTEGER(3))", |p| p.parse_expr()) + .expect_err("CAST doesn't allow display width"); +}