Skip to content

Add support for IS [NOT] [form] NORMALIZED #1655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ pub use self::trigger::{

pub use self::value::{
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
TrimWhereField, Value,
NormalizationForm, TrimWhereField, Value,
};

use crate::ast::helpers::stmt_data_loading::{
Expand Down Expand Up @@ -653,6 +653,12 @@ pub enum Expr {
IsDistinctFrom(Box<Expr>, Box<Expr>),
/// `IS NOT DISTINCT FROM` operator
IsNotDistinctFrom(Box<Expr>, Box<Expr>),
/// `<expr> IS [ NOT ] [ form ] NORMALIZED`
IsNormalized {
expr: Box<Expr>,
form: Option<NormalizationForm>,
negated: bool,
},
/// `[ NOT ] IN (val1, val2, ...)`
InList {
expr: Box<Expr>,
Expand Down Expand Up @@ -1118,7 +1124,7 @@ impl fmt::Display for LambdaFunction {
/// `OneOrManyWithParens` implements `Deref<Target = [T]>` and `IntoIterator`,
/// so you can call slice methods on it and iterate over items
/// # Examples
/// Acessing as a slice:
/// Accessing as a slice:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for these cleanups

/// ```
/// # use sqlparser::ast::OneOrManyWithParens;
/// let one = OneOrManyWithParens::One("a");
Expand Down Expand Up @@ -1419,6 +1425,24 @@ impl fmt::Display for Expr {
if *regexp { "REGEXP" } else { "RLIKE" },
pattern
),
Expr::IsNormalized {
expr,
form,
negated,
} => {
let not_ = if *negated { "NOT " } else { "" };
if form.is_none() {
write!(f, "{} IS {}NORMALIZED", expr, not_)
} else {
write!(
f,
"{} IS {}{} NORMALIZED",
expr,
not_,
form.as_ref().unwrap()
)
}
}
Expr::SimilarTo {
negated,
expr,
Expand Down Expand Up @@ -7749,7 +7773,7 @@ where
/// ```sql
/// EXPLAIN (ANALYZE, VERBOSE TRUE, FORMAT TEXT) SELECT * FROM my_table;
///
/// VACCUM (VERBOSE, ANALYZE ON, PARALLEL 10) my_table;
/// VACUUM (VERBOSE, ANALYZE ON, PARALLEL 10) my_table;
/// ```
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down
4 changes: 2 additions & 2 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2821,10 +2821,10 @@ impl fmt::Display for ValueTableMode {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum UpdateTableFromKind {
/// Update Statment where the 'FROM' clause is before the 'SET' keyword (Supported by Snowflake)
/// Update Statement where the 'FROM' clause is before the 'SET' keyword (Supported by Snowflake)
/// For Example: `UPDATE FROM t1 SET t1.name='aaa'`
BeforeSet(TableWithJoins),
/// Update Statment where the 'FROM' clause is after the 'SET' keyword (Which is the standard way)
/// Update Statement where the 'FROM' clause is after the 'SET' keyword (Which is the standard way)
/// For Example: `UPDATE SET t1.name='aaa' FROM t1`
AfterSet(TableWithJoins),
}
7 changes: 6 additions & 1 deletion src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,12 @@ impl Spanned for Expr {
escape_char: _,
any: _,
} => expr.span().union(&pattern.span()),
Expr::RLike { .. } => Span::empty(),
Expr::IsNormalized {
expr,
form: _,
negated: _,
} => expr.span(),
Expr::SimilarTo {
negated: _,
expr,
Expand Down Expand Up @@ -1359,7 +1365,6 @@ impl Spanned for Expr {
Expr::Array(array) => array.span(),
Expr::MatchAgainst { .. } => Span::empty(),
Expr::JsonAccess { value, path } => value.span().union(&path.span()),
Expr::RLike { .. } => Span::empty(),
Expr::AnyOp {
left,
compare_op: _,
Expand Down
29 changes: 29 additions & 0 deletions src/ast/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,35 @@ impl fmt::Display for DateTimeField {
}
}

#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
/// The Unicode Standard defines four normalization forms, which are intended to eliminate
/// certain distinctions between visually or functionally identical characters.
///
/// See [Unicode Normalization Forms](https://unicode.org/reports/tr15/) for details.
pub enum NormalizationForm {
/// Canonical Decomposition, followed by Canonical Composition.
NFC,
/// Canonical Decomposition.
NFD,
/// Compatibility Decomposition, followed by Canonical Composition.
NFKC,
/// Compatibility Decomposition.
NFKD,
}

impl fmt::Display for NormalizationForm {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
NormalizationForm::NFC => write!(f, "NFC"),
NormalizationForm::NFD => write!(f, "NFD"),
NormalizationForm::NFKC => write!(f, "NFKC"),
NormalizationForm::NFKD => write!(f, "NFKD"),
}
}
}

pub struct EscapeQuotedString<'a> {
string: &'a str,
quote: char,
Expand Down
5 changes: 5 additions & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,10 @@ define_keywords!(
NESTED,
NEW,
NEXT,
NFC,
NFD,
NFKC,
NFKD,
NO,
NOBYPASSRLS,
NOCREATEDB,
Expand All @@ -539,6 +543,7 @@ define_keywords!(
NOORDER,
NOREPLICATION,
NORMALIZE,
NORMALIZED,
NOSCAN,
NOSUPERUSER,
NOT,
Expand Down
41 changes: 35 additions & 6 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3183,9 +3183,11 @@ impl<'a> Parser<'a> {
{
let expr2 = self.parse_expr()?;
Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2)))
} else if let Ok(is_normalized) = self.parse_unicode_is_normalized(expr) {
Ok(is_normalized)
} else {
self.expected(
"[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS",
"[NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS",
self.peek_token(),
)
}
Expand Down Expand Up @@ -3850,7 +3852,7 @@ impl<'a> Parser<'a> {
/// If the current token is the `expected` keyword, consume the token.
/// Otherwise, return an error.
///
// todo deprecate infavor of expected_keyword_is
// todo deprecate in favor of expected_keyword_is
pub fn expect_keyword(&mut self, expected: Keyword) -> Result<TokenWithSpan, ParserError> {
if self.parse_keyword(expected) {
Ok(self.get_current_token().clone())
Expand Down Expand Up @@ -8452,6 +8454,33 @@ impl<'a> Parser<'a> {
}
}

/// Parse a literal unicode normalization clause
pub fn parse_unicode_is_normalized(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let neg = self.parse_keyword(Keyword::NOT);
let normalized_form = self.maybe_parse(|parser| {
match parser.parse_one_of_keywords(&[
Keyword::NFC,
Keyword::NFD,
Keyword::NFKC,
Keyword::NFKD,
]) {
Some(Keyword::NFC) => Ok(NormalizationForm::NFC),
Some(Keyword::NFD) => Ok(NormalizationForm::NFD),
Some(Keyword::NFKC) => Ok(NormalizationForm::NFKC),
Some(Keyword::NFKD) => Ok(NormalizationForm::NFKD),
_ => parser.expected("unicode normalization form", parser.peek_token()),
}
})?;
if self.parse_keyword(Keyword::NORMALIZED) {
return Ok(Expr::IsNormalized {
expr: Box::new(expr),
form: normalized_form,
negated: neg,
});
}
self.expected("unicode normalization form", self.peek_token())
}

pub fn parse_enum_values(&mut self) -> Result<Vec<EnumMember>, ParserError> {
self.expect_token(&Token::LParen)?;
let values = self.parse_comma_separated(|parser| {
Expand Down Expand Up @@ -8957,7 +8986,7 @@ impl<'a> Parser<'a> {
}
}

/// Parse a table object for insetion
/// Parse a table object for insertion
/// e.g. `some_database.some_table` or `FUNCTION some_table_func(...)`
pub fn parse_table_object(&mut self) -> Result<TableObject, ParserError> {
if self.dialect.supports_insert_table_function() && self.parse_keyword(Keyword::FUNCTION) {
Expand Down Expand Up @@ -11867,7 +11896,7 @@ impl<'a> Parser<'a> {
} else {
let mut name = self.parse_grantee_name()?;
if self.consume_token(&Token::Colon) {
// Redshift supports namespace prefix for extenrnal users and groups:
// Redshift supports namespace prefix for external users and groups:
// <Namespace>:<GroupName> or <Namespace>:<UserName>
// https://docs.aws.amazon.com/redshift/latest/mgmt/redshift-iam-access-control-native-idp.html
let ident = self.parse_identifier()?;
Expand Down Expand Up @@ -12863,7 +12892,7 @@ impl<'a> Parser<'a> {
Ok(WithFill { from, to, step })
}

// Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect)
// Parse a set of comma separated INTERPOLATE expressions (ClickHouse dialect)
// that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier
pub fn parse_interpolations(&mut self) -> Result<Option<Interpolate>, ParserError> {
if !self.parse_keyword(Keyword::INTERPOLATE) {
Expand Down Expand Up @@ -14372,7 +14401,7 @@ mod tests {
assert_eq!(
ast,
Err(ParserError::ParserError(
"Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16"
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: a at Line: 1, Column: 16"
.to_string()
))
);
Expand Down
84 changes: 80 additions & 4 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4579,7 +4579,7 @@ fn run_explain_analyze(
expected_verbose: bool,
expected_analyze: bool,
expected_format: Option<AnalyzeFormat>,
exepcted_options: Option<Vec<UtilityOption>>,
expected_options: Option<Vec<UtilityOption>>,
) {
match dialect.verified_stmt(query) {
Statement::Explain {
Expand All @@ -4595,7 +4595,7 @@ fn run_explain_analyze(
assert_eq!(verbose, expected_verbose);
assert_eq!(analyze, expected_analyze);
assert_eq!(format, expected_format);
assert_eq!(options, exepcted_options);
assert_eq!(options, expected_options);
assert!(!query_plan);
assert!(!estimate);
assert_eq!("SELECT sqrt(id) FROM foo", statement.to_string());
Expand Down Expand Up @@ -9296,6 +9296,46 @@ fn parse_is_boolean() {
verified_expr(sql)
);

let sql = "a IS NORMALIZED";
assert_eq!(
IsNormalized {
expr: Box::new(Identifier(Ident::new("a"))),
form: None,
negated: false,
},
verified_expr(sql)
);

let sql = "a IS NOT NORMALIZED";
assert_eq!(
IsNormalized {
expr: Box::new(Identifier(Ident::new("a"))),
form: None,
negated: true,
},
verified_expr(sql)
);

let sql = "a IS NFKC NORMALIZED";
assert_eq!(
IsNormalized {
expr: Box::new(Identifier(Ident::new("a"))),
form: Some(NormalizationForm::NFKC),
negated: false,
},
verified_expr(sql)
);

let sql = "a IS NOT NFKD NORMALIZED";
assert_eq!(
IsNormalized {
expr: Box::new(Identifier(Ident::new("a"))),
form: Some(NormalizationForm::NFKD),
negated: true,
},
verified_expr(sql)
);

let sql = "a IS UNKNOWN";
assert_eq!(
IsUnknown(Box::new(Identifier(Ident::new("a")))),
Expand All @@ -9314,14 +9354,50 @@ fn parse_is_boolean() {
verified_stmt("SELECT f FROM foo WHERE field IS FALSE");
verified_stmt("SELECT f FROM foo WHERE field IS NOT FALSE");

verified_stmt("SELECT f FROM foo WHERE field IS NORMALIZED");
verified_stmt("SELECT f FROM foo WHERE field IS NFC NORMALIZED");
verified_stmt("SELECT f FROM foo WHERE field IS NFD NORMALIZED");
verified_stmt("SELECT f FROM foo WHERE field IS NOT NORMALIZED");
verified_stmt("SELECT f FROM foo WHERE field IS NOT NFKC NORMALIZED");

verified_stmt("SELECT f FROM foo WHERE field IS UNKNOWN");
verified_stmt("SELECT f FROM foo WHERE field IS NOT UNKNOWN");

let sql = "SELECT f from foo where field is 0";
let res = parse_sql_statements(sql);
assert_eq!(
ParserError::ParserError(
"Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0"
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: 0"
.to_string()
),
res.unwrap_err()
);

let sql = "SELECT s, s IS XYZ NORMALIZED FROM foo";
let res = parse_sql_statements(sql);
assert_eq!(
ParserError::ParserError(
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: XYZ"
.to_string()
),
res.unwrap_err()
);

let sql = "SELECT s, s IS NFKC FROM foo";
let res = parse_sql_statements(sql);
assert_eq!(
ParserError::ParserError(
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: FROM"
.to_string()
),
res.unwrap_err()
);

let sql = "SELECT s, s IS TRIM(' NFKC ') FROM foo";
let res = parse_sql_statements(sql);
assert_eq!(
ParserError::ParserError(
"Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: TRIM"
.to_string()
),
res.unwrap_err()
Expand Down Expand Up @@ -12982,7 +13058,7 @@ fn test_trailing_commas_in_from() {
let sql = "SELECT a FROM b, WHERE c = 1";
let _ = dialects.parse_sql_statements(sql).unwrap();

// nasted
// nested
let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),";
let _ = dialects.parse_sql_statements(sql).unwrap();

Expand Down
2 changes: 1 addition & 1 deletion tests/sqlparser_mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2572,7 +2572,7 @@ fn parse_kill() {
}

#[test]
fn parse_table_colum_option_on_update() {
fn parse_table_column_option_on_update() {
let sql1 = "CREATE TABLE foo (`modification_time` DATETIME ON UPDATE CURRENT_TIMESTAMP())";
match mysql().verified_stmt(sql1) {
Statement::CreateTable(CreateTable { name, columns, .. }) => {
Expand Down
Loading