Skip to content

Ignore escaped LIKE wildcards in MySQL #1735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,33 @@ pub trait Dialect: Debug + Any {
false
}

/// Determine whether the dialect strips the backslash when escaping LIKE wildcards (%, _).
///
/// [MySQL] has a special case when escaping single quoted strings which leaves these unescaped
/// so they can be used in LIKE patterns without double-escaping (as is necessary in other
/// escaping dialects, such as [Snowflake]). Generally, special characters have escaping rules
/// causing them to be replaced with a different byte sequences (e.g. `'\0'` becoming the zero
/// byte), and the default if an escaped character does not have a specific escaping rule is to
/// strip the backslash (e.g. there is no rule for `h`, so `'\h' = 'h'`). MySQL's special case
/// for ignoring LIKE wildcard escapes is to *not* strip the backslash, so that `'\%' = '\\%'`.
/// This applies to all string literals though, not just those used in LIKE patterns.
///
/// ```text
/// mysql> select '\_', hex('\\'), hex('_'), hex('\_');
/// +----+-----------+----------+-----------+
/// | \_ | hex('\\') | hex('_') | hex('\_') |
/// +----+-----------+----------+-----------+
/// | \_ | 5C | 5F | 5C5F |
/// +----+-----------+----------+-----------+
/// 1 row in set (0.00 sec)
/// ```
///
/// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
/// [Snowflake]: https://docs.snowflake.com/en/sql-reference/functions/like#usage-notes
fn ignores_wildcard_escapes(&self) -> bool {
false
}

/// Determine if the dialect supports string literals with `U&` prefix.
/// This is used to specify Unicode code points in string literals.
/// For example, in PostgreSQL, the following is a valid string literal:
Expand Down
4 changes: 4 additions & 0 deletions src/dialect/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ impl Dialect for MySqlDialect {
true
}

fn ignores_wildcard_escapes(&self) -> bool {
true
}

fn supports_numeric_prefix(&self) -> bool {
true
}
Expand Down
22 changes: 20 additions & 2 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2011,8 +2011,13 @@ impl<'a> Tokenizer<'a> {
num_consecutive_quotes = 0;

if let Some(next) = chars.peek() {
if !self.unescape {
// In no-escape mode, the given query has to be saved completely including backslashes.
if !self.unescape
|| (self.dialect.ignores_wildcard_escapes()
&& (*next == '%' || *next == '_'))
{
// In no-escape mode, the given query has to be saved completely
// including backslashes. Similarly, with ignore_like_wildcard_escapes,
// the backslash is not stripped.
s.push(ch);
s.push(*next);
chars.next(); // consume next
Expand Down Expand Up @@ -3585,6 +3590,9 @@ mod tests {
(r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#, r#"\a\b'c"#),
(r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#),
(r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#),
(r#"'\q'"#, r#"\q"#, r#"q"#),
(r#"'\%\_'"#, r#"\%\_"#, r#"%_"#),
(r#"'\\%\\_'"#, r#"\\%\\_"#, r#"\%\_"#),
] {
let tokens = Tokenizer::new(&dialect, sql)
.with_unescape(false)
Expand Down Expand Up @@ -3618,6 +3626,16 @@ mod tests {

compare(expected, tokens);
}

// MySQL special case for LIKE escapes
for (sql, expected) in [(r#"'\%'"#, r#"\%"#), (r#"'\_'"#, r#"\_"#)] {
let dialect = MySqlDialect {};
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();

let expected = vec![Token::SingleQuotedString(expected.to_string())];

compare(expected, tokens);
}
}

#[test]
Expand Down
40 changes: 27 additions & 13 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10387,15 +10387,8 @@ fn parse_with_recursion_limit() {

#[test]
fn parse_escaped_string_with_unescape() {
fn assert_mysql_query_value(sql: &str, quoted: &str) {
let stmt = TestedDialects::new(vec![
Box::new(MySqlDialect {}),
Box::new(BigQueryDialect {}),
Box::new(SnowflakeDialect {}),
])
.one_statement_parses_to(sql, "");

match stmt {
fn assert_mysql_query_value(dialects: &TestedDialects, sql: &str, quoted: &str) {
match dialects.one_statement_parses_to(sql, "") {
Statement::Query(query) => match *query.body {
SetExpr::Select(value) => {
let expr = expr_from_projection(only(&value.projection));
Expand All @@ -10411,17 +10404,38 @@ fn parse_escaped_string_with_unescape() {
_ => unreachable!(),
};
}

let escaping_dialects =
&all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape());
let no_wildcard_exception = &all_dialects_where(|dialect| {
dialect.supports_string_literal_backslash_escape() && !dialect.ignores_wildcard_escapes()
});
let with_wildcard_exception = &all_dialects_where(|dialect| {
dialect.supports_string_literal_backslash_escape() && dialect.ignores_wildcard_escapes()
});

let sql = r"SELECT 'I\'m fine'";
assert_mysql_query_value(sql, "I'm fine");
assert_mysql_query_value(escaping_dialects, sql, "I'm fine");

let sql = r#"SELECT 'I''m fine'"#;
assert_mysql_query_value(sql, "I'm fine");
assert_mysql_query_value(escaping_dialects, sql, "I'm fine");

let sql = r#"SELECT 'I\"m fine'"#;
assert_mysql_query_value(sql, "I\"m fine");
assert_mysql_query_value(escaping_dialects, sql, "I\"m fine");

let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \h \ '";
assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ");
assert_mysql_query_value(
no_wildcard_exception,
sql,
"Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ",
);

// check MySQL doesn't remove backslash from escaped LIKE wildcards
assert_mysql_query_value(
with_wildcard_exception,
sql,
"Testing: \0 \\ \\% \\_ \u{8} \n \r \t \u{1a} \u{7} h ",
);
}

#[test]
Expand Down
11 changes: 11 additions & 0 deletions tests/sqlparser_mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2627,6 +2627,17 @@ fn parse_rlike_and_regexp() {
}
}

#[test]
fn parse_like_with_escape() {
// verify backslash is not stripped for escaped wildcards
mysql().verified_only_select(r#"SELECT 'a\%c' LIKE 'a\%c'"#);
mysql().verified_only_select(r#"SELECT 'a\_c' LIKE 'a\_c'"#);
mysql().verified_only_select(r#"SELECT '%\_\%' LIKE '%\_\%'"#);
mysql().verified_only_select(r#"SELECT '\_\%' LIKE CONCAT('\_', '\%')"#);
mysql().verified_only_select(r#"SELECT 'a%c' LIKE 'a$%c' ESCAPE '$'"#);
mysql().verified_only_select(r#"SELECT 'a_c' LIKE 'a#_c' ESCAPE '#'"#);
}

#[test]
fn parse_kill() {
let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5");
Expand Down