Skip to content

Commit cc259bd

Browse files
mvzinkayman-sigma
authored andcommitted
Ignore escaped LIKE wildcards in MySQL (apache#1735)
1 parent 18aff60 commit cc259bd

File tree

5 files changed

+89
-15
lines changed

5 files changed

+89
-15
lines changed

src/dialect/mod.rs

+27
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,33 @@ pub trait Dialect: Debug + Any {
201201
false
202202
}
203203

204+
/// Determine whether the dialect strips the backslash when escaping LIKE wildcards (%, _).
205+
///
206+
/// [MySQL] has a special case when escaping single quoted strings which leaves these unescaped
207+
/// so they can be used in LIKE patterns without double-escaping (as is necessary in other
208+
/// escaping dialects, such as [Snowflake]). Generally, special characters have escaping rules
209+
/// causing them to be replaced with a different byte sequences (e.g. `'\0'` becoming the zero
210+
/// byte), and the default if an escaped character does not have a specific escaping rule is to
211+
/// strip the backslash (e.g. there is no rule for `h`, so `'\h' = 'h'`). MySQL's special case
212+
/// for ignoring LIKE wildcard escapes is to *not* strip the backslash, so that `'\%' = '\\%'`.
213+
/// This applies to all string literals though, not just those used in LIKE patterns.
214+
///
215+
/// ```text
216+
/// mysql> select '\_', hex('\\'), hex('_'), hex('\_');
217+
/// +----+-----------+----------+-----------+
218+
/// | \_ | hex('\\') | hex('_') | hex('\_') |
219+
/// +----+-----------+----------+-----------+
220+
/// | \_ | 5C | 5F | 5C5F |
221+
/// +----+-----------+----------+-----------+
222+
/// 1 row in set (0.00 sec)
223+
/// ```
224+
///
225+
/// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
226+
/// [Snowflake]: https://docs.snowflake.com/en/sql-reference/functions/like#usage-notes
227+
fn ignores_wildcard_escapes(&self) -> bool {
228+
false
229+
}
230+
204231
/// Determine if the dialect supports string literals with `U&` prefix.
205232
/// This is used to specify Unicode code points in string literals.
206233
/// For example, in PostgreSQL, the following is a valid string literal:

src/dialect/mysql.rs

+4
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ impl Dialect for MySqlDialect {
6262
true
6363
}
6464

65+
fn ignores_wildcard_escapes(&self) -> bool {
66+
true
67+
}
68+
6569
fn supports_numeric_prefix(&self) -> bool {
6670
true
6771
}

src/tokenizer.rs

+20-2
Original file line numberDiff line numberDiff line change
@@ -2011,8 +2011,13 @@ impl<'a> Tokenizer<'a> {
20112011
num_consecutive_quotes = 0;
20122012

20132013
if let Some(next) = chars.peek() {
2014-
if !self.unescape {
2015-
// In no-escape mode, the given query has to be saved completely including backslashes.
2014+
if !self.unescape
2015+
|| (self.dialect.ignores_wildcard_escapes()
2016+
&& (*next == '%' || *next == '_'))
2017+
{
2018+
// In no-escape mode, the given query has to be saved completely
2019+
// including backslashes. Similarly, with ignore_like_wildcard_escapes,
2020+
// the backslash is not stripped.
20162021
s.push(ch);
20172022
s.push(*next);
20182023
chars.next(); // consume next
@@ -3585,6 +3590,9 @@ mod tests {
35853590
(r#"'\\a\\b\'c'"#, r#"\\a\\b\'c"#, r#"\a\b'c"#),
35863591
(r#"'\'abcd'"#, r#"\'abcd"#, r#"'abcd"#),
35873592
(r#"'''a''b'"#, r#"''a''b"#, r#"'a'b"#),
3593+
(r#"'\q'"#, r#"\q"#, r#"q"#),
3594+
(r#"'\%\_'"#, r#"\%\_"#, r#"%_"#),
3595+
(r#"'\\%\\_'"#, r#"\\%\\_"#, r#"\%\_"#),
35883596
] {
35893597
let tokens = Tokenizer::new(&dialect, sql)
35903598
.with_unescape(false)
@@ -3618,6 +3626,16 @@ mod tests {
36183626

36193627
compare(expected, tokens);
36203628
}
3629+
3630+
// MySQL special case for LIKE escapes
3631+
for (sql, expected) in [(r#"'\%'"#, r#"\%"#), (r#"'\_'"#, r#"\_"#)] {
3632+
let dialect = MySqlDialect {};
3633+
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
3634+
3635+
let expected = vec![Token::SingleQuotedString(expected.to_string())];
3636+
3637+
compare(expected, tokens);
3638+
}
36213639
}
36223640

36233641
#[test]

tests/sqlparser_common.rs

+27-13
Original file line numberDiff line numberDiff line change
@@ -10390,15 +10390,8 @@ fn parse_with_recursion_limit() {
1039010390

1039110391
#[test]
1039210392
fn parse_escaped_string_with_unescape() {
10393-
fn assert_mysql_query_value(sql: &str, quoted: &str) {
10394-
let stmt = TestedDialects::new(vec![
10395-
Box::new(MySqlDialect {}),
10396-
Box::new(BigQueryDialect {}),
10397-
Box::new(SnowflakeDialect {}),
10398-
])
10399-
.one_statement_parses_to(sql, "");
10400-
10401-
match stmt {
10393+
fn assert_mysql_query_value(dialects: &TestedDialects, sql: &str, quoted: &str) {
10394+
match dialects.one_statement_parses_to(sql, "") {
1040210395
Statement::Query(query) => match *query.body {
1040310396
SetExpr::Select(value) => {
1040410397
let expr = expr_from_projection(only(&value.projection));
@@ -10414,17 +10407,38 @@ fn parse_escaped_string_with_unescape() {
1041410407
_ => unreachable!(),
1041510408
};
1041610409
}
10410+
10411+
let escaping_dialects =
10412+
&all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape());
10413+
let no_wildcard_exception = &all_dialects_where(|dialect| {
10414+
dialect.supports_string_literal_backslash_escape() && !dialect.ignores_wildcard_escapes()
10415+
});
10416+
let with_wildcard_exception = &all_dialects_where(|dialect| {
10417+
dialect.supports_string_literal_backslash_escape() && dialect.ignores_wildcard_escapes()
10418+
});
10419+
1041710420
let sql = r"SELECT 'I\'m fine'";
10418-
assert_mysql_query_value(sql, "I'm fine");
10421+
assert_mysql_query_value(escaping_dialects, sql, "I'm fine");
1041910422

1042010423
let sql = r#"SELECT 'I''m fine'"#;
10421-
assert_mysql_query_value(sql, "I'm fine");
10424+
assert_mysql_query_value(escaping_dialects, sql, "I'm fine");
1042210425

1042310426
let sql = r#"SELECT 'I\"m fine'"#;
10424-
assert_mysql_query_value(sql, "I\"m fine");
10427+
assert_mysql_query_value(escaping_dialects, sql, "I\"m fine");
1042510428

1042610429
let sql = r"SELECT 'Testing: \0 \\ \% \_ \b \n \r \t \Z \a \h \ '";
10427-
assert_mysql_query_value(sql, "Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ");
10430+
assert_mysql_query_value(
10431+
no_wildcard_exception,
10432+
sql,
10433+
"Testing: \0 \\ % _ \u{8} \n \r \t \u{1a} \u{7} h ",
10434+
);
10435+
10436+
// check MySQL doesn't remove backslash from escaped LIKE wildcards
10437+
assert_mysql_query_value(
10438+
with_wildcard_exception,
10439+
sql,
10440+
"Testing: \0 \\ \\% \\_ \u{8} \n \r \t \u{1a} \u{7} h ",
10441+
);
1042810442
}
1042910443

1043010444
#[test]

tests/sqlparser_mysql.rs

+11
Original file line numberDiff line numberDiff line change
@@ -2627,6 +2627,17 @@ fn parse_rlike_and_regexp() {
26272627
}
26282628
}
26292629

2630+
#[test]
2631+
fn parse_like_with_escape() {
2632+
// verify backslash is not stripped for escaped wildcards
2633+
mysql().verified_only_select(r#"SELECT 'a\%c' LIKE 'a\%c'"#);
2634+
mysql().verified_only_select(r#"SELECT 'a\_c' LIKE 'a\_c'"#);
2635+
mysql().verified_only_select(r#"SELECT '%\_\%' LIKE '%\_\%'"#);
2636+
mysql().verified_only_select(r#"SELECT '\_\%' LIKE CONCAT('\_', '\%')"#);
2637+
mysql().verified_only_select(r#"SELECT 'a%c' LIKE 'a$%c' ESCAPE '$'"#);
2638+
mysql().verified_only_select(r#"SELECT 'a_c' LIKE 'a#_c' ESCAPE '#'"#);
2639+
}
2640+
26302641
#[test]
26312642
fn parse_kill() {
26322643
let stmt = mysql_and_generic().verified_stmt("KILL CONNECTION 5");

0 commit comments

Comments
 (0)