Skip to content

Commit 21c20b0

Browse files
hansottayman-sigma
authored andcommitted
Only support escape literals for Postgres, Redshift and generic dialect (apache#1674)
1 parent 9c81e06 commit 21c20b0

File tree

6 files changed

+69
-2
lines changed

6 files changed

+69
-2
lines changed

src/dialect/generic.rs

+4
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,8 @@ impl Dialect for GenericDialect {
139139
fn supports_user_host_grantee(&self) -> bool {
140140
true
141141
}
142+
143+
fn supports_string_escape_constant(&self) -> bool {
144+
true
145+
}
142146
}

src/dialect/mod.rs

+7
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,13 @@ pub trait Dialect: Debug + Any {
840840
fn supports_timestamp_versioning(&self) -> bool {
841841
false
842842
}
843+
844+
/// Returns true if this dialect supports the E'...' syntax for string literals
845+
///
846+
/// Postgres: <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE>
847+
fn supports_string_escape_constant(&self) -> bool {
848+
false
849+
}
843850
}
844851

845852
/// This represents the operators for which precedence must be defined

src/dialect/postgresql.rs

+4
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,10 @@ impl Dialect for PostgreSqlDialect {
245245
fn supports_nested_comments(&self) -> bool {
246246
true
247247
}
248+
249+
fn supports_string_escape_constant(&self) -> bool {
250+
true
251+
}
248252
}
249253

250254
pub fn parse_create(parser: &mut Parser) -> Option<Result<Statement, ParserError>> {

src/dialect/redshift.rs

+4
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,8 @@ impl Dialect for RedshiftSqlDialect {
109109
fn supports_partiql(&self) -> bool {
110110
true
111111
}
112+
113+
fn supports_string_escape_constant(&self) -> bool {
114+
true
115+
}
112116
}

src/test_utils.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -240,13 +240,17 @@ impl TestedDialects {
240240

241241
/// Check that the tokenizer returns the expected tokens for the given SQL.
242242
pub fn tokenizes_to(&self, sql: &str, expected: Vec<Token>) {
243+
if self.dialects.is_empty() {
244+
panic!("No dialects to test");
245+
}
246+
243247
self.dialects.iter().for_each(|dialect| {
244248
let mut tokenizer = Tokenizer::new(&**dialect, sql);
245249
if let Some(options) = &self.options {
246250
tokenizer = tokenizer.with_unescape(options.unescape);
247251
}
248252
let tokens = tokenizer.tokenize().unwrap();
249-
assert_eq!(expected, tokens);
253+
assert_eq!(expected, tokens, "Tokenized differently for {:?}", dialect);
250254
});
251255
}
252256
}

src/tokenizer.rs

+45-1
Original file line numberDiff line numberDiff line change
@@ -985,7 +985,7 @@ impl<'a> Tokenizer<'a> {
985985
}
986986
}
987987
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
988-
x @ 'e' | x @ 'E' => {
988+
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
989989
let starting_loc = chars.location();
990990
chars.next(); // consume, to check the next char
991991
match chars.peek() {
@@ -3573,4 +3573,48 @@ mod tests {
35733573
],
35743574
);
35753575
}
3576+
3577+
#[test]
3578+
fn test_string_escape_constant_not_supported() {
3579+
all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
3580+
"select e'...'",
3581+
vec![
3582+
Token::make_keyword("select"),
3583+
Token::Whitespace(Whitespace::Space),
3584+
Token::make_word("e", None),
3585+
Token::SingleQuotedString("...".to_string()),
3586+
],
3587+
);
3588+
3589+
all_dialects_where(|dialect| !dialect.supports_string_escape_constant()).tokenizes_to(
3590+
"select E'...'",
3591+
vec![
3592+
Token::make_keyword("select"),
3593+
Token::Whitespace(Whitespace::Space),
3594+
Token::make_word("E", None),
3595+
Token::SingleQuotedString("...".to_string()),
3596+
],
3597+
);
3598+
}
3599+
3600+
#[test]
3601+
fn test_string_escape_constant_supported() {
3602+
all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
3603+
"select e'\\''",
3604+
vec![
3605+
Token::make_keyword("select"),
3606+
Token::Whitespace(Whitespace::Space),
3607+
Token::EscapedStringLiteral("'".to_string()),
3608+
],
3609+
);
3610+
3611+
all_dialects_where(|dialect| dialect.supports_string_escape_constant()).tokenizes_to(
3612+
"select E'\\''",
3613+
vec![
3614+
Token::make_keyword("select"),
3615+
Token::Whitespace(Whitespace::Space),
3616+
Token::EscapedStringLiteral("'".to_string()),
3617+
],
3618+
);
3619+
}
35763620
}

0 commit comments

Comments
 (0)