Skip to content

feat: mysql no-escape mode #870

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 47 additions & 6 deletions src/ast/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
Value::DoubleQuotedString(v) => write!(f, "\"{v}\""),
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)),
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
Value::DollarQuotedString(v) => write!(f, "{v}"),
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Expand Down Expand Up @@ -187,12 +187,49 @@ pub struct EscapeQuotedString<'a> {

impl<'a> fmt::Display for EscapeQuotedString<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for c in self.string.chars() {
if c == self.quote {
write!(f, "{q}{q}", q = self.quote)?;
} else {
write!(f, "{c}")?;
// EscapeQuotedString doesn't know which mode of escape was
// chosen by the user. So this code must to correctly display
// strings without knowing if the strings are already escaped
// or not.
//
// If the quote symbol in the string is repeated twice, OR, if
// the quote symbol is after backslash, display all the chars
// without any escape. However, if the quote symbol is used
// just between usual chars, `fmt()` should display it twice."
//
// The following table has examples
//
// | original query | mode | AST Node | serialized |
// | ------------- | --------- | -------------------------------------------------- | ------------ |
// | `"A""B""A"` | no-escape | `DoubleQuotedString(String::from("A\"\"B\"\"A"))` | `"A""B""A"` |
// | `"A""B""A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` |
// | `"A\"B\"A"` | no-escape | `DoubleQuotedString(String::from("A\\\"B\\\"A"))` | `"A\"B\"A"` |
// | `"A\"B\"A"` | default | `DoubleQuotedString(String::from("A\"B\"A"))` | `"A""B""A"` |
let quote = self.quote;
let mut previous_char = char::default();
let mut peekable_chars = self.string.chars().peekable();
while let Some(&ch) = peekable_chars.peek() {
match ch {
char if char == quote => {
if previous_char == '\\' {
write!(f, "{char}")?;
peekable_chars.next();
continue;
}
peekable_chars.next();
if peekable_chars.peek().map(|c| *c == quote).unwrap_or(false) {
write!(f, "{char}{char}")?;
peekable_chars.next();
} else {
write!(f, "{char}{char}")?;
}
}
_ => {
write!(f, "{ch}")?;
peekable_chars.next();
}
}
previous_char = ch;
}
Ok(())
}
Expand All @@ -206,6 +243,10 @@ pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> {
escape_quoted_string(s, '\'')
}

pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> {
escape_quoted_string(s, '\"')
}

pub struct EscapeEscapedStringLiteral<'a>(&'a str);

impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> {
Expand Down
3 changes: 1 addition & 2 deletions src/ast/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,7 @@ mod tests {

fn do_visit(sql: &str) -> Vec<String> {
let dialect = GenericDialect {};
let mut tokenizer = Tokenizer::new(&dialect, sql);
let tokens = tokenizer.tokenize().unwrap();
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
let s = Parser::new(&dialect)
.with_tokens(tokens)
.parse_statement()
Expand Down
68 changes: 58 additions & 10 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,52 @@ impl std::error::Error for ParserError {}
// By default, allow expressions up to this deep before erroring
const DEFAULT_REMAINING_DEPTH: usize = 50;

#[derive(Debug, Default, Clone, PartialEq, Eq)]
/// Options that control how the [`Parser`] parses SQL text
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParserOptions {
pub trailing_commas: bool,
/// Controls how literal values are unescaped. See
/// [`Tokenizer::with_unescape`] for more details.
pub unescape: bool,
}

impl Default for ParserOptions {
fn default() -> Self {
Self {
trailing_commas: false,
unescape: true,
}
}
}

impl ParserOptions {
/// Create a new [`ParserOptions`]
pub fn new() -> Self {
Default::default()
}

/// Set if trailing commas are allowed.
///
/// If this option is `false` (the default), the following SQL will
/// not parse. If the option is `true`, the SQL will parse.
///
/// ```sql
/// SELECT
/// foo,
/// bar,
/// FROM baz
/// ```
pub fn with_trailing_commas(mut self, trailing_commas: bool) -> Self {
self.trailing_commas = trailing_commas;
self
}

/// Set if literal values are unescaped. Defaults to true. See
/// [`Tokenizer::with_unescape`] for more details.
pub fn with_unescape(mut self, unescape: bool) -> Self {
self.unescape = unescape;
self
}
}

pub struct Parser<'a> {
Expand All @@ -206,8 +249,9 @@ pub struct Parser<'a> {
index: usize,
/// The current dialect to use
dialect: &'a dyn Dialect,
/// Additional options that allow you to mix & match behavior otherwise
/// constrained to certain dialects (e.g. trailing commas)
/// Additional options that allow you to mix & match behavior
/// otherwise constrained to certain dialects (e.g. trailing
/// commas) and/or format of parse (e.g. unescaping)
options: ParserOptions,
/// ensure the stack does not overflow by limiting recursion depth
recursion_counter: RecursionCounter,
Expand Down Expand Up @@ -267,17 +311,20 @@ impl<'a> Parser<'a> {
/// Specify additional parser options
///
///
/// [`Parser`] supports additional options ([`ParserOptions`]) that allow you to
/// mix & match behavior otherwise constrained to certain dialects (e.g. trailing
/// commas).
/// [`Parser`] supports additional options ([`ParserOptions`])
/// that allow you to mix & match behavior otherwise constrained
/// to certain dialects (e.g. trailing commas).
///
/// Example:
/// ```
/// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect};
/// # fn main() -> Result<(), ParserError> {
/// let dialect = GenericDialect{};
/// let options = ParserOptions::new()
/// .with_trailing_commas(true)
/// .with_unescape(false);
/// let result = Parser::new(&dialect)
/// .with_options(ParserOptions { trailing_commas: true })
/// .with_options(options)
/// .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")?
/// .parse_statements();
/// assert!(matches!(result, Ok(_)));
Expand Down Expand Up @@ -317,8 +364,9 @@ impl<'a> Parser<'a> {
/// See example on [`Parser::new()`] for an example
pub fn try_with_sql(self, sql: &str) -> Result<Self, ParserError> {
debug!("Parsing sql '{}'...", sql);
let mut tokenizer = Tokenizer::new(self.dialect, sql);
let tokens = tokenizer.tokenize()?;
let tokens = Tokenizer::new(self.dialect, sql)
.with_unescape(self.options.unescape)
.tokenize()?;
Ok(self.with_tokens(tokens))
}

Expand Down Expand Up @@ -3654,7 +3702,7 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::RParen)?;
Ok(Some(ColumnOption::Check(expr)))
} else if self.parse_keyword(Keyword::AUTO_INCREMENT)
&& dialect_of!(self is MySqlDialect | GenericDialect)
&& dialect_of!(self is MySqlDialect | GenericDialect)
{
// Support AUTO_INCREMENT for MySQL
Ok(Some(ColumnOption::DialectSpecific(vec![
Expand Down
Loading