apache · alamb · Jul 19, 2023 · May 8, 2023 · May 8, 2023 · May 8, 2023
diff --git a/src/ast/value.rs b/src/ast/value.rs
@@ -71,7 +71,7 @@ impl fmt::Display for Value {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
-            Value::DoubleQuotedString(v) => write!(f, "\"{v}\""),
+            Value::DoubleQuotedString(v) => write!(f, "\"{}\"", escape_double_quote_string(v)),
             Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
             Value::DollarQuotedString(v) => write!(f, "{v}"),
             Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
@@ -187,12 +187,49 @@ pub struct EscapeQuotedString<'a> {
 
 impl<'a> fmt::Display for EscapeQuotedString<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        for c in self.string.chars() {
-            if c == self.quote {
-                write!(f, "{q}{q}", q = self.quote)?;
-            } else {
-                write!(f, "{c}")?;
+        // EscapeQuotedString doesn't know which mode of escape was
+        // chosen by the user. So this code must to correctly display
+        // strings without knowing if the strings are already escaped
+        // or not.
+        //
+        // If the quote symbol in the string is repeated twice, OR, if
+        // the quote symbol is after backslash, display all the chars
+        // without any escape. However, if the quote symbol is used
+        // just between usual chars, `fmt()` should display it twice."
+        //
+        // The following table has examples
+        //
+        // | original query | mode      | AST Node                                           | serialized   |
+        // | -------------  | --------- | -------------------------------------------------- | ------------ |
+        // | `"A""B""A"`    | no-escape | `DoubleQuotedString(String::from("A\"\"B\"\"A"))`  | `"A""B""A"`  |
+        // | `"A""B""A"`    | default   | `DoubleQuotedString(String::from("A\"B\"A"))`      | `"A""B""A"`  |
+        // | `"A\"B\"A"`    | no-escape | `DoubleQuotedString(String::from("A\\\"B\\\"A"))`  | `"A\"B\"A"`  |
+        // | `"A\"B\"A"`    | default   | `DoubleQuotedString(String::from("A\"B\"A"))`      | `"A""B""A"`  |
+        let quote = self.quote;
+        let mut previous_char = char::default();
+        let mut peekable_chars = self.string.chars().peekable();
+        while let Some(&ch) = peekable_chars.peek() {
+            match ch {
+                char if char == quote => {
+                    if previous_char == '\\' {
+                        write!(f, "{char}")?;
+                        peekable_chars.next();
+                        continue;
+                    }
+                    peekable_chars.next();
+                    if peekable_chars.peek().map(|c| *c == quote).unwrap_or(false) {
+                        write!(f, "{char}{char}")?;
+                        peekable_chars.next();
+                    } else {
+                        write!(f, "{char}{char}")?;
+                    }
+                }
+                _ => {
+                    write!(f, "{ch}")?;
+                    peekable_chars.next();
+                }
             }
+            previous_char = ch;
         }
         Ok(())
     }
@@ -206,6 +243,10 @@ pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> {
     escape_quoted_string(s, '\'')
 }
 
+pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> {
+    escape_quoted_string(s, '\"')
+}
+
 pub struct EscapeEscapedStringLiteral<'a>(&'a str);
 
 impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> {

diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs
@@ -632,8 +632,7 @@ mod tests {
 
     fn do_visit(sql: &str) -> Vec<String> {
         let dialect = GenericDialect {};
-        let mut tokenizer = Tokenizer::new(&dialect, sql);
-        let tokens = tokenizer.tokenize().unwrap();
+        let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
         let s = Parser::new(&dialect)
             .with_tokens(tokens)
             .parse_statement()

diff --git a/src/parser.rs b/src/parser.rs
@@ -195,9 +195,52 @@ impl std::error::Error for ParserError {}
 // By default, allow expressions up to this deep before erroring
 const DEFAULT_REMAINING_DEPTH: usize = 50;
 
-#[derive(Debug, Default, Clone, PartialEq, Eq)]
+/// Options that control how the [`Parser`] parses SQL text
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ParserOptions {
     pub trailing_commas: bool,
+    /// Controls how literal values are unescaped. See
+    /// [`Tokenizer::with_unescape`] for more details.
+    pub unescape: bool,
+}
+
+impl Default for ParserOptions {
+    fn default() -> Self {
+        Self {
+            trailing_commas: false,
+            unescape: true,
+        }
+    }
+}
+
+impl ParserOptions {
+    /// Create a new [`ParserOptions`]
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Set if trailing commas are allowed.
+    ///
+    /// If this option is `false` (the default), the following SQL will
+    /// not parse. If the option is `true`, the SQL will parse.
+    ///
+    /// ```sql
+    ///  SELECT
+    ///   foo,
+    ///   bar,
+    ///  FROM baz
+    /// ```
+    pub fn with_trailing_commas(mut self, trailing_commas: bool) -> Self {
+        self.trailing_commas = trailing_commas;
+        self
+    }
+
+    /// Set if literal values are unescaped. Defaults to true. See
+    /// [`Tokenizer::with_unescape`] for more details.
+    pub fn with_unescape(mut self, unescape: bool) -> Self {
+        self.unescape = unescape;
+        self
+    }
 }
 
 pub struct Parser<'a> {
@@ -206,8 +249,9 @@ pub struct Parser<'a> {
     index: usize,
     /// The current dialect to use
     dialect: &'a dyn Dialect,
-    /// Additional options that allow you to mix & match behavior otherwise
-    /// constrained to certain dialects (e.g. trailing commas)
+    /// Additional options that allow you to mix & match behavior
+    /// otherwise constrained to certain dialects (e.g. trailing
+    /// commas) and/or format of parse (e.g. unescaping)
     options: ParserOptions,
     /// ensure the stack does not overflow by limiting recursion depth
     recursion_counter: RecursionCounter,
@@ -267,17 +311,20 @@ impl<'a> Parser<'a> {
     /// Specify additional parser options
     ///
     ///
-    /// [`Parser`] supports additional options ([`ParserOptions`]) that allow you to
-    /// mix & match behavior otherwise constrained to certain dialects (e.g. trailing
-    /// commas).
+    /// [`Parser`] supports additional options ([`ParserOptions`])
+    /// that allow you to mix & match behavior otherwise constrained
+    /// to certain dialects (e.g. trailing commas).
     ///
     /// Example:
     /// ```
     /// # use sqlparser::{parser::{Parser, ParserError, ParserOptions}, dialect::GenericDialect};
     /// # fn main() -> Result<(), ParserError> {
     /// let dialect = GenericDialect{};
+    /// let options = ParserOptions::new()
+    ///    .with_trailing_commas(true)
+    ///    .with_unescape(false);
     /// let result = Parser::new(&dialect)
-    ///   .with_options(ParserOptions { trailing_commas: true })
+    ///   .with_options(options)
     ///   .try_with_sql("SELECT a, b, COUNT(*), FROM foo GROUP BY a, b,")?
     ///   .parse_statements();
     ///   assert!(matches!(result, Ok(_)));
@@ -317,8 +364,9 @@ impl<'a> Parser<'a> {
     /// See example on [`Parser::new()`] for an example
     pub fn try_with_sql(self, sql: &str) -> Result<Self, ParserError> {
         debug!("Parsing sql '{}'...", sql);
-        let mut tokenizer = Tokenizer::new(self.dialect, sql);
-        let tokens = tokenizer.tokenize()?;
+        let tokens = Tokenizer::new(self.dialect, sql)
+            .with_unescape(self.options.unescape)
+            .tokenize()?;
         Ok(self.with_tokens(tokens))
     }
 
@@ -3654,7 +3702,7 @@ impl<'a> Parser<'a> {
             self.expect_token(&Token::RParen)?;
             Ok(Some(ColumnOption::Check(expr)))
         } else if self.parse_keyword(Keyword::AUTO_INCREMENT)
-            && dialect_of!(self is MySqlDialect |  GenericDialect)
+            && dialect_of!(self is MySqlDialect | GenericDialect)
         {
             // Support AUTO_INCREMENT for MySQL
             Ok(Some(ColumnOption::DialectSpecific(vec![