From 113722efb230072d31b14eb991b947761006f43e Mon Sep 17 00:00:00 2001
From: Markus Westerlind <mwesterlind@influxdata.com>
Date: Fri, 4 Feb 2022 17:28:22 +0100
Subject: [PATCH 1/5] fix: Handle double quotes inside quoted identifiers
 correctly

This fixes #410 for standard SQL, however I don't know enough about other dialects to know if they
handle this differently. May need more extensive testing as well.
---
 src/ast/mod.rs              | 15 +++++++++++++--
 src/tokenizer.rs            | 22 ++++++++++++++++++++--
 tests/sqlparser_postgres.rs |  5 +++++
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 477a22890..81d98a820 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -23,7 +23,7 @@ use alloc::{
     string::{String, ToString},
     vec::Vec,
 };
-use core::fmt;
+use core::fmt::{self, Write};
 
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
@@ -127,7 +127,18 @@ impl From<&str> for Ident {
 impl fmt::Display for Ident {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self.quote_style {
-            Some(q) if q == '"' || q == '\'' || q == '`' => write!(f, "{}{}{}", q, self.value, q),
+            Some(q) if q == '"' || q == '\'' || q == '`' => {
+                f.write_char(q)?;
+                let mut first = true;
+                for s in self.value.split_inclusive(q) {
+                    if !first {
+                        f.write_char(q)?;
+                    }
+                    first = false;
+                    f.write_str(s)?;
+                }
+                f.write_char(q)
+            }
             Some(q) if q == '[' => write!(f, "[{}]", self.value),
             None => f.write_str(&self.value),
             _ => panic!("unexpected quote style"),
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 296bcc64b..24c91ef95 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -418,8 +418,26 @@ impl<'a> Tokenizer<'a> {
                 quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
                     chars.next(); // consume the opening quote
                     let quote_end = Word::matching_end_quote(quote_start);
-                    let s = peeking_take_while(chars, |ch| ch != quote_end);
-                    if chars.next() == Some(quote_end) {
+                    let mut last_char = None;
+                    let s = {
+                        let mut s = String::new();
+                        while let Some(ch) = chars.next() {
+                            if ch == quote_end {
+                                if chars.peek() == Some(&quote_end) {
+                                    chars.next();
+                                    s.push(ch);
+                                } else {
+                                    last_char = Some(quote_end);
+                                    break;
+                                }
+                            } else {
+                                s.push(ch);
+                            }
+                        }
+                        s
+                    };
+
+                    if last_char == Some(quote_end) {
                         Ok(Some(Token::make_word(&s, Some(quote_start))))
                     } else {
                         self.tokenizer_error(format!(
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index 60d9c1cb4..0c1bb627c 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -834,6 +834,11 @@ fn parse_comments() {
     }
 }
 
+#[test]
+fn parse_quoted_identifier() {
+    pg_and_generic().verified_stmt(r#"SELECT "quoted "" ident""#);
+}
+
 fn pg() -> TestedDialects {
     TestedDialects {
         dialects: vec![Box::new(PostgreSqlDialect {})],

From 96e695fd2430d0e732d03126295ef3475a483f52 Mon Sep 17 00:00:00 2001
From: Markus Westerlind <mwesterlind@influxdata.com>
Date: Mon, 7 Feb 2022 12:11:09 +0100
Subject: [PATCH 2/5] refactor: Make quoted identifier parsing a seperate
 function

---
 src/tokenizer.rs | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 24c91ef95..0f13f53b4 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -418,24 +418,7 @@ impl<'a> Tokenizer<'a> {
                 quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
                     chars.next(); // consume the opening quote
                     let quote_end = Word::matching_end_quote(quote_start);
-                    let mut last_char = None;
-                    let s = {
-                        let mut s = String::new();
-                        while let Some(ch) = chars.next() {
-                            if ch == quote_end {
-                                if chars.peek() == Some(&quote_end) {
-                                    chars.next();
-                                    s.push(ch);
-                                } else {
-                                    last_char = Some(quote_end);
-                                    break;
-                                }
-                            } else {
-                                s.push(ch);
-                            }
-                        }
-                        s
-                    };
+                    let (s, last_char) = parse_quoted_ident(chars, quote_end);
 
                     if last_char == Some(quote_end) {
                         Ok(Some(Token::make_word(&s, Some(quote_start))))
@@ -746,6 +729,25 @@ fn peeking_take_while(
     s
 }
 
+fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
+    let mut last_char = None;
+    let mut s = String::new();
+    while let Some(ch) = chars.next() {
+        if ch == quote_end {
+            if chars.peek() == Some(&quote_end) {
+                chars.next();
+                s.push(ch);
+            } else {
+                last_char = Some(quote_end);
+                break;
+            }
+        } else {
+            s.push(ch);
+        }
+    }
+    (s, last_char)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From e344aac30f8be12073a5f605f6429f1feec6a51e Mon Sep 17 00:00:00 2001
From: Markus Westerlind <mwesterlind@influxdata.com>
Date: Mon, 7 Feb 2022 12:21:12 +0100
Subject: [PATCH 3/5] test: Check that quoted identifier tokenization works

Added `pretty_assertions` so that the `assert_eq!` in the tokenization is readable
---
 Cargo.toml       |  1 +
 src/lib.rs       |  4 ++++
 src/tokenizer.rs | 18 ++++++++++++++++++
 3 files changed, 23 insertions(+)

diff --git a/Cargo.toml b/Cargo.toml
index bf65c052b..d53e3aa9e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -36,6 +36,7 @@ serde_json = { version = "1.0", optional = true }
 [dev-dependencies]
 simple_logger = "1.9"
 matches = "0.1"
+pretty_assertions = "1"
 
 [package.metadata.release]
 # Instruct `cargo release` to not run `cargo publish` locally:
diff --git a/src/lib.rs b/src/lib.rs
index f04ae07a9..880c6d6e1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -39,6 +39,10 @@
 #[cfg(not(feature = "std"))]
 extern crate alloc;
 
+#[macro_use]
+#[cfg(test)]
+extern crate pretty_assertions;
+
 pub mod ast;
 #[macro_use]
 pub mod dialect;
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 0f13f53b4..faf99cd4e 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1296,6 +1296,24 @@ mod tests {
         compare(expected, tokens);
     }
 
+    #[test]
+    fn tokenize_quoted_identifier() {
+        let sql = r#" "a "" b" "a """ "c """"" "#;
+        let dialect = GenericDialect {};
+        let mut tokenizer = Tokenizer::new(&dialect, sql);
+        let tokens = tokenizer.tokenize().unwrap();
+        let expected = vec![
+            Token::Whitespace(Whitespace::Space),
+            Token::make_word(r#"a " b"#.into(), Some('"')),
+            Token::Whitespace(Whitespace::Space),
+            Token::make_word(r#"a ""#.into(), Some('"')),
+            Token::Whitespace(Whitespace::Space),
+            Token::make_word(r#"c """#.into(), Some('"')),
+            Token::Whitespace(Whitespace::Space),
+        ];
+        compare(expected, tokens);
+    }
+
     fn compare(expected: Vec<Token>, actual: Vec<Token>) {
         //println!("------------------------------");
         //println!("tokens   = {:?}", actual);

From f85c3991cd80cf62d7deae1e976143ca6207b3bc Mon Sep 17 00:00:00 2001
From: Markus Westerlind <mwesterlind@influxdata.com>
Date: Mon, 7 Feb 2022 12:37:13 +0100
Subject: [PATCH 4/5] test: Check that quoted identifiers work in mysql

---
 tests/sqlparser_mysql.rs | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs
index f67d05c34..c1f558596 100644
--- a/tests/sqlparser_mysql.rs
+++ b/tests/sqlparser_mysql.rs
@@ -178,6 +178,37 @@ fn parse_quote_identifiers() {
     }
 }
 
+#[test]
+fn parse_quote_identifiers_2() {
+    let sql = "SELECT `quoted `` identifier`";
+    assert_eq!(
+        mysql().verified_stmt(sql),
+        Statement::Query(Box::new(Query {
+            with: None,
+            body: SetExpr::Select(Box::new(Select {
+                distinct: false,
+                top: None,
+                projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
+                    value: "quoted ` identifier".into(),
+                    quote_style: Some('`'),
+                }))],
+                from: vec![],
+                lateral_views: vec![],
+                selection: None,
+                group_by: vec![],
+                cluster_by: vec![],
+                distribute_by: vec![],
+                sort_by: vec![],
+                having: None,
+            })),
+            order_by: vec![],
+            limit: None,
+            offset: None,
+            fetch: None,
+        }))
+    );
+}
+
 #[test]
 fn parse_unterminated_escape() {
     let sql = r#"SELECT 'I\'m not fine\'"#;

From 2c9cd207e972c0271cbf7ff77b91d7e50eefaebe Mon Sep 17 00:00:00 2001
From: Markus Westerlind <mwesterlind@influxdata.com>
Date: Mon, 7 Feb 2022 15:56:27 +0100
Subject: [PATCH 5/5] chore: cargo clippy

---
 src/tokenizer.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index faf99cd4e..05b530216 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1304,11 +1304,11 @@ mod tests {
         let tokens = tokenizer.tokenize().unwrap();
         let expected = vec![
             Token::Whitespace(Whitespace::Space),
-            Token::make_word(r#"a " b"#.into(), Some('"')),
+            Token::make_word(r#"a " b"#, Some('"')),
             Token::Whitespace(Whitespace::Space),
-            Token::make_word(r#"a ""#.into(), Some('"')),
+            Token::make_word(r#"a ""#, Some('"')),
             Token::Whitespace(Whitespace::Space),
-            Token::make_word(r#"c """#.into(), Some('"')),
+            Token::make_word(r#"c """#, Some('"')),
             Token::Whitespace(Whitespace::Space),
         ];
         compare(expected, tokens);