Skip to content

Commit 34fedf3

Browse files
authored
fix: Handle double quotes inside quoted identifiers correctly (#411)
* fix: Handle double quotes inside quoted identifiers correctly This fixes #410 for standard SQL, however I don't know enough about other dialects to know if they handle this differently. May need more extensive testing as well. * refactor: Make quoted identifier parsing a seperate function * test: Check that quoted identifier tokenization works Added `pretty_assertions` so that the `assert_eq!` in the tokenization is readable * test: Check that quoted identifiers work in mysql * chore: cargo clippy
1 parent 2614576 commit 34fedf3

File tree

6 files changed

+94
-4
lines changed

6 files changed

+94
-4
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ serde_json = { version = "1.0", optional = true }
3636
[dev-dependencies]
3737
simple_logger = "2.1"
3838
matches = "0.1"
39+
pretty_assertions = "1"
3940

4041
[package.metadata.release]
4142
# Instruct `cargo release` to not run `cargo publish` locally:

src/ast/mod.rs

+13-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use alloc::{
2323
string::{String, ToString},
2424
vec::Vec,
2525
};
26-
use core::fmt;
26+
use core::fmt::{self, Write};
2727

2828
#[cfg(feature = "serde")]
2929
use serde::{Deserialize, Serialize};
@@ -127,7 +127,18 @@ impl From<&str> for Ident {
127127
impl fmt::Display for Ident {
128128
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
129129
match self.quote_style {
130-
Some(q) if q == '"' || q == '\'' || q == '`' => write!(f, "{}{}{}", q, self.value, q),
130+
Some(q) if q == '"' || q == '\'' || q == '`' => {
131+
f.write_char(q)?;
132+
let mut first = true;
133+
for s in self.value.split_inclusive(q) {
134+
if !first {
135+
f.write_char(q)?;
136+
}
137+
first = false;
138+
f.write_str(s)?;
139+
}
140+
f.write_char(q)
141+
}
131142
Some(q) if q == '[' => write!(f, "[{}]", self.value),
132143
None => f.write_str(&self.value),
133144
_ => panic!("unexpected quote style"),

src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
#[cfg(not(feature = "std"))]
4040
extern crate alloc;
4141

42+
#[macro_use]
43+
#[cfg(test)]
44+
extern crate pretty_assertions;
45+
4246
pub mod ast;
4347
#[macro_use]
4448
pub mod dialect;

src/tokenizer.rs

+40-2
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,9 @@ impl<'a> Tokenizer<'a> {
418418
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
419419
chars.next(); // consume the opening quote
420420
let quote_end = Word::matching_end_quote(quote_start);
421-
let s = peeking_take_while(chars, |ch| ch != quote_end);
422-
if chars.next() == Some(quote_end) {
421+
let (s, last_char) = parse_quoted_ident(chars, quote_end);
422+
423+
if last_char == Some(quote_end) {
423424
Ok(Some(Token::make_word(&s, Some(quote_start))))
424425
} else {
425426
self.tokenizer_error(format!(
@@ -728,6 +729,25 @@ fn peeking_take_while(
728729
s
729730
}
730731

732+
fn parse_quoted_ident(chars: &mut Peekable<Chars<'_>>, quote_end: char) -> (String, Option<char>) {
733+
let mut last_char = None;
734+
let mut s = String::new();
735+
while let Some(ch) = chars.next() {
736+
if ch == quote_end {
737+
if chars.peek() == Some(&quote_end) {
738+
chars.next();
739+
s.push(ch);
740+
} else {
741+
last_char = Some(quote_end);
742+
break;
743+
}
744+
} else {
745+
s.push(ch);
746+
}
747+
}
748+
(s, last_char)
749+
}
750+
731751
#[cfg(test)]
732752
mod tests {
733753
use super::*;
@@ -1276,6 +1296,24 @@ mod tests {
12761296
compare(expected, tokens);
12771297
}
12781298

1299+
#[test]
1300+
fn tokenize_quoted_identifier() {
1301+
let sql = r#" "a "" b" "a """ "c """"" "#;
1302+
let dialect = GenericDialect {};
1303+
let mut tokenizer = Tokenizer::new(&dialect, sql);
1304+
let tokens = tokenizer.tokenize().unwrap();
1305+
let expected = vec![
1306+
Token::Whitespace(Whitespace::Space),
1307+
Token::make_word(r#"a " b"#, Some('"')),
1308+
Token::Whitespace(Whitespace::Space),
1309+
Token::make_word(r#"a ""#, Some('"')),
1310+
Token::Whitespace(Whitespace::Space),
1311+
Token::make_word(r#"c """#, Some('"')),
1312+
Token::Whitespace(Whitespace::Space),
1313+
];
1314+
compare(expected, tokens);
1315+
}
1316+
12791317
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
12801318
//println!("------------------------------");
12811319
//println!("tokens = {:?}", actual);

tests/sqlparser_mysql.rs

+31
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,37 @@ fn parse_quote_identifiers() {
178178
}
179179
}
180180

181+
#[test]
182+
fn parse_quote_identifiers_2() {
183+
let sql = "SELECT `quoted `` identifier`";
184+
assert_eq!(
185+
mysql().verified_stmt(sql),
186+
Statement::Query(Box::new(Query {
187+
with: None,
188+
body: SetExpr::Select(Box::new(Select {
189+
distinct: false,
190+
top: None,
191+
projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident {
192+
value: "quoted ` identifier".into(),
193+
quote_style: Some('`'),
194+
}))],
195+
from: vec![],
196+
lateral_views: vec![],
197+
selection: None,
198+
group_by: vec![],
199+
cluster_by: vec![],
200+
distribute_by: vec![],
201+
sort_by: vec![],
202+
having: None,
203+
})),
204+
order_by: vec![],
205+
limit: None,
206+
offset: None,
207+
fetch: None,
208+
}))
209+
);
210+
}
211+
181212
#[test]
182213
fn parse_unterminated_escape() {
183214
let sql = r#"SELECT 'I\'m not fine\'"#;

tests/sqlparser_postgres.rs

+5
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,11 @@ fn parse_comments() {
891891
}
892892
}
893893

894+
#[test]
895+
fn parse_quoted_identifier() {
896+
pg_and_generic().verified_stmt(r#"SELECT "quoted "" ident""#);
897+
}
898+
894899
fn pg() -> TestedDialects {
895900
TestedDialects {
896901
dialects: vec![Box::new(PostgreSqlDialect {})],

0 commit comments

Comments
 (0)