Skip to content

Commit 45cee8b

Browse files
committed
Correctly look for end delimiter dollar quoted string
Currently the tokenizer throws an error for ```sql SELECT $abc$x$ab$abc$ ``` The logic is also quite difficult to read so I made it a bit simpler.
1 parent 182dc31 commit 45cee8b

File tree

1 file changed

+129
-47
lines changed

1 file changed

+129
-47
lines changed

src/tokenizer.rs

Lines changed: 129 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,46 +1323,33 @@ impl<'a> Tokenizer<'a> {
13231323
if matches!(chars.peek(), Some('$')) && !self.dialect.supports_dollar_placeholder() {
13241324
chars.next();
13251325

1326-
'searching_for_end: loop {
1327-
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
1328-
match chars.peek() {
1329-
Some('$') => {
1330-
chars.next();
1331-
let mut maybe_s = String::from("$");
1332-
for c in value.chars() {
1333-
if let Some(next_char) = chars.next() {
1334-
maybe_s.push(next_char);
1335-
if next_char != c {
1336-
// This doesn't match the dollar quote delimiter so this
1337-
// is not the end of the string.
1338-
s.push_str(&maybe_s);
1339-
continue 'searching_for_end;
1340-
}
1341-
} else {
1342-
return self.tokenizer_error(
1343-
chars.location(),
1344-
"Unterminated dollar-quoted, expected $",
1345-
);
1326+
let mut temp = String::new();
1327+
let end_delimiter = format!("${}$", value);
1328+
1329+
loop {
1330+
match chars.next() {
1331+
Some(ch) => {
1332+
temp.push(ch);
1333+
1334+
if temp.ends_with(&end_delimiter) {
1335+
if let Some(temp) = temp.strip_suffix(&end_delimiter) {
1336+
s.push_str(temp);
13461337
}
1347-
}
1348-
if chars.peek() == Some(&'$') {
1349-
chars.next();
1350-
maybe_s.push('$');
1351-
// maybe_s matches the end delimiter
1352-
break 'searching_for_end;
1353-
} else {
1354-
// This also doesn't match the dollar quote delimiter as there are
1355-
// more characters before the second dollar so this is not the end
1356-
// of the string.
1357-
s.push_str(&maybe_s);
1358-
continue 'searching_for_end;
1338+
break;
13591339
}
13601340
}
1361-
_ => {
1341+
None => {
1342+
if temp.ends_with(&end_delimiter) {
1343+
if let Some(temp) = temp.strip_suffix(&end_delimiter) {
1344+
s.push_str(temp);
1345+
}
1346+
break;
1347+
}
1348+
13621349
return self.tokenizer_error(
13631350
chars.location(),
13641351
"Unterminated dollar-quoted, expected $",
1365-
)
1352+
);
13661353
}
13671354
}
13681355
}
@@ -2305,20 +2292,67 @@ mod tests {
23052292

23062293
#[test]
23072294
fn tokenize_dollar_quoted_string_tagged() {
2308-
let sql = String::from(
2309-
"SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$",
2310-
);
2311-
let dialect = GenericDialect {};
2312-
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2313-
let expected = vec![
2314-
Token::make_keyword("SELECT"),
2315-
Token::Whitespace(Whitespace::Space),
2316-
Token::DollarQuotedString(DollarQuotedString {
2317-
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
2318-
tag: Some("tag".into()),
2319-
}),
2295+
let test_cases = vec![
2296+
(
2297+
String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$"),
2298+
vec![
2299+
Token::make_keyword("SELECT"),
2300+
Token::Whitespace(Whitespace::Space),
2301+
Token::DollarQuotedString(DollarQuotedString {
2302+
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
2303+
tag: Some("tag".into()),
2304+
})
2305+
]
2306+
),
2307+
(
2308+
String::from("SELECT $abc$x$ab$abc$"),
2309+
vec![
2310+
Token::make_keyword("SELECT"),
2311+
Token::Whitespace(Whitespace::Space),
2312+
Token::DollarQuotedString(DollarQuotedString {
2313+
value: "x$ab".into(),
2314+
tag: Some("abc".into()),
2315+
})
2316+
]
2317+
),
2318+
(
2319+
String::from("SELECT $abc$$abc$"),
2320+
vec![
2321+
Token::make_keyword("SELECT"),
2322+
Token::Whitespace(Whitespace::Space),
2323+
Token::DollarQuotedString(DollarQuotedString {
2324+
value: "".into(),
2325+
tag: Some("abc".into()),
2326+
})
2327+
]
2328+
),
2329+
(
2330+
String::from("0$abc$$abc$1"),
2331+
vec![
2332+
Token::Number("0".into(), false),
2333+
Token::DollarQuotedString(DollarQuotedString {
2334+
value: "".into(),
2335+
tag: Some("abc".into()),
2336+
}),
2337+
Token::Number("1".into(), false),
2338+
]
2339+
),
2340+
(
2341+
String::from("$function$abc$q$data$q$$function$"),
2342+
vec![
2343+
Token::DollarQuotedString(DollarQuotedString {
2344+
value: "abc$q$data$q$".into(),
2345+
tag: Some("function".into()),
2346+
}),
2347+
]
2348+
),
23202349
];
2321-
compare(expected, tokens);
2350+
2351+
let dialect = GenericDialect {};
2352+
for (sql, expected) in test_cases {
2353+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2354+
compare(expected, tokens);
2355+
}
23222356
}
23232357

23242358
#[test]
@@ -2337,6 +2371,22 @@ mod tests {
23372371
);
23382372
}
23392373

2374+
#[test]
2375+
fn tokenize_dollar_quoted_string_tagged_unterminated_mirror() {
2376+
let sql = String::from("SELECT $abc$abc$");
2377+
let dialect = GenericDialect {};
2378+
assert_eq!(
2379+
Tokenizer::new(&dialect, &sql).tokenize(),
2380+
Err(TokenizerError {
2381+
message: "Unterminated dollar-quoted, expected $".into(),
2382+
location: Location {
2383+
line: 1,
2384+
column: 17
2385+
}
2386+
})
2387+
);
2388+
}
2389+
23402390
#[test]
23412391
fn tokenize_dollar_placeholder() {
23422392
let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC");
@@ -2361,6 +2411,38 @@ mod tests {
23612411
);
23622412
}
23632413

2414+
#[test]
2415+
fn tokenize_nested_dollar_quoted_strings() {
2416+
let sql = String::from("SELECT $tag$dollar $nested$ string$tag$");
2417+
let dialect = GenericDialect {};
2418+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2419+
let expected = vec![
2420+
Token::make_keyword("SELECT"),
2421+
Token::Whitespace(Whitespace::Space),
2422+
Token::DollarQuotedString(DollarQuotedString {
2423+
value: "dollar $nested$ string".into(),
2424+
tag: Some("tag".into()),
2425+
}),
2426+
];
2427+
compare(expected, tokens);
2428+
}
2429+
2430+
#[test]
2431+
fn tokenize_dollar_quoted_string_untagged_empty() {
2432+
let sql = String::from("SELECT $$$$");
2433+
let dialect = GenericDialect {};
2434+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
2435+
let expected = vec![
2436+
Token::make_keyword("SELECT"),
2437+
Token::Whitespace(Whitespace::Space),
2438+
Token::DollarQuotedString(DollarQuotedString {
2439+
value: "".into(),
2440+
tag: None,
2441+
}),
2442+
];
2443+
compare(expected, tokens);
2444+
}
2445+
23642446
#[test]
23652447
fn tokenize_dollar_quoted_string_untagged() {
23662448
let sql =

0 commit comments

Comments
 (0)