Skip to content

Fix dollar quoted string tokenizer #1193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 105 additions & 25 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1119,38 +1119,49 @@ impl<'a> Tokenizer<'a> {

if let Some('$') = chars.peek() {
chars.next();
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));

match chars.peek() {
Some('$') => {
chars.next();
for c in value.chars() {
let next_char = chars.next();
if Some(c) != next_char {
return self.tokenizer_error(
chars.location(),
format!(
"Unterminated dollar-quoted string at or near \"{value}\""
),
);
'searching_for_end: loop {
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
match chars.peek() {
Some('$') => {
chars.next();
let mut maybe_s = String::from("$");
for c in value.chars() {
if let Some(next_char) = chars.next() {
maybe_s.push(next_char);
if next_char != c {
// This doesn't match the dollar quote delimiter so this
// is not the end of the string.
s.push_str(&maybe_s);
continue 'searching_for_end;
}
} else {
return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted, expected $",
);
}
}
if chars.peek() == Some(&'$') {
chars.next();
maybe_s.push('$');
// maybe_s matches the end delimiter
break 'searching_for_end;
} else {
// This also doesn't match the dollar quote delimiter as there are
// more characters before the second dollar so this is not the end
// of the string.
s.push_str(&maybe_s);
continue 'searching_for_end;
}
}

if let Some('$') = chars.peek() {
chars.next();
} else {
_ => {
return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted string, expected $",
);
"Unterminated dollar-quoted, expected $",
)
}
}
_ => {
return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted, expected $",
);
}
}
} else {
return Ok(Token::Placeholder(String::from("$") + &value));
Expand Down Expand Up @@ -1906,6 +1917,75 @@ mod tests {
compare(expected, tokens);
}

#[test]
fn tokenize_dollar_quoted_string_tagged() {
let sql = String::from(
"SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$",
);
let dialect = GenericDialect {};
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
tag: Some("tag".into()),
}),
];
compare(expected, tokens);
}

#[test]
fn tokenize_dollar_quoted_string_tagged_unterminated() {
let sql = String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$different tag$");
let dialect = GenericDialect {};
assert_eq!(
Tokenizer::new(&dialect, &sql).tokenize(),
Err(TokenizerError {
message: "Unterminated dollar-quoted, expected $".into(),
location: Location {
line: 1,
column: 91
}
})
);
}

#[test]
fn tokenize_dollar_quoted_string_untagged() {
let sql =
String::from("SELECT $$within dollar '$' quoted strings have $tags like this$ $$");
let dialect = GenericDialect {};
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::DollarQuotedString(DollarQuotedString {
value: "within dollar '$' quoted strings have $tags like this$ ".into(),
tag: None,
}),
];
compare(expected, tokens);
}

#[test]
fn tokenize_dollar_quoted_string_untagged_unterminated() {
let sql = String::from(
"SELECT $$dollar '$' quoted strings have $tags like this$ or like this $different tag$",
);
let dialect = GenericDialect {};
assert_eq!(
Tokenizer::new(&dialect, &sql).tokenize(),
Err(TokenizerError {
message: "Unterminated dollar-quoted string".into(),
location: Location {
line: 1,
column: 86
}
})
);
}

#[test]
fn tokenize_right_arrow() {
let sql = String::from("FUNCTION(key=>value)");
Expand Down
Loading