Skip to content

Commit 12ccf48

Browse files
ZacJWJichaoS
authored andcommitted
Fix dollar quoted string tokenizer (apache#1193)
1 parent 1858331 commit 12ccf48

File tree

1 file changed

+105
-25
lines changed

1 file changed

+105
-25
lines changed

src/tokenizer.rs

+105-25
Original file line numberDiff line numberDiff line change
@@ -1119,38 +1119,49 @@ impl<'a> Tokenizer<'a> {
11191119

11201120
if let Some('$') = chars.peek() {
11211121
chars.next();
1122-
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
11231122

1124-
match chars.peek() {
1125-
Some('$') => {
1126-
chars.next();
1127-
for c in value.chars() {
1128-
let next_char = chars.next();
1129-
if Some(c) != next_char {
1130-
return self.tokenizer_error(
1131-
chars.location(),
1132-
format!(
1133-
"Unterminated dollar-quoted string at or near \"{value}\""
1134-
),
1135-
);
1123+
'searching_for_end: loop {
1124+
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
1125+
match chars.peek() {
1126+
Some('$') => {
1127+
chars.next();
1128+
let mut maybe_s = String::from("$");
1129+
for c in value.chars() {
1130+
if let Some(next_char) = chars.next() {
1131+
maybe_s.push(next_char);
1132+
if next_char != c {
1133+
// This doesn't match the dollar quote delimiter so this
1134+
// is not the end of the string.
1135+
s.push_str(&maybe_s);
1136+
continue 'searching_for_end;
1137+
}
1138+
} else {
1139+
return self.tokenizer_error(
1140+
chars.location(),
1141+
"Unterminated dollar-quoted, expected $",
1142+
);
1143+
}
1144+
}
1145+
if chars.peek() == Some(&'$') {
1146+
chars.next();
1147+
maybe_s.push('$');
1148+
// maybe_s matches the end delimiter
1149+
break 'searching_for_end;
1150+
} else {
1151+
// This also doesn't match the dollar quote delimiter as there are
1152+
// more characters before the second dollar so this is not the end
1153+
// of the string.
1154+
s.push_str(&maybe_s);
1155+
continue 'searching_for_end;
11361156
}
11371157
}
1138-
1139-
if let Some('$') = chars.peek() {
1140-
chars.next();
1141-
} else {
1158+
_ => {
11421159
return self.tokenizer_error(
11431160
chars.location(),
1144-
"Unterminated dollar-quoted string, expected $",
1145-
);
1161+
"Unterminated dollar-quoted, expected $",
1162+
)
11461163
}
11471164
}
1148-
_ => {
1149-
return self.tokenizer_error(
1150-
chars.location(),
1151-
"Unterminated dollar-quoted, expected $",
1152-
);
1153-
}
11541165
}
11551166
} else {
11561167
return Ok(Token::Placeholder(String::from("$") + &value));
@@ -1906,6 +1917,75 @@ mod tests {
19061917
compare(expected, tokens);
19071918
}
19081919

1920+
#[test]
1921+
fn tokenize_dollar_quoted_string_tagged() {
1922+
let sql = String::from(
1923+
"SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$tag$",
1924+
);
1925+
let dialect = GenericDialect {};
1926+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
1927+
let expected = vec![
1928+
Token::make_keyword("SELECT"),
1929+
Token::Whitespace(Whitespace::Space),
1930+
Token::DollarQuotedString(DollarQuotedString {
1931+
value: "dollar '$' quoted strings have $tags like this$ or like this $$".into(),
1932+
tag: Some("tag".into()),
1933+
}),
1934+
];
1935+
compare(expected, tokens);
1936+
}
1937+
1938+
#[test]
1939+
fn tokenize_dollar_quoted_string_tagged_unterminated() {
1940+
let sql = String::from("SELECT $tag$dollar '$' quoted strings have $tags like this$ or like this $$$different tag$");
1941+
let dialect = GenericDialect {};
1942+
assert_eq!(
1943+
Tokenizer::new(&dialect, &sql).tokenize(),
1944+
Err(TokenizerError {
1945+
message: "Unterminated dollar-quoted, expected $".into(),
1946+
location: Location {
1947+
line: 1,
1948+
column: 91
1949+
}
1950+
})
1951+
);
1952+
}
1953+
1954+
#[test]
1955+
fn tokenize_dollar_quoted_string_untagged() {
1956+
let sql =
1957+
String::from("SELECT $$within dollar '$' quoted strings have $tags like this$ $$");
1958+
let dialect = GenericDialect {};
1959+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
1960+
let expected = vec![
1961+
Token::make_keyword("SELECT"),
1962+
Token::Whitespace(Whitespace::Space),
1963+
Token::DollarQuotedString(DollarQuotedString {
1964+
value: "within dollar '$' quoted strings have $tags like this$ ".into(),
1965+
tag: None,
1966+
}),
1967+
];
1968+
compare(expected, tokens);
1969+
}
1970+
1971+
#[test]
1972+
fn tokenize_dollar_quoted_string_untagged_unterminated() {
1973+
let sql = String::from(
1974+
"SELECT $$dollar '$' quoted strings have $tags like this$ or like this $different tag$",
1975+
);
1976+
let dialect = GenericDialect {};
1977+
assert_eq!(
1978+
Tokenizer::new(&dialect, &sql).tokenize(),
1979+
Err(TokenizerError {
1980+
message: "Unterminated dollar-quoted string".into(),
1981+
location: Location {
1982+
line: 1,
1983+
column: 86
1984+
}
1985+
})
1986+
);
1987+
}
1988+
19091989
#[test]
19101990
fn tokenize_right_arrow() {
19111991
let sql = String::from("FUNCTION(key=>value)");

0 commit comments

Comments
 (0)