Skip to content

Commit 7867ba3

Browse files
author
Aleksei Piianin
authored
Redshift: Fix parsing for quoted numbered columns (#1576)
1 parent 316bb14 commit 7867ba3

File tree

4 files changed

+200
-36
lines changed

4 files changed

+200
-36
lines changed

src/dialect/mod.rs

+41-12
Original file line numberDiff line numberDiff line change
@@ -128,14 +128,39 @@ pub trait Dialect: Debug + Any {
128128
ch == '"' || ch == '`'
129129
}
130130

131-
/// Return the character used to quote identifiers.
132-
fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
131+
/// Determine if a character starts a potential nested quoted identifier.
132+
/// Example: RedShift supports the following quote styles to all mean the same thing:
133+
/// ```sql
134+
/// SELECT 1 AS foo;
135+
/// SELECT 1 AS "foo";
136+
/// SELECT 1 AS [foo];
137+
/// SELECT 1 AS ["foo"];
138+
/// ```
139+
fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool {
140+
false
141+
}
142+
143+
/// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true
144+
/// If the next sequence of tokens potentially represent a nested identifier, then this method
145+
/// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style.
146+
///
147+
/// Example (Redshift):
148+
/// ```text
149+
/// `["foo"]` => Some(`[`, Some(`"`))
150+
/// `[foo]` => Some(`[`, None)
151+
/// `[0]` => None
152+
/// `"foo"` => None
153+
/// ```
154+
fn peek_nested_delimited_identifier_quotes(
155+
&self,
156+
mut _chars: Peekable<Chars<'_>>,
157+
) -> Option<(char, Option<char>)> {
133158
None
134159
}
135160

136-
/// Determine if quoted characters are proper for identifier
137-
fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable<Chars<'_>>) -> bool {
138-
true
161+
/// Return the character used to quote identifiers.
162+
fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
163+
None
139164
}
140165

141166
/// Determine if a character is a valid start character for an unquoted identifier
@@ -869,6 +894,17 @@ mod tests {
869894
self.0.is_delimited_identifier_start(ch)
870895
}
871896

897+
fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
898+
self.0.is_nested_delimited_identifier_start(ch)
899+
}
900+
901+
fn peek_nested_delimited_identifier_quotes(
902+
&self,
903+
chars: std::iter::Peekable<std::str::Chars<'_>>,
904+
) -> Option<(char, Option<char>)> {
905+
self.0.peek_nested_delimited_identifier_quotes(chars)
906+
}
907+
872908
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
873909
self.0.identifier_quote_style(identifier)
874910
}
@@ -877,13 +913,6 @@ mod tests {
877913
self.0.supports_string_literal_backslash_escape()
878914
}
879915

880-
fn is_proper_identifier_inside_quotes(
881-
&self,
882-
chars: std::iter::Peekable<std::str::Chars<'_>>,
883-
) -> bool {
884-
self.0.is_proper_identifier_inside_quotes(chars)
885-
}
886-
887916
fn supports_filter_during_aggregation(&self) -> bool {
888917
self.0.supports_filter_during_aggregation()
889918
}

src/dialect/redshift.rs

+39-9
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,51 @@ pub struct RedshiftSqlDialect {}
3232
// in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will
3333
// be a json path
3434
impl Dialect for RedshiftSqlDialect {
35-
fn is_delimited_identifier_start(&self, ch: char) -> bool {
36-
ch == '"' || ch == '['
35+
/// Determine if a character starts a potential nested quoted identifier.
36+
/// Example: RedShift supports the following quote styles to all mean the same thing:
37+
/// ```sql
38+
/// SELECT 1 AS foo;
39+
/// SELECT 1 AS "foo";
40+
/// SELECT 1 AS [foo];
41+
/// SELECT 1 AS ["foo"];
42+
/// ```
43+
fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
44+
ch == '['
3745
}
3846

39-
/// Determine if quoted characters are proper for identifier
40-
/// It's needed to distinguish treating square brackets as quotes from
41-
/// treating them as json path. If there is identifier then we assume
42-
/// there is no json path.
43-
fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable<Chars<'_>>) -> bool {
47+
/// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true
48+
/// If the next sequence of tokens potentially represent a nested identifier, then this method
49+
/// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style.
50+
///
51+
/// Example (Redshift):
52+
/// ```text
53+
/// `["foo"]` => Some(`[`, Some(`"`))
54+
/// `[foo]` => Some(`[`, None)
55+
/// `[0]` => None
56+
/// `"foo"` => None
57+
/// ```
58+
fn peek_nested_delimited_identifier_quotes(
59+
&self,
60+
mut chars: Peekable<Chars<'_>>,
61+
) -> Option<(char, Option<char>)> {
62+
if chars.peek() != Some(&'[') {
63+
return None;
64+
}
65+
4466
chars.next();
67+
4568
let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable();
69+
4670
if let Some(&ch) = not_white_chars.peek() {
47-
return self.is_identifier_start(ch);
71+
if ch == '"' {
72+
return Some(('[', Some('"')));
73+
}
74+
if self.is_identifier_start(ch) {
75+
return Some(('[', None));
76+
}
4877
}
49-
false
78+
79+
None
5080
}
5181

5282
fn is_identifier_start(&self, ch: char) -> bool {

src/tokenizer.rs

+67-10
Original file line numberDiff line numberDiff line change
@@ -1075,25 +1075,61 @@ impl<'a> Tokenizer<'a> {
10751075
Ok(Some(Token::DoubleQuotedString(s)))
10761076
}
10771077
// delimited (quoted) identifier
1078+
quote_start if self.dialect.is_delimited_identifier_start(ch) => {
1079+
let word = self.tokenize_quoted_identifier(quote_start, chars)?;
1080+
Ok(Some(Token::make_word(&word, Some(quote_start))))
1081+
}
1082+
// Potentially nested delimited (quoted) identifier
10781083
quote_start
1079-
if self.dialect.is_delimited_identifier_start(ch)
1084+
if self
1085+
.dialect
1086+
.is_nested_delimited_identifier_start(quote_start)
10801087
&& self
10811088
.dialect
1082-
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
1089+
.peek_nested_delimited_identifier_quotes(chars.peekable.clone())
1090+
.is_some() =>
10831091
{
1084-
let error_loc = chars.location();
1085-
chars.next(); // consume the opening quote
1092+
let Some((quote_start, nested_quote_start)) = self
1093+
.dialect
1094+
.peek_nested_delimited_identifier_quotes(chars.peekable.clone())
1095+
else {
1096+
return self.tokenizer_error(
1097+
chars.location(),
1098+
format!("Expected nested delimiter '{quote_start}' before EOF."),
1099+
);
1100+
};
1101+
1102+
let Some(nested_quote_start) = nested_quote_start else {
1103+
let word = self.tokenize_quoted_identifier(quote_start, chars)?;
1104+
return Ok(Some(Token::make_word(&word, Some(quote_start))));
1105+
};
1106+
1107+
let mut word = vec![];
10861108
let quote_end = Word::matching_end_quote(quote_start);
1087-
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
1109+
let nested_quote_end = Word::matching_end_quote(nested_quote_start);
1110+
let error_loc = chars.location();
10881111

1089-
if last_char == Some(quote_end) {
1090-
Ok(Some(Token::make_word(&s, Some(quote_start))))
1091-
} else {
1092-
self.tokenizer_error(
1112+
chars.next(); // skip the first delimiter
1113+
peeking_take_while(chars, |ch| ch.is_whitespace());
1114+
if chars.peek() != Some(&nested_quote_start) {
1115+
return self.tokenizer_error(
1116+
error_loc,
1117+
format!("Expected nested delimiter '{nested_quote_start}' before EOF."),
1118+
);
1119+
}
1120+
word.push(nested_quote_start.into());
1121+
word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
1122+
word.push(nested_quote_end.into());
1123+
peeking_take_while(chars, |ch| ch.is_whitespace());
1124+
if chars.peek() != Some(&quote_end) {
1125+
return self.tokenizer_error(
10931126
error_loc,
10941127
format!("Expected close delimiter '{quote_end}' before EOF."),
1095-
)
1128+
);
10961129
}
1130+
chars.next(); // skip close delimiter
1131+
1132+
Ok(Some(Token::make_word(&word.concat(), Some(quote_start))))
10971133
}
10981134
// numbers and period
10991135
'0'..='9' | '.' => {
@@ -1597,6 +1633,27 @@ impl<'a> Tokenizer<'a> {
15971633
s
15981634
}
15991635

1636+
/// Read a quoted identifier
1637+
fn tokenize_quoted_identifier(
1638+
&self,
1639+
quote_start: char,
1640+
chars: &mut State,
1641+
) -> Result<String, TokenizerError> {
1642+
let error_loc = chars.location();
1643+
chars.next(); // consume the opening quote
1644+
let quote_end = Word::matching_end_quote(quote_start);
1645+
let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
1646+
1647+
if last_char == Some(quote_end) {
1648+
Ok(s)
1649+
} else {
1650+
self.tokenizer_error(
1651+
error_loc,
1652+
format!("Expected close delimiter '{quote_end}' before EOF."),
1653+
)
1654+
}
1655+
}
1656+
16001657
/// Read a single quoted string, starting with the opening quote.
16011658
fn tokenize_escaped_single_quoted_string(
16021659
&self,

tests/sqlparser_redshift.rs

+53-5
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ fn parse_delimited_identifiers() {
157157
}
158158

159159
redshift().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#);
160+
// An alias starting with a number
161+
redshift().verified_stmt(r#"CREATE TABLE "foo" ("1" INT)"#);
160162
redshift().verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#);
161163
//TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#);
162164
}
@@ -203,7 +205,7 @@ fn test_redshift_json_path() {
203205
path: JsonPath {
204206
path: vec![
205207
JsonPathElem::Bracket {
206-
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
208+
key: Expr::Value(number("0"))
207209
},
208210
JsonPathElem::Dot {
209211
key: "o_orderkey".to_string(),
@@ -226,7 +228,7 @@ fn test_redshift_json_path() {
226228
path: JsonPath {
227229
path: vec![
228230
JsonPathElem::Bracket {
229-
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
231+
key: Expr::Value(number("0"))
230232
},
231233
JsonPathElem::Bracket {
232234
key: Expr::Value(Value::SingleQuotedString("id".to_owned()))
@@ -250,7 +252,7 @@ fn test_redshift_json_path() {
250252
path: JsonPath {
251253
path: vec![
252254
JsonPathElem::Bracket {
253-
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
255+
key: Expr::Value(number("0"))
254256
},
255257
JsonPathElem::Bracket {
256258
key: Expr::Value(Value::SingleQuotedString("id".to_owned()))
@@ -260,6 +262,31 @@ fn test_redshift_json_path() {
260262
},
261263
expr_from_projection(only(&select.projection))
262264
);
265+
266+
let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM customer_orders_lineitem"#;
267+
let select = dialects.verified_only_select(sql);
268+
assert_eq!(
269+
&Expr::JsonAccess {
270+
value: Box::new(Expr::CompoundIdentifier(vec![
271+
Ident::new("db1"),
272+
Ident::new("sc1"),
273+
Ident::new("tbl1"),
274+
Ident::new("col1")
275+
])),
276+
path: JsonPath {
277+
path: vec![
278+
JsonPathElem::Bracket {
279+
key: Expr::Value(number("0"))
280+
},
281+
JsonPathElem::Dot {
282+
key: "id".to_string(),
283+
quoted: true,
284+
}
285+
]
286+
}
287+
},
288+
expr_from_projection(only(&select.projection))
289+
);
263290
}
264291

265292
#[test]
@@ -276,7 +303,7 @@ fn test_parse_json_path_from() {
276303
&Some(JsonPath {
277304
path: vec![
278305
JsonPathElem::Bracket {
279-
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
306+
key: Expr::Value(number("0"))
280307
},
281308
JsonPathElem::Dot {
282309
key: "a".to_string(),
@@ -300,7 +327,7 @@ fn test_parse_json_path_from() {
300327
&Some(JsonPath {
301328
path: vec![
302329
JsonPathElem::Bracket {
303-
key: Expr::Value(Value::Number("0".parse().unwrap(), false))
330+
key: Expr::Value(number("0"))
304331
},
305332
JsonPathElem::Dot {
306333
key: "a".to_string(),
@@ -334,3 +361,24 @@ fn test_parse_json_path_from() {
334361
_ => panic!(),
335362
}
336363
}
364+
365+
#[test]
366+
fn test_parse_select_numbered_columns() {
367+
// An alias starting with a number
368+
redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
369+
redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1abc" FROM a"#);
370+
}
371+
372+
#[test]
373+
fn test_parse_nested_quoted_identifier() {
374+
redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#);
375+
redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#);
376+
redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#);
377+
redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#);
378+
// trim spaces
379+
redshift().one_statement_parses_to(r#"SELECT 1 AS [ " 1 " ]"#, r#"SELECT 1 AS [" 1 "]"#);
380+
// invalid query
381+
assert!(redshift()
382+
.parse_sql_statements(r#"SELECT 1 AS ["1]"#)
383+
.is_err());
384+
}

0 commit comments

Comments
 (0)