Skip to content

Commit 31e7a44

Browse files
iffyioVedin
authored andcommitted
BigQuery: Fix column identifier reserved keywords list (apache#1678)
1 parent 8fb7a02 commit 31e7a44

File tree

5 files changed

+120
-34
lines changed

5 files changed

+120
-34
lines changed

src/dialect/bigquery.rs

+26
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,28 @@
1616
// under the License.
1717

1818
use crate::dialect::Dialect;
19+
use crate::keywords::Keyword;
20+
use crate::parser::Parser;
21+
22+
/// These keywords are disallowed as column identifiers. Such that
23+
/// `SELECT 5 AS <col> FROM T` is rejected by BigQuery.
24+
const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
25+
Keyword::WITH,
26+
Keyword::SELECT,
27+
Keyword::WHERE,
28+
Keyword::GROUP,
29+
Keyword::HAVING,
30+
Keyword::ORDER,
31+
Keyword::LATERAL,
32+
Keyword::LIMIT,
33+
Keyword::FETCH,
34+
Keyword::UNION,
35+
Keyword::EXCEPT,
36+
Keyword::INTERSECT,
37+
Keyword::FROM,
38+
Keyword::INTO,
39+
Keyword::END,
40+
];
1941

2042
/// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/)
2143
#[derive(Debug, Default)]
@@ -92,4 +114,8 @@ impl Dialect for BigQueryDialect {
92114
fn supports_timestamp_versioning(&self) -> bool {
93115
true
94116
}
117+
118+
fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
119+
!RESERVED_FOR_COLUMN_ALIAS.contains(kw)
120+
}
95121
}

src/dialect/mod.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,7 @@ pub trait Dialect: Debug + Any {
804804
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
805805
}
806806

807-
// Returns reserved keywords when looking to parse a [TableFactor].
807+
/// Returns reserved keywords when looking to parse a `TableFactor`.
808808
/// See [Self::supports_from_trailing_commas]
809809
fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] {
810810
keywords::RESERVED_FOR_TABLE_FACTOR
@@ -844,11 +844,17 @@ pub trait Dialect: Debug + Any {
844844
false
845845
}
846846

847+
/// Returns true if the specified keyword should be parsed as a column identifier.
848+
/// See [keywords::RESERVED_FOR_COLUMN_ALIAS]
849+
fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
850+
!keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
851+
}
852+
847853
/// Returns true if the specified keyword should be parsed as a select item alias.
848854
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
849855
/// to enable looking ahead if needed.
850-
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
851-
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
856+
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
857+
explicit || self.is_column_alias(kw, parser)
852858
}
853859

854860
/// Returns true if the specified keyword should be parsed as a table factor alias.

src/parser/mod.rs

+47-18
Original file line numberDiff line numberDiff line change
@@ -3951,7 +3951,7 @@ impl<'a> Parser<'a> {
39513951
self.parse_comma_separated_with_trailing_commas(
39523952
|p| p.parse_select_item(),
39533953
trailing_commas,
3954-
None,
3954+
Self::is_reserved_for_column_alias,
39553955
)
39563956
}
39573957

@@ -3985,30 +3985,42 @@ impl<'a> Parser<'a> {
39853985
self.parse_comma_separated_with_trailing_commas(
39863986
Parser::parse_table_and_joins,
39873987
trailing_commas,
3988-
Some(self.dialect.get_reserved_keywords_for_table_factor()),
3988+
|kw, _parser| {
3989+
self.dialect
3990+
.get_reserved_keywords_for_table_factor()
3991+
.contains(kw)
3992+
},
39893993
)
39903994
}
39913995

39923996
/// Parse the comma of a comma-separated syntax element.
3997+
/// `R` is a predicate that should return true if the next
3998+
/// keyword is a reserved keyword.
39933999
/// Allows for control over trailing commas
4000+
///
39944001
/// Returns true if there is a next element
3995-
fn is_parse_comma_separated_end_with_trailing_commas(
4002+
fn is_parse_comma_separated_end_with_trailing_commas<R>(
39964003
&mut self,
39974004
trailing_commas: bool,
3998-
reserved_keywords: Option<&[Keyword]>,
3999-
) -> bool {
4000-
let reserved_keywords = reserved_keywords.unwrap_or(keywords::RESERVED_FOR_COLUMN_ALIAS);
4005+
is_reserved_keyword: &R,
4006+
) -> bool
4007+
where
4008+
R: Fn(&Keyword, &mut Parser) -> bool,
4009+
{
40014010
if !self.consume_token(&Token::Comma) {
40024011
true
40034012
} else if trailing_commas {
4004-
let token = self.peek_token().token;
4005-
match token {
4006-
Token::Word(ref kw) if reserved_keywords.contains(&kw.keyword) => true,
4013+
let token = self.next_token().token;
4014+
let is_end = match token {
4015+
Token::Word(ref kw) if is_reserved_keyword(&kw.keyword, self) => true,
40074016
Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => {
40084017
true
40094018
}
40104019
_ => false,
4011-
}
4020+
};
4021+
self.prev_token();
4022+
4023+
is_end
40124024
} else {
40134025
false
40144026
}
@@ -4017,34 +4029,44 @@ impl<'a> Parser<'a> {
40174029
/// Parse the comma of a comma-separated syntax element.
40184030
/// Returns true if there is a next element
40194031
fn is_parse_comma_separated_end(&mut self) -> bool {
4020-
self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas, None)
4032+
self.is_parse_comma_separated_end_with_trailing_commas(
4033+
self.options.trailing_commas,
4034+
&Self::is_reserved_for_column_alias,
4035+
)
40214036
}
40224037

40234038
/// Parse a comma-separated list of 1+ items accepted by `F`
40244039
pub fn parse_comma_separated<T, F>(&mut self, f: F) -> Result<Vec<T>, ParserError>
40254040
where
40264041
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
40274042
{
4028-
self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas, None)
4043+
self.parse_comma_separated_with_trailing_commas(
4044+
f,
4045+
self.options.trailing_commas,
4046+
Self::is_reserved_for_column_alias,
4047+
)
40294048
}
40304049

4031-
/// Parse a comma-separated list of 1+ items accepted by `F`
4032-
/// Allows for control over trailing commas
4033-
fn parse_comma_separated_with_trailing_commas<T, F>(
4050+
/// Parse a comma-separated list of 1+ items accepted by `F`.
4051+
/// `R` is a predicate that should return true if the next
4052+
/// keyword is a reserved keyword.
4053+
/// Allows for control over trailing commas.
4054+
fn parse_comma_separated_with_trailing_commas<T, F, R>(
40344055
&mut self,
40354056
mut f: F,
40364057
trailing_commas: bool,
4037-
reserved_keywords: Option<&[Keyword]>,
4058+
is_reserved_keyword: R,
40384059
) -> Result<Vec<T>, ParserError>
40394060
where
40404061
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
4062+
R: Fn(&Keyword, &mut Parser) -> bool,
40414063
{
40424064
let mut values = vec![];
40434065
loop {
40444066
values.push(f(self)?);
40454067
if self.is_parse_comma_separated_end_with_trailing_commas(
40464068
trailing_commas,
4047-
reserved_keywords,
4069+
&is_reserved_keyword,
40484070
) {
40494071
break;
40504072
}
@@ -4118,6 +4140,13 @@ impl<'a> Parser<'a> {
41184140
self.parse_comma_separated(f)
41194141
}
41204142

4143+
/// Default implementation of a predicate that returns true if
4144+
/// the specified keyword is reserved for column alias.
4145+
/// See [Dialect::is_column_alias]
4146+
fn is_reserved_for_column_alias(kw: &Keyword, parser: &mut Parser) -> bool {
4147+
!parser.dialect.is_column_alias(kw, parser)
4148+
}
4149+
41214150
/// Run a parser method `f`, reverting back to the current position if unsuccessful.
41224151
/// Returns `None` if `f` returns an error
41234152
pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
@@ -9394,7 +9423,7 @@ impl<'a> Parser<'a> {
93949423
let cols = self.parse_comma_separated_with_trailing_commas(
93959424
Parser::parse_view_column,
93969425
self.dialect.supports_column_definition_trailing_commas(),
9397-
None,
9426+
Self::is_reserved_for_column_alias,
93989427
)?;
93999428
self.expect_token(&Token::RParen)?;
94009429
Ok(cols)

tests/sqlparser_bigquery.rs

+9
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,15 @@ fn parse_raw_literal() {
213213
);
214214
}
215215

216+
#[test]
217+
fn parse_big_query_non_reserved_column_alias() {
218+
let sql = r#"SELECT OFFSET, EXPLAIN, ANALYZE, SORT, TOP, VIEW FROM T"#;
219+
bigquery().verified_stmt(sql);
220+
221+
let sql = r#"SELECT 1 AS OFFSET, 2 AS EXPLAIN, 3 AS ANALYZE FROM T"#;
222+
bigquery().verified_stmt(sql);
223+
}
224+
216225
#[test]
217226
fn parse_delete_statement() {
218227
let sql = "DELETE \"table\" WHERE 1";

tests/sqlparser_common.rs

+29-13
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,13 @@ fn parse_insert_default_values() {
253253

254254
#[test]
255255
fn parse_insert_select_returning() {
256-
verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
257-
let stmt = verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
256+
// Dialects that support `RETURNING` as a column identifier do
257+
// not support this syntax.
258+
let dialects =
259+
all_dialects_where(|d| !d.is_column_alias(&Keyword::RETURNING, &mut Parser::new(d)));
260+
261+
dialects.verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
262+
let stmt = dialects.verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
258263
match stmt {
259264
Statement::Insert(Insert {
260265
returning: Some(ret),
@@ -6993,9 +6998,6 @@ fn parse_union_except_intersect_minus() {
69936998
verified_stmt("SELECT 1 EXCEPT SELECT 2");
69946999
verified_stmt("SELECT 1 EXCEPT ALL SELECT 2");
69957000
verified_stmt("SELECT 1 EXCEPT DISTINCT SELECT 1");
6996-
verified_stmt("SELECT 1 MINUS SELECT 2");
6997-
verified_stmt("SELECT 1 MINUS ALL SELECT 2");
6998-
verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
69997001
verified_stmt("SELECT 1 INTERSECT SELECT 2");
70007002
verified_stmt("SELECT 1 INTERSECT ALL SELECT 2");
70017003
verified_stmt("SELECT 1 INTERSECT DISTINCT SELECT 1");
@@ -7014,6 +7016,13 @@ fn parse_union_except_intersect_minus() {
70147016
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS x");
70157017
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 8 AS x");
70167018
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 AS y, 8 AS x");
7019+
7020+
// Dialects that support `MINUS` as column identifier
7021+
// do not support `MINUS` as a set operator.
7022+
let dialects = all_dialects_where(|d| !d.is_column_alias(&Keyword::MINUS, &mut Parser::new(d)));
7023+
dialects.verified_stmt("SELECT 1 MINUS SELECT 2");
7024+
dialects.verified_stmt("SELECT 1 MINUS ALL SELECT 2");
7025+
dialects.verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
70177026
}
70187027

70197028
#[test]
@@ -7690,19 +7699,26 @@ fn parse_invalid_subquery_without_parens() {
76907699

76917700
#[test]
76927701
fn parse_offset() {
7702+
// Dialects that support `OFFSET` as column identifiers
7703+
// don't support this syntax.
7704+
let dialects =
7705+
all_dialects_where(|d| !d.is_column_alias(&Keyword::OFFSET, &mut Parser::new(d)));
7706+
76937707
let expect = Some(Offset {
76947708
value: Expr::Value(number("2")),
76957709
rows: OffsetRows::Rows,
76967710
});
7697-
let ast = verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
7711+
let ast = dialects.verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
76987712
assert_eq!(ast.offset, expect);
7699-
let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS");
7713+
let ast = dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS");
77007714
assert_eq!(ast.offset, expect);
7701-
let ast = verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
7715+
let ast = dialects.verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
77027716
assert_eq!(ast.offset, expect);
7703-
let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS");
7717+
let ast =
7718+
dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS");
77047719
assert_eq!(ast.offset, expect);
7705-
let ast = verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS");
7720+
let ast =
7721+
dialects.verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS");
77067722
assert_eq!(ast.offset, expect);
77077723
match *ast.body {
77087724
SetExpr::Select(s) => match only(s.from).relation {
@@ -7713,23 +7729,23 @@ fn parse_offset() {
77137729
},
77147730
_ => panic!("Test broke"),
77157731
}
7716-
let ast = verified_query("SELECT 'foo' OFFSET 0 ROWS");
7732+
let ast = dialects.verified_query("SELECT 'foo' OFFSET 0 ROWS");
77177733
assert_eq!(
77187734
ast.offset,
77197735
Some(Offset {
77207736
value: Expr::Value(number("0")),
77217737
rows: OffsetRows::Rows,
77227738
})
77237739
);
7724-
let ast = verified_query("SELECT 'foo' OFFSET 1 ROW");
7740+
let ast = dialects.verified_query("SELECT 'foo' OFFSET 1 ROW");
77257741
assert_eq!(
77267742
ast.offset,
77277743
Some(Offset {
77287744
value: Expr::Value(number("1")),
77297745
rows: OffsetRows::Row,
77307746
})
77317747
);
7732-
let ast = verified_query("SELECT 'foo' OFFSET 1");
7748+
let ast = dialects.verified_query("SELECT 'foo' OFFSET 1");
77337749
assert_eq!(
77347750
ast.offset,
77357751
Some(Offset {

0 commit comments

Comments
 (0)