Skip to content

BigQuery: Fix column identifier reserved keywords list #1678

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/dialect/bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,28 @@
// under the License.

use crate::dialect::Dialect;
use crate::keywords::Keyword;
use crate::parser::Parser;

/// These keywords are disallowed as column identifiers. Such that
/// `SELECT 5 AS <col> FROM T` is rejected by BigQuery.
const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
Keyword::WITH,
Keyword::SELECT,
Keyword::WHERE,
Keyword::GROUP,
Keyword::HAVING,
Keyword::ORDER,
Keyword::LATERAL,
Keyword::LIMIT,
Keyword::FETCH,
Keyword::UNION,
Keyword::EXCEPT,
Keyword::INTERSECT,
Keyword::FROM,
Keyword::INTO,
Keyword::END,
];

/// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/)
#[derive(Debug, Default)]
Expand Down Expand Up @@ -87,4 +109,8 @@ impl Dialect for BigQueryDialect {
fn supports_timestamp_versioning(&self) -> bool {
true
}

fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
!RESERVED_FOR_COLUMN_ALIAS.contains(kw)
}
}
12 changes: 9 additions & 3 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ pub trait Dialect: Debug + Any {
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
}

// Returns reserved keywords when looking to parse a [TableFactor].
/// Returns reserved keywords when looking to parse a `TableFactor`.
/// See [Self::supports_from_trailing_commas]
fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] {
keywords::RESERVED_FOR_TABLE_FACTOR
Expand Down Expand Up @@ -828,11 +828,17 @@ pub trait Dialect: Debug + Any {
false
}

/// Returns true if the specified keyword should be parsed as a column identifier.
/// See [keywords::RESERVED_FOR_COLUMN_ALIAS]
fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
!keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
}

/// Returns true if the specified keyword should be parsed as a select item alias.
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
/// to enable looking ahead if needed.
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
explicit || self.is_column_alias(kw, parser)
}

/// Returns true if the specified keyword should be parsed as a table factor alias.
Expand Down
65 changes: 47 additions & 18 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3944,7 +3944,7 @@ impl<'a> Parser<'a> {
self.parse_comma_separated_with_trailing_commas(
|p| p.parse_select_item(),
trailing_commas,
None,
Self::is_reserved_for_column_alias,
)
}

Expand Down Expand Up @@ -3978,30 +3978,42 @@ impl<'a> Parser<'a> {
self.parse_comma_separated_with_trailing_commas(
Parser::parse_table_and_joins,
trailing_commas,
Some(self.dialect.get_reserved_keywords_for_table_factor()),
|kw, _parser| {
self.dialect
.get_reserved_keywords_for_table_factor()
.contains(kw)
},
)
}

/// Parse the comma of a comma-separated syntax element.
/// `R` is a predicate that should return true if the next
/// keyword is a reserved keyword.
/// Allows for control over trailing commas
///
/// Returns true if there is a next element
fn is_parse_comma_separated_end_with_trailing_commas(
fn is_parse_comma_separated_end_with_trailing_commas<R>(
&mut self,
trailing_commas: bool,
reserved_keywords: Option<&[Keyword]>,
) -> bool {
let reserved_keywords = reserved_keywords.unwrap_or(keywords::RESERVED_FOR_COLUMN_ALIAS);
is_reserved_keyword: &R,
) -> bool
where
R: Fn(&Keyword, &mut Parser) -> bool,
{
if !self.consume_token(&Token::Comma) {
true
} else if trailing_commas {
let token = self.peek_token().token;
match token {
Token::Word(ref kw) if reserved_keywords.contains(&kw.keyword) => true,
let token = self.next_token().token;
let is_end = match token {
Token::Word(ref kw) if is_reserved_keyword(&kw.keyword, self) => true,
Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => {
true
}
_ => false,
}
};
self.prev_token();

is_end
} else {
false
}
Expand All @@ -4010,34 +4022,44 @@ impl<'a> Parser<'a> {
/// Parse the comma of a comma-separated syntax element.
/// Returns true if there is a next element
fn is_parse_comma_separated_end(&mut self) -> bool {
self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas, None)
self.is_parse_comma_separated_end_with_trailing_commas(
self.options.trailing_commas,
&Self::is_reserved_for_column_alias,
)
}

/// Parse a comma-separated list of 1+ items accepted by `F`
pub fn parse_comma_separated<T, F>(&mut self, f: F) -> Result<Vec<T>, ParserError>
where
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
{
self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas, None)
self.parse_comma_separated_with_trailing_commas(
f,
self.options.trailing_commas,
Self::is_reserved_for_column_alias,
)
}

/// Parse a comma-separated list of 1+ items accepted by `F`
/// Allows for control over trailing commas
fn parse_comma_separated_with_trailing_commas<T, F>(
/// Parse a comma-separated list of 1+ items accepted by `F`.
/// `R` is a predicate that should return true if the next
/// keyword is a reserved keyword.
/// Allows for control over trailing commas.
fn parse_comma_separated_with_trailing_commas<T, F, R>(
&mut self,
mut f: F,
trailing_commas: bool,
reserved_keywords: Option<&[Keyword]>,
is_reserved_keyword: R,
) -> Result<Vec<T>, ParserError>
where
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
R: Fn(&Keyword, &mut Parser) -> bool,
{
let mut values = vec![];
loop {
values.push(f(self)?);
if self.is_parse_comma_separated_end_with_trailing_commas(
trailing_commas,
reserved_keywords,
&is_reserved_keyword,
) {
break;
}
Expand Down Expand Up @@ -4111,6 +4133,13 @@ impl<'a> Parser<'a> {
self.parse_comma_separated(f)
}

/// Default implementation of a predicate that returns true if
/// the specified keyword is reserved for column alias.
/// See [Dialect::is_column_alias]
fn is_reserved_for_column_alias(kw: &Keyword, parser: &mut Parser) -> bool {
!parser.dialect.is_column_alias(kw, parser)
}

/// Run a parser method `f`, reverting back to the current position if unsuccessful.
/// Returns `None` if `f` returns an error
pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
Expand Down Expand Up @@ -9329,7 +9358,7 @@ impl<'a> Parser<'a> {
let cols = self.parse_comma_separated_with_trailing_commas(
Parser::parse_view_column,
self.dialect.supports_column_definition_trailing_commas(),
None,
Self::is_reserved_for_column_alias,
)?;
self.expect_token(&Token::RParen)?;
Ok(cols)
Expand Down
9 changes: 9 additions & 0 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,15 @@ fn parse_raw_literal() {
);
}

#[test]
fn parse_big_query_non_reserved_column_alias() {
let sql = r#"SELECT OFFSET, EXPLAIN, ANALYZE, SORT, TOP, VIEW FROM T"#;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤯

bigquery().verified_stmt(sql);

let sql = r#"SELECT 1 AS OFFSET, 2 AS EXPLAIN, 3 AS ANALYZE FROM T"#;
bigquery().verified_stmt(sql);
}

#[test]
fn parse_delete_statement() {
let sql = "DELETE \"table\" WHERE 1";
Expand Down
42 changes: 29 additions & 13 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,13 @@ fn parse_insert_default_values() {

#[test]
fn parse_insert_select_returning() {
verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
let stmt = verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
// Dialects that support `RETURNING` as a column identifier do
// not support this syntax.
let dialects =
all_dialects_where(|d| !d.is_column_alias(&Keyword::RETURNING, &mut Parser::new(d)));

dialects.verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
let stmt = dialects.verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
match stmt {
Statement::Insert(Insert {
returning: Some(ret),
Expand Down Expand Up @@ -6902,9 +6907,6 @@ fn parse_union_except_intersect_minus() {
verified_stmt("SELECT 1 EXCEPT SELECT 2");
verified_stmt("SELECT 1 EXCEPT ALL SELECT 2");
verified_stmt("SELECT 1 EXCEPT DISTINCT SELECT 1");
verified_stmt("SELECT 1 MINUS SELECT 2");
verified_stmt("SELECT 1 MINUS ALL SELECT 2");
verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
verified_stmt("SELECT 1 INTERSECT SELECT 2");
verified_stmt("SELECT 1 INTERSECT ALL SELECT 2");
verified_stmt("SELECT 1 INTERSECT DISTINCT SELECT 1");
Expand All @@ -6923,6 +6925,13 @@ fn parse_union_except_intersect_minus() {
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS x");
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 8 AS x");
verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 AS y, 8 AS x");

// Dialects that support `MINUS` as column identifier
// do not support `MINUS` as a set operator.
let dialects = all_dialects_where(|d| !d.is_column_alias(&Keyword::MINUS, &mut Parser::new(d)));
dialects.verified_stmt("SELECT 1 MINUS SELECT 2");
dialects.verified_stmt("SELECT 1 MINUS ALL SELECT 2");
dialects.verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
}

#[test]
Expand Down Expand Up @@ -7599,19 +7608,26 @@ fn parse_invalid_subquery_without_parens() {

#[test]
fn parse_offset() {
// Dialects that support `OFFSET` as column identifiers
// don't support this syntax.
let dialects =
all_dialects_where(|d| !d.is_column_alias(&Keyword::OFFSET, &mut Parser::new(d)));

let expect = Some(Offset {
value: Expr::Value(number("2")),
rows: OffsetRows::Rows,
});
let ast = verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
let ast = dialects.verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
assert_eq!(ast.offset, expect);
let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS");
let ast = dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 ROWS");
assert_eq!(ast.offset, expect);
let ast = verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
let ast = dialects.verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
assert_eq!(ast.offset, expect);
let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS");
let ast =
dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz OFFSET 2 ROWS");
assert_eq!(ast.offset, expect);
let ast = verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS");
let ast =
dialects.verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 ROWS) OFFSET 2 ROWS");
assert_eq!(ast.offset, expect);
match *ast.body {
SetExpr::Select(s) => match only(s.from).relation {
Expand All @@ -7622,23 +7638,23 @@ fn parse_offset() {
},
_ => panic!("Test broke"),
}
let ast = verified_query("SELECT 'foo' OFFSET 0 ROWS");
let ast = dialects.verified_query("SELECT 'foo' OFFSET 0 ROWS");
assert_eq!(
ast.offset,
Some(Offset {
value: Expr::Value(number("0")),
rows: OffsetRows::Rows,
})
);
let ast = verified_query("SELECT 'foo' OFFSET 1 ROW");
let ast = dialects.verified_query("SELECT 'foo' OFFSET 1 ROW");
assert_eq!(
ast.offset,
Some(Offset {
value: Expr::Value(number("1")),
rows: OffsetRows::Row,
})
);
let ast = verified_query("SELECT 'foo' OFFSET 1");
let ast = dialects.verified_query("SELECT 'foo' OFFSET 1");
assert_eq!(
ast.offset,
Some(Offset {
Expand Down
Loading