Skip to content

Commit b4b5576

Browse files
authored
Add support for Snowflake column aliases that use SQL keywords (apache#1632)
1 parent 4741500 commit b4b5576

File tree

4 files changed

+163
-56
lines changed

4 files changed

+163
-56
lines changed

src/dialect/mod.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,20 @@ pub trait Dialect: Debug + Any {
820820
fn supports_set_stmt_without_operator(&self) -> bool {
821821
false
822822
}
823+
824+
/// Returns true if the specified keyword should be parsed as a select item alias.
825+
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
826+
/// to enable looking ahead if needed.
827+
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
828+
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
829+
}
830+
831+
/// Returns true if the specified keyword should be parsed as a table factor alias.
832+
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
833+
/// to enable looking ahead if needed.
834+
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
835+
explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw)
836+
}
823837
}
824838

825839
/// This represents the operators for which precedence must be defined

src/dialect/snowflake.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,51 @@ impl Dialect for SnowflakeDialect {
251251
fn supports_partiql(&self) -> bool {
252252
true
253253
}
254+
255+
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
256+
explicit
257+
|| match kw {
258+
// The following keywords can be considered an alias as long as
259+
// they are not followed by other tokens that may change their meaning
260+
// e.g. `SELECT * EXCEPT (col1) FROM tbl`
261+
Keyword::EXCEPT
262+
// e.g. `SELECT 1 LIMIT 5`
263+
| Keyword::LIMIT
264+
// e.g. `SELECT 1 OFFSET 5 ROWS`
265+
| Keyword::OFFSET
266+
// e.g. `INSERT INTO t SELECT 1 RETURNING *`
267+
| Keyword::RETURNING if !matches!(parser.peek_token_ref().token, Token::Comma | Token::EOF) =>
268+
{
269+
false
270+
}
271+
272+
// `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT`
273+
// which would give it a different meanins, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias
274+
Keyword::FETCH
275+
if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) =>
276+
{
277+
false
278+
}
279+
280+
// Reserved keywords by the Snowflake dialect, which seem to be less strictive
281+
// than what is listed in `keywords::RESERVED_FOR_COLUMN_ALIAS`. The following
282+
// keywords were tested with the this statement: `SELECT 1 <KW>`.
283+
Keyword::FROM
284+
| Keyword::GROUP
285+
| Keyword::HAVING
286+
| Keyword::INTERSECT
287+
| Keyword::INTO
288+
| Keyword::MINUS
289+
| Keyword::ORDER
290+
| Keyword::SELECT
291+
| Keyword::UNION
292+
| Keyword::WHERE
293+
| Keyword::WITH => false,
294+
295+
// Any other word is considered an alias
296+
_ => true,
297+
}
298+
}
254299
}
255300

256301
fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {

src/parser/mod.rs

Lines changed: 75 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8838,38 +8838,76 @@ impl<'a> Parser<'a> {
88388838
Ok(IdentWithAlias { ident, alias })
88398839
}
88408840

8841-
/// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword)
8842-
/// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`,
8843-
/// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar`
8841+
/// Optionally parses an alias for a select list item
8842+
fn maybe_parse_select_item_alias(&mut self) -> Result<Option<Ident>, ParserError> {
8843+
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
8844+
parser.dialect.is_select_item_alias(explicit, kw, parser)
8845+
}
8846+
self.parse_optional_alias_inner(None, validator)
8847+
}
8848+
8849+
/// Optionally parses an alias for a table like in `... FROM generate_series(1, 10) AS t (col)`.
8850+
/// In this case, the alias is allowed to optionally name the columns in the table, in
8851+
/// addition to the table itself.
8852+
pub fn maybe_parse_table_alias(&mut self) -> Result<Option<TableAlias>, ParserError> {
8853+
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
8854+
parser.dialect.is_table_factor_alias(explicit, kw, parser)
8855+
}
8856+
match self.parse_optional_alias_inner(None, validator)? {
8857+
Some(name) => {
8858+
let columns = self.parse_table_alias_column_defs()?;
8859+
Ok(Some(TableAlias { name, columns }))
8860+
}
8861+
None => Ok(None),
8862+
}
8863+
}
8864+
8865+
/// Wrapper for parse_optional_alias_inner, left for backwards-compatibility
8866+
/// but new flows should use the context-specific methods such as `maybe_parse_select_item_alias`
8867+
/// and `maybe_parse_table_alias`.
88448868
pub fn parse_optional_alias(
88458869
&mut self,
88468870
reserved_kwds: &[Keyword],
88478871
) -> Result<Option<Ident>, ParserError> {
8872+
fn validator(_explicit: bool, _kw: &Keyword, _parser: &mut Parser) -> bool {
8873+
false
8874+
}
8875+
self.parse_optional_alias_inner(Some(reserved_kwds), validator)
8876+
}
8877+
8878+
/// Parses an optional alias after a SQL element such as a select list item
8879+
/// or a table name.
8880+
///
8881+
/// This method accepts an optional list of reserved keywords or a function
8882+
/// to call to validate if a keyword should be parsed as an alias, to allow
8883+
/// callers to customize the parsing logic based on their context.
8884+
fn parse_optional_alias_inner<F>(
8885+
&mut self,
8886+
reserved_kwds: Option<&[Keyword]>,
8887+
validator: F,
8888+
) -> Result<Option<Ident>, ParserError>
8889+
where
8890+
F: Fn(bool, &Keyword, &mut Parser) -> bool,
8891+
{
88488892
let after_as = self.parse_keyword(Keyword::AS);
8893+
88498894
let next_token = self.next_token();
88508895
match next_token.token {
8851-
// Accept any identifier after `AS` (though many dialects have restrictions on
8852-
// keywords that may appear here). If there's no `AS`: don't parse keywords,
8853-
// which may start a construct allowed in this position, to be parsed as aliases.
8854-
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
8855-
// not an alias.)
8856-
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
8896+
// By default, if a word is located after the `AS` keyword we consider it an alias
8897+
// as long as it's not reserved.
8898+
Token::Word(w)
8899+
if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) =>
8900+
{
88578901
Ok(Some(w.into_ident(next_token.span)))
88588902
}
8859-
// MSSQL supports single-quoted strings as aliases for columns
8860-
// We accept them as table aliases too, although MSSQL does not.
8861-
//
8862-
// Note, that this conflicts with an obscure rule from the SQL
8863-
// standard, which we don't implement:
8864-
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
8865-
// "[Obscure Rule] SQL allows you to break a long <character
8866-
// string literal> up into two or more smaller <character string
8867-
// literal>s, split by a <separator> that includes a newline
8868-
// character. When it sees such a <literal>, your DBMS will
8869-
// ignore the <separator> and treat the multiple strings as
8870-
// a single <literal>."
8903+
// This pattern allows for customizing the acceptance of words as aliases based on the caller's
8904+
// context, such as to what SQL element this word is a potential alias of (select item alias, table name
8905+
// alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords.
8906+
Token::Word(w) if validator(after_as, &w.keyword, self) => {
8907+
Ok(Some(w.into_ident(next_token.span)))
8908+
}
8909+
// For backwards-compatibility, we accept quoted strings as aliases regardless of the context.
88718910
Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))),
8872-
// Support for MySql dialect double-quoted string, `AS "HOUR"` for example
88738911
Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))),
88748912
_ => {
88758913
if after_as {
@@ -8881,23 +8919,6 @@ impl<'a> Parser<'a> {
88818919
}
88828920
}
88838921

8884-
/// Parse `AS identifier` when the AS is describing a table-valued object,
8885-
/// like in `... FROM generate_series(1, 10) AS t (col)`. In this case
8886-
/// the alias is allowed to optionally name the columns in the table, in
8887-
/// addition to the table itself.
8888-
pub fn parse_optional_table_alias(
8889-
&mut self,
8890-
reserved_kwds: &[Keyword],
8891-
) -> Result<Option<TableAlias>, ParserError> {
8892-
match self.parse_optional_alias(reserved_kwds)? {
8893-
Some(name) => {
8894-
let columns = self.parse_table_alias_column_defs()?;
8895-
Ok(Some(TableAlias { name, columns }))
8896-
}
8897-
None => Ok(None),
8898-
}
8899-
}
8900-
89018922
pub fn parse_optional_group_by(&mut self) -> Result<Option<GroupByExpr>, ParserError> {
89028923
if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) {
89038924
let expressions = if self.parse_keyword(Keyword::ALL) {
@@ -10899,7 +10920,7 @@ impl<'a> Parser<'a> {
1089910920
let name = self.parse_object_name(false)?;
1090010921
self.expect_token(&Token::LParen)?;
1090110922
let args = self.parse_optional_args()?;
10902-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
10923+
let alias = self.maybe_parse_table_alias()?;
1090310924
Ok(TableFactor::Function {
1090410925
lateral: true,
1090510926
name,
@@ -10912,7 +10933,7 @@ impl<'a> Parser<'a> {
1091210933
self.expect_token(&Token::LParen)?;
1091310934
let expr = self.parse_expr()?;
1091410935
self.expect_token(&Token::RParen)?;
10915-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
10936+
let alias = self.maybe_parse_table_alias()?;
1091610937
Ok(TableFactor::TableFunction { expr, alias })
1091710938
} else if self.consume_token(&Token::LParen) {
1091810939
// A left paren introduces either a derived table (i.e., a subquery)
@@ -10961,7 +10982,7 @@ impl<'a> Parser<'a> {
1096110982
#[allow(clippy::if_same_then_else)]
1096210983
if !table_and_joins.joins.is_empty() {
1096310984
self.expect_token(&Token::RParen)?;
10964-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
10985+
let alias = self.maybe_parse_table_alias()?;
1096510986
Ok(TableFactor::NestedJoin {
1096610987
table_with_joins: Box::new(table_and_joins),
1096710988
alias,
@@ -10974,7 +10995,7 @@ impl<'a> Parser<'a> {
1097410995
// (B): `table_and_joins` (what we found inside the parentheses)
1097510996
// is a nested join `(foo JOIN bar)`, not followed by other joins.
1097610997
self.expect_token(&Token::RParen)?;
10977-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
10998+
let alias = self.maybe_parse_table_alias()?;
1097810999
Ok(TableFactor::NestedJoin {
1097911000
table_with_joins: Box::new(table_and_joins),
1098011001
alias,
@@ -10988,9 +11009,7 @@ impl<'a> Parser<'a> {
1098811009
// [AS alias])`) as well.
1098911010
self.expect_token(&Token::RParen)?;
1099011011

10991-
if let Some(outer_alias) =
10992-
self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?
10993-
{
11012+
if let Some(outer_alias) = self.maybe_parse_table_alias()? {
1099411013
// Snowflake also allows specifying an alias *after* parens
1099511014
// e.g. `FROM (mytable) AS alias`
1099611015
match &mut table_and_joins.relation {
@@ -11043,7 +11062,7 @@ impl<'a> Parser<'a> {
1104311062
// SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2)
1104411063
// where there are no parentheses around the VALUES clause.
1104511064
let values = SetExpr::Values(self.parse_values(false)?);
11046-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11065+
let alias = self.maybe_parse_table_alias()?;
1104711066
Ok(TableFactor::Derived {
1104811067
lateral: false,
1104911068
subquery: Box::new(Query {
@@ -11069,7 +11088,7 @@ impl<'a> Parser<'a> {
1106911088
self.expect_token(&Token::RParen)?;
1107011089

1107111090
let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]);
11072-
let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) {
11091+
let alias = match self.maybe_parse_table_alias() {
1107311092
Ok(Some(alias)) => Some(alias),
1107411093
Ok(None) => None,
1107511094
Err(e) => return Err(e),
@@ -11106,7 +11125,7 @@ impl<'a> Parser<'a> {
1110611125
let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?;
1110711126
self.expect_token(&Token::RParen)?;
1110811127
self.expect_token(&Token::RParen)?;
11109-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11128+
let alias = self.maybe_parse_table_alias()?;
1111011129
Ok(TableFactor::JsonTable {
1111111130
json_expr,
1111211131
json_path,
@@ -11151,7 +11170,7 @@ impl<'a> Parser<'a> {
1115111170
}
1115211171
}
1115311172

11154-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11173+
let alias = self.maybe_parse_table_alias()?;
1115511174

1115611175
// MSSQL-specific table hints:
1115711176
let mut with_hints = vec![];
@@ -11329,7 +11348,7 @@ impl<'a> Parser<'a> {
1132911348
} else {
1133011349
Vec::new()
1133111350
};
11332-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11351+
let alias = self.maybe_parse_table_alias()?;
1133311352
Ok(TableFactor::OpenJsonTable {
1133411353
json_expr,
1133511354
json_path,
@@ -11428,7 +11447,7 @@ impl<'a> Parser<'a> {
1142811447

1142911448
self.expect_token(&Token::RParen)?;
1143011449

11431-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11450+
let alias = self.maybe_parse_table_alias()?;
1143211451

1143311452
Ok(TableFactor::MatchRecognize {
1143411453
table: Box::new(table),
@@ -11672,7 +11691,7 @@ impl<'a> Parser<'a> {
1167211691
) -> Result<TableFactor, ParserError> {
1167311692
let subquery = self.parse_query()?;
1167411693
self.expect_token(&Token::RParen)?;
11675-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11694+
let alias = self.maybe_parse_table_alias()?;
1167611695
Ok(TableFactor::Derived {
1167711696
lateral: match lateral {
1167811697
Lateral => true,
@@ -11766,7 +11785,7 @@ impl<'a> Parser<'a> {
1176611785
};
1176711786

1176811787
self.expect_token(&Token::RParen)?;
11769-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11788+
let alias = self.maybe_parse_table_alias()?;
1177011789
Ok(TableFactor::Pivot {
1177111790
table: Box::new(table),
1177211791
aggregate_functions,
@@ -11788,7 +11807,7 @@ impl<'a> Parser<'a> {
1178811807
self.expect_keyword_is(Keyword::IN)?;
1178911808
let columns = self.parse_parenthesized_column_list(Mandatory, false)?;
1179011809
self.expect_token(&Token::RParen)?;
11791-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
11810+
let alias = self.maybe_parse_table_alias()?;
1179211811
Ok(TableFactor::Unpivot {
1179311812
table: Box::new(table),
1179411813
value,
@@ -12614,7 +12633,7 @@ impl<'a> Parser<'a> {
1261412633
})
1261512634
}
1261612635
expr => self
12617-
.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)
12636+
.maybe_parse_select_item_alias()
1261812637
.map(|alias| match alias {
1261912638
Some(alias) => SelectItem::ExprWithAlias { expr, alias },
1262012639
None => SelectItem::UnnamedExpr(expr),

tests/sqlparser_snowflake.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3022,3 +3022,32 @@ fn parse_ls_and_rm() {
30223022

30233023
snowflake().verified_stmt(r#"LIST @"STAGE_WITH_QUOTES""#);
30243024
}
3025+
3026+
#[test]
3027+
fn test_sql_keywords_as_select_item_aliases() {
3028+
// Some keywords that should be parsed as an alias
3029+
let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT"];
3030+
for kw in unreserved_kws {
3031+
snowflake()
3032+
.one_statement_parses_to(&format!("SELECT 1 {kw}"), &format!("SELECT 1 AS {kw}"));
3033+
}
3034+
3035+
// Some keywords that should not be parsed as an alias
3036+
let reserved_kws = vec![
3037+
"FROM",
3038+
"GROUP",
3039+
"HAVING",
3040+
"INTERSECT",
3041+
"INTO",
3042+
"ORDER",
3043+
"SELECT",
3044+
"UNION",
3045+
"WHERE",
3046+
"WITH",
3047+
];
3048+
for kw in reserved_kws {
3049+
assert!(snowflake()
3050+
.parse_sql_statements(&format!("SELECT 1 {kw}"))
3051+
.is_err());
3052+
}
3053+
}

0 commit comments

Comments
 (0)