Skip to content

Commit 464b770

Browse files
committed
Use dialects in the parser for support snowflake uninque parenthesis syntax
Snowflake DB allow single table to be within parenthesis. This behaviour is diffrent than other DB , and it has some impact on the parsing table factor. For supporting we do the following : 1. Add refrence to the dialect in the parser 2. Add Snowflake dialect 3. add function to the dialect trait the identify if single table inside parenthesis allowed 4. When parsing table factor in the allow/deny single table inside parenthesis according to dialect
1 parent 9e7e302 commit 464b770

File tree

6 files changed

+352
-48
lines changed

6 files changed

+352
-48
lines changed

src/dialect/mod.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@ pub mod keywords;
1616
mod mssql;
1717
mod mysql;
1818
mod postgresql;
19-
19+
mod snowflake;
2020
use std::fmt::Debug;
2121

2222
pub use self::ansi::AnsiDialect;
2323
pub use self::generic::GenericDialect;
2424
pub use self::mssql::MsSqlDialect;
2525
pub use self::mysql::MySqlDialect;
2626
pub use self::postgresql::PostgreSqlDialect;
27+
pub use self::snowflake::SnowflakeDialect;
2728

2829
pub trait Dialect: Debug {
2930
/// Determine if a character starts a quoted identifier. The default
@@ -38,4 +39,8 @@ pub trait Dialect: Debug {
3839
fn is_identifier_start(&self, ch: char) -> bool;
3940
/// Determine if a character is a valid unquoted identifier character
4041
fn is_identifier_part(&self, ch: char) -> bool;
42+
43+
fn alllow_single_table_in_parenthesis(&self) -> bool {
44+
false
45+
}
4146
}

src/dialect/snowflake.rs

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
use crate::dialect::Dialect;
2+
3+
#[derive(Debug, Default)]
4+
pub struct SnowflakeDialect;
5+
6+
impl Dialect for SnowflakeDialect {
7+
//Revisit: currently copied from Genric dialect
8+
fn is_identifier_start(&self, ch: char) -> bool {
9+
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '#' || ch == '@'
10+
}
11+
12+
//Revisit: currently copied from Genric dialect
13+
fn is_identifier_part(&self, ch: char) -> bool {
14+
(ch >= 'a' && ch <= 'z')
15+
|| (ch >= 'A' && ch <= 'Z')
16+
|| (ch >= '0' && ch <= '9')
17+
|| ch == '@'
18+
|| ch == '$'
19+
|| ch == '#'
20+
|| ch == '_'
21+
}
22+
23+
fn alllow_single_table_in_parenthesis(&self) -> bool {
24+
true
25+
}
26+
}

src/parser.rs

+129-9
Original file line numberDiff line numberDiff line change
@@ -83,23 +83,28 @@ impl fmt::Display for ParserError {
8383
impl Error for ParserError {}
8484

8585
/// SQL Parser
86-
pub struct Parser {
86+
pub struct Parser<'a> {
8787
tokens: Vec<Token>,
8888
/// The index of the first unprocessed token in `self.tokens`
8989
index: usize,
90+
dialect: &'a dyn Dialect,
9091
}
9192

92-
impl Parser {
93+
impl<'a> Parser<'a> {
9394
/// Parse the specified tokens
94-
pub fn new(tokens: Vec<Token>) -> Self {
95-
Parser { tokens, index: 0 }
95+
pub fn new(tokens: Vec<Token>, dialect: &'a dyn Dialect) -> Self {
96+
Parser {
97+
tokens,
98+
index: 0,
99+
dialect,
100+
}
96101
}
97102

98103
/// Parse a SQL statement and produce an Abstract Syntax Tree (AST)
99104
pub fn parse_sql(dialect: &dyn Dialect, sql: &str) -> Result<Vec<Statement>, ParserError> {
100105
let mut tokenizer = Tokenizer::new(dialect, &sql);
101106
let tokens = tokenizer.tokenize()?;
102-
let mut parser = Parser::new(tokens);
107+
let mut parser = Parser::new(tokens, dialect);
103108
let mut stmts = Vec::new();
104109
let mut expecting_statement_delimiter = false;
105110
debug!("Parsing sql '{}'...", sql);
@@ -950,7 +955,7 @@ impl Parser {
950955
/// Parse a comma-separated list of 1+ items accepted by `F`
951956
pub fn parse_comma_separated<T, F>(&mut self, mut f: F) -> Result<Vec<T>, ParserError>
952957
where
953-
F: FnMut(&mut Parser) -> Result<T, ParserError>,
958+
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
954959
{
955960
let mut values = vec![];
956961
loop {
@@ -2054,9 +2059,113 @@ impl Parser {
20542059
};
20552060
joins.push(join);
20562061
}
2062+
20572063
Ok(TableWithJoins { relation, joins })
20582064
}
20592065

2066+
fn add_alias_to_single_table_in_parenthesis(
2067+
&self,
2068+
table_and_joins: TableWithJoins,
2069+
consumed_alias: TableAlias,
2070+
) -> Result<TableWithJoins, ParserError> {
2071+
// This function deal with alias after single table in parenthesis
2072+
// aliases not allow in joining between multiple tables (At least in snowflake DB)
2073+
if !table_and_joins.joins.is_empty() {
2074+
return Err(ParserError::ParserError(
2075+
"alias not allowed on multiple table join".to_owned(),
2076+
));
2077+
}
2078+
2079+
match table_and_joins.relation {
2080+
// If the realation is Nested join - we will seep the alias
2081+
// into the nested table, it's resonable as it's based
2082+
// on the assumation that aliasing not allowed on join between
2083+
// 2 diffrent tables - so the alias accutaly belong to the inner table
2084+
TableFactor::NestedJoin(table_and_joins_box) => Ok(TableWithJoins {
2085+
relation: TableFactor::NestedJoin(Box::new(
2086+
self.add_alias_to_single_table_in_parenthesis(
2087+
*table_and_joins_box,
2088+
consumed_alias,
2089+
)?,
2090+
)),
2091+
joins: Vec::new(),
2092+
}),
2093+
// Add the alias to dervied table
2094+
TableFactor::Derived {
2095+
lateral,
2096+
subquery,
2097+
alias,
2098+
} => match alias {
2099+
None => Ok(TableWithJoins {
2100+
relation: TableFactor::Derived {
2101+
lateral,
2102+
subquery,
2103+
alias: Some(consumed_alias),
2104+
},
2105+
joins: Vec::new(),
2106+
}),
2107+
// "Select * from (table1 as alias1) as alias1" - it prohabited
2108+
Some(alias) => Err(ParserError::ParserError(format!(
2109+
"duplicate alias {}",
2110+
alias
2111+
))),
2112+
},
2113+
// Add The alias to the table factor
2114+
TableFactor::Table {
2115+
name,
2116+
alias,
2117+
args,
2118+
with_hints,
2119+
} => match alias {
2120+
None => Ok(TableWithJoins {
2121+
relation: TableFactor::Table {
2122+
name,
2123+
alias: Some(consumed_alias),
2124+
args,
2125+
with_hints,
2126+
},
2127+
joins: Vec::new(),
2128+
}),
2129+
// "Select * from (table1 as alias1) as alias1" - it prohabited
2130+
Some(alias) => Err(ParserError::ParserError(format!(
2131+
"duplicate alias {}",
2132+
alias
2133+
))),
2134+
},
2135+
}
2136+
}
2137+
2138+
fn check_for_alias_after_parenthesis(
2139+
&mut self,
2140+
table_and_joins: TableWithJoins,
2141+
) -> Result<TableWithJoins, ParserError> {
2142+
// Try to parse alias if there is no alias - just return the TableWithJoins as is .
2143+
let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? {
2144+
None => {
2145+
return Ok(table_and_joins);
2146+
}
2147+
Some(alias) => alias,
2148+
};
2149+
2150+
// if we have alias, we attached it to the single table that inside parenthesis
2151+
self.add_alias_to_single_table_in_parenthesis(table_and_joins, alias)
2152+
}
2153+
2154+
fn validate_nested_join(&self, table_and_joins: &TableWithJoins) -> Result<(), ParserError> {
2155+
match table_and_joins.relation {
2156+
TableFactor::NestedJoin { .. } => (),
2157+
_ => {
2158+
if table_and_joins.joins.is_empty() {
2159+
// validate thats indeed join and not dervied
2160+
// or nested table
2161+
self.expected("joined table", self.peek_token())?
2162+
}
2163+
}
2164+
}
2165+
2166+
Ok(())
2167+
}
2168+
20602169
/// A table name or a parenthesized subquery, followed by optional `[AS] alias`
20612170
pub fn parse_table_factor(&mut self) -> Result<TableFactor, ParserError> {
20622171
if self.parse_keyword(Keyword::LATERAL) {
@@ -2100,10 +2209,21 @@ impl Parser {
21002209
// followed by some joins or another level of nesting.
21012210
let table_and_joins = self.parse_table_and_joins()?;
21022211
self.expect_token(&Token::RParen)?;
2212+
21032213
// The SQL spec prohibits derived and bare tables from appearing
2104-
// alone in parentheses. We don't enforce this as some databases
2105-
// (e.g. Snowflake) allow such syntax.
2106-
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
2214+
// alone in parentheses. But as some databases
2215+
// (e.g. Snowflake) allow such syntax - it's can be allowed
2216+
// for specfic dialect.
2217+
if self.dialect.alllow_single_table_in_parenthesis() {
2218+
// In case of single dervied or bare table in parenthesis,
2219+
// the alias could appears also after the parenthesis
2220+
let table_and_joins = self.check_for_alias_after_parenthesis(table_and_joins)?;
2221+
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
2222+
} else {
2223+
// Defualt behaviuor
2224+
self.validate_nested_join(&table_and_joins)?;
2225+
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
2226+
}
21072227
} else {
21082228
let name = self.parse_object_name()?;
21092229
// Postgres, MSSQL: table-valued functions:

src/test_utils.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ impl TestedDialects {
5353
self.one_of_identical_results(|dialect| {
5454
let mut tokenizer = Tokenizer::new(dialect, sql);
5555
let tokens = tokenizer.tokenize().unwrap();
56-
f(&mut Parser::new(tokens))
56+
f(&mut Parser::new(tokens, dialect))
5757
})
5858
}
5959

@@ -104,7 +104,9 @@ impl TestedDialects {
104104
/// Ensures that `sql` parses as an expression, and is not modified
105105
/// after a serialization round-trip.
106106
pub fn verified_expr(&self, sql: &str) -> Expr {
107-
let ast = self.run_parser_method(sql, Parser::parse_expr).unwrap();
107+
let ast = self
108+
.run_parser_method(sql, |parser| parser.parse_expr())
109+
.unwrap();
108110
assert_eq!(sql, &ast.to_string(), "round-tripping without changes");
109111
ast
110112
}

tests/sqlparser_common.rs

+10-36
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use matches::assert_matches;
2222

2323
use sqlparser::ast::*;
2424
use sqlparser::dialect::keywords::ALL_KEYWORDS;
25-
use sqlparser::parser::{Parser, ParserError};
25+
use sqlparser::parser::ParserError;
2626
use sqlparser::test_utils::{all_dialects, expr_from_projection, number, only};
2727

2828
#[test]
@@ -147,13 +147,14 @@ fn parse_update() {
147147

148148
#[test]
149149
fn parse_invalid_table_name() {
150-
let ast = all_dialects().run_parser_method("db.public..customer", Parser::parse_object_name);
150+
let ast = all_dialects()
151+
.run_parser_method("db.public..customer", |parser| parser.parse_object_name());
151152
assert!(ast.is_err());
152153
}
153154

154155
#[test]
155156
fn parse_no_table_name() {
156-
let ast = all_dialects().run_parser_method("", Parser::parse_object_name);
157+
let ast = all_dialects().run_parser_method("", |parser| parser.parse_object_name());
157158
assert!(ast.is_err());
158159
}
159160

@@ -2273,19 +2274,12 @@ fn parse_join_nesting() {
22732274
vec![join(nest!(nest!(nest!(table("b"), table("c")))))]
22742275
);
22752276

2276-
// Parenthesized table names are non-standard, but supported in Snowflake SQL
2277-
let sql = "SELECT * FROM (a NATURAL JOIN (b))";
2278-
let select = verified_only_select(sql);
2279-
let from = only(select.from);
2280-
2281-
assert_eq!(from.relation, nest!(table("a"), nest!(table("b"))));
2282-
2283-
// Double parentheses around table names are non-standard, but supported in Snowflake SQL
2284-
let sql = "SELECT * FROM (a NATURAL JOIN ((b)))";
2285-
let select = verified_only_select(sql);
2286-
let from = only(select.from);
2287-
2288-
assert_eq!(from.relation, nest!(table("a"), nest!(nest!(table("b")))));
2277+
// Nesting a subquery in parentheses is non-standard, but supported in Snowflake SQL
2278+
let res = parse_sql_statements("SELECT * FROM ((SELECT 1) AS t)");
2279+
assert_eq!(
2280+
ParserError::ParserError("Expected joined table, found: EOF".to_string()),
2281+
res.unwrap_err()
2282+
);
22892283
}
22902284

22912285
#[test]
@@ -2427,26 +2421,6 @@ fn parse_derived_tables() {
24272421
}],
24282422
}))
24292423
);
2430-
2431-
// Nesting a subquery in parentheses is non-standard, but supported in Snowflake SQL
2432-
let sql = "SELECT * FROM ((SELECT 1) AS t)";
2433-
let select = verified_only_select(sql);
2434-
let from = only(select.from);
2435-
2436-
assert_eq!(
2437-
from.relation,
2438-
TableFactor::NestedJoin(Box::new(TableWithJoins {
2439-
relation: TableFactor::Derived {
2440-
lateral: false,
2441-
subquery: Box::new(verified_query("SELECT 1")),
2442-
alias: Some(TableAlias {
2443-
name: "t".into(),
2444-
columns: vec![],
2445-
})
2446-
},
2447-
joins: Vec::new(),
2448-
}))
2449-
);
24502424
}
24512425

24522426
#[test]

0 commit comments

Comments
 (0)