Skip to content

Commit 4d6ff63

Browse files
authored
snowflake: support for CONNECT BY and data as a json access key (apache#18)
A customer needs these fixes now, so need to merge these directly into our fork. When the upstream PRs land we'll revert this and incorporate that version in the next release merge.
2 parents 3f051d1 + f5825d5 commit 4d6ff63

File tree

6 files changed

+219
-6
lines changed

6 files changed

+219
-6
lines changed

src/ast/mod.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ pub use self::ddl::{
3939
};
4040
pub use self::operator::{BinaryOperator, UnaryOperator};
4141
pub use self::query::{
42-
Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause,
43-
ForJson, ForXml, GroupByExpr, IdentWithAlias, Join, JoinConstraint, JoinOperator,
42+
ConnectBy, Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch,
43+
ForClause, ForJson, ForXml, GroupByExpr, IdentWithAlias, Join, JoinConstraint, JoinOperator,
4444
JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType,
4545
NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem,
4646
ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator,
@@ -735,6 +735,8 @@ pub enum Expr {
735735
///
736736
/// See <https://docs.snowflake.com/en/sql-reference/constructs/where#joins-in-the-where-clause>.
737737
OuterJoin(Box<Expr>),
738+
/// A reference to the prior level in a CONNECT BY clause.
739+
Prior(Box<Expr>),
738740
}
739741

740742
impl fmt::Display for CastFormat {
@@ -1210,6 +1212,7 @@ impl fmt::Display for Expr {
12101212
Expr::OuterJoin(expr) => {
12111213
write!(f, "{expr} (+)")
12121214
}
1215+
Expr::Prior(expr) => write!(f, "PRIOR {expr}"),
12131216
}
12141217
}
12151218
}

src/ast/query.rs

+33
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ impl fmt::Display for Query {
9292
pub enum SetExpr {
9393
/// Restricted SELECT .. FROM .. HAVING (no ORDER BY or set operations)
9494
Select(Box<Select>),
95+
/// SELECT .. FROM .. STARTING WITH .. CONNECT BY
96+
ConnectBy(ConnectBy),
9597
/// Parenthesized SELECT subquery, which may include more set operations
9698
/// in its body and an optional ORDER BY / LIMIT.
9799
Query(Box<Query>),
@@ -112,6 +114,7 @@ impl fmt::Display for SetExpr {
112114
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113115
match self {
114116
SetExpr::Select(s) => write!(f, "{s}"),
117+
SetExpr::ConnectBy(c) => write!(f, "{c}"),
115118
SetExpr::Query(q) => write!(f, "({q})"),
116119
SetExpr::Values(v) => write!(f, "{v}"),
117120
SetExpr::Insert(v) => write!(f, "{v}"),
@@ -702,6 +705,36 @@ impl fmt::Display for TableWithJoins {
702705
}
703706
}
704707

708+
/// Joins a table to itself to process hierarchical data in the table.
709+
///
710+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/connect-by>.
711+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
712+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
713+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
714+
pub struct ConnectBy {
715+
/// SELECT
716+
pub projection: Vec<SelectItem>,
717+
/// FROM
718+
pub from: Vec<TableWithJoins>,
719+
/// START WITH
720+
pub condition: Expr,
721+
/// CONNECT BY
722+
pub relationships: Vec<Expr>,
723+
}
724+
725+
impl fmt::Display for ConnectBy {
726+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
727+
write!(
728+
f,
729+
"SELECT {projection} FROM {from} START WITH {condition} CONNECT BY {relationships}",
730+
projection = display_comma_separated(&self.projection),
731+
from = display_comma_separated(&self.from),
732+
condition = self.condition,
733+
relationships = display_comma_separated(&self.relationships)
734+
)
735+
}
736+
}
737+
705738
/// A table name or a parenthesized subquery with an optional alias
706739
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
707740
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]

src/keywords.rs

+3
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,9 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
797797
Keyword::FOR,
798798
// for MYSQL PARTITION SELECTION
799799
Keyword::PARTITION,
800+
// for Snowflake START WITH .. CONNECT BY
801+
Keyword::START,
802+
Keyword::CONNECT,
800803
];
801804

802805
/// Can't be used as a column alias, so that `SELECT <expr> alias`

src/parser/mod.rs

+77-3
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,22 @@ impl ParserOptions {
256256
}
257257
}
258258

259+
#[derive(Copy, Clone)]
260+
enum ParserState {
261+
/// The default state of the parser.
262+
Normal,
263+
/// The state when parsing a CONNECT BY expression. This allows parsing
264+
/// PRIOR expressions while still allowing prior as an identifier name
265+
/// in other contexts.
266+
ConnectBy,
267+
}
268+
259269
pub struct Parser<'a> {
260270
tokens: Vec<TokenWithLocation>,
261271
/// The index of the first unprocessed token in `self.tokens`
262272
index: usize,
273+
/// The current state of the parser.
274+
state: ParserState,
263275
/// The current dialect to use
264276
dialect: &'a dyn Dialect,
265277
/// Additional options that allow you to mix & match behavior
@@ -290,6 +302,7 @@ impl<'a> Parser<'a> {
290302
Self {
291303
tokens: vec![],
292304
index: 0,
305+
state: ParserState::Normal,
293306
dialect,
294307
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
295308
options: ParserOptions::default(),
@@ -966,6 +979,10 @@ impl<'a> Parser<'a> {
966979
self.prev_token();
967980
self.parse_bigquery_struct_literal()
968981
}
982+
Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => {
983+
let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?;
984+
Ok(Expr::Prior(Box::new(expr)))
985+
}
969986
// Here `w` is a word, check if it's a part of a multi-part
970987
// identifier, a function call, or a simple identifier:
971988
_ => match self.peek_token().token {
@@ -5894,12 +5911,15 @@ impl<'a> Parser<'a> {
58945911
)?,
58955912
},
58965913
// Case when Snowflake Semi-structured data like key:value
5914+
// FIXME: Find a better way to deal with this than exhaustively
5915+
// listing every keyword allowed here.
58975916
Keyword::NoKeyword
58985917
| Keyword::LOCATION
58995918
| Keyword::TYPE
59005919
| Keyword::DATE
59015920
| Keyword::START
59025921
| Keyword::END
5922+
| Keyword::DATA
59035923
if dialect_of!(self is SnowflakeDialect | GenericDialect) =>
59045924
{
59055925
Ok(Value::UnQuotedString(w.value))
@@ -7154,7 +7174,7 @@ impl<'a> Parser<'a> {
71547174
// We parse the expression using a Pratt parser, as in `parse_expr()`.
71557175
// Start by parsing a restricted SELECT or a `(subquery)`:
71567176
let mut expr = if self.parse_keyword(Keyword::SELECT) {
7157-
SetExpr::Select(Box::new(self.parse_select()?))
7177+
self.parse_select()?
71587178
} else if self.consume_token(&Token::LParen) {
71597179
// CTEs are not allowed here, but the parser currently accepts them
71607180
let subquery = self.parse_query()?;
@@ -7242,7 +7262,7 @@ impl<'a> Parser<'a> {
72427262

72437263
/// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`),
72447264
/// assuming the initial `SELECT` was already consumed
7245-
pub fn parse_select(&mut self) -> Result<Select, ParserError> {
7265+
pub fn parse_select(&mut self) -> Result<SetExpr, ParserError> {
72467266
let value_table_mode =
72477267
if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) {
72487268
if self.parse_keyword(Keyword::VALUE) {
@@ -7294,6 +7314,18 @@ impl<'a> Parser<'a> {
72947314
vec![]
72957315
};
72967316

7317+
if distinct.is_none()
7318+
&& top.is_none()
7319+
&& into.is_none()
7320+
&& !from.is_empty()
7321+
&& self
7322+
.parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT])
7323+
.is_some()
7324+
{
7325+
self.prev_token();
7326+
return Ok(SetExpr::ConnectBy(self.parse_connect_by(projection, from)?));
7327+
}
7328+
72977329
let mut lateral_views = vec![];
72987330
loop {
72997331
if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) {
@@ -7377,7 +7409,7 @@ impl<'a> Parser<'a> {
73777409
None
73787410
};
73797411

7380-
Ok(Select {
7412+
Ok(SetExpr::Select(Box::new(Select {
73817413
distinct,
73827414
top,
73837415
projection,
@@ -7393,6 +7425,48 @@ impl<'a> Parser<'a> {
73937425
named_window: named_windows,
73947426
qualify,
73957427
value_table_mode,
7428+
})))
7429+
}
7430+
7431+
fn with_state<T, F>(&mut self, state: ParserState, mut f: F) -> Result<T, ParserError>
7432+
where
7433+
F: FnMut(&mut Parser) -> Result<T, ParserError>,
7434+
{
7435+
let current_state = self.state;
7436+
self.state = state;
7437+
let res = f(self);
7438+
self.state = current_state;
7439+
res
7440+
}
7441+
7442+
pub fn parse_connect_by(
7443+
&mut self,
7444+
projection: Vec<SelectItem>,
7445+
from: Vec<TableWithJoins>,
7446+
) -> Result<ConnectBy, ParserError> {
7447+
debug_assert!(!from.is_empty());
7448+
7449+
let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) {
7450+
let relationships = self.with_state(ParserState::ConnectBy, |parser| {
7451+
parser.parse_comma_separated(Parser::parse_expr)
7452+
})?;
7453+
self.expect_keywords(&[Keyword::START, Keyword::WITH])?;
7454+
let condition = self.parse_expr()?;
7455+
(condition, relationships)
7456+
} else {
7457+
self.expect_keywords(&[Keyword::START, Keyword::WITH])?;
7458+
let condition = self.parse_expr()?;
7459+
self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?;
7460+
let relationships = self.with_state(ParserState::ConnectBy, |parser| {
7461+
parser.parse_comma_separated(Parser::parse_expr)
7462+
})?;
7463+
(condition, relationships)
7464+
};
7465+
Ok(ConnectBy {
7466+
projection,
7467+
from,
7468+
condition,
7469+
relationships,
73967470
})
73977471
}
73987472

src/test_utils.rs

+10
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,16 @@ impl TestedDialects {
157157
}
158158
}
159159

160+
/// Ensures that `sql` parses as a single [Query], and that
161+
/// re-serializing the parse result matches the given canonical
162+
/// sql string.
163+
pub fn verified_query_with_canonical(&self, query: &str, canonical: &str) -> Query {
164+
match self.one_statement_parses_to(query, canonical) {
165+
Statement::Query(query) => *query,
166+
_ => panic!("Expected Query"),
167+
}
168+
}
169+
160170
/// Ensures that `sql` parses as a single [Select], and that
161171
/// re-serializing the parse result produces the same `sql`
162172
/// string (is not modified after a serialization round-trip).

tests/sqlparser_snowflake.rs

+91-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ fn parse_json_using_colon() {
230230

231231
snowflake().one_statement_parses_to("SELECT a:b::int FROM t", "SELECT CAST(a:b AS INT) FROM t");
232232

233-
let sql = "SELECT a:start, a:end FROM t";
233+
let sql = "SELECT a:start, a:end, a:data FROM t";
234234
let select = snowflake().verified_only_select(sql);
235235
assert_eq!(
236236
vec![
@@ -243,6 +243,11 @@ fn parse_json_using_colon() {
243243
left: Box::new(Expr::Identifier(Ident::new("a"))),
244244
operator: JsonOperator::Colon,
245245
right: Box::new(Expr::Value(Value::UnQuotedString("end".to_string()))),
246+
}),
247+
SelectItem::UnnamedExpr(Expr::JsonAccess {
248+
left: Box::new(Expr::Identifier(Ident::new("a"))),
249+
operator: JsonOperator::Colon,
250+
right: Box::new(Expr::Value(Value::UnQuotedString("data".to_string()))),
246251
})
247252
],
248253
select.projection
@@ -1503,3 +1508,88 @@ fn parse_comma_outer_join() {
15031508
"SELECT t1.c1, t2.c2 FROM t1, t2 WHERE t1.c1 = t2.c2 (+)",
15041509
);
15051510
}
1511+
1512+
#[test]
1513+
fn parse_connect_by() {
1514+
let expect_query = Query {
1515+
with: None,
1516+
body: Box::new(SetExpr::ConnectBy(ConnectBy {
1517+
projection: vec![
1518+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))),
1519+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))),
1520+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))),
1521+
],
1522+
from: vec![TableWithJoins {
1523+
relation: TableFactor::Table {
1524+
name: ObjectName(vec![Ident::new("employees")]),
1525+
alias: None,
1526+
args: None,
1527+
with_hints: vec![],
1528+
version: None,
1529+
partitions: vec![],
1530+
},
1531+
joins: vec![],
1532+
}],
1533+
condition: Expr::BinaryOp {
1534+
left: Box::new(Expr::Identifier(Ident::new("title"))),
1535+
op: BinaryOperator::Eq,
1536+
right: Box::new(Expr::Value(Value::SingleQuotedString(
1537+
"president".to_owned(),
1538+
))),
1539+
},
1540+
relationships: vec![Expr::BinaryOp {
1541+
left: Box::new(Expr::Identifier(Ident::new("manager_id"))),
1542+
op: BinaryOperator::Eq,
1543+
right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new(
1544+
"employee_id",
1545+
))))),
1546+
}],
1547+
})),
1548+
order_by: vec![OrderByExpr {
1549+
expr: Expr::Identifier(Ident::new("employee_id")),
1550+
asc: None,
1551+
nulls_first: None,
1552+
}],
1553+
limit: None,
1554+
limit_by: vec![],
1555+
offset: None,
1556+
fetch: None,
1557+
locks: vec![],
1558+
for_clause: None,
1559+
};
1560+
1561+
let connect_by_1 = concat!(
1562+
"SELECT employee_id, manager_id, title FROM employees ",
1563+
"START WITH title = 'president' ",
1564+
"CONNECT BY manager_id = PRIOR employee_id ",
1565+
"ORDER BY employee_id"
1566+
);
1567+
1568+
assert_eq!(
1569+
snowflake_and_generic().verified_query(connect_by_1),
1570+
expect_query
1571+
);
1572+
1573+
// CONNECT BY can come before START WITH
1574+
let connect_by_2 = concat!(
1575+
"SELECT employee_id, manager_id, title FROM employees ",
1576+
"CONNECT BY manager_id = PRIOR employee_id ",
1577+
"START WITH title = 'president' ",
1578+
"ORDER BY employee_id"
1579+
);
1580+
assert_eq!(
1581+
snowflake_and_generic().verified_query_with_canonical(connect_by_2, connect_by_1),
1582+
expect_query
1583+
);
1584+
1585+
// PRIOR expressions are only valid within a CONNECT BY, and the the token
1586+
// `prior` is valid as an identifier anywhere else.
1587+
assert_eq!(
1588+
snowflake_and_generic()
1589+
.verified_only_select("SELECT prior FROM some_table")
1590+
.projection,
1591+
vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(
1592+
"prior"
1593+
)))]
1594+
);
1595+
}

0 commit comments

Comments
 (0)