Skip to content

Commit 906ad24

Browse files
committed
support for CONNECT BY
1 parent 3f051d1 commit 906ad24

File tree

6 files changed

+210
-5
lines changed

6 files changed

+210
-5
lines changed

src/ast/mod.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ pub use self::ddl::{
3939
};
4040
pub use self::operator::{BinaryOperator, UnaryOperator};
4141
pub use self::query::{
42-
Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, ForClause,
43-
ForJson, ForXml, GroupByExpr, IdentWithAlias, Join, JoinConstraint, JoinOperator,
42+
ConnectBy, Cte, CteAsMaterialized, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch,
43+
ForClause, ForJson, ForXml, GroupByExpr, IdentWithAlias, Join, JoinConstraint, JoinOperator,
4444
JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType,
4545
NamedWindowDefinition, NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem,
4646
ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SetOperator,
@@ -735,6 +735,8 @@ pub enum Expr {
735735
///
736736
/// See <https://docs.snowflake.com/en/sql-reference/constructs/where#joins-in-the-where-clause>.
737737
OuterJoin(Box<Expr>),
738+
/// A reference to the prior level in a CONNECT BY clause.
739+
Prior(Box<Expr>),
738740
}
739741

740742
impl fmt::Display for CastFormat {
@@ -1210,6 +1212,7 @@ impl fmt::Display for Expr {
12101212
Expr::OuterJoin(expr) => {
12111213
write!(f, "{expr} (+)")
12121214
}
1215+
Expr::Prior(expr) => write!(f, "PRIOR {expr}"),
12131216
}
12141217
}
12151218
}

src/ast/query.rs

+33
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ impl fmt::Display for Query {
9292
pub enum SetExpr {
9393
/// Restricted SELECT .. FROM .. HAVING (no ORDER BY or set operations)
9494
Select(Box<Select>),
95+
/// SELECT .. FROM .. STARTING WITH .. CONNECT BY
96+
ConnectBy(ConnectBy),
9597
/// Parenthesized SELECT subquery, which may include more set operations
9698
/// in its body and an optional ORDER BY / LIMIT.
9799
Query(Box<Query>),
@@ -112,6 +114,7 @@ impl fmt::Display for SetExpr {
112114
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113115
match self {
114116
SetExpr::Select(s) => write!(f, "{s}"),
117+
SetExpr::ConnectBy(c) => write!(f, "{c}"),
115118
SetExpr::Query(q) => write!(f, "({q})"),
116119
SetExpr::Values(v) => write!(f, "{v}"),
117120
SetExpr::Insert(v) => write!(f, "{v}"),
@@ -702,6 +705,36 @@ impl fmt::Display for TableWithJoins {
702705
}
703706
}
704707

708+
/// Joins a table to itself to process hierarchical data in the table.
709+
///
710+
/// See <https://docs.snowflake.com/en/sql-reference/constructs/connect-by>.
711+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
712+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
713+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
714+
pub struct ConnectBy {
715+
/// SELECT
716+
pub projection: Vec<SelectItem>,
717+
/// FROM
718+
pub from: Vec<TableWithJoins>,
719+
/// START WITH
720+
pub condition: Expr,
721+
/// CONNECT BY
722+
pub relationships: Vec<Expr>,
723+
}
724+
725+
impl fmt::Display for ConnectBy {
726+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
727+
write!(
728+
f,
729+
"SELECT {projection} FROM {from} START WITH {condition} CONNECT BY {relationships}",
730+
projection = display_comma_separated(&self.projection),
731+
from = display_comma_separated(&self.from),
732+
condition = self.condition,
733+
relationships = display_comma_separated(&self.relationships)
734+
)
735+
}
736+
}
737+
705738
/// A table name or a parenthesized subquery with an optional alias
706739
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
707740
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]

src/keywords.rs

+3
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,9 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
797797
Keyword::FOR,
798798
// for MYSQL PARTITION SELECTION
799799
Keyword::PARTITION,
800+
// for Snowflake START WITH .. CONNECT BY
801+
Keyword::START,
802+
Keyword::CONNECT,
800803
];
801804

802805
/// Can't be used as a column alias, so that `SELECT <expr> alias`

src/parser/mod.rs

+74-3
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,22 @@ impl ParserOptions {
256256
}
257257
}
258258

259+
#[derive(Copy, Clone)]
260+
enum ParserState {
261+
/// The default state of the parser.
262+
Normal,
263+
/// The state when parsing a CONNECT BY expression. This allows parsing
264+
/// PRIOR expressions while still allowing prior as an identifier name
265+
/// in other contexts.
266+
ConnectBy,
267+
}
268+
259269
pub struct Parser<'a> {
260270
tokens: Vec<TokenWithLocation>,
261271
/// The index of the first unprocessed token in `self.tokens`
262272
index: usize,
273+
/// The current state of the parser.
274+
state: ParserState,
263275
/// The current dialect to use
264276
dialect: &'a dyn Dialect,
265277
/// Additional options that allow you to mix & match behavior
@@ -290,6 +302,7 @@ impl<'a> Parser<'a> {
290302
Self {
291303
tokens: vec![],
292304
index: 0,
305+
state: ParserState::Normal,
293306
dialect,
294307
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
295308
options: ParserOptions::default(),
@@ -966,6 +979,10 @@ impl<'a> Parser<'a> {
966979
self.prev_token();
967980
self.parse_bigquery_struct_literal()
968981
}
982+
Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => {
983+
let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?;
984+
Ok(Expr::Prior(Box::new(expr)))
985+
}
969986
// Here `w` is a word, check if it's a part of a multi-part
970987
// identifier, a function call, or a simple identifier:
971988
_ => match self.peek_token().token {
@@ -7154,7 +7171,7 @@ impl<'a> Parser<'a> {
71547171
// We parse the expression using a Pratt parser, as in `parse_expr()`.
71557172
// Start by parsing a restricted SELECT or a `(subquery)`:
71567173
let mut expr = if self.parse_keyword(Keyword::SELECT) {
7157-
SetExpr::Select(Box::new(self.parse_select()?))
7174+
self.parse_select()?
71587175
} else if self.consume_token(&Token::LParen) {
71597176
// CTEs are not allowed here, but the parser currently accepts them
71607177
let subquery = self.parse_query()?;
@@ -7242,7 +7259,7 @@ impl<'a> Parser<'a> {
72427259

72437260
/// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`),
72447261
/// assuming the initial `SELECT` was already consumed
7245-
pub fn parse_select(&mut self) -> Result<Select, ParserError> {
7262+
pub fn parse_select(&mut self) -> Result<SetExpr, ParserError> {
72467263
let value_table_mode =
72477264
if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) {
72487265
if self.parse_keyword(Keyword::VALUE) {
@@ -7294,6 +7311,18 @@ impl<'a> Parser<'a> {
72947311
vec![]
72957312
};
72967313

7314+
if distinct.is_none()
7315+
&& top.is_none()
7316+
&& into.is_none()
7317+
&& !from.is_empty()
7318+
&& self
7319+
.parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT])
7320+
.is_some()
7321+
{
7322+
self.prev_token();
7323+
return Ok(SetExpr::ConnectBy(self.parse_connect_by(projection, from)?));
7324+
}
7325+
72977326
let mut lateral_views = vec![];
72987327
loop {
72997328
if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) {
@@ -7377,7 +7406,7 @@ impl<'a> Parser<'a> {
73777406
None
73787407
};
73797408

7380-
Ok(Select {
7409+
Ok(SetExpr::Select(Box::new(Select {
73817410
distinct,
73827411
top,
73837412
projection,
@@ -7393,6 +7422,48 @@ impl<'a> Parser<'a> {
73937422
named_window: named_windows,
73947423
qualify,
73957424
value_table_mode,
7425+
})))
7426+
}
7427+
7428+
fn with_state<T, F>(&mut self, state: ParserState, mut f: F) -> Result<T, ParserError>
7429+
where
7430+
F: FnMut(&mut Parser) -> Result<T, ParserError>,
7431+
{
7432+
let current_state = self.state;
7433+
self.state = state;
7434+
let res = f(self);
7435+
self.state = current_state;
7436+
res
7437+
}
7438+
7439+
pub fn parse_connect_by(
7440+
&mut self,
7441+
projection: Vec<SelectItem>,
7442+
from: Vec<TableWithJoins>,
7443+
) -> Result<ConnectBy, ParserError> {
7444+
debug_assert!(!from.is_empty());
7445+
7446+
let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) {
7447+
let relationships = self.with_state(ParserState::ConnectBy, |parser| {
7448+
parser.parse_comma_separated(Parser::parse_expr)
7449+
})?;
7450+
self.expect_keywords(&[Keyword::START, Keyword::WITH])?;
7451+
let condition = self.parse_expr()?;
7452+
(condition, relationships)
7453+
} else {
7454+
self.expect_keywords(&[Keyword::START, Keyword::WITH])?;
7455+
let condition = self.parse_expr()?;
7456+
self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?;
7457+
let relationships = self.with_state(ParserState::ConnectBy, |parser| {
7458+
parser.parse_comma_separated(Parser::parse_expr)
7459+
})?;
7460+
(condition, relationships)
7461+
};
7462+
Ok(ConnectBy {
7463+
projection,
7464+
from,
7465+
condition,
7466+
relationships,
73967467
})
73977468
}
73987469

src/test_utils.rs

+10
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,16 @@ impl TestedDialects {
157157
}
158158
}
159159

160+
/// Ensures that `sql` parses as a single [Query], and that
161+
/// re-serializing the parse result matches the given canonical
162+
/// sql string.
163+
pub fn verified_query_with_canonical(&self, query: &str, canonical: &str) -> Query {
164+
match self.one_statement_parses_to(query, canonical) {
165+
Statement::Query(query) => *query,
166+
_ => panic!("Expected Query"),
167+
}
168+
}
169+
160170
/// Ensures that `sql` parses as a single [Select], and that
161171
/// re-serializing the parse result produces the same `sql`
162172
/// string (is not modified after a serialization round-trip).

tests/sqlparser_snowflake.rs

+85
Original file line numberDiff line numberDiff line change
@@ -1503,3 +1503,88 @@ fn parse_comma_outer_join() {
15031503
"SELECT t1.c1, t2.c2 FROM t1, t2 WHERE t1.c1 = t2.c2 (+)",
15041504
);
15051505
}
1506+
1507+
#[test]
1508+
fn parse_connect_by() {
1509+
let expect_query = Query {
1510+
with: None,
1511+
body: Box::new(SetExpr::ConnectBy(ConnectBy {
1512+
projection: vec![
1513+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))),
1514+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))),
1515+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))),
1516+
],
1517+
from: vec![TableWithJoins {
1518+
relation: TableFactor::Table {
1519+
name: ObjectName(vec![Ident::new("employees")]),
1520+
alias: None,
1521+
args: None,
1522+
with_hints: vec![],
1523+
version: None,
1524+
partitions: vec![],
1525+
},
1526+
joins: vec![],
1527+
}],
1528+
condition: Expr::BinaryOp {
1529+
left: Box::new(Expr::Identifier(Ident::new("title"))),
1530+
op: BinaryOperator::Eq,
1531+
right: Box::new(Expr::Value(Value::SingleQuotedString(
1532+
"president".to_owned(),
1533+
))),
1534+
},
1535+
relationships: vec![Expr::BinaryOp {
1536+
left: Box::new(Expr::Identifier(Ident::new("manager_id"))),
1537+
op: BinaryOperator::Eq,
1538+
right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new(
1539+
"employee_id",
1540+
))))),
1541+
}],
1542+
})),
1543+
order_by: vec![OrderByExpr {
1544+
expr: Expr::Identifier(Ident::new("employee_id")),
1545+
asc: None,
1546+
nulls_first: None,
1547+
}],
1548+
limit: None,
1549+
limit_by: vec![],
1550+
offset: None,
1551+
fetch: None,
1552+
locks: vec![],
1553+
for_clause: None,
1554+
};
1555+
1556+
let connect_by_1 = concat!(
1557+
"SELECT employee_id, manager_id, title FROM employees ",
1558+
"START WITH title = 'president' ",
1559+
"CONNECT BY manager_id = PRIOR employee_id ",
1560+
"ORDER BY employee_id"
1561+
);
1562+
1563+
assert_eq!(
1564+
snowflake_and_generic().verified_query(connect_by_1),
1565+
expect_query
1566+
);
1567+
1568+
// CONNECT BY can come before START WITH
1569+
let connect_by_2 = concat!(
1570+
"SELECT employee_id, manager_id, title FROM employees ",
1571+
"CONNECT BY manager_id = PRIOR employee_id ",
1572+
"START WITH title = 'president' ",
1573+
"ORDER BY employee_id"
1574+
);
1575+
assert_eq!(
1576+
snowflake_and_generic().verified_query_with_canonical(connect_by_2, connect_by_1),
1577+
expect_query
1578+
);
1579+
1580+
// PRIOR expressions are only valid within a CONNECT BY, and the the token
1581+
// `prior` is valid as an identifier anywhere else.
1582+
assert_eq!(
1583+
snowflake_and_generic()
1584+
.verified_only_select("SELECT prior FROM some_table")
1585+
.projection,
1586+
vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(
1587+
"prior"
1588+
)))]
1589+
);
1590+
}

0 commit comments

Comments
 (0)