Skip to content

Commit 98a06d6

Browse files
authored
Merge pull request #111 from benesch/join-tweaks
Refine join parsing
2 parents 1998910 + 535505b commit 98a06d6

File tree

3 files changed

+131
-26
lines changed

3 files changed

+131
-26
lines changed

src/sqlast/query.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,10 @@ pub enum TableFactor {
238238
subquery: Box<SQLQuery>,
239239
alias: Option<TableAlias>,
240240
},
241+
/// Represents a parenthesized join expression, such as
242+
/// `(foo <JOIN> bar [ <JOIN> baz ... ])`.
243+
/// The inner `TableWithJoins` can have no joins only if its
244+
/// `relation` is itself a `TableFactor::NestedJoin`.
241245
NestedJoin(Box<TableWithJoins>),
242246
}
243247

src/sqlparser.rs

Lines changed: 84 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ pub enum IsOptional {
4040
}
4141
use IsOptional::*;
4242

43+
pub enum IsLateral {
44+
Lateral,
45+
NotLateral,
46+
}
47+
use IsLateral::*;
48+
4349
impl From<TokenizerError> for ParserError {
4450
fn from(e: TokenizerError) -> Self {
4551
ParserError::TokenizerError(format!("{:?}", e))
@@ -1523,7 +1529,10 @@ impl Parser {
15231529
} else if self.parse_keyword("VALUES") {
15241530
SQLSetExpr::Values(self.parse_values()?)
15251531
} else {
1526-
return self.expected("SELECT or a subquery in the query body", self.peek_token());
1532+
return self.expected(
1533+
"SELECT, VALUES, or a subquery in the query body",
1534+
self.peek_token(),
1535+
);
15271536
};
15281537

15291538
loop {
@@ -1668,30 +1677,65 @@ impl Parser {
16681677

16691678
/// A table name or a parenthesized subquery, followed by optional `[AS] alias`
16701679
pub fn parse_table_factor(&mut self) -> Result<TableFactor, ParserError> {
1671-
let lateral = self.parse_keyword("LATERAL");
1680+
if self.parse_keyword("LATERAL") {
1681+
// LATERAL must always be followed by a subquery.
1682+
if !self.consume_token(&Token::LParen) {
1683+
self.expected("subquery after LATERAL", self.peek_token())?;
1684+
}
1685+
return self.parse_derived_table_factor(Lateral);
1686+
}
1687+
16721688
if self.consume_token(&Token::LParen) {
1673-
if self.parse_keyword("SELECT")
1674-
|| self.parse_keyword("WITH")
1675-
|| self.parse_keyword("VALUES")
1676-
{
1677-
self.prev_token();
1678-
let subquery = Box::new(self.parse_query()?);
1679-
self.expect_token(&Token::RParen)?;
1680-
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
1681-
Ok(TableFactor::Derived {
1682-
lateral,
1683-
subquery,
1684-
alias,
1685-
})
1686-
} else if lateral {
1687-
parser_err!("Expected subquery after LATERAL, found nested join".to_string())
1688-
} else {
1689-
let table_reference = self.parse_table_and_joins()?;
1690-
self.expect_token(&Token::RParen)?;
1691-
Ok(TableFactor::NestedJoin(Box::new(table_reference)))
1689+
let index = self.index;
1690+
// A left paren introduces either a derived table (i.e., a subquery)
1691+
// or a nested join. It's nearly impossible to determine ahead of
1692+
// time which it is... so we just try to parse both.
1693+
//
1694+
// Here's an example that demonstrates the complexity:
1695+
// /-------------------------------------------------------\
1696+
// | /-----------------------------------\ |
1697+
// SELECT * FROM ( ( ( (SELECT 1) UNION (SELECT 2) ) AS t1 NATURAL JOIN t2 ) )
1698+
// ^ ^ ^ ^
1699+
// | | | |
1700+
// | | | |
1701+
// | | | (4) belongs to a SQLSetExpr::Query inside the subquery
1702+
// | | (3) starts a derived table (subquery)
1703+
// | (2) starts a nested join
1704+
// (1) an additional set of parens around a nested join
1705+
//
1706+
match self.parse_derived_table_factor(NotLateral) {
1707+
// The recently consumed '(' started a derived table, and we've
1708+
// parsed the subquery, followed by the closing ')', and the
1709+
// alias of the derived table. In the example above this is
1710+
// case (3), and the next token would be `NATURAL`.
1711+
Ok(table_factor) => Ok(table_factor),
1712+
Err(_) => {
1713+
// The '(' we've recently consumed does not start a derived
1714+
// table. For valid input this can happen either when the
1715+
// token following the paren can't start a query (e.g. `foo`
1716+
// in `FROM (foo NATURAL JOIN bar)`, or when the '(' we've
1717+
// consumed is followed by another '(' that starts a
1718+
// derived table, like (3), or another nested join (2).
1719+
//
1720+
// Ignore the error and back up to where we were before.
1721+
// Either we'll be able to parse a valid nested join, or
1722+
// we won't, and we'll return that error instead.
1723+
self.index = index;
1724+
let table_and_joins = self.parse_table_and_joins()?;
1725+
match table_and_joins.relation {
1726+
TableFactor::NestedJoin { .. } => (),
1727+
_ => {
1728+
if table_and_joins.joins.is_empty() {
1729+
// The SQL spec prohibits derived tables and bare
1730+
// tables from appearing alone in parentheses.
1731+
self.expected("joined table", self.peek_token())?
1732+
}
1733+
}
1734+
}
1735+
self.expect_token(&Token::RParen)?;
1736+
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
1737+
}
16921738
}
1693-
} else if lateral {
1694-
self.expected("subquery after LATERAL", self.peek_token())
16951739
} else {
16961740
let name = self.parse_object_name()?;
16971741
// Postgres, MSSQL: table-valued functions:
@@ -1721,6 +1765,23 @@ impl Parser {
17211765
}
17221766
}
17231767

1768+
pub fn parse_derived_table_factor(
1769+
&mut self,
1770+
lateral: IsLateral,
1771+
) -> Result<TableFactor, ParserError> {
1772+
let subquery = Box::new(self.parse_query()?);
1773+
self.expect_token(&Token::RParen)?;
1774+
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
1775+
Ok(TableFactor::Derived {
1776+
lateral: match lateral {
1777+
Lateral => true,
1778+
NotLateral => false,
1779+
},
1780+
subquery,
1781+
alias,
1782+
})
1783+
}
1784+
17241785
fn parse_join_constraint(&mut self, natural: bool) -> Result<JoinConstraint, ParserError> {
17251786
if natural {
17261787
Ok(JoinConstraint::Natural)

tests/sqlparser_common.rs

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,6 +1742,12 @@ fn parse_join_nesting() {
17421742
from.joins,
17431743
vec![join(nest!(nest!(nest!(table("b"), table("c")))))]
17441744
);
1745+
1746+
let res = parse_sql_statements("SELECT * FROM (a NATURAL JOIN (b))");
1747+
assert_eq!(
1748+
ParserError::ParserError("Expected joined table, found: )".to_string()),
1749+
res.unwrap_err()
1750+
);
17451751
}
17461752

17471753
#[test]
@@ -1848,6 +1854,38 @@ fn parse_derived_tables() {
18481854
let sql = "SELECT * FROM t NATURAL JOIN (((SELECT 1)))";
18491855
let _ = verified_only_select(sql);
18501856
// TODO: add assertions
1857+
1858+
let sql = "SELECT * FROM (((SELECT 1) UNION (SELECT 2)) AS t1 NATURAL JOIN t2)";
1859+
let select = verified_only_select(sql);
1860+
let from = only(select.from);
1861+
assert_eq!(
1862+
from.relation,
1863+
TableFactor::NestedJoin(Box::new(TableWithJoins {
1864+
relation: TableFactor::Derived {
1865+
lateral: false,
1866+
subquery: Box::new(verified_query("(SELECT 1) UNION (SELECT 2)")),
1867+
alias: Some(TableAlias {
1868+
name: "t1".into(),
1869+
columns: vec![],
1870+
})
1871+
},
1872+
joins: vec![Join {
1873+
relation: TableFactor::Table {
1874+
name: SQLObjectName(vec!["t2".into()]),
1875+
alias: None,
1876+
args: vec![],
1877+
with_hints: vec![],
1878+
},
1879+
join_operator: JoinOperator::Inner(JoinConstraint::Natural),
1880+
}],
1881+
}))
1882+
);
1883+
1884+
let res = parse_sql_statements("SELECT * FROM ((SELECT 1) AS t)");
1885+
assert_eq!(
1886+
ParserError::ParserError("Expected joined table, found: )".to_string()),
1887+
res.unwrap_err()
1888+
);
18511889
}
18521890

18531891
#[test]
@@ -1952,15 +1990,15 @@ fn parse_exists_subquery() {
19521990
let res = parse_sql_statements("SELECT EXISTS (");
19531991
assert_eq!(
19541992
ParserError::ParserError(
1955-
"Expected SELECT or a subquery in the query body, found: EOF".to_string()
1993+
"Expected SELECT, VALUES, or a subquery in the query body, found: EOF".to_string()
19561994
),
19571995
res.unwrap_err(),
19581996
);
19591997

19601998
let res = parse_sql_statements("SELECT EXISTS (NULL)");
19611999
assert_eq!(
19622000
ParserError::ParserError(
1963-
"Expected SELECT or a subquery in the query body, found: NULL".to_string()
2001+
"Expected SELECT, VALUES, or a subquery in the query body, found: NULL".to_string()
19642002
),
19652003
res.unwrap_err(),
19662004
);
@@ -2360,7 +2398,9 @@ fn lateral_derived() {
23602398
let sql = "SELECT * FROM a LEFT JOIN LATERAL (b CROSS JOIN c)";
23612399
let res = parse_sql_statements(sql);
23622400
assert_eq!(
2363-
ParserError::ParserError("Expected subquery after LATERAL, found nested join".to_string()),
2401+
ParserError::ParserError(
2402+
"Expected SELECT, VALUES, or a subquery in the query body, found: b".to_string()
2403+
),
23642404
res.unwrap_err()
23652405
);
23662406
}

0 commit comments

Comments
 (0)