Skip to content

Commit 03efcf6

Browse files
committed
Add parse_comma_separated to simplify the parser
To use the new helper effectively, a few related changes were required: - Each of the parse_..._list functions (`parse_cte_list`, `parse_order_by_expr_list`, `parse_select_list`) was replaced with a version that parses a single element of the list (e.g. `parse_cte`), with their callers now using `self.parse_comma_separated(Parser::parse_<one_element>)?` - `parse_with_options` now parses the WITH keyword and a separate `parse_sql_option` function (named after the struct it produces) was added to parse a single k=v option. - `parse_list_of_ids` is gone, with the '.'-separated parsing moved to `parse_object_name`. Custom comma-separated parsing is still used in: - parse_transaction_modes (where the comma separator is optional) - parse_columns (allows optional trailing comma, before the closing ')')
1 parent f11d74a commit 03efcf6

File tree

1 file changed

+98
-142
lines changed

1 file changed

+98
-142
lines changed

src/parser.rs

Lines changed: 98 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ impl Parser {
290290
vec![]
291291
};
292292
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
293-
self.parse_order_by_expr_list()?
293+
self.parse_comma_separated(Parser::parse_order_by_expr)?
294294
} else {
295295
vec![]
296296
};
@@ -829,6 +829,21 @@ impl Parser {
829829
}
830830
}
831831

832+
/// Parse a comma-separated list of 1+ items accepted by `F`
833+
pub fn parse_comma_separated<T, F>(&mut self, mut f: F) -> Result<Vec<T>, ParserError>
834+
where
835+
F: FnMut(&mut Parser) -> Result<T, ParserError>,
836+
{
837+
let mut values = vec![];
838+
loop {
839+
values.push(f(self)?);
840+
if !self.consume_token(&Token::Comma) {
841+
break;
842+
}
843+
}
844+
Ok(values)
845+
}
846+
832847
/// Parse a SQL CREATE statement
833848
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
834849
if self.parse_keyword("TABLE") {
@@ -872,11 +887,7 @@ impl Parser {
872887
// ANSI SQL and Postgres support RECURSIVE here, but we don't support it either.
873888
let name = self.parse_object_name()?;
874889
let columns = self.parse_parenthesized_column_list(Optional)?;
875-
let with_options = if self.parse_keyword("WITH") {
876-
self.parse_with_options()?
877-
} else {
878-
vec![]
879-
};
890+
let with_options = self.parse_with_options()?;
880891
self.expect_keyword("AS")?;
881892
let query = Box::new(self.parse_query()?);
882893
// Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here.
@@ -897,14 +908,10 @@ impl Parser {
897908
} else {
898909
return self.expected("TABLE or VIEW after DROP", self.peek_token());
899910
};
911+
// Many dialects support the non standard `IF EXISTS` clause and allow
912+
// specifying multiple objects to delete in a single statement
900913
let if_exists = self.parse_keywords(vec!["IF", "EXISTS"]);
901-
let mut names = vec![];
902-
loop {
903-
names.push(self.parse_object_name()?);
904-
if !self.consume_token(&Token::Comma) {
905-
break;
906-
}
907-
}
914+
let names = self.parse_comma_separated(Parser::parse_object_name)?;
908915
let cascade = self.parse_keyword("CASCADE");
909916
let restrict = self.parse_keyword("RESTRICT");
910917
if cascade && restrict {
@@ -922,12 +929,7 @@ impl Parser {
922929
let table_name = self.parse_object_name()?;
923930
// parse optional column list (schema)
924931
let (columns, constraints) = self.parse_columns()?;
925-
926-
let with_options = if self.parse_keyword("WITH") {
927-
self.parse_with_options()?
928-
} else {
929-
vec![]
930-
};
932+
let with_options = self.parse_with_options()?;
931933

932934
Ok(Statement::CreateTable {
933935
name: table_name,
@@ -1075,19 +1077,21 @@ impl Parser {
10751077
}
10761078

10771079
pub fn parse_with_options(&mut self) -> Result<Vec<SqlOption>, ParserError> {
1078-
self.expect_token(&Token::LParen)?;
1079-
let mut options = vec![];
1080-
loop {
1081-
let name = self.parse_identifier()?;
1082-
self.expect_token(&Token::Eq)?;
1083-
let value = self.parse_value()?;
1084-
options.push(SqlOption { name, value });
1085-
if !self.consume_token(&Token::Comma) {
1086-
break;
1087-
}
1080+
if self.parse_keyword("WITH") {
1081+
self.expect_token(&Token::LParen)?;
1082+
let options = self.parse_comma_separated(Parser::parse_sql_option)?;
1083+
self.expect_token(&Token::RParen)?;
1084+
Ok(options)
1085+
} else {
1086+
Ok(vec![])
10881087
}
1089-
self.expect_token(&Token::RParen)?;
1090-
Ok(options)
1088+
}
1089+
1090+
pub fn parse_sql_option(&mut self) -> Result<SqlOption, ParserError> {
1091+
let name = self.parse_identifier()?;
1092+
self.expect_token(&Token::Eq)?;
1093+
let value = self.parse_value()?;
1094+
Ok(SqlOption { name, value })
10911095
}
10921096

10931097
pub fn parse_alter(&mut self) -> Result<Statement, ParserError> {
@@ -1333,22 +1337,17 @@ impl Parser {
13331337
}
13341338
}
13351339

1336-
/// Parse one or more identifiers with the specified separator between them
1337-
pub fn parse_list_of_ids(&mut self, separator: &Token) -> Result<Vec<Ident>, ParserError> {
1340+
/// Parse a possibly qualified, possibly quoted identifier, e.g.
1341+
/// `foo` or `myschema."table"`
1342+
pub fn parse_object_name(&mut self) -> Result<ObjectName, ParserError> {
13381343
let mut idents = vec![];
13391344
loop {
13401345
idents.push(self.parse_identifier()?);
1341-
if !self.consume_token(separator) {
1346+
if !self.consume_token(&Token::Period) {
13421347
break;
13431348
}
13441349
}
1345-
Ok(idents)
1346-
}
1347-
1348-
/// Parse a possibly qualified, possibly quoted identifier, e.g.
1349-
/// `foo` or `myschema."table"`
1350-
pub fn parse_object_name(&mut self) -> Result<ObjectName, ParserError> {
1351-
Ok(ObjectName(self.parse_list_of_ids(&Token::Period)?))
1350+
Ok(ObjectName(idents))
13521351
}
13531352

13541353
/// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
@@ -1365,7 +1364,7 @@ impl Parser {
13651364
optional: IsOptional,
13661365
) -> Result<Vec<Ident>, ParserError> {
13671366
if self.consume_token(&Token::LParen) {
1368-
let cols = self.parse_list_of_ids(&Token::Comma)?;
1367+
let cols = self.parse_comma_separated(Parser::parse_identifier)?;
13691368
self.expect_token(&Token::RParen)?;
13701369
Ok(cols)
13711370
} else if optional == Optional {
@@ -1424,15 +1423,15 @@ impl Parser {
14241423
pub fn parse_query(&mut self) -> Result<Query, ParserError> {
14251424
let ctes = if self.parse_keyword("WITH") {
14261425
// TODO: optional RECURSIVE
1427-
self.parse_cte_list()?
1426+
self.parse_comma_separated(Parser::parse_cte)?
14281427
} else {
14291428
vec![]
14301429
};
14311430

14321431
let body = self.parse_query_body(0)?;
14331432

14341433
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
1435-
self.parse_order_by_expr_list()?
1434+
self.parse_comma_separated(Parser::parse_order_by_expr)?
14361435
} else {
14371436
vec![]
14381437
};
@@ -1465,27 +1464,17 @@ impl Parser {
14651464
})
14661465
}
14671466

1468-
/// Parse one or more (comma-separated) `alias AS (subquery)` CTEs,
1469-
/// assuming the initial `WITH` was already consumed.
1470-
fn parse_cte_list(&mut self) -> Result<Vec<Cte>, ParserError> {
1471-
let mut cte = vec![];
1472-
loop {
1473-
let alias = TableAlias {
1474-
name: self.parse_identifier()?,
1475-
columns: self.parse_parenthesized_column_list(Optional)?,
1476-
};
1477-
self.expect_keyword("AS")?;
1478-
self.expect_token(&Token::LParen)?;
1479-
cte.push(Cte {
1480-
alias,
1481-
query: self.parse_query()?,
1482-
});
1483-
self.expect_token(&Token::RParen)?;
1484-
if !self.consume_token(&Token::Comma) {
1485-
break;
1486-
}
1487-
}
1488-
Ok(cte)
1467+
/// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`)
1468+
fn parse_cte(&mut self) -> Result<Cte, ParserError> {
1469+
let alias = TableAlias {
1470+
name: self.parse_identifier()?,
1471+
columns: self.parse_parenthesized_column_list(Optional)?,
1472+
};
1473+
self.expect_keyword("AS")?;
1474+
self.expect_token(&Token::LParen)?;
1475+
let query = self.parse_query()?;
1476+
self.expect_token(&Token::RParen)?;
1477+
Ok(Cte { alias, query })
14891478
}
14901479

14911480
/// Parse a "query body", which is an expression with roughly the
@@ -1559,22 +1548,18 @@ impl Parser {
15591548
if all && distinct {
15601549
return parser_err!("Cannot specify both ALL and DISTINCT in SELECT");
15611550
}
1562-
let projection = self.parse_select_list()?;
1551+
let projection = self.parse_comma_separated(Parser::parse_select_item)?;
15631552

15641553
// Note that for keywords to be properly handled here, they need to be
15651554
// added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`,
15661555
// otherwise they may be parsed as an alias as part of the `projection`
15671556
// or `from`.
15681557

1569-
let mut from = vec![];
1570-
if self.parse_keyword("FROM") {
1571-
loop {
1572-
from.push(self.parse_table_and_joins()?);
1573-
if !self.consume_token(&Token::Comma) {
1574-
break;
1575-
}
1576-
}
1577-
}
1558+
let from = if self.parse_keyword("FROM") {
1559+
self.parse_comma_separated(Parser::parse_table_and_joins)?
1560+
} else {
1561+
vec![]
1562+
};
15781563

15791564
let selection = if self.parse_keyword("WHERE") {
15801565
Some(self.parse_expr()?)
@@ -1812,16 +1797,7 @@ impl Parser {
18121797
pub fn parse_update(&mut self) -> Result<Statement, ParserError> {
18131798
let table_name = self.parse_object_name()?;
18141799
self.expect_keyword("SET")?;
1815-
let mut assignments = vec![];
1816-
loop {
1817-
let id = self.parse_identifier()?;
1818-
self.expect_token(&Token::Eq)?;
1819-
let value = self.parse_expr()?;
1820-
assignments.push(Assignment { id, value });
1821-
if !self.consume_token(&Token::Comma) {
1822-
break;
1823-
}
1824-
}
1800+
let assignments = self.parse_comma_separated(Parser::parse_assignment)?;
18251801
let selection = if self.parse_keyword("WHERE") {
18261802
Some(self.parse_expr()?)
18271803
} else {
@@ -1834,16 +1810,17 @@ impl Parser {
18341810
})
18351811
}
18361812

1813+
/// Parse a `var = expr` assignment, used in an UPDATE statement
1814+
pub fn parse_assignment(&mut self) -> Result<Assignment, ParserError> {
1815+
let id = self.parse_identifier()?;
1816+
self.expect_token(&Token::Eq)?;
1817+
let value = self.parse_expr()?;
1818+
Ok(Assignment { id, value })
1819+
}
1820+
18371821
/// Parse a comma-delimited list of SQL expressions
18381822
pub fn parse_expr_list(&mut self) -> Result<Vec<Expr>, ParserError> {
1839-
let mut expr_list: Vec<Expr> = vec![];
1840-
loop {
1841-
expr_list.push(self.parse_expr()?);
1842-
if !self.consume_token(&Token::Comma) {
1843-
break;
1844-
}
1845-
}
1846-
Ok(expr_list)
1823+
Ok(self.parse_comma_separated(Parser::parse_expr)?)
18471824
}
18481825

18491826
pub fn parse_optional_args(&mut self) -> Result<Vec<Expr>, ParserError> {
@@ -1857,52 +1834,34 @@ impl Parser {
18571834
}
18581835

18591836
/// Parse a comma-delimited list of projections after SELECT
1860-
pub fn parse_select_list(&mut self) -> Result<Vec<SelectItem>, ParserError> {
1861-
let mut projections: Vec<SelectItem> = vec![];
1862-
loop {
1863-
let expr = self.parse_expr()?;
1864-
if let Expr::Wildcard = expr {
1865-
projections.push(SelectItem::Wildcard);
1866-
} else if let Expr::QualifiedWildcard(prefix) = expr {
1867-
projections.push(SelectItem::QualifiedWildcard(ObjectName(prefix)));
1837+
pub fn parse_select_item(&mut self) -> Result<SelectItem, ParserError> {
1838+
let expr = self.parse_expr()?;
1839+
if let Expr::Wildcard = expr {
1840+
Ok(SelectItem::Wildcard)
1841+
} else if let Expr::QualifiedWildcard(prefix) = expr {
1842+
Ok(SelectItem::QualifiedWildcard(ObjectName(prefix)))
1843+
} else {
1844+
// `expr` is a regular SQL expression and can be followed by an alias
1845+
if let Some(alias) = self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? {
1846+
Ok(SelectItem::ExprWithAlias { expr, alias })
18681847
} else {
1869-
// `expr` is a regular SQL expression and can be followed by an alias
1870-
if let Some(alias) =
1871-
self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)?
1872-
{
1873-
projections.push(SelectItem::ExprWithAlias { expr, alias });
1874-
} else {
1875-
projections.push(SelectItem::UnnamedExpr(expr));
1876-
}
1877-
}
1878-
1879-
if !self.consume_token(&Token::Comma) {
1880-
break;
1848+
Ok(SelectItem::UnnamedExpr(expr))
18811849
}
18821850
}
1883-
Ok(projections)
18841851
}
18851852

1886-
/// Parse a comma-delimited list of ORDER BY expressions
1887-
pub fn parse_order_by_expr_list(&mut self) -> Result<Vec<OrderByExpr>, ParserError> {
1888-
let mut expr_list: Vec<OrderByExpr> = vec![];
1889-
loop {
1890-
let expr = self.parse_expr()?;
1891-
1892-
let asc = if self.parse_keyword("ASC") {
1893-
Some(true)
1894-
} else if self.parse_keyword("DESC") {
1895-
Some(false)
1896-
} else {
1897-
None
1898-
};
1853+
/// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY)
1854+
pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
1855+
let expr = self.parse_expr()?;
18991856

1900-
expr_list.push(OrderByExpr { expr, asc });
1901-
if !self.consume_token(&Token::Comma) {
1902-
break;
1903-
}
1904-
}
1905-
Ok(expr_list)
1857+
let asc = if self.parse_keyword("ASC") {
1858+
Some(true)
1859+
} else if self.parse_keyword("DESC") {
1860+
Some(false)
1861+
} else {
1862+
None
1863+
};
1864+
Ok(OrderByExpr { expr, asc })
19061865
}
19071866

19081867
/// Parse a LIMIT clause
@@ -1950,15 +1909,12 @@ impl Parser {
19501909
}
19511910

19521911
pub fn parse_values(&mut self) -> Result<Values, ParserError> {
1953-
let mut values = vec![];
1954-
loop {
1955-
self.expect_token(&Token::LParen)?;
1956-
values.push(self.parse_expr_list()?);
1957-
self.expect_token(&Token::RParen)?;
1958-
if !self.consume_token(&Token::Comma) {
1959-
break;
1960-
}
1961-
}
1912+
let values = self.parse_comma_separated(|parser| {
1913+
parser.expect_token(&Token::LParen)?;
1914+
let e = parser.parse_expr_list()?;
1915+
parser.expect_token(&Token::RParen)?;
1916+
Ok(e)
1917+
})?;
19621918
Ok(Values(values))
19631919
}
19641920

0 commit comments

Comments
 (0)