From 1b31f0373237cbf1d6e2ec163dc991e30db8f246 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 7 Jul 2019 03:22:30 +0300 Subject: [PATCH 1/8] Change write! to write_str for better rustfmt formatting --- src/ast/mod.rs | 62 +++++++++++++++++---------------------------- src/ast/operator.rs | 52 ++++++++++++++++--------------------- src/ast/query.rs | 14 ++++------ src/ast/value.rs | 20 ++++++--------- 4 files changed, 58 insertions(+), 90 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 33010d2fe..79d780873 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -660,19 +660,15 @@ pub enum FileFormat { impl fmt::Display for FileFormat { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use self::FileFormat::*; - write!( - f, - "{}", - match self { - TEXTFILE => "TEXTFILE", - SEQUENCEFILE => "SEQUENCEFILE", - ORC => "ORC", - PARQUET => "PARQUET", - AVRO => "AVRO", - RCFILE => "RCFILE", - JSONFILE => "TEXTFILE", - } - ) + f.write_str(match self { + TEXTFILE => "TEXTFILE", + SEQUENCEFILE => "SEQUENCEFILE", + ORC => "ORC", + PARQUET => "PARQUET", + AVRO => "AVRO", + RCFILE => "RCFILE", + JSONFILE => "TEXTFILE", + }) } } @@ -707,14 +703,10 @@ pub enum ObjectType { impl fmt::Display for ObjectType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}", - match self { - ObjectType::Table => "TABLE", - ObjectType::View => "VIEW", - } - ) + f.write_str(match self { + ObjectType::Table => "TABLE", + ObjectType::View => "VIEW", + }) } } @@ -755,14 +747,10 @@ pub enum TransactionAccessMode { impl fmt::Display for TransactionAccessMode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use TransactionAccessMode::*; - write!( - f, - "{}", - match self { - ReadOnly => "READ ONLY", - ReadWrite => "READ WRITE", - } - ) + f.write_str(match self { + ReadOnly => "READ ONLY", + ReadWrite => "READ WRITE", + }) } } @@ -777,15 +765,11 @@ pub enum TransactionIsolationLevel { impl fmt::Display for TransactionIsolationLevel { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use TransactionIsolationLevel::*; - write!( - f, - "{}", - match self { - ReadUncommitted => "READ UNCOMMITTED", - ReadCommitted => "READ COMMITTED", - RepeatableRead => "REPEATABLE READ", - Serializable => "SERIALIZABLE", - } - ) + f.write_str(match self { + ReadUncommitted => "READ UNCOMMITTED", + ReadCommitted => "READ COMMITTED", + RepeatableRead => "REPEATABLE READ", + Serializable => "SERIALIZABLE", + }) } } diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 747cfd9ef..f2970482c 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -22,15 +22,11 @@ pub enum UnaryOperator { impl fmt::Display for UnaryOperator { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}", - match self { - UnaryOperator::Plus => "+", - UnaryOperator::Minus => "-", - UnaryOperator::Not => "NOT", - } - ) + f.write_str(match self { + UnaryOperator::Plus => "+", + UnaryOperator::Minus => "-", + UnaryOperator::Not => "NOT", + }) } } @@ -56,26 +52,22 @@ pub enum BinaryOperator { impl fmt::Display for BinaryOperator { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}", - match self { - BinaryOperator::Plus => "+", - BinaryOperator::Minus => "-", - BinaryOperator::Multiply => "*", - BinaryOperator::Divide => "/", - BinaryOperator::Modulus => "%", - BinaryOperator::Gt => ">", - BinaryOperator::Lt => "<", - BinaryOperator::GtEq => ">=", - BinaryOperator::LtEq => "<=", - BinaryOperator::Eq => "=", - BinaryOperator::NotEq => "<>", - BinaryOperator::And => "AND", - BinaryOperator::Or => "OR", - BinaryOperator::Like => "LIKE", - BinaryOperator::NotLike => "NOT LIKE", - } - ) + f.write_str(match self { + BinaryOperator::Plus => "+", + BinaryOperator::Minus => "-", + BinaryOperator::Multiply => "*", + BinaryOperator::Divide => "/", + BinaryOperator::Modulus => "%", + BinaryOperator::Gt => ">", + BinaryOperator::Lt => "<", + BinaryOperator::GtEq => ">=", + BinaryOperator::LtEq => "<=", + BinaryOperator::Eq => "=", + BinaryOperator::NotEq => "<>", + BinaryOperator::And => "AND", + BinaryOperator::Or => "OR", + BinaryOperator::Like => "LIKE", + BinaryOperator::NotLike => "NOT LIKE", + }) } } diff --git a/src/ast/query.rs b/src/ast/query.rs index e1f2637d7..656f7f14b 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -100,15 +100,11 @@ pub enum SetOperator { impl fmt::Display for SetOperator { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}", - match self { - SetOperator::Union => "UNION", - SetOperator::Except => "EXCEPT", - SetOperator::Intersect => "INTERSECT", - } - ) + f.write_str(match self { + SetOperator::Union => "UNION", + SetOperator::Except => "EXCEPT", + SetOperator::Intersect => "INTERSECT", + }) } } diff --git a/src/ast/value.rs b/src/ast/value.rs index f0e418a25..b406dfe49 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -128,18 +128,14 @@ pub enum DateTimeField { impl fmt::Display for DateTimeField { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}", - match self { - DateTimeField::Year => "YEAR", - DateTimeField::Month => "MONTH", - DateTimeField::Day => "DAY", - DateTimeField::Hour => "HOUR", - DateTimeField::Minute => "MINUTE", - DateTimeField::Second => "SECOND", - } - ) + f.write_str(match self { + DateTimeField::Year => "YEAR", + DateTimeField::Month => "MONTH", + DateTimeField::Day => "DAY", + DateTimeField::Hour => "HOUR", + DateTimeField::Minute => "MINUTE", + DateTimeField::Second => "SECOND", + }) } } From ac8ba107e3a69e938ca00c0a24173b68aa18c669 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 7 Jul 2019 03:21:59 +0300 Subject: [PATCH 2/8] Simplify Display implementations introduced in #124 --- src/ast/mod.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 79d780873..b89abda9e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -165,12 +165,9 @@ pub enum Expr { impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Expr::Identifier(s) => write!(f, "{}", s), + Expr::Identifier(s) => f.write_str(s), Expr::Wildcard => f.write_str("*"), - Expr::QualifiedWildcard(q) => { - write!(f, "{}", display_separated(q, "."))?; - f.write_str(".*") - } + Expr::QualifiedWildcard(q) => write!(f, "{}.*", display_separated(q, ".")), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), Expr::IsNull(ast) => write!(f, "{} IS NULL", ast), Expr::IsNotNull(ast) => write!(f, "{} IS NOT NULL", ast), @@ -732,7 +729,7 @@ impl fmt::Display for TransactionMode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use TransactionMode::*; match self { - AccessMode(access_mode) => write!(f, "{}", access_mode.to_string()), + AccessMode(access_mode) => write!(f, "{}", access_mode), IsolationLevel(iso_level) => write!(f, "ISOLATION LEVEL {}", iso_level), } } From 64e7be0c685541f3d37960ec19da777c8e335d41 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 7 Jul 2019 02:23:10 +0300 Subject: [PATCH 3/8] Move ObjectName closer to Ident in mod.rs --- src/ast/mod.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b89abda9e..8067ca725 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -71,6 +71,16 @@ where /// Identifier name, in the originally quoted form (e.g. `"id"`) pub type Ident = String; +/// A name of a table, view, custom type, etc., possibly multi-part, i.e. db.schema.obj +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ObjectName(pub Vec); + +impl fmt::Display for ObjectName { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", display_separated(&self.0, ".")) + } +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -593,16 +603,6 @@ impl fmt::Display for Statement { } } -/// A name of a table, view, custom type, etc., possibly multi-part, i.e. db.schema.obj -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ObjectName(pub Vec); - -impl fmt::Display for ObjectName { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", display_separated(&self.0, ".")) - } -} - /// SQL assignment `foo = expr` as used in SQLUpdate #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Assignment { From f11d74a64de2c90770c433951492344aec2067a7 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 7 Jul 2019 02:23:24 +0300 Subject: [PATCH 4/8] Update Statement::Drop doc comment --- src/ast/mod.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 8067ca725..e80ccdb16 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -423,11 +423,16 @@ pub enum Statement { name: ObjectName, operation: AlterTableOperation, }, - /// DROP TABLE + /// DROP Drop { + /// The type of the object to drop: TABLE, VIEW, etc. object_type: ObjectType, + /// An optional `IF EXISTS` clause. (Non-standard.) if_exists: bool, + /// One or more objects to drop. (ANSI SQL requires exactly one.) names: Vec, + /// Whether `CASCADE` was specified. This will be `false` when + /// `RESTRICT` or no drop behavior at all was specified. cascade: bool, }, /// `{ BEGIN [ TRANSACTION | WORK ] | START TRANSACTION } ...` From 03efcf6fa6ac2745ae27f6188e93863ae3ea9808 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 9 Jun 2019 05:39:07 +0300 Subject: [PATCH 5/8] Add parse_comma_separated to simplify the parser To use the new helper effectively, a few related changes were required: - Each of the parse_..._list functions (`parse_cte_list`, `parse_order_by_expr_list`, `parse_select_list`) was replaced with a version that parses a single element of the list (e.g. `parse_cte`), with their callers now using `self.parse_comma_separated(Parser::parse_)?` - `parse_with_options` now parses the WITH keyword and a separate `parse_sql_option` function (named after the struct it produces) was added to parse a single k=v option. - `parse_list_of_ids` is gone, with the '.'-separated parsing moved to `parse_object_name`. Custom comma-separated parsing is still used in: - parse_transaction_modes (where the comma separator is optional) - parse_columns (allows optional trailing comma, before the closing ')') --- src/parser.rs | 240 +++++++++++++++++++++----------------------------- 1 file changed, 98 insertions(+), 142 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index b30184ca1..a03386644 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -290,7 +290,7 @@ impl Parser { vec![] }; let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { - self.parse_order_by_expr_list()? + self.parse_comma_separated(Parser::parse_order_by_expr)? } else { vec![] }; @@ -829,6 +829,21 @@ impl Parser { } } + /// Parse a comma-separated list of 1+ items accepted by `F` + pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> + where + F: FnMut(&mut Parser) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if !self.consume_token(&Token::Comma) { + break; + } + } + Ok(values) + } + /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { if self.parse_keyword("TABLE") { @@ -872,11 +887,7 @@ impl Parser { // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; - let with_options = if self.parse_keyword("WITH") { - self.parse_with_options()? - } else { - vec![] - }; + let with_options = self.parse_with_options()?; self.expect_keyword("AS")?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -897,14 +908,10 @@ impl Parser { } else { return self.expected("TABLE or VIEW after DROP", self.peek_token()); }; + // Many dialects support the non standard `IF EXISTS` clause and allow + // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(vec!["IF", "EXISTS"]); - let mut names = vec![]; - loop { - names.push(self.parse_object_name()?); - if !self.consume_token(&Token::Comma) { - break; - } - } + let names = self.parse_comma_separated(Parser::parse_object_name)?; let cascade = self.parse_keyword("CASCADE"); let restrict = self.parse_keyword("RESTRICT"); if cascade && restrict { @@ -922,12 +929,7 @@ impl Parser { let table_name = self.parse_object_name()?; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; - - let with_options = if self.parse_keyword("WITH") { - self.parse_with_options()? - } else { - vec![] - }; + let with_options = self.parse_with_options()?; Ok(Statement::CreateTable { name: table_name, @@ -1075,19 +1077,21 @@ impl Parser { } pub fn parse_with_options(&mut self) -> Result, ParserError> { - self.expect_token(&Token::LParen)?; - let mut options = vec![]; - loop { - let name = self.parse_identifier()?; - self.expect_token(&Token::Eq)?; - let value = self.parse_value()?; - options.push(SqlOption { name, value }); - if !self.consume_token(&Token::Comma) { - break; - } + if self.parse_keyword("WITH") { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Parser::parse_sql_option)?; + self.expect_token(&Token::RParen)?; + Ok(options) + } else { + Ok(vec![]) } - self.expect_token(&Token::RParen)?; - Ok(options) + } + + pub fn parse_sql_option(&mut self) -> Result { + let name = self.parse_identifier()?; + self.expect_token(&Token::Eq)?; + let value = self.parse_value()?; + Ok(SqlOption { name, value }) } pub fn parse_alter(&mut self) -> Result { @@ -1333,22 +1337,17 @@ impl Parser { } } - /// Parse one or more identifiers with the specified separator between them - pub fn parse_list_of_ids(&mut self, separator: &Token) -> Result, ParserError> { + /// Parse a possibly qualified, possibly quoted identifier, e.g. + /// `foo` or `myschema."table"` + pub fn parse_object_name(&mut self) -> Result { let mut idents = vec![]; loop { idents.push(self.parse_identifier()?); - if !self.consume_token(separator) { + if !self.consume_token(&Token::Period) { break; } } - Ok(idents) - } - - /// Parse a possibly qualified, possibly quoted identifier, e.g. - /// `foo` or `myschema."table"` - pub fn parse_object_name(&mut self) -> Result { - Ok(ObjectName(self.parse_list_of_ids(&Token::Period)?)) + Ok(ObjectName(idents)) } /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) @@ -1365,7 +1364,7 @@ impl Parser { optional: IsOptional, ) -> Result, ParserError> { if self.consume_token(&Token::LParen) { - let cols = self.parse_list_of_ids(&Token::Comma)?; + let cols = self.parse_comma_separated(Parser::parse_identifier)?; self.expect_token(&Token::RParen)?; Ok(cols) } else if optional == Optional { @@ -1424,7 +1423,7 @@ impl Parser { pub fn parse_query(&mut self) -> Result { let ctes = if self.parse_keyword("WITH") { // TODO: optional RECURSIVE - self.parse_cte_list()? + self.parse_comma_separated(Parser::parse_cte)? } else { vec![] }; @@ -1432,7 +1431,7 @@ impl Parser { let body = self.parse_query_body(0)?; let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { - self.parse_order_by_expr_list()? + self.parse_comma_separated(Parser::parse_order_by_expr)? } else { vec![] }; @@ -1465,27 +1464,17 @@ impl Parser { }) } - /// Parse one or more (comma-separated) `alias AS (subquery)` CTEs, - /// assuming the initial `WITH` was already consumed. - fn parse_cte_list(&mut self) -> Result, ParserError> { - let mut cte = vec![]; - loop { - let alias = TableAlias { - name: self.parse_identifier()?, - columns: self.parse_parenthesized_column_list(Optional)?, - }; - self.expect_keyword("AS")?; - self.expect_token(&Token::LParen)?; - cte.push(Cte { - alias, - query: self.parse_query()?, - }); - self.expect_token(&Token::RParen)?; - if !self.consume_token(&Token::Comma) { - break; - } - } - Ok(cte) + /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + fn parse_cte(&mut self) -> Result { + let alias = TableAlias { + name: self.parse_identifier()?, + columns: self.parse_parenthesized_column_list(Optional)?, + }; + self.expect_keyword("AS")?; + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + Ok(Cte { alias, query }) } /// Parse a "query body", which is an expression with roughly the @@ -1559,22 +1548,18 @@ impl Parser { if all && distinct { return parser_err!("Cannot specify both ALL and DISTINCT in SELECT"); } - let projection = self.parse_select_list()?; + let projection = self.parse_comma_separated(Parser::parse_select_item)?; // Note that for keywords to be properly handled here, they need to be // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, // otherwise they may be parsed as an alias as part of the `projection` // or `from`. - let mut from = vec![]; - if self.parse_keyword("FROM") { - loop { - from.push(self.parse_table_and_joins()?); - if !self.consume_token(&Token::Comma) { - break; - } - } - } + let from = if self.parse_keyword("FROM") { + self.parse_comma_separated(Parser::parse_table_and_joins)? + } else { + vec![] + }; let selection = if self.parse_keyword("WHERE") { Some(self.parse_expr()?) @@ -1812,16 +1797,7 @@ impl Parser { pub fn parse_update(&mut self) -> Result { let table_name = self.parse_object_name()?; self.expect_keyword("SET")?; - let mut assignments = vec![]; - loop { - let id = self.parse_identifier()?; - self.expect_token(&Token::Eq)?; - let value = self.parse_expr()?; - assignments.push(Assignment { id, value }); - if !self.consume_token(&Token::Comma) { - break; - } - } + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; let selection = if self.parse_keyword("WHERE") { Some(self.parse_expr()?) } else { @@ -1834,16 +1810,17 @@ impl Parser { }) } + /// Parse a `var = expr` assignment, used in an UPDATE statement + pub fn parse_assignment(&mut self) -> Result { + let id = self.parse_identifier()?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + Ok(Assignment { id, value }) + } + /// Parse a comma-delimited list of SQL expressions pub fn parse_expr_list(&mut self) -> Result, ParserError> { - let mut expr_list: Vec = vec![]; - loop { - expr_list.push(self.parse_expr()?); - if !self.consume_token(&Token::Comma) { - break; - } - } - Ok(expr_list) + Ok(self.parse_comma_separated(Parser::parse_expr)?) } pub fn parse_optional_args(&mut self) -> Result, ParserError> { @@ -1857,52 +1834,34 @@ impl Parser { } /// Parse a comma-delimited list of projections after SELECT - pub fn parse_select_list(&mut self) -> Result, ParserError> { - let mut projections: Vec = vec![]; - loop { - let expr = self.parse_expr()?; - if let Expr::Wildcard = expr { - projections.push(SelectItem::Wildcard); - } else if let Expr::QualifiedWildcard(prefix) = expr { - projections.push(SelectItem::QualifiedWildcard(ObjectName(prefix))); + pub fn parse_select_item(&mut self) -> Result { + let expr = self.parse_expr()?; + if let Expr::Wildcard = expr { + Ok(SelectItem::Wildcard) + } else if let Expr::QualifiedWildcard(prefix) = expr { + Ok(SelectItem::QualifiedWildcard(ObjectName(prefix))) + } else { + // `expr` is a regular SQL expression and can be followed by an alias + if let Some(alias) = self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? { + Ok(SelectItem::ExprWithAlias { expr, alias }) } else { - // `expr` is a regular SQL expression and can be followed by an alias - if let Some(alias) = - self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? - { - projections.push(SelectItem::ExprWithAlias { expr, alias }); - } else { - projections.push(SelectItem::UnnamedExpr(expr)); - } - } - - if !self.consume_token(&Token::Comma) { - break; + Ok(SelectItem::UnnamedExpr(expr)) } } - Ok(projections) } - /// Parse a comma-delimited list of ORDER BY expressions - pub fn parse_order_by_expr_list(&mut self) -> Result, ParserError> { - let mut expr_list: Vec = vec![]; - loop { - let expr = self.parse_expr()?; - - let asc = if self.parse_keyword("ASC") { - Some(true) - } else if self.parse_keyword("DESC") { - Some(false) - } else { - None - }; + /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + pub fn parse_order_by_expr(&mut self) -> Result { + let expr = self.parse_expr()?; - expr_list.push(OrderByExpr { expr, asc }); - if !self.consume_token(&Token::Comma) { - break; - } - } - Ok(expr_list) + let asc = if self.parse_keyword("ASC") { + Some(true) + } else if self.parse_keyword("DESC") { + Some(false) + } else { + None + }; + Ok(OrderByExpr { expr, asc }) } /// Parse a LIMIT clause @@ -1950,15 +1909,12 @@ impl Parser { } pub fn parse_values(&mut self) -> Result { - let mut values = vec![]; - loop { - self.expect_token(&Token::LParen)?; - values.push(self.parse_expr_list()?); - self.expect_token(&Token::RParen)?; - if !self.consume_token(&Token::Comma) { - break; - } - } + let values = self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let e = parser.parse_expr_list()?; + parser.expect_token(&Token::RParen)?; + Ok(e) + })?; Ok(Values(values)) } From 9314371d3b728617fef95d06de5e4c7c5d81ddb9 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 7 Jul 2019 04:20:14 +0300 Subject: [PATCH 6/8] Remove parse_expr_list, as it's now trivial --- src/parser.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index a03386644..342142e94 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -285,7 +285,7 @@ impl Parser { self.expect_token(&Token::LParen)?; let partition_by = if self.parse_keywords(vec!["PARTITION", "BY"]) { // a list of possibly-qualified column names - self.parse_expr_list()? + self.parse_comma_separated(Parser::parse_expr)? } else { vec![] }; @@ -597,7 +597,7 @@ impl Parser { } else { Expr::InList { expr: Box::new(expr), - list: self.parse_expr_list()?, + list: self.parse_comma_separated(Parser::parse_expr)?, negated, } }; @@ -1568,7 +1568,7 @@ impl Parser { }; let group_by = if self.parse_keywords(vec!["GROUP", "BY"]) { - self.parse_expr_list()? + self.parse_comma_separated(Parser::parse_expr)? } else { vec![] }; @@ -1734,7 +1734,7 @@ impl Parser { let mut with_hints = vec![]; if self.parse_keyword("WITH") { if self.consume_token(&Token::LParen) { - with_hints = self.parse_expr_list()?; + with_hints = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; } else { // rewind, as WITH may belong to the next statement's CTE @@ -1818,16 +1818,11 @@ impl Parser { Ok(Assignment { id, value }) } - /// Parse a comma-delimited list of SQL expressions - pub fn parse_expr_list(&mut self) -> Result, ParserError> { - Ok(self.parse_comma_separated(Parser::parse_expr)?) - } - pub fn parse_optional_args(&mut self) -> Result, ParserError> { if self.consume_token(&Token::RParen) { Ok(vec![]) } else { - let args = self.parse_expr_list()?; + let args = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; Ok(args) } @@ -1911,9 +1906,9 @@ impl Parser { pub fn parse_values(&mut self) -> Result { let values = self.parse_comma_separated(|parser| { parser.expect_token(&Token::LParen)?; - let e = parser.parse_expr_list()?; + let exprs = parser.parse_comma_separated(Parser::parse_expr)?; parser.expect_token(&Token::RParen)?; - Ok(e) + Ok(exprs) })?; Ok(Values(values)) } From f31636d3397f7156759ee667f3700e878df8f539 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 7 Jul 2019 05:53:53 +0300 Subject: [PATCH 7/8] Simplify parse_window_frame It used to consume the `RParen` closing the encompassing `OVER (`, even when no window frame was parsed, which confused me a bit, even though I wrote it initially. After fixing that, I took the opportunity to reduce nesting and duplication a bit. --- src/parser.rs | 53 +++++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 342142e94..01fdf0f37 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -294,7 +294,13 @@ impl Parser { } else { vec![] }; - let window_frame = self.parse_window_frame()?; + let window_frame = if !self.consume_token(&Token::RParen) { + let window_frame = self.parse_window_frame()?; + self.expect_token(&Token::RParen)?; + Some(window_frame) + } else { + None + }; Some(WindowSpec { partition_by, @@ -313,35 +319,24 @@ impl Parser { })) } - pub fn parse_window_frame(&mut self) -> Result, ParserError> { - let window_frame = match self.peek_token() { - Some(Token::Word(w)) => { - let units = w.keyword.parse::()?; - self.next_token(); - if self.parse_keyword("BETWEEN") { - let start_bound = self.parse_window_frame_bound()?; - self.expect_keyword("AND")?; - let end_bound = Some(self.parse_window_frame_bound()?); - Some(WindowFrame { - units, - start_bound, - end_bound, - }) - } else { - let start_bound = self.parse_window_frame_bound()?; - let end_bound = None; - Some(WindowFrame { - units, - start_bound, - end_bound, - }) - } - } - Some(Token::RParen) => None, - unexpected => return self.expected("'ROWS', 'RANGE', 'GROUPS', or ')'", unexpected), + pub fn parse_window_frame(&mut self) -> Result { + let units = match self.next_token() { + Some(Token::Word(w)) => w.keyword.parse::()?, + unexpected => return self.expected("ROWS, RANGE, GROUPS", unexpected), }; - self.expect_token(&Token::RParen)?; - Ok(window_frame) + let (start_bound, end_bound) = if self.parse_keyword("BETWEEN") { + let start_bound = self.parse_window_frame_bound()?; + self.expect_keyword("AND")?; + let end_bound = Some(self.parse_window_frame_bound()?); + (start_bound, end_bound) + } else { + (self.parse_window_frame_bound()?, None) + }; + Ok(WindowFrame { + units, + start_bound, + end_bound, + }) } /// "CURRENT ROW" | ( ( | "UNBOUNDED") ("PRECEDING" | FOLLOWING) ) From 086ba1281cad76a59fbd811ca03a0cb43e899fd2 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 9 Jul 2019 02:37:36 +0300 Subject: [PATCH 8/8] Amend WindowFrame docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The note about WindowFrameBound::Following being only valid "in WindowFrame::end_bound" was both - confusing, as it was based on the ANSI SQL syntax the parser doesn't adhere to -- though it sounded like a promise about the AST one could expect to get from the parser - and incomplete, as the reality is that the bounds validation the SQL engine might want to perform is more complex. For example Postgres documentation says : > Restrictions are that frame_start cannot be UNBOUNDED FOLLOWING, > frame_end cannot be UNBOUNDED PRECEDING, and the frame_end choice > cannot appear earlier in the above list of frame_start and frame_end > options than the frame_start choice does — for example RANGE BETWEEN > CURRENT ROW AND offset PRECEDING is not allowed. But, for example, > ROWS BETWEEN 7 PRECEDING AND 8 PRECEDING is allowed, even though it > would never select any rows. --- src/ast/mod.rs | 11 ++++++++--- src/parser.rs | 5 ++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e80ccdb16..030ebba98 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -292,11 +292,16 @@ impl fmt::Display for WindowSpec { /// Specifies the data processed by a window function, e.g. /// `RANGE UNBOUNDED PRECEDING` or `ROWS BETWEEN 5 PRECEDING AND CURRENT ROW`. +/// +/// Note: The parser does not validate the specified bounds; the caller should +/// reject invalid bounds like `ROWS UNBOUNDED FOLLOWING` before execution. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct WindowFrame { pub units: WindowFrameUnits, pub start_bound: WindowFrameBound, - /// The right bound of the `BETWEEN .. AND` clause. + /// The right bound of the `BETWEEN .. AND` clause. The end bound of `None` + /// indicates the shorthand form (e.g. `ROWS 1 PRECEDING`), which must + /// behave the same as `end_bound = WindowFrameBound::CurrentRow`. pub end_bound: Option, // TBD: EXCLUDE } @@ -334,14 +339,14 @@ impl FromStr for WindowFrameUnits { } } +/// Specifies [WindowFrame]'s `start_bound` and `end_bound` #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum WindowFrameBound { /// `CURRENT ROW` CurrentRow, /// ` PRECEDING` or `UNBOUNDED PRECEDING` Preceding(Option), - /// ` FOLLOWING` or `UNBOUNDED FOLLOWING`. This can only appear in - /// [WindowFrame::end_bound]. + /// ` FOLLOWING` or `UNBOUNDED FOLLOWING`. Following(Option), } diff --git a/src/parser.rs b/src/parser.rs index 01fdf0f37..cdc031b72 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -339,7 +339,7 @@ impl Parser { }) } - /// "CURRENT ROW" | ( ( | "UNBOUNDED") ("PRECEDING" | FOLLOWING) ) + /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` pub fn parse_window_frame_bound(&mut self) -> Result { if self.parse_keywords(vec!["CURRENT", "ROW"]) { Ok(WindowFrameBound::CurrentRow) @@ -347,8 +347,7 @@ impl Parser { let rows = if self.parse_keyword("UNBOUNDED") { None } else { - let rows = self.parse_literal_uint()?; - Some(rows) + Some(self.parse_literal_uint()?) }; if self.parse_keyword("PRECEDING") { Ok(WindowFrameBound::Preceding(rows))