From a0fa14f1da1369db94c805ca93da1030e46dc6d1 Mon Sep 17 00:00:00 2001 From: koushiro Date: Fri, 3 Sep 2021 11:26:38 +0800 Subject: [PATCH 1/5] Split parser into multiple files Signed-off-by: koushiro --- src/parser.rs | 1368 ++---------------------------------------- src/parser/ddl.rs | 564 +++++++++++++++++ src/parser/dml.rs | 64 ++ src/parser/dql.rs | 673 +++++++++++++++++++++ src/parser/macros.rs | 27 + src/parser/tcl.rs | 68 +++ 6 files changed, 1446 insertions(+), 1318 deletions(-) create mode 100644 src/parser/ddl.rs create mode 100644 src/parser/dml.rs create mode 100644 src/parser/dql.rs create mode 100644 src/parser/macros.rs create mode 100644 src/parser/tcl.rs diff --git a/src/parser.rs b/src/parser.rs index 659f3d1ee..eaaeebc5e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -12,6 +12,18 @@ //! SQL Parser +#[macro_use] +mod macros; + +// Data Definition Language +mod ddl; +// Data Manipulation Language +mod dml; +// Data Query Language +mod dql; +// Transactional Control Language +mod tcl; + #[cfg(not(feature = "std"))] use alloc::{ boxed::Box, @@ -22,49 +34,27 @@ use alloc::{ }; use core::fmt; -use log::debug; - use crate::ast::*; use crate::dialect::keywords::Keyword; use crate::dialect::*; use crate::tokenizer::*; -#[derive(Debug, Clone, PartialEq)] -pub enum ParserError { - TokenizerError(String), - ParserError(String), -} - -// Use `Parser::expected` instead, if possible -macro_rules! parser_err { - ($MSG:expr) => { - Err(ParserError::ParserError($MSG.to_string())) - }; -} - -// Returns a successful result if the optional expression is some -macro_rules! return_ok_if_some { - ($e:expr) => {{ - if let Some(v) = $e { - return Ok(v); - } - }}; -} - #[derive(PartialEq)] pub enum IsOptional { Optional, Mandatory, } -use IsOptional::*; - pub enum IsLateral { Lateral, NotLateral, } -use IsLateral::*; +#[derive(Debug, Clone, PartialEq)] +pub enum ParserError { + TokenizerError(String), + ParserError(String), +} impl From for ParserError { fn from(e: TokenizerError) -> Self { @@ -115,7 +105,7 @@ impl<'a> Parser<'a> { let mut parser = Parser::new(tokens, dialect); let mut stmts = Vec::new(); let mut expecting_statement_delimiter = false; - debug!("Parsing sql '{}'...", sql); + log::debug!("Parsing sql '{}'...", sql); loop { // ignore empty statements (between successive statement delimiters) while parser.consume_token(&Token::SemiColon) { @@ -185,6 +175,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `MSCK` statement pub fn parse_msck(&mut self) -> Result { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword(Keyword::TABLE)?; @@ -212,6 +203,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `TRUNCATE` statement pub fn parse_truncate(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; @@ -227,6 +219,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `ANALYZE` statement pub fn parse_analyze(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; @@ -290,12 +283,12 @@ impl<'a> Parser<'a> { /// Parse tokens until the precedence changes pub fn parse_subexpr(&mut self, precedence: u8) -> Result { - debug!("parsing expr"); + log::debug!("parsing expr"); let mut expr = self.parse_prefix()?; - debug!("prefix: {:?}", expr); + log::debug!("prefix: {:?}", expr); loop { let next_precedence = self.get_next_precedence()?; - debug!("next precedence: {:?}", next_precedence); + log::debug!("next precedence: {:?}", next_precedence); if precedence >= next_precedence { break; @@ -306,6 +299,7 @@ impl<'a> Parser<'a> { Ok(expr) } + /// Parse a `ASSERT` statement pub fn parse_assert(&mut self) -> Result { let condition = self.parse_expr()?; let message = if self.parse_keyword(Keyword::AS) { @@ -955,7 +949,7 @@ impl<'a> Parser<'a> { pub fn parse_map_access(&mut self, expr: Expr) -> Result { let key = self.parse_literal_string()?; let tok = self.consume_token(&Token::RBracket); - debug!("Tok: {}", tok); + log::debug!("Tok: {}", tok); match expr { e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess { column: Box::new(e), @@ -1016,7 +1010,7 @@ impl<'a> Parser<'a> { /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { let token = self.peek_token(); - debug!("get_next_precedence() {:?}", token); + log::debug!("get_next_precedence() {:?}", token); match token { Token::Word(w) if w.keyword == Keyword::OR => Ok(5), Token::Word(w) if w.keyword == Keyword::AND => Ok(10), @@ -1260,7 +1254,7 @@ impl<'a> Parser<'a> { } } - /// Parse a SQL CREATE statement + /// Parse a SQL `CREATE` statement pub fn parse_create(&mut self) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); let temporary = self @@ -1291,99 +1285,6 @@ impl<'a> Parser<'a> { } } - /// SQLite-specific `CREATE VIRTUAL TABLE` - pub fn parse_create_virtual_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name()?; - self.expect_keyword(Keyword::USING)?; - let module_name = self.parse_identifier()?; - // SQLite docs note that module "arguments syntax is sufficiently - // general that the arguments can be made to appear as column - // definitions in a traditional CREATE TABLE statement", but - // we don't implement that. - let module_args = self.parse_parenthesized_column_list(Optional)?; - Ok(Statement::CreateVirtualTable { - name: table_name, - if_not_exists, - module_name, - module_args, - }) - } - - pub fn parse_create_schema(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let schema_name = self.parse_object_name()?; - Ok(Statement::CreateSchema { - schema_name, - if_not_exists, - }) - } - - pub fn parse_create_database(&mut self) -> Result { - let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let db_name = self.parse_object_name()?; - let mut location = None; - let mut managed_location = None; - loop { - match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { - Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), - Some(Keyword::MANAGEDLOCATION) => { - managed_location = Some(self.parse_literal_string()?) - } - _ => break, - } - } - Ok(Statement::CreateDatabase { - db_name, - if_not_exists: ine, - location, - managed_location, - }) - } - - pub fn parse_create_external_table( - &mut self, - or_replace: bool, - ) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name()?; - let (columns, constraints) = self.parse_columns()?; - - let hive_distribution = self.parse_hive_distribution()?; - let hive_formats = self.parse_hive_formats()?; - - let file_format = if let Some(ff) = &hive_formats.storage { - match ff { - HiveIOFormat::FileFormat { format } => Some(format.clone()), - _ => None, - } - } else { - None - }; - let location = hive_formats.location.clone(); - let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; - Ok(Statement::CreateTable { - name: table_name, - columns, - constraints, - hive_distribution, - hive_formats: Some(hive_formats), - with_options: vec![], - table_properties, - or_replace, - if_not_exists, - external: true, - temporary: false, - file_format, - location, - query: None, - without_rowid: false, - like: None, - }) - } - pub fn parse_file_format(&mut self) -> Result { match self.next_token() { Token::Word(w) => match w.keyword { @@ -1400,27 +1301,7 @@ impl<'a> Parser<'a> { } } - pub fn parse_create_view(&mut self, or_replace: bool) -> Result { - let materialized = self.parse_keyword(Keyword::MATERIALIZED); - self.expect_keyword(Keyword::VIEW)?; - // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). - // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. - let name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(Optional)?; - let with_options = self.parse_options(Keyword::WITH)?; - self.expect_keyword(Keyword::AS)?; - let query = Box::new(self.parse_query()?); - // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. - Ok(Statement::CreateView { - name, - columns, - query, - materialized, - or_replace, - with_options, - }) - } - + /// Parse a `DROP` statement pub fn parse_drop(&mut self) -> Result { let object_type = if self.parse_keyword(Keyword::TABLE) { ObjectType::Table @@ -1452,352 +1333,7 @@ impl<'a> Parser<'a> { }) } - pub fn parse_create_index(&mut self, unique: bool) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let index_name = self.parse_object_name()?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name()?; - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; - Ok(Statement::CreateIndex { - name: index_name, - table_name, - columns, - unique, - if_not_exists, - }) - } - - //TODO: Implement parsing for Skewed and Clustered - pub fn parse_hive_distribution(&mut self) -> Result { - if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(HiveDistributionStyle::PARTITIONED { columns }) - } else { - Ok(HiveDistributionStyle::NONE) - } - } - - pub fn parse_hive_formats(&mut self) -> Result { - let mut hive_format = HiveFormat::default(); - loop { - match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) { - Some(Keyword::ROW) => { - hive_format.row_format = Some(self.parse_row_format()?); - } - Some(Keyword::STORED) => { - self.expect_keyword(Keyword::AS)?; - if self.parse_keyword(Keyword::INPUTFORMAT) { - let input_format = self.parse_expr()?; - self.expect_keyword(Keyword::OUTPUTFORMAT)?; - let output_format = self.parse_expr()?; - hive_format.storage = Some(HiveIOFormat::IOF { - input_format, - output_format, - }); - } else { - let format = self.parse_file_format()?; - hive_format.storage = Some(HiveIOFormat::FileFormat { format }); - } - } - Some(Keyword::LOCATION) => { - hive_format.location = Some(self.parse_literal_string()?); - } - None => break, - _ => break, - } - } - - Ok(hive_format) - } - - pub fn parse_row_format(&mut self) -> Result { - self.expect_keyword(Keyword::FORMAT)?; - match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { - Some(Keyword::SERDE) => { - let class = self.parse_literal_string()?; - Ok(HiveRowFormat::SERDE { class }) - } - _ => Ok(HiveRowFormat::DELIMITED), - } - } - - pub fn parse_create_table( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name()?; - let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { - self.parse_object_name().ok() - } else { - None - }; - // parse optional column list (schema) - let (columns, constraints) = self.parse_columns()?; - - // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` - let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); - - let hive_distribution = self.parse_hive_distribution()?; - let hive_formats = self.parse_hive_formats()?; - // PostgreSQL supports `WITH ( options )`, before `AS` - let with_options = self.parse_options(Keyword::WITH)?; - let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; - // Parse optional `AS ( query )` - let query = if self.parse_keyword(Keyword::AS) { - Some(Box::new(self.parse_query()?)) - } else { - None - }; - - Ok(Statement::CreateTable { - name: table_name, - temporary, - columns, - constraints, - with_options, - table_properties, - or_replace, - if_not_exists, - hive_distribution, - hive_formats: Some(hive_formats), - external: false, - file_format: None, - location: None, - query, - without_rowid, - like, - }) - } - - fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { - let mut columns = vec![]; - let mut constraints = vec![]; - if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok((columns, constraints)); - } - - loop { - if let Some(constraint) = self.parse_optional_table_constraint()? { - constraints.push(constraint); - } else if let Token::Word(_) = self.peek_token() { - columns.push(self.parse_column_def()?); - } else { - return self.expected("column name or constraint definition", self.peek_token()); - } - let comma = self.consume_token(&Token::Comma); - if self.consume_token(&Token::RParen) { - // allow a trailing comma, even though it's not in standard - break; - } else if !comma { - return self.expected("',' or ')' after column definition", self.peek_token()); - } - } - - Ok((columns, constraints)) - } - - fn parse_column_def(&mut self) -> Result { - let name = self.parse_identifier()?; - let data_type = self.parse_data_type()?; - let collation = if self.parse_keyword(Keyword::COLLATE) { - Some(self.parse_object_name()?) - } else { - None - }; - let mut options = vec![]; - loop { - if self.parse_keyword(Keyword::CONSTRAINT) { - let name = Some(self.parse_identifier()?); - if let Some(option) = self.parse_optional_column_option()? { - options.push(ColumnOptionDef { name, option }); - } else { - return self.expected( - "constraint details after CONSTRAINT ", - self.peek_token(), - ); - } - } else if let Some(option) = self.parse_optional_column_option()? { - options.push(ColumnOptionDef { name: None, option }); - } else { - break; - }; - } - Ok(ColumnDef { - name, - data_type, - collation, - options, - }) - } - - pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { - Ok(Some(ColumnOption::NotNull)) - } else if self.parse_keyword(Keyword::NULL) { - Ok(Some(ColumnOption::Null)) - } else if self.parse_keyword(Keyword::DEFAULT) { - Ok(Some(ColumnOption::Default(self.parse_expr()?))) - } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { - Ok(Some(ColumnOption::Unique { is_primary: true })) - } else if self.parse_keyword(Keyword::UNIQUE) { - Ok(Some(ColumnOption::Unique { is_primary: false })) - } else if self.parse_keyword(Keyword::REFERENCES) { - let foreign_table = self.parse_object_name()?; - // PostgreSQL allows omitting the column list and - // uses the primary key column of the foreign table by default - let referred_columns = self.parse_parenthesized_column_list(Optional)?; - let mut on_delete = None; - let mut on_update = None; - loop { - if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { - on_delete = Some(self.parse_referential_action()?); - } else if on_update.is_none() - && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - { - on_update = Some(self.parse_referential_action()?); - } else { - break; - } - } - Ok(Some(ColumnOption::ForeignKey { - foreign_table, - referred_columns, - on_delete, - on_update, - })) - } else if self.parse_keyword(Keyword::CHECK) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Some(ColumnOption::Check(expr))) - } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - // Support AUTO_INCREMENT for MySQL - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTO_INCREMENT"), - ]))) - } else if self.parse_keyword(Keyword::AUTOINCREMENT) - && dialect_of!(self is SQLiteDialect | GenericDialect) - { - // Support AUTOINCREMENT for SQLite - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTOINCREMENT"), - ]))) - } else { - Ok(None) - } - } - - pub fn parse_referential_action(&mut self) -> Result { - if self.parse_keyword(Keyword::RESTRICT) { - Ok(ReferentialAction::Restrict) - } else if self.parse_keyword(Keyword::CASCADE) { - Ok(ReferentialAction::Cascade) - } else if self.parse_keywords(&[Keyword::SET, Keyword::NULL]) { - Ok(ReferentialAction::SetNull) - } else if self.parse_keywords(&[Keyword::NO, Keyword::ACTION]) { - Ok(ReferentialAction::NoAction) - } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { - Ok(ReferentialAction::SetDefault) - } else { - self.expected( - "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", - self.peek_token(), - ) - } - } - - pub fn parse_optional_table_constraint( - &mut self, - ) -> Result, ParserError> { - let name = if self.parse_keyword(Keyword::CONSTRAINT) { - Some(self.parse_identifier()?) - } else { - None - }; - match self.next_token() { - Token::Word(w) if w.keyword == Keyword::PRIMARY || w.keyword == Keyword::UNIQUE => { - let is_primary = w.keyword == Keyword::PRIMARY; - if is_primary { - self.expect_keyword(Keyword::KEY)?; - } - let columns = self.parse_parenthesized_column_list(Mandatory)?; - Ok(Some(TableConstraint::Unique { - name, - columns, - is_primary, - })) - } - Token::Word(w) if w.keyword == Keyword::FOREIGN => { - self.expect_keyword(Keyword::KEY)?; - let columns = self.parse_parenthesized_column_list(Mandatory)?; - self.expect_keyword(Keyword::REFERENCES)?; - let foreign_table = self.parse_object_name()?; - let referred_columns = self.parse_parenthesized_column_list(Mandatory)?; - let mut on_delete = None; - let mut on_update = None; - loop { - if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { - on_delete = Some(self.parse_referential_action()?); - } else if on_update.is_none() - && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - { - on_update = Some(self.parse_referential_action()?); - } else { - break; - } - } - Ok(Some(TableConstraint::ForeignKey { - name, - columns, - foreign_table, - referred_columns, - on_delete, - on_update, - })) - } - Token::Word(w) if w.keyword == Keyword::CHECK => { - self.expect_token(&Token::LParen)?; - let expr = Box::new(self.parse_expr()?); - self.expect_token(&Token::RParen)?; - Ok(Some(TableConstraint::Check { name, expr })) - } - unexpected => { - if name.is_some() { - self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", unexpected) - } else { - self.prev_token(); - Ok(None) - } - } - } - } - - pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { - if self.parse_keyword(keyword) { - self.expect_token(&Token::LParen)?; - let options = self.parse_comma_separated(Parser::parse_sql_option)?; - self.expect_token(&Token::RParen)?; - Ok(options) - } else { - Ok(vec![]) - } - } - - pub fn parse_sql_option(&mut self) -> Result { - let name = self.parse_identifier()?; - self.expect_token(&Token::Eq)?; - let value = self.parse_value()?; - Ok(SqlOption { name, value }) - } - + /// Parse a `ALTER` statement pub fn parse_alter(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let _ = self.parse_keyword(Keyword::ONLY); @@ -1889,13 +1425,13 @@ impl<'a> Parser<'a> { }) } - /// Parse a copy statement + /// Parse a `COPY` statement pub fn parse_copy(&mut self) -> Result { let table_name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(Optional)?; + let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; self.expect_keywords(&[Keyword::FROM, Keyword::STDIN])?; self.expect_token(&Token::SemiColon)?; - let values = self.parse_tsv(); + let values = self.parse_tab_values(); Ok(Statement::Copy { table_name, columns, @@ -1903,43 +1439,6 @@ impl<'a> Parser<'a> { }) } - /// Parse a tab separated values in - /// COPY payload - fn parse_tsv(&mut self) -> Vec> { - self.parse_tab_value() - } - - fn parse_tab_value(&mut self) -> Vec> { - let mut values = vec![]; - let mut content = String::from(""); - while let Some(t) = self.next_token_no_skip() { - match t { - Token::Whitespace(Whitespace::Tab) => { - values.push(Some(content.to_string())); - content.clear(); - } - Token::Whitespace(Whitespace::Newline) => { - values.push(Some(content.to_string())); - content.clear(); - } - Token::Backslash => { - if self.consume_token(&Token::Period) { - return values; - } - if let Token::Word(w) = self.next_token() { - if w.value == "N" { - values.push(None); - } - } - } - _ => { - content.push_str(&t.to_string()); - } - } - } - values - } - /// Parse a literal value (numbers, strings, date/time, booleans) fn parse_value(&mut self) -> Result { match self.next_token() { @@ -2068,63 +1567,6 @@ impl<'a> Parser<'a> { } } - /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) - /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, - /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` - pub fn parse_optional_alias( - &mut self, - reserved_kwds: &[Keyword], - ) -> Result, ParserError> { - let after_as = self.parse_keyword(Keyword::AS); - match self.next_token() { - // Accept any identifier after `AS` (though many dialects have restrictions on - // keywords that may appear here). If there's no `AS`: don't parse keywords, - // which may start a construct allowed in this position, to be parsed as aliases. - // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, - // not an alias.) - Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => { - Ok(Some(w.to_ident())) - } - // MSSQL supports single-quoted strings as aliases for columns - // We accept them as table aliases too, although MSSQL does not. - // - // Note, that this conflicts with an obscure rule from the SQL - // standard, which we don't implement: - // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s - // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline - // character. When it sees such a , your DBMS will - // ignore the and treat the multiple strings as - // a single ." - Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), - not_an_ident => { - if after_as { - return self.expected("an identifier after AS", not_an_ident); - } - self.prev_token(); - Ok(None) // no alias found - } - } - } - - /// Parse `AS identifier` when the AS is describing a table-valued object, - /// like in `... FROM generate_series(1, 10) AS t (col)`. In this case - /// the alias is allowed to optionally name the columns in the table, in - /// addition to the table itself. - pub fn parse_optional_table_alias( - &mut self, - reserved_kwds: &[Keyword], - ) -> Result, ParserError> { - match self.parse_optional_alias(reserved_kwds)? { - Some(name) => { - let columns = self.parse_parenthesized_column_list(Optional)?; - Ok(Some(TableAlias { name, columns })) - } - None => Ok(None), - } - } - /// Parse a possibly qualified, possibly quoted identifier, e.g. /// `foo` or `myschema."table" pub fn parse_object_name(&mut self) -> Result { @@ -2138,19 +1580,6 @@ impl<'a> Parser<'a> { Ok(ObjectName(idents)) } - /// Parse identifiers - pub fn parse_identifiers(&mut self) -> Result, ParserError> { - let mut idents = vec![]; - loop { - match self.next_token() { - Token::Word(w) => idents.push(w.to_ident()), - Token::EOF => break, - _ => {} - } - } - Ok(idents) - } - /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) pub fn parse_identifier(&mut self) -> Result { match self.next_token() { @@ -2169,7 +1598,7 @@ impl<'a> Parser<'a> { let cols = self.parse_comma_separated(Parser::parse_identifier)?; self.expect_token(&Token::RParen)?; Ok(cols) - } else if optional == Optional { + } else if optional == IsOptional::Optional { Ok(vec![]) } else { self.expected("a list of columns in parentheses", self.peek_token()) @@ -2203,6 +1632,7 @@ impl<'a> Parser<'a> { } } + /// Parse a SQL `DELETE` statement pub fn parse_delete(&mut self) -> Result { self.expect_keyword(Keyword::FROM)?; let table_name = self.parse_object_name()?; @@ -2218,6 +1648,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a SQL `EXPLAIN` statement pub fn parse_explain(&mut self) -> Result { let analyze = self.parse_keyword(Keyword::ANALYZE); let verbose = self.parse_keyword(Keyword::VERBOSE); @@ -2231,271 +1662,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a query expression, i.e. a `SELECT` statement optionally - /// preceeded with some `WITH` CTE declarations and optionally followed - /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't - /// expect the initial keyword to be already consumed - pub fn parse_query(&mut self) -> Result { - let with = if self.parse_keyword(Keyword::WITH) { - Some(With { - recursive: self.parse_keyword(Keyword::RECURSIVE), - cte_tables: self.parse_comma_separated(Parser::parse_cte)?, - }) - } else { - None - }; - - if !self.parse_keyword(Keyword::INSERT) { - let body = self.parse_query_body(0)?; - - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()? - } else { - None - }; - - let offset = if self.parse_keyword(Keyword::OFFSET) { - Some(self.parse_offset()?) - } else { - None - }; - - let fetch = if self.parse_keyword(Keyword::FETCH) { - Some(self.parse_fetch()?) - } else { - None - }; - - Ok(Query { - with, - body, - order_by, - limit, - offset, - fetch, - }) - } else { - let insert = self.parse_insert()?; - Ok(Query { - with, - body: SetExpr::Insert(insert), - limit: None, - order_by: vec![], - offset: None, - fetch: None, - }) - } - } - - /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) - fn parse_cte(&mut self) -> Result { - let name = self.parse_identifier()?; - - let mut cte = if self.parse_keyword(Keyword::AS) { - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - let alias = TableAlias { - name, - columns: vec![], - }; - Cte { - alias, - query, - from: None, - } - } else { - let columns = self.parse_parenthesized_column_list(Optional)?; - self.expect_keyword(Keyword::AS)?; - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - let alias = TableAlias { name, columns }; - Cte { - alias, - query, - from: None, - } - }; - if self.parse_keyword(Keyword::FROM) { - cte.from = Some(self.parse_identifier()?); - } - Ok(cte) - } - - /// Parse a "query body", which is an expression with roughly the - /// following grammar: - /// ```text - /// query_body ::= restricted_select | '(' subquery ')' | set_operation - /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] - /// subquery ::= query_body [ order_by_limit ] - /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body - /// ``` - fn parse_query_body(&mut self, precedence: u8) -> Result { - // We parse the expression using a Pratt parser, as in `parse_expr()`. - // Start by parsing a restricted SELECT or a `(subquery)`: - let mut expr = if self.parse_keyword(Keyword::SELECT) { - SetExpr::Select(Box::new(self.parse_select()?)) - } else if self.consume_token(&Token::LParen) { - // CTEs are not allowed here, but the parser currently accepts them - let subquery = self.parse_query()?; - self.expect_token(&Token::RParen)?; - SetExpr::Query(Box::new(subquery)) - } else if self.parse_keyword(Keyword::VALUES) { - SetExpr::Values(self.parse_values()?) - } else { - return self.expected( - "SELECT, VALUES, or a subquery in the query body", - self.peek_token(), - ); - }; - - loop { - // The query can be optionally followed by a set operator: - let op = self.parse_set_operator(&self.peek_token()); - let next_precedence = match op { - // UNION and EXCEPT have the same binding power and evaluate left-to-right - Some(SetOperator::Union) | Some(SetOperator::Except) => 10, - // INTERSECT has higher precedence than UNION/EXCEPT - Some(SetOperator::Intersect) => 20, - // Unexpected token or EOF => stop parsing the query body - None => break, - }; - if precedence >= next_precedence { - break; - } - self.next_token(); // skip past the set operator - expr = SetExpr::SetOperation { - left: Box::new(expr), - op: op.unwrap(), - all: self.parse_keyword(Keyword::ALL), - right: Box::new(self.parse_query_body(next_precedence)?), - }; - } - - Ok(expr) - } - - fn parse_set_operator(&mut self, token: &Token) -> Option { - match token { - Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), - Token::Word(w) if w.keyword == Keyword::EXCEPT => Some(SetOperator::Except), - Token::Word(w) if w.keyword == Keyword::INTERSECT => Some(SetOperator::Intersect), - _ => None, - } - } - - /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), - /// assuming the initial `SELECT` was already consumed - pub fn parse_select(&mut self) -> Result { - let distinct = self.parse_all_or_distinct()?; - - let top = if self.parse_keyword(Keyword::TOP) { - Some(self.parse_top()?) - } else { - None - }; - - let projection = self.parse_comma_separated(Parser::parse_select_item)?; - - // Note that for keywords to be properly handled here, they need to be - // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, - // otherwise they may be parsed as an alias as part of the `projection` - // or `from`. - - let from = if self.parse_keyword(Keyword::FROM) { - self.parse_comma_separated(Parser::parse_table_and_joins)? - } else { - vec![] - }; - let mut lateral_views = vec![]; - loop { - if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { - let outer = self.parse_keyword(Keyword::OUTER); - let lateral_view = self.parse_expr()?; - let lateral_view_name = self.parse_object_name()?; - let lateral_col_alias = self - .parse_comma_separated(|parser| { - parser.parse_optional_alias(&[ - Keyword::WHERE, - Keyword::GROUP, - Keyword::CLUSTER, - Keyword::HAVING, - Keyword::LATERAL, - ]) // This couldn't possibly be a bad idea - })? - .into_iter() - .flatten() - .collect(); - - lateral_views.push(LateralView { - lateral_view, - lateral_view_name, - lateral_col_alias, - outer, - }); - } else { - break; - } - } - - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - - let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let having = if self.parse_keyword(Keyword::HAVING) { - Some(self.parse_expr()?) - } else { - None - }; - - Ok(Select { - distinct, - top, - projection, - from, - lateral_views, - selection, - group_by, - cluster_by, - distribute_by, - sort_by, - having, - }) - } - + /// Parse a SQL `SET ` or `SET TRANSACTION` statement pub fn parse_set(&mut self) -> Result { let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); @@ -2532,6 +1699,7 @@ impl<'a> Parser<'a> { } } + /// Parse a SQL `SHOW` statement pub fn parse_show(&mut self) -> Result { if self .parse_one_of_keywords(&[ @@ -2547,301 +1715,11 @@ impl<'a> Parser<'a> { } else if self.parse_one_of_keywords(&[Keyword::CREATE]).is_some() { Ok(self.parse_show_create()?) } else { - Ok(Statement::ShowVariable { - variable: self.parse_identifiers()?, - }) + Ok(self.parse_show_variable()?) } } - fn parse_show_create(&mut self) -> Result { - let obj_type = match self.expect_one_of_keywords(&[ - Keyword::TABLE, - Keyword::TRIGGER, - Keyword::FUNCTION, - Keyword::PROCEDURE, - Keyword::EVENT, - ])? { - Keyword::TABLE => Ok(ShowCreateObject::Table), - Keyword::TRIGGER => Ok(ShowCreateObject::Trigger), - Keyword::FUNCTION => Ok(ShowCreateObject::Function), - Keyword::PROCEDURE => Ok(ShowCreateObject::Procedure), - Keyword::EVENT => Ok(ShowCreateObject::Event), - keyword => Err(ParserError::ParserError(format!( - "Unable to map keyword to ShowCreateObject: {:?}", - keyword - ))), - }?; - - let obj_name = self.parse_object_name()?; - - Ok(Statement::ShowCreate { obj_type, obj_name }) - } - - fn parse_show_columns(&mut self) -> Result { - let extended = self.parse_keyword(Keyword::EXTENDED); - let full = self.parse_keyword(Keyword::FULL); - self.expect_one_of_keywords(&[Keyword::COLUMNS, Keyword::FIELDS])?; - self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; - let table_name = self.parse_object_name()?; - // MySQL also supports FROM here. In other words, MySQL - // allows both FROM FROM and FROM .
, - // while we only support the latter for now. - let filter = self.parse_show_statement_filter()?; - Ok(Statement::ShowColumns { - extended, - full, - table_name, - filter, - }) - } - - fn parse_show_statement_filter(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::LIKE) { - Ok(Some(ShowStatementFilter::Like( - self.parse_literal_string()?, - ))) - } else if self.parse_keyword(Keyword::ILIKE) { - Ok(Some(ShowStatementFilter::ILike( - self.parse_literal_string()?, - ))) - } else if self.parse_keyword(Keyword::WHERE) { - Ok(Some(ShowStatementFilter::Where(self.parse_expr()?))) - } else { - Ok(None) - } - } - - pub fn parse_table_and_joins(&mut self) -> Result { - let relation = self.parse_table_factor()?; - - // Note that for keywords to be properly handled here, they need to be - // added to `RESERVED_FOR_TABLE_ALIAS`, otherwise they may be parsed as - // a table alias. - let mut joins = vec![]; - loop { - let join = if self.parse_keyword(Keyword::CROSS) { - let join_operator = if self.parse_keyword(Keyword::JOIN) { - JoinOperator::CrossJoin - } else if self.parse_keyword(Keyword::APPLY) { - // MSSQL extension, similar to CROSS JOIN LATERAL - JoinOperator::CrossApply - } else { - return self.expected("JOIN or APPLY after CROSS", self.peek_token()); - }; - Join { - relation: self.parse_table_factor()?, - join_operator, - } - } else if self.parse_keyword(Keyword::OUTER) { - // MSSQL extension, similar to LEFT JOIN LATERAL .. ON 1=1 - self.expect_keyword(Keyword::APPLY)?; - Join { - relation: self.parse_table_factor()?, - join_operator: JoinOperator::OuterApply, - } - } else { - let natural = self.parse_keyword(Keyword::NATURAL); - let peek_keyword = if let Token::Word(w) = self.peek_token() { - w.keyword - } else { - Keyword::NoKeyword - }; - - let join_operator_type = match peek_keyword { - Keyword::INNER | Keyword::JOIN => { - let _ = self.parse_keyword(Keyword::INNER); - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::Inner - } - kw @ Keyword::LEFT | kw @ Keyword::RIGHT | kw @ Keyword::FULL => { - let _ = self.next_token(); - let _ = self.parse_keyword(Keyword::OUTER); - self.expect_keyword(Keyword::JOIN)?; - match kw { - Keyword::LEFT => JoinOperator::LeftOuter, - Keyword::RIGHT => JoinOperator::RightOuter, - Keyword::FULL => JoinOperator::FullOuter, - _ => unreachable!(), - } - } - Keyword::OUTER => { - return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); - } - _ if natural => { - return self.expected("a join type after NATURAL", self.peek_token()); - } - _ => break, - }; - let relation = self.parse_table_factor()?; - let join_constraint = self.parse_join_constraint(natural)?; - Join { - relation, - join_operator: join_operator_type(join_constraint), - } - }; - joins.push(join); - } - Ok(TableWithJoins { relation, joins }) - } - - /// A table name or a parenthesized subquery, followed by optional `[AS] alias` - pub fn parse_table_factor(&mut self) -> Result { - if self.parse_keyword(Keyword::LATERAL) { - // LATERAL must always be followed by a subquery. - if !self.consume_token(&Token::LParen) { - self.expected("subquery after LATERAL", self.peek_token())?; - } - self.parse_derived_table_factor(Lateral) - } else if self.parse_keyword(Keyword::TABLE) { - // parse table function (SELECT * FROM TABLE () [ AS ]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::TableFunction { expr, alias }) - } else if self.consume_token(&Token::LParen) { - // A left paren introduces either a derived table (i.e., a subquery) - // or a nested join. It's nearly impossible to determine ahead of - // time which it is... so we just try to parse both. - // - // Here's an example that demonstrates the complexity: - // /-------------------------------------------------------\ - // | /-----------------------------------\ | - // SELECT * FROM ( ( ( (SELECT 1) UNION (SELECT 2) ) AS t1 NATURAL JOIN t2 ) ) - // ^ ^ ^ ^ - // | | | | - // | | | | - // | | | (4) belongs to a SetExpr::Query inside the subquery - // | | (3) starts a derived table (subquery) - // | (2) starts a nested join - // (1) an additional set of parens around a nested join - // - - // If the recently consumed '(' starts a derived table, the call to - // `parse_derived_table_factor` below will return success after parsing the - // subquery, followed by the closing ')', and the alias of the derived table. - // In the example above this is case (3). - return_ok_if_some!( - self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral)) - ); - // A parsing error from `parse_derived_table_factor` indicates that the '(' we've - // recently consumed does not start a derived table (cases 1, 2, or 4). - // `maybe_parse` will ignore such an error and rewind to be after the opening '('. - - // Inside the parentheses we expect to find an (A) table factor - // followed by some joins or (B) another level of nesting. - let mut table_and_joins = self.parse_table_and_joins()?; - - if !table_and_joins.joins.is_empty() { - self.expect_token(&Token::RParen)?; - Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) // (A) - } else if let TableFactor::NestedJoin(_) = &table_and_joins.relation { - // (B): `table_and_joins` (what we found inside the parentheses) - // is a nested join `(foo JOIN bar)`, not followed by other joins. - self.expect_token(&Token::RParen)?; - Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) - } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { - // Dialect-specific behavior: Snowflake diverges from the - // standard and from most of the other implementations by - // allowing extra parentheses not only around a join (B), but - // around lone table names (e.g. `FROM (mytable [AS alias])`) - // and around derived tables (e.g. `FROM ((SELECT ...) - // [AS alias])`) as well. - self.expect_token(&Token::RParen)?; - - if let Some(outer_alias) = - self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? - { - // Snowflake also allows specifying an alias *after* parens - // e.g. `FROM (mytable) AS alias` - match &mut table_and_joins.relation { - TableFactor::Derived { alias, .. } - | TableFactor::Table { alias, .. } - | TableFactor::TableFunction { alias, .. } => { - // but not `FROM (mytable AS alias1) AS alias2`. - if let Some(inner_alias) = alias { - return Err(ParserError::ParserError(format!( - "duplicate alias {}", - inner_alias - ))); - } - // Act as if the alias was specified normally next - // to the table name: `(mytable) AS alias` -> - // `(mytable AS alias)` - alias.replace(outer_alias); - } - TableFactor::NestedJoin(_) => unreachable!(), - }; - } - // Do not store the extra set of parens in the AST - Ok(table_and_joins.relation) - } else { - // The SQL spec prohibits derived tables and bare tables from - // appearing alone in parentheses (e.g. `FROM (mytable)`) - self.expected("joined table", self.peek_token()) - } - } else { - let name = self.parse_object_name()?; - // Postgres, MSSQL: table-valued functions: - let args = if self.consume_token(&Token::LParen) { - self.parse_optional_args()? - } else { - vec![] - }; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - // MSSQL-specific table hints: - let mut with_hints = vec![]; - if self.parse_keyword(Keyword::WITH) { - if self.consume_token(&Token::LParen) { - with_hints = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - } else { - // rewind, as WITH may belong to the next statement's CTE - self.prev_token(); - } - }; - Ok(TableFactor::Table { - name, - alias, - args, - with_hints, - }) - } - } - - pub fn parse_derived_table_factor( - &mut self, - lateral: IsLateral, - ) -> Result { - let subquery = Box::new(self.parse_query()?); - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Derived { - lateral: match lateral { - Lateral => true, - NotLateral => false, - }, - subquery, - alias, - }) - } - - fn parse_join_constraint(&mut self, natural: bool) -> Result { - if natural { - Ok(JoinConstraint::Natural) - } else if self.parse_keyword(Keyword::ON) { - let constraint = self.parse_expr()?; - Ok(JoinConstraint::On(constraint)) - } else if self.parse_keyword(Keyword::USING) { - let columns = self.parse_parenthesized_column_list(Mandatory)?; - Ok(JoinConstraint::Using(columns)) - } else { - Ok(JoinConstraint::None) - //self.expected("ON, or USING after JOIN", self.peek_token()) - } - } - - /// Parse an INSERT statement + /// Parse an `INSERT` statement pub fn parse_insert(&mut self) -> Result { let or = if !dialect_of!(self is SQLiteDialect) { None @@ -2883,7 +1761,7 @@ impl<'a> Parser<'a> { // Hive lets you put table here regardless let table = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(Optional)?; + let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; let partitioned = if self.parse_keyword(Keyword::PARTITION) { self.expect_token(&Token::LParen)?; @@ -2895,7 +1773,7 @@ impl<'a> Parser<'a> { }; // Hive allows you to specify columns after partitions as well if you want. - let after_columns = self.parse_parenthesized_column_list(Optional)?; + let after_columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; let source = Box::new(self.parse_query()?); Ok(Statement::Insert { @@ -2911,6 +1789,7 @@ impl<'a> Parser<'a> { } } + /// Parse an `UPDATE` statement pub fn parse_update(&mut self) -> Result { let table_name = self.parse_object_name()?; self.expect_keyword(Keyword::SET)?; @@ -2927,14 +1806,6 @@ impl<'a> Parser<'a> { }) } - /// Parse a `var = expr` assignment, used in an UPDATE statement - pub fn parse_assignment(&mut self) -> Result { - let id = self.parse_identifier()?; - self.expect_token(&Token::Eq)?; - let value = self.parse_expr()?; - Ok(Assignment { id, value }) - } - fn parse_function_args(&mut self) -> Result { if self.peek_nth_token(1) == Token::RArrow { let name = self.parse_identifier()?; @@ -2958,23 +1829,6 @@ impl<'a> Parser<'a> { } } - /// Parse a comma-delimited list of projections after SELECT - pub fn parse_select_item(&mut self) -> Result { - let expr = self.parse_expr()?; - if let Expr::Wildcard = expr { - Ok(SelectItem::Wildcard) - } else if let Expr::QualifiedWildcard(prefix) = expr { - Ok(SelectItem::QualifiedWildcard(ObjectName(prefix))) - } else { - // `expr` is a regular SQL expression and can be followed by an alias - if let Some(alias) = self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? { - Ok(SelectItem::ExprWithAlias { expr, alias }) - } else { - Ok(SelectItem::UnnamedExpr(expr)) - } - } - } - /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) pub fn parse_order_by_expr(&mut self) -> Result { let expr = self.parse_expr()?; @@ -3002,88 +1856,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a TOP clause, MSSQL equivalent of LIMIT, - /// that follows after SELECT [DISTINCT]. - pub fn parse_top(&mut self) -> Result { - let quantity = if self.consume_token(&Token::LParen) { - let quantity = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(quantity) - } else { - Some(Expr::Value(self.parse_number_value()?)) - }; - - let percent = self.parse_keyword(Keyword::PERCENT); - - let with_ties = self.parse_keywords(&[Keyword::WITH, Keyword::TIES]); - - Ok(Top { - with_ties, - percent, - quantity, - }) - } - - /// Parse a LIMIT clause - pub fn parse_limit(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::ALL) { - Ok(None) - } else { - Ok(Some(Expr::Value(self.parse_number_value()?))) - } - } - - /// Parse an OFFSET clause - pub fn parse_offset(&mut self) -> Result { - let value = Expr::Value(self.parse_number_value()?); - let rows = if self.parse_keyword(Keyword::ROW) { - OffsetRows::Row - } else if self.parse_keyword(Keyword::ROWS) { - OffsetRows::Rows - } else { - OffsetRows::None - }; - Ok(Offset { value, rows }) - } - - /// Parse a FETCH clause - pub fn parse_fetch(&mut self) -> Result { - self.expect_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT])?; - let (quantity, percent) = if self - .parse_one_of_keywords(&[Keyword::ROW, Keyword::ROWS]) - .is_some() - { - (None, false) - } else { - let quantity = Expr::Value(self.parse_value()?); - let percent = self.parse_keyword(Keyword::PERCENT); - self.expect_one_of_keywords(&[Keyword::ROW, Keyword::ROWS])?; - (Some(quantity), percent) - }; - let with_ties = if self.parse_keyword(Keyword::ONLY) { - false - } else if self.parse_keywords(&[Keyword::WITH, Keyword::TIES]) { - true - } else { - return self.expected("one of ONLY or WITH TIES", self.peek_token()); - }; - Ok(Fetch { - with_ties, - percent, - quantity, - }) - } - - pub fn parse_values(&mut self) -> Result { - let values = self.parse_comma_separated(|parser| { - parser.expect_token(&Token::LParen)?; - let exprs = parser.parse_comma_separated(Parser::parse_expr)?; - parser.expect_token(&Token::RParen)?; - Ok(exprs) - })?; - Ok(Values(values)) - } - + /// Parse a SQL `START TRANSACTION` statement pub fn parse_start_transaction(&mut self) -> Result { self.expect_keyword(Keyword::TRANSACTION)?; Ok(Statement::StartTransaction { @@ -3091,6 +1864,7 @@ impl<'a> Parser<'a> { }) } + /// Parse an `BEGIN` statement pub fn parse_begin(&mut self) -> Result { let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); Ok(Statement::StartTransaction { @@ -3098,71 +1872,28 @@ impl<'a> Parser<'a> { }) } - pub fn parse_transaction_modes(&mut self) -> Result, ParserError> { - let mut modes = vec![]; - let mut required = false; - loop { - let mode = if self.parse_keywords(&[Keyword::ISOLATION, Keyword::LEVEL]) { - let iso_level = if self.parse_keywords(&[Keyword::READ, Keyword::UNCOMMITTED]) { - TransactionIsolationLevel::ReadUncommitted - } else if self.parse_keywords(&[Keyword::READ, Keyword::COMMITTED]) { - TransactionIsolationLevel::ReadCommitted - } else if self.parse_keywords(&[Keyword::REPEATABLE, Keyword::READ]) { - TransactionIsolationLevel::RepeatableRead - } else if self.parse_keyword(Keyword::SERIALIZABLE) { - TransactionIsolationLevel::Serializable - } else { - self.expected("isolation level", self.peek_token())? - }; - TransactionMode::IsolationLevel(iso_level) - } else if self.parse_keywords(&[Keyword::READ, Keyword::ONLY]) { - TransactionMode::AccessMode(TransactionAccessMode::ReadOnly) - } else if self.parse_keywords(&[Keyword::READ, Keyword::WRITE]) { - TransactionMode::AccessMode(TransactionAccessMode::ReadWrite) - } else if required { - self.expected("transaction mode", self.peek_token())? - } else { - break; - }; - modes.push(mode); - // ANSI requires a comma after each transaction mode, but - // PostgreSQL, for historical reasons, does not. We follow - // PostgreSQL in making the comma optional, since that is strictly - // more general. - required = self.consume_token(&Token::Comma); - } - Ok(modes) - } - + /// Parse an `COMMIT` statement pub fn parse_commit(&mut self) -> Result { Ok(Statement::Commit { chain: self.parse_commit_rollback_chain()?, }) } + /// Parse an `ROLLBACK` statement pub fn parse_rollback(&mut self) -> Result { Ok(Statement::Rollback { chain: self.parse_commit_rollback_chain()?, }) } - pub fn parse_commit_rollback_chain(&mut self) -> Result { - let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); - if self.parse_keyword(Keyword::AND) { - let chain = !self.parse_keyword(Keyword::NO); - self.expect_keyword(Keyword::CHAIN)?; - Ok(chain) - } else { - Ok(false) - } - } - + /// Parse a SQL `DEALLOCATE` statement fn parse_deallocate(&mut self) -> Result { let prepare = self.parse_keyword(Keyword::PREPARE); let name = self.parse_identifier()?; Ok(Statement::Deallocate { name, prepare }) } + /// Parse a SQL `EXECUTE` statement fn parse_execute(&mut self) -> Result { let name = self.parse_identifier()?; @@ -3175,6 +1906,7 @@ impl<'a> Parser<'a> { Ok(Statement::Execute { name, parameters }) } + /// Parse a SQL `PREPARE` statement fn parse_prepare(&mut self) -> Result { let name = self.parse_identifier()?; diff --git a/src/parser/ddl.rs b/src/parser/ddl.rs new file mode 100644 index 000000000..3a45f2bd3 --- /dev/null +++ b/src/parser/ddl.rs @@ -0,0 +1,564 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, format, vec, vec::Vec}; + +use crate::ast::*; +use crate::dialect::keywords::Keyword; +use crate::dialect::*; +use crate::parser::{IsOptional::*, Parser, ParserError}; +use crate::tokenizer::Token; + +impl<'a> Parser<'a> { + /// Parse a SQL `CREATE DATABASE` statement + pub fn parse_create_database(&mut self) -> Result { + let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let db_name = self.parse_object_name()?; + let mut location = None; + let mut managed_location = None; + loop { + match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { + Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), + Some(Keyword::MANAGEDLOCATION) => { + managed_location = Some(self.parse_literal_string()?) + } + _ => break, + } + } + Ok(Statement::CreateDatabase { + db_name, + if_not_exists: ine, + location, + managed_location, + }) + } + + /// Parse a SQL `CREATE TABLE` statement + pub fn parse_create_table( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name()?; + let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { + self.parse_object_name().ok() + } else { + None + }; + // parse optional column list (schema) + let (columns, constraints) = self.parse_columns()?; + + // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` + let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); + + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; + // PostgreSQL supports `WITH ( options )`, before `AS` + let with_options = self.parse_options(Keyword::WITH)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; + // Parse optional `AS ( query )` + let query = if self.parse_keyword(Keyword::AS) { + Some(Box::new(self.parse_query()?)) + } else { + None + }; + + Ok(Statement::CreateTable { + name: table_name, + temporary, + columns, + constraints, + with_options, + table_properties, + or_replace, + if_not_exists, + hive_distribution, + hive_formats: Some(hive_formats), + external: false, + file_format: None, + location: None, + query, + without_rowid, + like, + }) + } + + /// Parse a SQL `CREATE EXTERNAL TABLE` statement + pub fn parse_create_external_table( + &mut self, + or_replace: bool, + ) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name()?; + let (columns, constraints) = self.parse_columns()?; + + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; + + let file_format = if let Some(ff) = &hive_formats.storage { + match ff { + HiveIOFormat::FileFormat { format } => Some(format.clone()), + _ => None, + } + } else { + None + }; + let location = hive_formats.location.clone(); + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; + Ok(Statement::CreateTable { + name: table_name, + columns, + constraints, + hive_distribution, + hive_formats: Some(hive_formats), + with_options: vec![], + table_properties, + or_replace, + if_not_exists, + external: true, + temporary: false, + file_format, + location, + query: None, + without_rowid: false, + like: None, + }) + } + + /// SQLite-specific `CREATE VIRTUAL TABLE` statement + pub fn parse_create_virtual_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name()?; + self.expect_keyword(Keyword::USING)?; + let module_name = self.parse_identifier()?; + // SQLite docs note that module "arguments syntax is sufficiently + // general that the arguments can be made to appear as column + // definitions in a traditional CREATE TABLE statement", but + // we don't implement that. + let module_args = self.parse_parenthesized_column_list(Optional)?; + Ok(Statement::CreateVirtualTable { + name: table_name, + if_not_exists, + module_name, + module_args, + }) + } + + /// Parse a SQL `CREATE MATERIALIZED` or `CREATE VIEW` statement + pub fn parse_create_view(&mut self, or_replace: bool) -> Result { + let materialized = self.parse_keyword(Keyword::MATERIALIZED); + self.expect_keyword(Keyword::VIEW)?; + // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). + // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. + let name = self.parse_object_name()?; + let columns = self.parse_parenthesized_column_list(Optional)?; + let with_options = self.parse_options(Keyword::WITH)?; + self.expect_keyword(Keyword::AS)?; + let query = Box::new(self.parse_query()?); + // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. + Ok(Statement::CreateView { + name, + columns, + query, + materialized, + or_replace, + with_options, + }) + } + + fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { + let mut columns = vec![]; + let mut constraints = vec![]; + if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + return Ok((columns, constraints)); + } + + loop { + if let Some(constraint) = self.parse_optional_table_constraint()? { + constraints.push(constraint); + } else if let Token::Word(_) = self.peek_token() { + columns.push(self.parse_column_def()?); + } else { + return self.expected("column name or constraint definition", self.peek_token()); + } + let comma = self.consume_token(&Token::Comma); + if self.consume_token(&Token::RParen) { + // allow a trailing comma, even though it's not in standard + break; + } else if !comma { + return self.expected("',' or ')' after column definition", self.peek_token()); + } + } + + Ok((columns, constraints)) + } + + pub(super) fn parse_column_def(&mut self) -> Result { + let name = self.parse_identifier()?; + let data_type = self.parse_data_type()?; + let collation = if self.parse_keyword(Keyword::COLLATE) { + Some(self.parse_object_name()?) + } else { + None + }; + let mut options = vec![]; + loop { + if self.parse_keyword(Keyword::CONSTRAINT) { + let name = Some(self.parse_identifier()?); + if let Some(option) = self.parse_optional_column_option()? { + options.push(ColumnOptionDef { name, option }); + } else { + return self.expected( + "constraint details after CONSTRAINT ", + self.peek_token(), + ); + } + } else if let Some(option) = self.parse_optional_column_option()? { + options.push(ColumnOptionDef { name: None, option }); + } else { + break; + }; + } + Ok(ColumnDef { + name, + data_type, + collation, + options, + }) + } + + pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { + Ok(Some(ColumnOption::NotNull)) + } else if self.parse_keyword(Keyword::NULL) { + Ok(Some(ColumnOption::Null)) + } else if self.parse_keyword(Keyword::DEFAULT) { + Ok(Some(ColumnOption::Default(self.parse_expr()?))) + } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { + Ok(Some(ColumnOption::Unique { is_primary: true })) + } else if self.parse_keyword(Keyword::UNIQUE) { + Ok(Some(ColumnOption::Unique { is_primary: false })) + } else if self.parse_keyword(Keyword::REFERENCES) { + let foreign_table = self.parse_object_name()?; + // PostgreSQL allows omitting the column list and + // uses the primary key column of the foreign table by default + let referred_columns = self.parse_parenthesized_column_list(Optional)?; + let mut on_delete = None; + let mut on_update = None; + loop { + if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { + on_delete = Some(self.parse_referential_action()?); + } else if on_update.is_none() + && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) + { + on_update = Some(self.parse_referential_action()?); + } else { + break; + } + } + Ok(Some(ColumnOption::ForeignKey { + foreign_table, + referred_columns, + on_delete, + on_update, + })) + } else if self.parse_keyword(Keyword::CHECK) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Some(ColumnOption::Check(expr))) + } else if self.parse_keyword(Keyword::AUTO_INCREMENT) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + // Support AUTO_INCREMENT for MySQL + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("AUTO_INCREMENT"), + ]))) + } else if self.parse_keyword(Keyword::AUTOINCREMENT) + && dialect_of!(self is SQLiteDialect | GenericDialect) + { + // Support AUTOINCREMENT for SQLite + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("AUTOINCREMENT"), + ]))) + } else { + Ok(None) + } + } + + pub fn parse_optional_table_constraint( + &mut self, + ) -> Result, ParserError> { + let name = if self.parse_keyword(Keyword::CONSTRAINT) { + Some(self.parse_identifier()?) + } else { + None + }; + match self.next_token() { + Token::Word(w) if w.keyword == Keyword::PRIMARY || w.keyword == Keyword::UNIQUE => { + let is_primary = w.keyword == Keyword::PRIMARY; + if is_primary { + self.expect_keyword(Keyword::KEY)?; + } + let columns = self.parse_parenthesized_column_list(Mandatory)?; + Ok(Some(TableConstraint::Unique { + name, + columns, + is_primary, + })) + } + Token::Word(w) if w.keyword == Keyword::FOREIGN => { + self.expect_keyword(Keyword::KEY)?; + let columns = self.parse_parenthesized_column_list(Mandatory)?; + self.expect_keyword(Keyword::REFERENCES)?; + let foreign_table = self.parse_object_name()?; + let referred_columns = self.parse_parenthesized_column_list(Mandatory)?; + let mut on_delete = None; + let mut on_update = None; + loop { + if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { + on_delete = Some(self.parse_referential_action()?); + } else if on_update.is_none() + && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) + { + on_update = Some(self.parse_referential_action()?); + } else { + break; + } + } + Ok(Some(TableConstraint::ForeignKey { + name, + columns, + foreign_table, + referred_columns, + on_delete, + on_update, + })) + } + Token::Word(w) if w.keyword == Keyword::CHECK => { + self.expect_token(&Token::LParen)?; + let expr = Box::new(self.parse_expr()?); + self.expect_token(&Token::RParen)?; + Ok(Some(TableConstraint::Check { name, expr })) + } + unexpected => { + if name.is_some() { + self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", unexpected) + } else { + self.prev_token(); + Ok(None) + } + } + } + } + + pub fn parse_referential_action(&mut self) -> Result { + if self.parse_keyword(Keyword::RESTRICT) { + Ok(ReferentialAction::Restrict) + } else if self.parse_keyword(Keyword::CASCADE) { + Ok(ReferentialAction::Cascade) + } else if self.parse_keywords(&[Keyword::SET, Keyword::NULL]) { + Ok(ReferentialAction::SetNull) + } else if self.parse_keywords(&[Keyword::NO, Keyword::ACTION]) { + Ok(ReferentialAction::NoAction) + } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { + Ok(ReferentialAction::SetDefault) + } else { + self.expected( + "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", + self.peek_token(), + ) + } + } + + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { + if self.parse_keyword(keyword) { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Parser::parse_sql_option)?; + self.expect_token(&Token::RParen)?; + Ok(options) + } else { + Ok(vec![]) + } + } + + pub fn parse_sql_option(&mut self) -> Result { + let name = self.parse_identifier()?; + self.expect_token(&Token::Eq)?; + let value = self.parse_value()?; + Ok(SqlOption { name, value }) + } + + /// Parse a SQL `CREATE INDEX` statement + pub fn parse_create_index(&mut self, unique: bool) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let index_name = self.parse_object_name()?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name()?; + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Statement::CreateIndex { + name: index_name, + table_name, + columns, + unique, + if_not_exists, + }) + } + + /// Parse a SQL `CREATE SCHEMA` statement + pub fn parse_create_schema(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let schema_name = self.parse_object_name()?; + Ok(Statement::CreateSchema { + schema_name, + if_not_exists, + }) + } + + //TODO: Implement parsing for Skewed and Clustered + pub fn parse_hive_distribution(&mut self) -> Result { + if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(HiveDistributionStyle::PARTITIONED { columns }) + } else { + Ok(HiveDistributionStyle::NONE) + } + } + + pub fn parse_hive_formats(&mut self) -> Result { + let mut hive_format = HiveFormat::default(); + loop { + match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) { + Some(Keyword::ROW) => { + hive_format.row_format = Some(self.parse_row_format()?); + } + Some(Keyword::STORED) => { + self.expect_keyword(Keyword::AS)?; + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::OUTPUTFORMAT)?; + let output_format = self.parse_expr()?; + hive_format.storage = Some(HiveIOFormat::IOF { + input_format, + output_format, + }); + } else { + let format = self.parse_file_format()?; + hive_format.storage = Some(HiveIOFormat::FileFormat { format }); + } + } + Some(Keyword::LOCATION) => { + hive_format.location = Some(self.parse_literal_string()?); + } + None => break, + _ => break, + } + } + + Ok(hive_format) + } + + pub fn parse_row_format(&mut self) -> Result { + self.expect_keyword(Keyword::FORMAT)?; + match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { + Some(Keyword::SERDE) => { + let class = self.parse_literal_string()?; + Ok(HiveRowFormat::SERDE { class }) + } + _ => Ok(HiveRowFormat::DELIMITED), + } + } + + pub(super) fn parse_show_create(&mut self) -> Result { + let obj_type = match self.expect_one_of_keywords(&[ + Keyword::TABLE, + Keyword::TRIGGER, + Keyword::FUNCTION, + Keyword::PROCEDURE, + Keyword::EVENT, + ])? { + Keyword::TABLE => Ok(ShowCreateObject::Table), + Keyword::TRIGGER => Ok(ShowCreateObject::Trigger), + Keyword::FUNCTION => Ok(ShowCreateObject::Function), + Keyword::PROCEDURE => Ok(ShowCreateObject::Procedure), + Keyword::EVENT => Ok(ShowCreateObject::Event), + keyword => Err(ParserError::ParserError(format!( + "Unable to map keyword to ShowCreateObject: {:?}", + keyword + ))), + }?; + + let obj_name = self.parse_object_name()?; + + Ok(Statement::ShowCreate { obj_type, obj_name }) + } + + pub(super) fn parse_show_columns(&mut self) -> Result { + let extended = self.parse_keyword(Keyword::EXTENDED); + let full = self.parse_keyword(Keyword::FULL); + self.expect_one_of_keywords(&[Keyword::COLUMNS, Keyword::FIELDS])?; + self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; + let table_name = self.parse_object_name()?; + // MySQL also supports FROM here. In other words, MySQL + // allows both FROM
FROM and FROM .
, + // while we only support the latter for now. + let filter = self.parse_show_statement_filter()?; + Ok(Statement::ShowColumns { + extended, + full, + table_name, + filter, + }) + } + + fn parse_show_statement_filter(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::LIKE) { + Ok(Some(ShowStatementFilter::Like( + self.parse_literal_string()?, + ))) + } else if self.parse_keyword(Keyword::ILIKE) { + Ok(Some(ShowStatementFilter::ILike( + self.parse_literal_string()?, + ))) + } else if self.parse_keyword(Keyword::WHERE) { + Ok(Some(ShowStatementFilter::Where(self.parse_expr()?))) + } else { + Ok(None) + } + } + + pub(super) fn parse_show_variable(&mut self) -> Result { + Ok(Statement::ShowVariable { + variable: self.parse_identifiers()?, + }) + } + + pub fn parse_identifiers(&mut self) -> Result, ParserError> { + let mut idents = vec![]; + loop { + match self.next_token() { + Token::Word(w) => idents.push(w.to_ident()), + Token::EOF => break, + _ => {} + } + } + Ok(idents) + } +} diff --git a/src/parser/dml.rs b/src/parser/dml.rs new file mode 100644 index 000000000..35581d9c7 --- /dev/null +++ b/src/parser/dml.rs @@ -0,0 +1,64 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(not(feature = "std"))] +use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::ast::*; +use crate::parser::{Parser, ParserError}; +use crate::tokenizer::{Token, Whitespace}; + +impl<'a> Parser<'a> { + // Parse a `var = expr` assignment, used in an `UPDATE` statement + pub fn parse_assignment(&mut self) -> Result { + let id = self.parse_identifier()?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + Ok(Assignment { id, value }) + } + + // Parse a tab separated values in COPY payload, used in a `COPY` statement + pub(super) fn parse_tab_values(&mut self) -> Vec> { + let mut values = vec![]; + let mut content = String::from(""); + while let Some(t) = self.next_token_no_skip() { + match t { + Token::Whitespace(Whitespace::Tab) => { + values.push(Some(content.to_string())); + content.clear(); + } + Token::Whitespace(Whitespace::Newline) => { + values.push(Some(content.to_string())); + content.clear(); + } + Token::Backslash => { + if self.consume_token(&Token::Period) { + return values; + } + if let Token::Word(w) = self.next_token() { + if w.value == "N" { + values.push(None); + } + } + } + _ => { + content.push_str(&t.to_string()); + } + } + } + values + } +} diff --git a/src/parser/dql.rs b/src/parser/dql.rs new file mode 100644 index 000000000..119dd4687 --- /dev/null +++ b/src/parser/dql.rs @@ -0,0 +1,673 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, format, vec}; + +use crate::ast::*; +use crate::dialect::keywords::Keyword; +use crate::dialect::*; +use crate::parser::{IsLateral, IsOptional, Parser, ParserError}; +use crate::tokenizer::Token; + +impl<'a> Parser<'a> { + /// Parse a query expression, i.e. a `SELECT` statement optionally + /// preceded with some `WITH` CTE declarations and optionally followed + /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't + /// expect the initial keyword to be already consumed + pub fn parse_query(&mut self) -> Result { + let with = if self.parse_keyword(Keyword::WITH) { + Some(With { + recursive: self.parse_keyword(Keyword::RECURSIVE), + cte_tables: self.parse_comma_separated(Parser::parse_cte)?, + }) + } else { + None + }; + + if !self.parse_keyword(Keyword::INSERT) { + let body = self.parse_query_body(0)?; + + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; + + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_offset()?) + } else { + None + }; + + let fetch = if self.parse_keyword(Keyword::FETCH) { + Some(self.parse_fetch()?) + } else { + None + }; + + Ok(Query { + with, + body, + order_by, + limit, + offset, + fetch, + }) + } else { + let insert = self.parse_insert()?; + Ok(Query { + with, + body: SetExpr::Insert(insert), + limit: None, + order_by: vec![], + offset: None, + fetch: None, + }) + } + } + + /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + pub(super) fn parse_cte(&mut self) -> Result { + let name = self.parse_identifier()?; + + let mut cte = if self.parse_keyword(Keyword::AS) { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = TableAlias { + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + } + } else { + let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; + self.expect_keyword(Keyword::AS)?; + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = TableAlias { name, columns }; + Cte { + alias, + query, + from: None, + } + }; + if self.parse_keyword(Keyword::FROM) { + cte.from = Some(self.parse_identifier()?); + } + Ok(cte) + } + + /// Parse a "query body", which is an expression with roughly the + /// following grammar: + /// ```text + /// query_body ::= restricted_select | '(' subquery ')' | set_operation + /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] + /// subquery ::= query_body [ order_by_limit ] + /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body + /// ``` + pub(super) fn parse_query_body(&mut self, precedence: u8) -> Result { + // We parse the expression using a Pratt parser, as in `parse_expr()`. + // Start by parsing a restricted SELECT or a `(subquery)`: + let mut expr = if self.parse_keyword(Keyword::SELECT) { + SetExpr::Select(Box::new(self.parse_select()?)) + } else if self.consume_token(&Token::LParen) { + // CTEs are not allowed here, but the parser currently accepts them + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + SetExpr::Query(Box::new(subquery)) + } else if self.parse_keyword(Keyword::VALUES) { + SetExpr::Values(self.parse_values()?) + } else { + return self.expected( + "SELECT, VALUES, or a subquery in the query body", + self.peek_token(), + ); + }; + + loop { + // The query can be optionally followed by a set operator: + let op = self.parse_set_operator(&self.peek_token()); + let next_precedence = match op { + // UNION and EXCEPT have the same binding power and evaluate left-to-right + Some(SetOperator::Union) | Some(SetOperator::Except) => 10, + // INTERSECT has higher precedence than UNION/EXCEPT + Some(SetOperator::Intersect) => 20, + // Unexpected token or EOF => stop parsing the query body + None => break, + }; + if precedence >= next_precedence { + break; + } + self.next_token(); // skip past the set operator + expr = SetExpr::SetOperation { + left: Box::new(expr), + op: op.unwrap(), + all: self.parse_keyword(Keyword::ALL), + right: Box::new(self.parse_query_body(next_precedence)?), + }; + } + + Ok(expr) + } + + fn parse_set_operator(&mut self, token: &Token) -> Option { + match token { + Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), + Token::Word(w) if w.keyword == Keyword::EXCEPT => Some(SetOperator::Except), + Token::Word(w) if w.keyword == Keyword::INTERSECT => Some(SetOperator::Intersect), + _ => None, + } + } + + /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), + /// assuming the initial `SELECT` was already consumed + pub fn parse_select(&mut self) -> Result { + let distinct = self.parse_all_or_distinct()?; + + let top = if self.parse_keyword(Keyword::TOP) { + Some(self.parse_top()?) + } else { + None + }; + + let projection = self.parse_comma_separated(Parser::parse_select_item)?; + + // Note that for keywords to be properly handled here, they need to be + // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, + // otherwise they may be parsed as an alias as part of the `projection` + // or `from`. + + let from = if self.parse_keyword(Keyword::FROM) { + self.parse_comma_separated(Parser::parse_table_and_joins)? + } else { + vec![] + }; + let mut lateral_views = vec![]; + loop { + if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let outer = self.parse_keyword(Keyword::OUTER); + let lateral_view = self.parse_expr()?; + let lateral_view_name = self.parse_object_name()?; + let lateral_col_alias = self + .parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + Keyword::LATERAL, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .flatten() + .collect(); + + lateral_views.push(LateralView { + lateral_view, + lateral_view_name, + lateral_col_alias, + outer, + }); + } else { + break; + } + } + + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let having = if self.parse_keyword(Keyword::HAVING) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Select { + distinct, + top, + projection, + from, + lateral_views, + selection, + group_by, + cluster_by, + distribute_by, + sort_by, + having, + }) + } + + /// Parse a comma-delimited list of projections after SELECT + pub fn parse_select_item(&mut self) -> Result { + let expr = self.parse_expr()?; + if let Expr::Wildcard = expr { + Ok(SelectItem::Wildcard) + } else if let Expr::QualifiedWildcard(prefix) = expr { + Ok(SelectItem::QualifiedWildcard(ObjectName(prefix))) + } else { + // `expr` is a regular SQL expression and can be followed by an alias + if let Some(alias) = self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? { + Ok(SelectItem::ExprWithAlias { expr, alias }) + } else { + Ok(SelectItem::UnnamedExpr(expr)) + } + } + } + + pub fn parse_table_and_joins(&mut self) -> Result { + let relation = self.parse_table_factor()?; + + // Note that for keywords to be properly handled here, they need to be + // added to `RESERVED_FOR_TABLE_ALIAS`, otherwise they may be parsed as + // a table alias. + let mut joins = vec![]; + loop { + let join = if self.parse_keyword(Keyword::CROSS) { + let join_operator = if self.parse_keyword(Keyword::JOIN) { + JoinOperator::CrossJoin + } else if self.parse_keyword(Keyword::APPLY) { + // MSSQL extension, similar to CROSS JOIN LATERAL + JoinOperator::CrossApply + } else { + return self.expected("JOIN or APPLY after CROSS", self.peek_token()); + }; + Join { + relation: self.parse_table_factor()?, + join_operator, + } + } else if self.parse_keyword(Keyword::OUTER) { + // MSSQL extension, similar to LEFT JOIN LATERAL .. ON 1=1 + self.expect_keyword(Keyword::APPLY)?; + Join { + relation: self.parse_table_factor()?, + join_operator: JoinOperator::OuterApply, + } + } else { + let natural = self.parse_keyword(Keyword::NATURAL); + let peek_keyword = if let Token::Word(w) = self.peek_token() { + w.keyword + } else { + Keyword::NoKeyword + }; + + let join_operator_type = match peek_keyword { + Keyword::INNER | Keyword::JOIN => { + let _ = self.parse_keyword(Keyword::INNER); + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::Inner + } + kw @ Keyword::LEFT | kw @ Keyword::RIGHT | kw @ Keyword::FULL => { + let _ = self.next_token(); + let _ = self.parse_keyword(Keyword::OUTER); + self.expect_keyword(Keyword::JOIN)?; + match kw { + Keyword::LEFT => JoinOperator::LeftOuter, + Keyword::RIGHT => JoinOperator::RightOuter, + Keyword::FULL => JoinOperator::FullOuter, + _ => unreachable!(), + } + } + Keyword::OUTER => { + return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); + } + _ if natural => { + return self.expected("a join type after NATURAL", self.peek_token()); + } + _ => break, + }; + let relation = self.parse_table_factor()?; + let join_constraint = self.parse_join_constraint(natural)?; + Join { + relation, + join_operator: join_operator_type(join_constraint), + } + }; + joins.push(join); + } + Ok(TableWithJoins { relation, joins }) + } + + /// A table name or a parenthesized subquery, followed by optional `[AS] alias` + pub fn parse_table_factor(&mut self) -> Result { + if self.parse_keyword(Keyword::LATERAL) { + // LATERAL must always be followed by a subquery. + if !self.consume_token(&Token::LParen) { + self.expected("subquery after LATERAL", self.peek_token())?; + } + self.parse_derived_table_factor(IsLateral::Lateral) + } else if self.parse_keyword(Keyword::TABLE) { + // parse table function (SELECT * FROM TABLE () [ AS ]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::TableFunction { expr, alias }) + } else if self.consume_token(&Token::LParen) { + // A left paren introduces either a derived table (i.e., a subquery) + // or a nested join. It's nearly impossible to determine ahead of + // time which it is... so we just try to parse both. + // + // Here's an example that demonstrates the complexity: + // /-------------------------------------------------------\ + // | /-----------------------------------\ | + // SELECT * FROM ( ( ( (SELECT 1) UNION (SELECT 2) ) AS t1 NATURAL JOIN t2 ) ) + // ^ ^ ^ ^ + // | | | | + // | | | | + // | | | (4) belongs to a SetExpr::Query inside the subquery + // | | (3) starts a derived table (subquery) + // | (2) starts a nested join + // (1) an additional set of parens around a nested join + // + + // If the recently consumed '(' starts a derived table, the call to + // `parse_derived_table_factor` below will return success after parsing the + // subquery, followed by the closing ')', and the alias of the derived table. + // In the example above this is case (3). + return_ok_if_some!( + self.maybe_parse(|parser| parser.parse_derived_table_factor(IsLateral::NotLateral)) + ); + // A parsing error from `parse_derived_table_factor` indicates that the '(' we've + // recently consumed does not start a derived table (cases 1, 2, or 4). + // `maybe_parse` will ignore such an error and rewind to be after the opening '('. + + // Inside the parentheses we expect to find an (A) table factor + // followed by some joins or (B) another level of nesting. + let mut table_and_joins = self.parse_table_and_joins()?; + + if !table_and_joins.joins.is_empty() { + self.expect_token(&Token::RParen)?; + Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) // (A) + } else if let TableFactor::NestedJoin(_) = &table_and_joins.relation { + // (B): `table_and_joins` (what we found inside the parentheses) + // is a nested join `(foo JOIN bar)`, not followed by other joins. + self.expect_token(&Token::RParen)?; + Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) + } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { + // Dialect-specific behavior: Snowflake diverges from the + // standard and from most of the other implementations by + // allowing extra parentheses not only around a join (B), but + // around lone table names (e.g. `FROM (mytable [AS alias])`) + // and around derived tables (e.g. `FROM ((SELECT ...) + // [AS alias])`) as well. + self.expect_token(&Token::RParen)?; + + if let Some(outer_alias) = + self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? + { + // Snowflake also allows specifying an alias *after* parens + // e.g. `FROM (mytable) AS alias` + match &mut table_and_joins.relation { + TableFactor::Derived { alias, .. } + | TableFactor::Table { alias, .. } + | TableFactor::TableFunction { alias, .. } => { + // but not `FROM (mytable AS alias1) AS alias2`. + if let Some(inner_alias) = alias { + return Err(ParserError::ParserError(format!( + "duplicate alias {}", + inner_alias + ))); + } + // Act as if the alias was specified normally next + // to the table name: `(mytable) AS alias` -> + // `(mytable AS alias)` + alias.replace(outer_alias); + } + TableFactor::NestedJoin(_) => unreachable!(), + }; + } + // Do not store the extra set of parens in the AST + Ok(table_and_joins.relation) + } else { + // The SQL spec prohibits derived tables and bare tables from + // appearing alone in parentheses (e.g. `FROM (mytable)`) + self.expected("joined table", self.peek_token()) + } + } else { + let name = self.parse_object_name()?; + // Postgres, MSSQL: table-valued functions: + let args = if self.consume_token(&Token::LParen) { + self.parse_optional_args()? + } else { + vec![] + }; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + // MSSQL-specific table hints: + let mut with_hints = vec![]; + if self.parse_keyword(Keyword::WITH) { + if self.consume_token(&Token::LParen) { + with_hints = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + } else { + // rewind, as WITH may belong to the next statement's CTE + self.prev_token(); + } + }; + Ok(TableFactor::Table { + name, + alias, + args, + with_hints, + }) + } + } + + pub fn parse_derived_table_factor( + &mut self, + lateral: IsLateral, + ) -> Result { + let subquery = Box::new(self.parse_query()?); + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Derived { + lateral: match lateral { + IsLateral::Lateral => true, + IsLateral::NotLateral => false, + }, + subquery, + alias, + }) + } + + /// Parse `AS identifier` when the AS is describing a table-valued object, + /// like in `... FROM generate_series(1, 10) AS t (col)`. In this case + /// the alias is allowed to optionally name the columns in the table, in + /// addition to the table itself. + pub fn parse_optional_table_alias( + &mut self, + reserved_kwds: &[Keyword], + ) -> Result, ParserError> { + match self.parse_optional_alias(reserved_kwds)? { + Some(name) => { + let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; + Ok(Some(TableAlias { name, columns })) + } + None => Ok(None), + } + } + + /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) + /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, + /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` + pub fn parse_optional_alias( + &mut self, + reserved_kwds: &[Keyword], + ) -> Result, ParserError> { + let after_as = self.parse_keyword(Keyword::AS); + match self.next_token() { + // Accept any identifier after `AS` (though many dialects have restrictions on + // keywords that may appear here). If there's no `AS`: don't parse keywords, + // which may start a construct allowed in this position, to be parsed as aliases. + // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, + // not an alias.) + Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => { + Ok(Some(w.to_ident())) + } + // MSSQL supports single-quoted strings as aliases for columns + // We accept them as table aliases too, although MSSQL does not. + // + // Note, that this conflicts with an obscure rule from the SQL + // standard, which we don't implement: + // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s + // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline + // character. When it sees such a , your DBMS will + // ignore the and treat the multiple strings as + // a single ." + Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), + not_an_ident => { + if after_as { + return self.expected("an identifier after AS", not_an_ident); + } + self.prev_token(); + Ok(None) // no alias found + } + } + } + + fn parse_join_constraint(&mut self, natural: bool) -> Result { + if natural { + Ok(JoinConstraint::Natural) + } else if self.parse_keyword(Keyword::ON) { + let constraint = self.parse_expr()?; + Ok(JoinConstraint::On(constraint)) + } else if self.parse_keyword(Keyword::USING) { + let columns = self.parse_parenthesized_column_list(IsOptional::Mandatory)?; + Ok(JoinConstraint::Using(columns)) + } else { + Ok(JoinConstraint::None) + //self.expected("ON, or USING after JOIN", self.peek_token()) + } + } + + pub fn parse_values(&mut self) -> Result { + let values = self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let exprs = parser.parse_comma_separated(Parser::parse_expr)?; + parser.expect_token(&Token::RParen)?; + Ok(exprs) + })?; + Ok(Values(values)) + } + + /// Parse a `TOP` clause, MSSQL equivalent of `LIMIT`, + /// that follows after `SELECT [DISTINCT]`. + pub fn parse_top(&mut self) -> Result { + let quantity = if self.consume_token(&Token::LParen) { + let quantity = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(quantity) + } else { + Some(Expr::Value(self.parse_number_value()?)) + }; + + let percent = self.parse_keyword(Keyword::PERCENT); + + let with_ties = self.parse_keywords(&[Keyword::WITH, Keyword::TIES]); + + Ok(Top { + with_ties, + percent, + quantity, + }) + } + + /// Parse a `LIMIT` clause + pub fn parse_limit(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::ALL) { + Ok(None) + } else { + Ok(Some(Expr::Value(self.parse_number_value()?))) + } + } + + /// Parse an `OFFSET` clause + pub fn parse_offset(&mut self) -> Result { + let value = Expr::Value(self.parse_number_value()?); + let rows = if self.parse_keyword(Keyword::ROW) { + OffsetRows::Row + } else if self.parse_keyword(Keyword::ROWS) { + OffsetRows::Rows + } else { + OffsetRows::None + }; + Ok(Offset { value, rows }) + } + + /// Parse a `FETCH` clause + pub fn parse_fetch(&mut self) -> Result { + self.expect_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT])?; + let (quantity, percent) = if self + .parse_one_of_keywords(&[Keyword::ROW, Keyword::ROWS]) + .is_some() + { + (None, false) + } else { + let quantity = Expr::Value(self.parse_value()?); + let percent = self.parse_keyword(Keyword::PERCENT); + self.expect_one_of_keywords(&[Keyword::ROW, Keyword::ROWS])?; + (Some(quantity), percent) + }; + let with_ties = if self.parse_keyword(Keyword::ONLY) { + false + } else if self.parse_keywords(&[Keyword::WITH, Keyword::TIES]) { + true + } else { + return self.expected("one of ONLY or WITH TIES", self.peek_token()); + }; + Ok(Fetch { + with_ties, + percent, + quantity, + }) + } +} diff --git a/src/parser/macros.rs b/src/parser/macros.rs new file mode 100644 index 000000000..8f8625a8b --- /dev/null +++ b/src/parser/macros.rs @@ -0,0 +1,27 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Use `Parser::expected` instead, if possible +macro_rules! parser_err { + ($MSG:expr) => { + Err(ParserError::ParserError($MSG.to_string())) + }; +} + +// Returns a successful result if the optional expression is some +macro_rules! return_ok_if_some { + ($e:expr) => {{ + if let Some(v) = $e { + return Ok(v); + } + }}; +} diff --git a/src/parser/tcl.rs b/src/parser/tcl.rs new file mode 100644 index 000000000..ff05c5423 --- /dev/null +++ b/src/parser/tcl.rs @@ -0,0 +1,68 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(not(feature = "std"))] +use alloc::{vec, vec::Vec}; + +use crate::ast::*; +use crate::dialect::keywords::Keyword; +use crate::parser::{Parser, ParserError}; +use crate::tokenizer::Token; + +impl<'a> Parser<'a> { + pub fn parse_transaction_modes(&mut self) -> Result, ParserError> { + let mut modes = vec![]; + let mut required = false; + loop { + let mode = if self.parse_keywords(&[Keyword::ISOLATION, Keyword::LEVEL]) { + let iso_level = if self.parse_keywords(&[Keyword::READ, Keyword::UNCOMMITTED]) { + TransactionIsolationLevel::ReadUncommitted + } else if self.parse_keywords(&[Keyword::READ, Keyword::COMMITTED]) { + TransactionIsolationLevel::ReadCommitted + } else if self.parse_keywords(&[Keyword::REPEATABLE, Keyword::READ]) { + TransactionIsolationLevel::RepeatableRead + } else if self.parse_keyword(Keyword::SERIALIZABLE) { + TransactionIsolationLevel::Serializable + } else { + self.expected("isolation level", self.peek_token())? + }; + TransactionMode::IsolationLevel(iso_level) + } else if self.parse_keywords(&[Keyword::READ, Keyword::ONLY]) { + TransactionMode::AccessMode(TransactionAccessMode::ReadOnly) + } else if self.parse_keywords(&[Keyword::READ, Keyword::WRITE]) { + TransactionMode::AccessMode(TransactionAccessMode::ReadWrite) + } else if required { + self.expected("transaction mode", self.peek_token())? + } else { + break; + }; + modes.push(mode); + // ANSI requires a comma after each transaction mode, but + // PostgreSQL, for historical reasons, does not. We follow + // PostgreSQL in making the comma optional, since that is strictly + // more general. + required = self.consume_token(&Token::Comma); + } + Ok(modes) + } + + pub fn parse_commit_rollback_chain(&mut self) -> Result { + let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); + if self.parse_keyword(Keyword::AND) { + let chain = !self.parse_keyword(Keyword::NO); + self.expect_keyword(Keyword::CHAIN)?; + Ok(chain) + } else { + Ok(false) + } + } +} From cff6450d961ed31a07265cacc4ad92b323596995 Mon Sep 17 00:00:00 2001 From: koushiro Date: Fri, 3 Sep 2021 11:27:05 +0800 Subject: [PATCH 2/5] Move parse.rs into parser/mod.rs Signed-off-by: koushiro --- src/{parser.rs => parser/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{parser.rs => parser/mod.rs} (100%) diff --git a/src/parser.rs b/src/parser/mod.rs similarity index 100% rename from src/parser.rs rename to src/parser/mod.rs From 06b62005cf6a53babe85d2534844ac74bca99e76 Mon Sep 17 00:00:00 2001 From: koushiro Date: Fri, 3 Sep 2021 11:30:07 +0800 Subject: [PATCH 3/5] change visibility of some methods Signed-off-by: koushiro --- src/parser/ddl.rs | 114 +++++++++++++++++++++++----------------------- src/parser/mod.rs | 6 +-- 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/src/parser/ddl.rs b/src/parser/ddl.rs index 3a45f2bd3..7dd68b337 100644 --- a/src/parser/ddl.rs +++ b/src/parser/ddl.rs @@ -430,62 +430,6 @@ impl<'a> Parser<'a> { }) } - //TODO: Implement parsing for Skewed and Clustered - pub fn parse_hive_distribution(&mut self) -> Result { - if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(HiveDistributionStyle::PARTITIONED { columns }) - } else { - Ok(HiveDistributionStyle::NONE) - } - } - - pub fn parse_hive_formats(&mut self) -> Result { - let mut hive_format = HiveFormat::default(); - loop { - match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) { - Some(Keyword::ROW) => { - hive_format.row_format = Some(self.parse_row_format()?); - } - Some(Keyword::STORED) => { - self.expect_keyword(Keyword::AS)?; - if self.parse_keyword(Keyword::INPUTFORMAT) { - let input_format = self.parse_expr()?; - self.expect_keyword(Keyword::OUTPUTFORMAT)?; - let output_format = self.parse_expr()?; - hive_format.storage = Some(HiveIOFormat::IOF { - input_format, - output_format, - }); - } else { - let format = self.parse_file_format()?; - hive_format.storage = Some(HiveIOFormat::FileFormat { format }); - } - } - Some(Keyword::LOCATION) => { - hive_format.location = Some(self.parse_literal_string()?); - } - None => break, - _ => break, - } - } - - Ok(hive_format) - } - - pub fn parse_row_format(&mut self) -> Result { - self.expect_keyword(Keyword::FORMAT)?; - match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { - Some(Keyword::SERDE) => { - let class = self.parse_literal_string()?; - Ok(HiveRowFormat::SERDE { class }) - } - _ => Ok(HiveRowFormat::DELIMITED), - } - } - pub(super) fn parse_show_create(&mut self) -> Result { let obj_type = match self.expect_one_of_keywords(&[ Keyword::TABLE, @@ -550,7 +494,7 @@ impl<'a> Parser<'a> { }) } - pub fn parse_identifiers(&mut self) -> Result, ParserError> { + fn parse_identifiers(&mut self) -> Result, ParserError> { let mut idents = vec![]; loop { match self.next_token() { @@ -561,4 +505,60 @@ impl<'a> Parser<'a> { } Ok(idents) } + + //TODO: Implement parsing for Skewed and Clustered + pub fn parse_hive_distribution(&mut self) -> Result { + if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(HiveDistributionStyle::PARTITIONED { columns }) + } else { + Ok(HiveDistributionStyle::NONE) + } + } + + pub fn parse_hive_formats(&mut self) -> Result { + let mut hive_format = HiveFormat::default(); + loop { + match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) { + Some(Keyword::ROW) => { + hive_format.row_format = Some(self.parse_row_format()?); + } + Some(Keyword::STORED) => { + self.expect_keyword(Keyword::AS)?; + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::OUTPUTFORMAT)?; + let output_format = self.parse_expr()?; + hive_format.storage = Some(HiveIOFormat::IOF { + input_format, + output_format, + }); + } else { + let format = self.parse_file_format()?; + hive_format.storage = Some(HiveIOFormat::FileFormat { format }); + } + } + Some(Keyword::LOCATION) => { + hive_format.location = Some(self.parse_literal_string()?); + } + None => break, + _ => break, + } + } + + Ok(hive_format) + } + + pub fn parse_row_format(&mut self) -> Result { + self.expect_keyword(Keyword::FORMAT)?; + match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { + Some(Keyword::SERDE) => { + let class = self.parse_literal_string()?; + Ok(HiveRowFormat::SERDE { class }) + } + _ => Ok(HiveRowFormat::DELIMITED), + } + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index eaaeebc5e..b7789037a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1887,14 +1887,14 @@ impl<'a> Parser<'a> { } /// Parse a SQL `DEALLOCATE` statement - fn parse_deallocate(&mut self) -> Result { + pub fn parse_deallocate(&mut self) -> Result { let prepare = self.parse_keyword(Keyword::PREPARE); let name = self.parse_identifier()?; Ok(Statement::Deallocate { name, prepare }) } /// Parse a SQL `EXECUTE` statement - fn parse_execute(&mut self) -> Result { + pub fn parse_execute(&mut self) -> Result { let name = self.parse_identifier()?; let mut parameters = vec![]; @@ -1907,7 +1907,7 @@ impl<'a> Parser<'a> { } /// Parse a SQL `PREPARE` statement - fn parse_prepare(&mut self) -> Result { + pub fn parse_prepare(&mut self) -> Result { let name = self.parse_identifier()?; let mut data_types = vec![]; From 4bf522beca123550ca50d0e2e1db7a10b677312e Mon Sep 17 00:00:00 2001 From: koushiro Date: Fri, 3 Sep 2021 11:48:40 +0800 Subject: [PATCH 4/5] Differentiate methods Signed-off-by: koushiro --- src/parser/mod.rs | 2442 +++++++++++++++++++++++---------------------- 1 file changed, 1235 insertions(+), 1207 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b7789037a..586cc8d90 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -131,23 +131,28 @@ impl<'a> Parser<'a> { pub fn parse_statement(&mut self) -> Result { match self.next_token() { Token::Word(w) => match w.keyword { - Keyword::EXPLAIN => Ok(self.parse_explain()?), Keyword::ANALYZE => Ok(self.parse_analyze()?), - Keyword::SELECT | Keyword::WITH | Keyword::VALUES => { - self.prev_token(); - Ok(Statement::Query(Box::new(self.parse_query()?))) - } - Keyword::TRUNCATE => Ok(self.parse_truncate()?), Keyword::MSCK => Ok(self.parse_msck()?), + Keyword::TRUNCATE => Ok(self.parse_truncate()?), + + Keyword::ALTER => Ok(self.parse_alter()?), Keyword::CREATE => Ok(self.parse_create()?), + Keyword::EXPLAIN => Ok(self.parse_explain()?), Keyword::DROP => Ok(self.parse_drop()?), + Keyword::SET => Ok(self.parse_set()?), + Keyword::SHOW => Ok(self.parse_show()?), + + Keyword::ASSERT => Ok(self.parse_assert()?), + Keyword::COPY => Ok(self.parse_copy()?), + Keyword::DELETE => Ok(self.parse_delete()?), Keyword::INSERT => Ok(self.parse_insert()?), + Keyword::SELECT | Keyword::WITH | Keyword::VALUES => { + self.prev_token(); + Ok(Statement::Query(Box::new(self.parse_query()?))) + } Keyword::UPDATE => Ok(self.parse_update()?), - Keyword::ALTER => Ok(self.parse_alter()?), - Keyword::COPY => Ok(self.parse_copy()?), - Keyword::SET => Ok(self.parse_set()?), - Keyword::SHOW => Ok(self.parse_show()?), + Keyword::START => Ok(self.parse_start_transaction()?), // `BEGIN` is a nonstandard but common alias for the // standard `START TRANSACTION` statement. It is supported @@ -155,12 +160,13 @@ impl<'a> Parser<'a> { Keyword::BEGIN => Ok(self.parse_begin()?), Keyword::COMMIT => Ok(self.parse_commit()?), Keyword::ROLLBACK => Ok(self.parse_rollback()?), - Keyword::ASSERT => Ok(self.parse_assert()?), + // `PREPARE`, `EXECUTE` and `DEALLOCATE` are Postgres-specific // syntaxes. They are used for Postgres prepared statement. - Keyword::DEALLOCATE => Ok(self.parse_deallocate()?), - Keyword::EXECUTE => Ok(self.parse_execute()?), Keyword::PREPARE => Ok(self.parse_prepare()?), + Keyword::EXECUTE => Ok(self.parse_execute()?), + Keyword::DEALLOCATE => Ok(self.parse_deallocate()?), + Keyword::REPLACE if dialect_of!(self is SQLiteDialect ) => { self.prev_token(); Ok(self.parse_insert()?) @@ -175,51 +181,11 @@ impl<'a> Parser<'a> { } } - /// Parse a `MSCK` statement - pub fn parse_msck(&mut self) -> Result { - let repair = self.parse_keyword(Keyword::REPAIR); - self.expect_keyword(Keyword::TABLE)?; - let table_name = self.parse_object_name()?; - let partition_action = self - .maybe_parse(|parser| { - let pa = match parser.parse_one_of_keywords(&[ - Keyword::ADD, - Keyword::DROP, - Keyword::SYNC, - ]) { - Some(Keyword::ADD) => Some(AddDropSync::ADD), - Some(Keyword::DROP) => Some(AddDropSync::DROP), - Some(Keyword::SYNC) => Some(AddDropSync::SYNC), - _ => None, - }; - parser.expect_keyword(Keyword::PARTITIONS)?; - Ok(pa) - }) - .unwrap_or_default(); - Ok(Statement::Msck { - repair, - table_name, - partition_action, - }) - } - - /// Parse a `TRUNCATE` statement - pub fn parse_truncate(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let table_name = self.parse_object_name()?; - let mut partitions = None; - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - } - Ok(Statement::Truncate { - table_name, - partitions, - }) - } + // ============================================================================================ + // Statement parsing methods + // ============================================================================================ - /// Parse a `ANALYZE` statement + /// Parse a SQL `ANALYZE` statement pub fn parse_analyze(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name()?; @@ -276,574 +242,701 @@ impl<'a> Parser<'a> { }) } - /// Parse a new expression - pub fn parse_expr(&mut self) -> Result { - self.parse_subexpr(0) + /// Parse a SQL `MSCK` statement + pub fn parse_msck(&mut self) -> Result { + let repair = self.parse_keyword(Keyword::REPAIR); + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let partition_action = self + .maybe_parse(|parser| { + let pa = match parser.parse_one_of_keywords(&[ + Keyword::ADD, + Keyword::DROP, + Keyword::SYNC, + ]) { + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), + _ => None, + }; + parser.expect_keyword(Keyword::PARTITIONS)?; + Ok(pa) + }) + .unwrap_or_default(); + Ok(Statement::Msck { + repair, + table_name, + partition_action, + }) } - /// Parse tokens until the precedence changes - pub fn parse_subexpr(&mut self, precedence: u8) -> Result { - log::debug!("parsing expr"); - let mut expr = self.parse_prefix()?; - log::debug!("prefix: {:?}", expr); - loop { - let next_precedence = self.get_next_precedence()?; - log::debug!("next precedence: {:?}", next_precedence); - - if precedence >= next_precedence { - break; - } - - expr = self.parse_infix(expr, next_precedence)?; + /// Parse a SQL `TRUNCATE` statement + pub fn parse_truncate(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut partitions = None; + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; } - Ok(expr) + Ok(Statement::Truncate { + table_name, + partitions, + }) } - /// Parse a `ASSERT` statement - pub fn parse_assert(&mut self) -> Result { - let condition = self.parse_expr()?; - let message = if self.parse_keyword(Keyword::AS) { - Some(self.parse_expr()?) + /// Parse a SQL `ALTER` statement + pub fn parse_alter(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let _ = self.parse_keyword(Keyword::ONLY); + let table_name = self.parse_object_name()?; + let operation = if self.parse_keyword(Keyword::ADD) { + if let Some(constraint) = self.parse_optional_table_constraint()? { + AlterTableOperation::AddConstraint(constraint) + } else { + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions: partitions, + } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let column_def = self.parse_column_def()?; + AlterTableOperation::AddColumn { column_def } + } + } + } else if self.parse_keyword(Keyword::RENAME) { + if self.parse_keyword(Keyword::TO) { + let table_name = self.parse_object_name()?; + AlterTableOperation::RenameTable { table_name } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let old_column_name = self.parse_identifier()?; + self.expect_keyword(Keyword::TO)?; + let new_column_name = self.parse_identifier()?; + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, + } + } + } else if self.parse_keyword(Keyword::DROP) { + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, + } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, + } + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, + } } else { - None + return self.expected( + "ADD, RENAME, PARTITION or DROP after ALTER TABLE", + self.peek_token(), + ); }; + Ok(Statement::AlterTable { + name: table_name, + operation, + }) + } - Ok(Statement::Assert { condition, message }) + /// Parse a SQL `CREATE` statement + pub fn parse_create(&mut self) -> Result { + let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); + if self.parse_keyword(Keyword::TABLE) { + self.parse_create_table(or_replace, temporary) + } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { + self.prev_token(); + self.parse_create_view(or_replace) + } else if self.parse_keyword(Keyword::EXTERNAL) { + self.parse_create_external_table(or_replace) + } else if or_replace { + self.expected( + "[EXTERNAL] TABLE or [MATERIALIZED] VIEW after CREATE OR REPLACE", + self.peek_token(), + ) + } else if self.parse_keyword(Keyword::INDEX) { + self.parse_create_index(false) + } else if self.parse_keywords(&[Keyword::UNIQUE, Keyword::INDEX]) { + self.parse_create_index(true) + } else if self.parse_keyword(Keyword::VIRTUAL) { + self.parse_create_virtual_table() + } else if self.parse_keyword(Keyword::SCHEMA) { + self.parse_create_schema() + } else { + self.expected("an object type after CREATE", self.peek_token()) + } } - /// Parse an expression prefix - pub fn parse_prefix(&mut self) -> Result { - // PostgreSQL allows any string literal to be preceded by a type name, indicating that the - // string literal represents a literal of that type. Some examples: - // - // DATE '2020-05-20' - // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54' - // BOOL 'true' - // - // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating - // matters is the fact that INTERVAL string literals may optionally be followed by special - // keywords, e.g.: - // - // INTERVAL '7' DAY - // - // Note also that naively `SELECT date` looks like a syntax error because the `date` type - // name is not followed by a string literal, but in fact in PostgreSQL it is a valid - // expression that should parse as the column name "date". - return_ok_if_some!(self.maybe_parse(|parser| { - match parser.parse_data_type()? { - DataType::Interval => parser.parse_literal_interval(), - // PosgreSQL allows almost any identifier to be used as custom data type name, - // and we support that in `parse_data_type()`. But unlike Postgres we don't - // have a list of globally reserved keywords (since they vary across dialects), - // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type - // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of - // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the - // `type 'string'` syntax for the custom data types at all. - DataType::Custom(..) => parser_err!("dummy"), - data_type => Ok(Expr::TypedString { - data_type, - value: parser.parse_literal_string()?, - }), - } - })); + /// Parse a SQL `EXPLAIN` statement + pub fn parse_explain(&mut self) -> Result { + let analyze = self.parse_keyword(Keyword::ANALYZE); + let verbose = self.parse_keyword(Keyword::VERBOSE); - let expr = match self.next_token() { - Token::Word(w) => match w.keyword { - Keyword::TRUE | Keyword::FALSE | Keyword::NULL => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Keyword::CASE => self.parse_case_expr(), - Keyword::CAST => self.parse_cast_expr(), - Keyword::TRY_CAST => self.parse_try_cast_expr(), - Keyword::EXISTS => self.parse_exists_expr(), - Keyword::EXTRACT => self.parse_extract_expr(), - Keyword::SUBSTRING => self.parse_substring_expr(), - Keyword::TRIM => self.parse_trim_expr(), - Keyword::INTERVAL => self.parse_literal_interval(), - Keyword::LISTAGG => self.parse_listagg_expr(), - Keyword::NOT => Ok(Expr::UnaryOp { - op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), - }), - // Here `w` is a word, check if it's a part of a multi-part - // identifier, a function call, or a simple identifier: - _ => match self.peek_token() { - Token::LParen | Token::Period => { - let mut id_parts: Vec = vec![w.to_ident()]; - let mut ends_with_wildcard = false; - while self.consume_token(&Token::Period) { - match self.next_token() { - Token::Word(w) => id_parts.push(w.to_ident()), - Token::Mult => { - ends_with_wildcard = true; - break; - } - unexpected => { - return self - .expected("an identifier or a '*' after '.'", unexpected); - } - } - } - if ends_with_wildcard { - Ok(Expr::QualifiedWildcard(id_parts)) - } else if self.consume_token(&Token::LParen) { - self.prev_token(); - self.parse_function(ObjectName(id_parts)) - } else { - Ok(Expr::CompoundIdentifier(id_parts)) - } - } - _ => Ok(Expr::Identifier(w.to_ident())), - }, - }, // End of Token::Word - Token::Mult => Ok(Expr::Wildcard), - tok @ Token::Minus | tok @ Token::Plus => { - let op = if tok == Token::Plus { - UnaryOperator::Plus - } else { - UnaryOperator::Minus - }; - Ok(Expr::UnaryOp { - op, - expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), - }) - } - tok @ Token::DoubleExclamationMark - | tok @ Token::PGSquareRoot - | tok @ Token::PGCubeRoot - | tok @ Token::AtSign - | tok @ Token::Tilde - if dialect_of!(self is PostgreSqlDialect) => - { - let op = match tok { - Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial, - Token::PGSquareRoot => UnaryOperator::PGSquareRoot, - Token::PGCubeRoot => UnaryOperator::PGCubeRoot, - Token::AtSign => UnaryOperator::PGAbs, - Token::Tilde => UnaryOperator::PGBitwiseNot, - _ => unreachable!(), - }; - Ok(Expr::UnaryOp { - op, - expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), - }) - } - Token::Number(_, _) - | Token::SingleQuotedString(_) - | Token::NationalStringLiteral(_) - | Token::HexStringLiteral(_) => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } + let statement = Box::new(self.parse_statement()?); - Token::LParen => { - let expr = - if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { - self.prev_token(); - Expr::Subquery(Box::new(self.parse_query()?)) - } else { - Expr::Nested(Box::new(self.parse_expr()?)) - }; - self.expect_token(&Token::RParen)?; - Ok(expr) - } - unexpected => self.expected("an expression:", unexpected), - }?; + Ok(Statement::Explain { + analyze, + verbose, + statement, + }) + } - if self.parse_keyword(Keyword::COLLATE) { - Ok(Expr::Collate { - expr: Box::new(expr), - collation: self.parse_object_name()?, + /// Parse a SQL `DROP` statement + pub fn parse_drop(&mut self) -> Result { + let object_type = if self.parse_keyword(Keyword::TABLE) { + ObjectType::Table + } else if self.parse_keyword(Keyword::VIEW) { + ObjectType::View + } else if self.parse_keyword(Keyword::INDEX) { + ObjectType::Index + } else if self.parse_keyword(Keyword::SCHEMA) { + ObjectType::Schema + } else { + return self.expected("TABLE, VIEW, INDEX or SCHEMA after DROP", self.peek_token()); + }; + // Many dialects support the non standard `IF EXISTS` clause and allow + // specifying multiple objects to delete in a single statement + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let names = self.parse_comma_separated(Parser::parse_object_name)?; + let cascade = self.parse_keyword(Keyword::CASCADE); + let restrict = self.parse_keyword(Keyword::RESTRICT); + let purge = self.parse_keyword(Keyword::PURGE); + if cascade && restrict { + return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); + } + Ok(Statement::Drop { + object_type, + if_exists, + names, + cascade, + purge, + }) + } + + /// Parse a SQL `SET ` or `SET TRANSACTION` statement + pub fn parse_set(&mut self) -> Result { + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; + } + let variable = self.parse_identifier()?; + if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + let mut values = vec![]; + loop { + let token = self.peek_token(); + let value = match (self.parse_value(), token) { + (Ok(value), _) => SetVariableValue::Literal(value), + (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), + (Err(_), unexpected) => self.expected("variable value", unexpected)?, + }; + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: Some(Keyword::HIVEVAR) == modifier, + variable, + value: values, + }); + } + } else if variable.value == "TRANSACTION" && modifier.is_none() { + Ok(Statement::SetTransaction { + modes: self.parse_transaction_modes()?, }) } else { - Ok(expr) + self.expected("equals sign or TO", self.peek_token()) } } - pub fn parse_function(&mut self, name: ObjectName) -> Result { - self.expect_token(&Token::LParen)?; - let distinct = self.parse_all_or_distinct()?; - let args = self.parse_optional_args()?; - let over = if self.parse_keyword(Keyword::OVER) { - // TBD: support window names (`OVER mywin`) in place of inline specification - self.expect_token(&Token::LParen)?; - let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - // a list of possibly-qualified column names - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - let window_frame = if !self.consume_token(&Token::RParen) { - let window_frame = self.parse_window_frame()?; - self.expect_token(&Token::RParen)?; - Some(window_frame) - } else { - None - }; + /// Parse a SQL `SHOW` statement + pub fn parse_show(&mut self) -> Result { + if self + .parse_one_of_keywords(&[ + Keyword::EXTENDED, + Keyword::FULL, + Keyword::COLUMNS, + Keyword::FIELDS, + ]) + .is_some() + { + self.prev_token(); + Ok(self.parse_show_columns()?) + } else if self.parse_one_of_keywords(&[Keyword::CREATE]).is_some() { + Ok(self.parse_show_create()?) + } else { + Ok(self.parse_show_variable()?) + } + } - Some(WindowSpec { - partition_by, - order_by, - window_frame, - }) + /// Parse a SQL `ASSERT` statement + pub fn parse_assert(&mut self) -> Result { + let condition = self.parse_expr()?; + let message = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) } else { None }; - Ok(Expr::Function(Function { - name, - args, - over, - distinct, - })) + Ok(Statement::Assert { condition, message }) } - pub fn parse_window_frame_units(&mut self) -> Result { - match self.next_token() { - Token::Word(w) => match w.keyword { - Keyword::ROWS => Ok(WindowFrameUnits::Rows), - Keyword::RANGE => Ok(WindowFrameUnits::Range), - Keyword::GROUPS => Ok(WindowFrameUnits::Groups), - _ => self.expected("ROWS, RANGE, GROUPS", Token::Word(w))?, - }, - unexpected => self.expected("ROWS, RANGE, GROUPS", unexpected), - } + /// Parse a SQL `COPY` statement + pub fn parse_copy(&mut self) -> Result { + let table_name = self.parse_object_name()?; + let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; + self.expect_keywords(&[Keyword::FROM, Keyword::STDIN])?; + self.expect_token(&Token::SemiColon)?; + let values = self.parse_tab_values(); + Ok(Statement::Copy { + table_name, + columns, + values, + }) } - pub fn parse_window_frame(&mut self) -> Result { - let units = self.parse_window_frame_units()?; - let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { - let start_bound = self.parse_window_frame_bound()?; - self.expect_keyword(Keyword::AND)?; - let end_bound = Some(self.parse_window_frame_bound()?); - (start_bound, end_bound) + /// Parse a `DELETE` statement + pub fn parse_delete(&mut self) -> Result { + self.expect_keyword(Keyword::FROM)?; + let table_name = self.parse_object_name()?; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) } else { - (self.parse_window_frame_bound()?, None) + None }; - Ok(WindowFrame { - units, - start_bound, - end_bound, + + Ok(Statement::Delete { + table_name, + selection, }) } - /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` - pub fn parse_window_frame_bound(&mut self) -> Result { - if self.parse_keywords(&[Keyword::CURRENT, Keyword::ROW]) { - Ok(WindowFrameBound::CurrentRow) + /// Parse an `INSERT` statement + pub fn parse_insert(&mut self) -> Result { + let or = if !dialect_of!(self is SQLiteDialect) { + None + } else if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { + Some(SqliteOnConflict::Replace) + } else if self.parse_keywords(&[Keyword::OR, Keyword::ROLLBACK]) { + Some(SqliteOnConflict::Rollback) + } else if self.parse_keywords(&[Keyword::OR, Keyword::ABORT]) { + Some(SqliteOnConflict::Abort) + } else if self.parse_keywords(&[Keyword::OR, Keyword::FAIL]) { + Some(SqliteOnConflict::Fail) + } else if self.parse_keywords(&[Keyword::OR, Keyword::IGNORE]) { + Some(SqliteOnConflict::Ignore) + } else if self.parse_keyword(Keyword::REPLACE) { + Some(SqliteOnConflict::Replace) } else { - let rows = if self.parse_keyword(Keyword::UNBOUNDED) { - None + None + }; + let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; + let overwrite = action == Keyword::OVERWRITE; + let local = self.parse_keyword(Keyword::LOCAL); + + if self.parse_keyword(Keyword::DIRECTORY) { + let path = self.parse_literal_string()?; + let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { + Some(self.parse_file_format()?) } else { - Some(self.parse_literal_uint()?) + None }; - if self.parse_keyword(Keyword::PRECEDING) { - Ok(WindowFrameBound::Preceding(rows)) - } else if self.parse_keyword(Keyword::FOLLOWING) { - Ok(WindowFrameBound::Following(rows)) + let source = Box::new(self.parse_query()?); + Ok(Statement::Directory { + local, + path, + overwrite, + file_format, + source, + }) + } else { + // Hive lets you put table here regardless + let table = self.parse_keyword(Keyword::TABLE); + let table_name = self.parse_object_name()?; + let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; + + let partitioned = if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let r = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + r } else { - self.expected("PRECEDING or FOLLOWING", self.peek_token()) - } + None + }; + + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; + + let source = Box::new(self.parse_query()?); + Ok(Statement::Insert { + or, + table_name, + overwrite, + partitioned, + columns, + after_columns, + source, + table, + }) } } - pub fn parse_case_expr(&mut self) -> Result { - let mut operand = None; - if !self.parse_keyword(Keyword::WHEN) { - operand = Some(Box::new(self.parse_expr()?)); - self.expect_keyword(Keyword::WHEN)?; - } - let mut conditions = vec![]; - let mut results = vec![]; - loop { - conditions.push(self.parse_expr()?); - self.expect_keyword(Keyword::THEN)?; - results.push(self.parse_expr()?); - if !self.parse_keyword(Keyword::WHEN) { - break; - } - } - let else_result = if self.parse_keyword(Keyword::ELSE) { - Some(Box::new(self.parse_expr()?)) + /// Parse an `UPDATE` statement + pub fn parse_update(&mut self) -> Result { + let table_name = self.parse_object_name()?; + self.expect_keyword(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) } else { None }; - self.expect_keyword(Keyword::END)?; - Ok(Expr::Case { - operand, - conditions, - results, - else_result, + Ok(Statement::Update { + table_name, + assignments, + selection, }) } - /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` - pub fn parse_cast_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let data_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Cast { - expr: Box::new(expr), - data_type, + /// Parse a SQL `START TRANSACTION` statement + pub fn parse_start_transaction(&mut self) -> Result { + self.expect_keyword(Keyword::TRANSACTION)?; + Ok(Statement::StartTransaction { + modes: self.parse_transaction_modes()?, }) } - /// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)` - pub fn parse_try_cast_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let data_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::TryCast { - expr: Box::new(expr), - data_type, + /// Parse a SQL `BEGIN` statement + pub fn parse_begin(&mut self) -> Result { + let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); + Ok(Statement::StartTransaction { + modes: self.parse_transaction_modes()?, }) } - /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. - pub fn parse_exists_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let exists_node = Expr::Exists(Box::new(self.parse_query()?)); - self.expect_token(&Token::RParen)?; - Ok(exists_node) - } - - pub fn parse_extract_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let field = self.parse_date_time_field()?; - self.expect_keyword(Keyword::FROM)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Extract { - field, - expr: Box::new(expr), + /// Parse a SQL `COMMIT` statement + pub fn parse_commit(&mut self) -> Result { + Ok(Statement::Commit { + chain: self.parse_commit_rollback_chain()?, }) } - pub fn parse_substring_expr(&mut self) -> Result { - // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - let mut from_expr = None; - let mut to_expr = None; - if self.parse_keyword(Keyword::FROM) { - from_expr = Some(self.parse_expr()?); - } - if self.parse_keyword(Keyword::FOR) { - to_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; - - Ok(Expr::Substring { - expr: Box::new(expr), - substring_from: from_expr.map(Box::new), - substring_for: to_expr.map(Box::new), + /// Parse a SQL `ROLLBACK` statement + pub fn parse_rollback(&mut self) -> Result { + Ok(Statement::Rollback { + chain: self.parse_commit_rollback_chain()?, }) } - /// TRIM (WHERE 'text' FROM 'text')\ - /// TRIM ('text') - pub fn parse_trim_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let mut where_expr = None; - if let Token::Word(word) = self.peek_token() { - if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] - .iter() - .any(|d| word.keyword == *d) - { - let trim_where = self.parse_trim_where()?; - let sub_expr = self.parse_expr()?; - self.expect_keyword(Keyword::FROM)?; - where_expr = Some((trim_where, Box::new(sub_expr))); - } + /// Parse a SQL `PREPARE` statement + pub fn parse_prepare(&mut self) -> Result { + let name = self.parse_identifier()?; + + let mut data_types = vec![]; + if self.consume_token(&Token::LParen) { + data_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; } - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Trim { - expr: Box::new(expr), - trim_where: where_expr, + self.expect_keyword(Keyword::AS)?; + let statement = Box::new(self.parse_statement()?); + Ok(Statement::Prepare { + name, + data_types, + statement, }) } - pub fn parse_trim_where(&mut self) -> Result { - match self.next_token() { - Token::Word(w) => match w.keyword { - Keyword::BOTH => Ok(TrimWhereField::Both), - Keyword::LEADING => Ok(TrimWhereField::Leading), - Keyword::TRAILING => Ok(TrimWhereField::Trailing), - _ => self.expected("trim_where field", Token::Word(w))?, - }, - unexpected => self.expected("trim_where field", unexpected), - } - } + /// Parse a SQL `EXECUTE` statement + pub fn parse_execute(&mut self) -> Result { + let name = self.parse_identifier()?; - /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. - pub fn parse_listagg_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let distinct = self.parse_all_or_distinct()?; - let expr = Box::new(self.parse_expr()?); - // While ANSI SQL would would require the separator, Redshift makes this optional. Here we - // choose to make the separator optional as this provides the more general implementation. - let separator = if self.consume_token(&Token::Comma) { - Some(Box::new(self.parse_expr()?)) - } else { - None - }; - let on_overflow = if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) { - if self.parse_keyword(Keyword::ERROR) { - Some(ListAggOnOverflow::Error) - } else { - self.expect_keyword(Keyword::TRUNCATE)?; - let filler = match self.peek_token() { - Token::Word(w) - if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => - { - None - } - Token::SingleQuotedString(_) - | Token::NationalStringLiteral(_) - | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), - unexpected => { - self.expected("either filler, WITH, or WITHOUT in LISTAGG", unexpected)? - } - }; - let with_count = self.parse_keyword(Keyword::WITH); - if !with_count && !self.parse_keyword(Keyword::WITHOUT) { - self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; - } - self.expect_keyword(Keyword::COUNT)?; - Some(ListAggOnOverflow::Truncate { filler, with_count }) - } - } else { - None - }; - self.expect_token(&Token::RParen)?; - // Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the - // more general implementation. - let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { - self.expect_token(&Token::LParen)?; - self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; - let order_by_expr = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; - order_by_expr - } else { - vec![] - }; - Ok(Expr::ListAgg(ListAgg { - distinct, - expr, - separator, - on_overflow, - within_group, - })) + let mut parameters = vec![]; + if self.consume_token(&Token::LParen) { + parameters = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + } + + Ok(Statement::Execute { name, parameters }) } - // This function parses date/time fields for both the EXTRACT function-like - // operator and interval qualifiers. EXTRACT supports a wider set of - // date/time fields than interval qualifiers, so this function may need to - // be split in two. - pub fn parse_date_time_field(&mut self) -> Result { - match self.next_token() { - Token::Word(w) => match w.keyword { - Keyword::YEAR => Ok(DateTimeField::Year), - Keyword::MONTH => Ok(DateTimeField::Month), - Keyword::DAY => Ok(DateTimeField::Day), - Keyword::HOUR => Ok(DateTimeField::Hour), - Keyword::MINUTE => Ok(DateTimeField::Minute), - Keyword::SECOND => Ok(DateTimeField::Second), - _ => self.expected("date/time field", Token::Word(w))?, - }, - unexpected => self.expected("date/time field", unexpected), - } + /// Parse a SQL `DEALLOCATE` statement + pub fn parse_deallocate(&mut self) -> Result { + let prepare = self.parse_keyword(Keyword::PREPARE); + let name = self.parse_identifier()?; + Ok(Statement::Deallocate { name, prepare }) } - /// Parse an INTERVAL literal. - /// - /// Some syntactically valid intervals: - /// - /// 1. `INTERVAL '1' DAY` - /// 2. `INTERVAL '1-1' YEAR TO MONTH` - /// 3. `INTERVAL '1' SECOND` - /// 4. `INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5)` - /// 5. `INTERVAL '1.1' SECOND (2, 2)` - /// 6. `INTERVAL '1:1' HOUR (5) TO MINUTE (5)` - /// - /// Note that we do not currently attempt to parse the quoted value. - pub fn parse_literal_interval(&mut self) -> Result { - // The SQL standard allows an optional sign before the value string, but - // it is not clear if any implementations support that syntax, so we - // don't currently try to parse it. (The sign can instead be included - // inside the value string.) + // ============================================================================================ + // Expression parsing methods + // ============================================================================================ - // The first token in an interval is a string literal which specifies - // the duration of the interval. - let value = self.parse_literal_string()?; + /// Parse a new expression + pub fn parse_expr(&mut self) -> Result { + self.parse_subexpr(0) + } - // Following the string literal is a qualifier which indicates the units - // of the duration specified in the string literal. + /// Parse tokens until the precedence changes + pub fn parse_subexpr(&mut self, precedence: u8) -> Result { + log::debug!("parsing expr"); + let mut expr = self.parse_prefix()?; + log::debug!("prefix: {:?}", expr); + loop { + let next_precedence = self.get_next_precedence()?; + log::debug!("next precedence: {:?}", next_precedence); + + if precedence >= next_precedence { + break; + } + + expr = self.parse_infix(expr, next_precedence)?; + } + Ok(expr) + } + + /// Parse an expression prefix + pub fn parse_prefix(&mut self) -> Result { + // PostgreSQL allows any string literal to be preceded by a type name, indicating that the + // string literal represents a literal of that type. Some examples: // - // Note that PostgreSQL allows omitting the qualifier, so we provide - // this more general implemenation. - let leading_field = match self.peek_token() { - Token::Word(kw) - if [ - Keyword::YEAR, - Keyword::MONTH, - Keyword::DAY, - Keyword::HOUR, - Keyword::MINUTE, - Keyword::SECOND, - ] - .iter() - .any(|d| kw.keyword == *d) => + // DATE '2020-05-20' + // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54' + // BOOL 'true' + // + // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating + // matters is the fact that INTERVAL string literals may optionally be followed by special + // keywords, e.g.: + // + // INTERVAL '7' DAY + // + // Note also that naively `SELECT date` looks like a syntax error because the `date` type + // name is not followed by a string literal, but in fact in PostgreSQL it is a valid + // expression that should parse as the column name "date". + return_ok_if_some!(self.maybe_parse(|parser| { + match parser.parse_data_type()? { + DataType::Interval => parser.parse_literal_interval(), + // PosgreSQL allows almost any identifier to be used as custom data type name, + // and we support that in `parse_data_type()`. But unlike Postgres we don't + // have a list of globally reserved keywords (since they vary across dialects), + // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type + // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of + // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the + // `type 'string'` syntax for the custom data types at all. + DataType::Custom(..) => parser_err!("dummy"), + data_type => Ok(Expr::TypedString { + data_type, + value: parser.parse_literal_string()?, + }), + } + })); + + let expr = match self.next_token() { + Token::Word(w) => match w.keyword { + Keyword::TRUE | Keyword::FALSE | Keyword::NULL => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Keyword::CASE => self.parse_case_expr(), + Keyword::CAST => self.parse_cast_expr(), + Keyword::TRY_CAST => self.parse_try_cast_expr(), + Keyword::EXISTS => self.parse_exists_expr(), + Keyword::EXTRACT => self.parse_extract_expr(), + Keyword::SUBSTRING => self.parse_substring_expr(), + Keyword::TRIM => self.parse_trim_expr(), + Keyword::INTERVAL => self.parse_literal_interval(), + Keyword::LISTAGG => self.parse_listagg_expr(), + Keyword::NOT => Ok(Expr::UnaryOp { + op: UnaryOperator::Not, + expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), + }), + // Here `w` is a word, check if it's a part of a multi-part + // identifier, a function call, or a simple identifier: + _ => match self.peek_token() { + Token::LParen | Token::Period => { + let mut id_parts: Vec = vec![w.to_ident()]; + let mut ends_with_wildcard = false; + while self.consume_token(&Token::Period) { + match self.next_token() { + Token::Word(w) => id_parts.push(w.to_ident()), + Token::Mult => { + ends_with_wildcard = true; + break; + } + unexpected => { + return self + .expected("an identifier or a '*' after '.'", unexpected); + } + } + } + if ends_with_wildcard { + Ok(Expr::QualifiedWildcard(id_parts)) + } else if self.consume_token(&Token::LParen) { + self.prev_token(); + self.parse_function(ObjectName(id_parts)) + } else { + Ok(Expr::CompoundIdentifier(id_parts)) + } + } + _ => Ok(Expr::Identifier(w.to_ident())), + }, + }, // End of Token::Word + Token::Mult => Ok(Expr::Wildcard), + tok @ Token::Minus | tok @ Token::Plus => { + let op = if tok == Token::Plus { + UnaryOperator::Plus + } else { + UnaryOperator::Minus + }; + Ok(Expr::UnaryOp { + op, + expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), + }) + } + tok @ Token::DoubleExclamationMark + | tok @ Token::PGSquareRoot + | tok @ Token::PGCubeRoot + | tok @ Token::AtSign + | tok @ Token::Tilde + if dialect_of!(self is PostgreSqlDialect) => { - Some(self.parse_date_time_field()?) + let op = match tok { + Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial, + Token::PGSquareRoot => UnaryOperator::PGSquareRoot, + Token::PGCubeRoot => UnaryOperator::PGCubeRoot, + Token::AtSign => UnaryOperator::PGAbs, + Token::Tilde => UnaryOperator::PGBitwiseNot, + _ => unreachable!(), + }; + Ok(Expr::UnaryOp { + op, + expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), + }) + } + Token::Number(_, _) + | Token::SingleQuotedString(_) + | Token::NationalStringLiteral(_) + | Token::HexStringLiteral(_) => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) } - _ => None, - }; - let (leading_precision, last_field, fsec_precision) = - if leading_field == Some(DateTimeField::Second) { - // SQL mandates special syntax for `SECOND TO SECOND` literals. - // Instead of - // `SECOND [()] TO SECOND[()]` - // one must use the special format: - // `SECOND [( [ , ] )]` - let last_field = None; - let (leading_precision, fsec_precision) = self.parse_optional_precision_scale()?; - (leading_precision, last_field, fsec_precision) - } else { - let leading_precision = self.parse_optional_precision()?; - if self.parse_keyword(Keyword::TO) { - let last_field = Some(self.parse_date_time_field()?); - let fsec_precision = if last_field == Some(DateTimeField::Second) { - self.parse_optional_precision()? + Token::LParen => { + let expr = + if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { + self.prev_token(); + Expr::Subquery(Box::new(self.parse_query()?)) } else { - None + Expr::Nested(Box::new(self.parse_expr()?)) }; - (leading_precision, last_field, fsec_precision) - } else { - (leading_precision, None, None) - } - }; + self.expect_token(&Token::RParen)?; + Ok(expr) + } + unexpected => self.expected("an expression:", unexpected), + }?; + + if self.parse_keyword(Keyword::COLLATE) { + Ok(Expr::Collate { + expr: Box::new(expr), + collation: self.parse_object_name()?, + }) + } else { + Ok(expr) + } + } + + const UNARY_NOT_PREC: u8 = 15; + const BETWEEN_PREC: u8 = 20; + const PLUS_MINUS_PREC: u8 = 30; - Ok(Expr::Value(Value::Interval { - value, - leading_field, - leading_precision, - last_field, - fractional_seconds_precision: fsec_precision, - })) + /// Get the precedence of the next token + pub fn get_next_precedence(&self) -> Result { + let token = self.peek_token(); + log::debug!("get_next_precedence() {:?}", token); + match token { + Token::Word(w) if w.keyword == Keyword::OR => Ok(5), + Token::Word(w) if w.keyword == Keyword::AND => Ok(10), + Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1) { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), + _ => Ok(0), + }, + Token::Word(w) if w.keyword == Keyword::IS => Ok(17), + Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Tilde + | Token::TildeAsterisk + | Token::ExclamationMarkTilde + | Token::ExclamationMarkTildeAsterisk + | Token::Spaceship => Ok(20), + Token::Pipe => Ok(21), + Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), + Token::Ampersand => Ok(23), + Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), + Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40), + Token::DoubleColon => Ok(50), + Token::ExclamationMark => Ok(50), + Token::LBracket | Token::RBracket => Ok(10), + _ => Ok(0), + } } /// Parse an operator following an expression @@ -1003,242 +1096,417 @@ impl<'a> Parser<'a> { }) } - const UNARY_NOT_PREC: u8 = 15; - const BETWEEN_PREC: u8 = 20; - const PLUS_MINUS_PREC: u8 = 30; + pub fn parse_function(&mut self, name: ObjectName) -> Result { + self.expect_token(&Token::LParen)?; + let distinct = self.parse_all_or_distinct()?; + let args = self.parse_optional_args()?; + let over = if self.parse_keyword(Keyword::OVER) { + // TBD: support window names (`OVER mywin`) in place of inline specification + self.expect_token(&Token::LParen)?; + let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { + // a list of possibly-qualified column names + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + let window_frame = if !self.consume_token(&Token::RParen) { + let window_frame = self.parse_window_frame()?; + self.expect_token(&Token::RParen)?; + Some(window_frame) + } else { + None + }; - /// Get the precedence of the next token - pub fn get_next_precedence(&self) -> Result { - let token = self.peek_token(); - log::debug!("get_next_precedence() {:?}", token); - match token { - Token::Word(w) if w.keyword == Keyword::OR => Ok(5), - Token::Word(w) if w.keyword == Keyword::AND => Ok(10), - Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1) { - // The precedence of NOT varies depending on keyword that - // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise it - // is not an infix operator, and therefore has zero - // precedence. - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), - _ => Ok(0), - }, - Token::Word(w) if w.keyword == Keyword::IS => Ok(17), - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), - Token::Eq - | Token::Lt - | Token::LtEq - | Token::Neq - | Token::Gt - | Token::GtEq - | Token::DoubleEq - | Token::Tilde - | Token::TildeAsterisk - | Token::ExclamationMarkTilde - | Token::ExclamationMarkTildeAsterisk - | Token::Spaceship => Ok(20), - Token::Pipe => Ok(21), - Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), - Token::Ampersand => Ok(23), - Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40), - Token::DoubleColon => Ok(50), - Token::ExclamationMark => Ok(50), - Token::LBracket | Token::RBracket => Ok(10), - _ => Ok(0), + Some(WindowSpec { + partition_by, + order_by, + window_frame, + }) + } else { + None + }; + + Ok(Expr::Function(Function { + name, + args, + over, + distinct, + })) + } + + pub fn parse_optional_args(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::RParen) { + Ok(vec![]) + } else { + let args = self.parse_comma_separated(Parser::parse_function_args)?; + self.expect_token(&Token::RParen)?; + Ok(args) } } - /// Return the first non-whitespace token that has not yet been processed - /// (or None if reached end-of-file) - pub fn peek_token(&self) -> Token { - self.peek_nth_token(0) + fn parse_function_args(&mut self) -> Result { + if self.peek_nth_token(1) == Token::RArrow { + let name = self.parse_identifier()?; + + self.expect_token(&Token::RArrow)?; + let arg = self.parse_expr()?; + + Ok(FunctionArg::Named { name, arg }) + } else { + Ok(FunctionArg::Unnamed(self.parse_expr()?)) + } } - /// Return nth non-whitespace token that has not yet been processed - pub fn peek_nth_token(&self, mut n: usize) -> Token { - let mut index = self.index; - loop { - index += 1; - match self.tokens.get(index - 1) { - Some(Token::Whitespace(_)) => continue, - non_whitespace => { - if n == 0 { - return non_whitespace.cloned().unwrap_or(Token::EOF); - } - n -= 1; - } + pub fn parse_window_frame(&mut self) -> Result { + let units = self.parse_window_frame_units()?; + let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { + let start_bound = self.parse_window_frame_bound()?; + self.expect_keyword(Keyword::AND)?; + let end_bound = Some(self.parse_window_frame_bound()?); + (start_bound, end_bound) + } else { + (self.parse_window_frame_bound()?, None) + }; + Ok(WindowFrame { + units, + start_bound, + end_bound, + }) + } + + pub fn parse_window_frame_units(&mut self) -> Result { + match self.next_token() { + Token::Word(w) => match w.keyword { + Keyword::ROWS => Ok(WindowFrameUnits::Rows), + Keyword::RANGE => Ok(WindowFrameUnits::Range), + Keyword::GROUPS => Ok(WindowFrameUnits::Groups), + _ => self.expected("ROWS, RANGE, GROUPS", Token::Word(w))?, + }, + unexpected => self.expected("ROWS, RANGE, GROUPS", unexpected), + } + } + + /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` + pub fn parse_window_frame_bound(&mut self) -> Result { + if self.parse_keywords(&[Keyword::CURRENT, Keyword::ROW]) { + Ok(WindowFrameBound::CurrentRow) + } else { + let rows = if self.parse_keyword(Keyword::UNBOUNDED) { + None + } else { + Some(self.parse_literal_uint()?) + }; + if self.parse_keyword(Keyword::PRECEDING) { + Ok(WindowFrameBound::Preceding(rows)) + } else if self.parse_keyword(Keyword::FOLLOWING) { + Ok(WindowFrameBound::Following(rows)) + } else { + self.expected("PRECEDING or FOLLOWING", self.peek_token()) } } } - /// Return the first non-whitespace token that has not yet been processed - /// (or None if reached end-of-file) and mark it as processed. OK to call - /// repeatedly after reaching EOF. - pub fn next_token(&mut self) -> Token { + /// Parse a SQL `CASE` expression e.g. `CASE WHEN text IS NULL THEN 0 ELSE 1 END` + pub fn parse_case_expr(&mut self) -> Result { + let mut operand = None; + if !self.parse_keyword(Keyword::WHEN) { + operand = Some(Box::new(self.parse_expr()?)); + self.expect_keyword(Keyword::WHEN)?; + } + let mut conditions = vec![]; + let mut results = vec![]; loop { - self.index += 1; - match self.tokens.get(self.index - 1) { - Some(Token::Whitespace(_)) => continue, - token => return token.cloned().unwrap_or(Token::EOF), + conditions.push(self.parse_expr()?); + self.expect_keyword(Keyword::THEN)?; + results.push(self.parse_expr()?); + if !self.parse_keyword(Keyword::WHEN) { + break; } } + let else_result = if self.parse_keyword(Keyword::ELSE) { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + self.expect_keyword(Keyword::END)?; + Ok(Expr::Case { + operand, + conditions, + results, + else_result, + }) } - /// Return the first unprocessed token, possibly whitespace. - pub fn next_token_no_skip(&mut self) -> Option<&Token> { - self.index += 1; - self.tokens.get(self.index - 1) + /// Parse a SQL `CAST` function e.g. `CAST(expr AS FLOAT)` + pub fn parse_cast_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Cast { + expr: Box::new(expr), + data_type, + }) } - /// Push back the last one non-whitespace token. Must be called after - /// `next_token()`, otherwise might panic. OK to call after - /// `next_token()` indicates an EOF. - pub fn prev_token(&mut self) { - loop { - assert!(self.index > 0); - self.index -= 1; - if let Some(Token::Whitespace(_)) = self.tokens.get(self.index) { - continue; - } - return; - } + /// Parse a SQL `TRY_CAST` function e.g. `TRY_CAST(expr AS FLOAT)` + pub fn parse_try_cast_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::TryCast { + expr: Box::new(expr), + data_type, + }) } - /// Report unexpected token - fn expected(&self, expected: &str, found: Token) -> Result { - parser_err!(format!("Expected {}, found: {}", expected, found)) + /// Parse a SQL `EXISTS` expression e.g. `WHERE EXISTS(SELECT ...)` + pub fn parse_exists_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let exists_node = Expr::Exists(Box::new(self.parse_query()?)); + self.expect_token(&Token::RParen)?; + Ok(exists_node) } - /// Look for an expected keyword and consume it if it exists - #[must_use] - pub fn parse_keyword(&mut self, expected: Keyword) -> bool { - match self.peek_token() { - Token::Word(w) if expected == w.keyword => { - self.next_token(); - true - } - _ => false, - } + /// Parse a SQL `EXTRACT` expression e.g. `EXTRACT(YEAR FROM ...)` + pub fn parse_extract_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let field = self.parse_date_time_field()?; + self.expect_keyword(Keyword::FROM)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Extract { + field, + expr: Box::new(expr), + }) } - /// Look for an expected sequence of keywords and consume them if they exist - #[must_use] - pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { - let index = self.index; - for &keyword in keywords { - if !self.parse_keyword(keyword) { - // println!("parse_keywords aborting .. did not find {:?}", keyword); - // reset index and return immediately - self.index = index; - return false; - } + /// Parse a SQL `SUBSTRING` expression e.g. `SUBSTRING('text', 1, 2)` + pub fn parse_substring_expr(&mut self) -> Result { + // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + let mut from_expr = None; + let mut to_expr = None; + if self.parse_keyword(Keyword::FROM) { + from_expr = Some(self.parse_expr()?); } - true + if self.parse_keyword(Keyword::FOR) { + to_expr = Some(self.parse_expr()?); + } + self.expect_token(&Token::RParen)?; + + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + }) } - /// Look for one of the given keywords and return the one that matches. - #[must_use] - pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { - match self.peek_token() { - Token::Word(w) => { - keywords - .iter() - .find(|keyword| **keyword == w.keyword) - .map(|keyword| { - self.next_token(); - *keyword - }) + /// Parse a SQL `TRIM` expression e.g. `TRIM('text')` + pub fn parse_trim_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let mut where_expr = None; + if let Token::Word(word) = self.peek_token() { + if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] + .iter() + .any(|d| word.keyword == *d) + { + let trim_where = self.parse_trim_where()?; + let sub_expr = self.parse_expr()?; + self.expect_keyword(Keyword::FROM)?; + where_expr = Some((trim_where, Box::new(sub_expr))); } - _ => None, } + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where: where_expr, + }) } - /// Bail out if the current token is not one of the expected keywords, or consume it if it is - pub fn expect_one_of_keywords(&mut self, keywords: &[Keyword]) -> Result { - if let Some(keyword) = self.parse_one_of_keywords(keywords) { - Ok(keyword) - } else { - let keywords: Vec = keywords.iter().map(|x| format!("{:?}", x)).collect(); - self.expected( - &format!("one of {}", keywords.join(" or ")), - self.peek_token(), - ) + fn parse_trim_where(&mut self) -> Result { + match self.next_token() { + Token::Word(w) => match w.keyword { + Keyword::BOTH => Ok(TrimWhereField::Both), + Keyword::LEADING => Ok(TrimWhereField::Leading), + Keyword::TRAILING => Ok(TrimWhereField::Trailing), + _ => self.expected("trim_where field", Token::Word(w))?, + }, + unexpected => self.expected("trim_where field", unexpected), } } - /// Bail out if the current token is not an expected keyword, or consume it if it is - pub fn expect_keyword(&mut self, expected: Keyword) -> Result<(), ParserError> { - if self.parse_keyword(expected) { - Ok(()) + /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. + pub fn parse_listagg_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let distinct = self.parse_all_or_distinct()?; + let expr = Box::new(self.parse_expr()?); + // While ANSI SQL would would require the separator, Redshift makes this optional. Here we + // choose to make the separator optional as this provides the more general implementation. + let separator = if self.consume_token(&Token::Comma) { + Some(Box::new(self.parse_expr()?)) } else { - self.expected(format!("{:?}", &expected).as_str(), self.peek_token()) - } + None + }; + let on_overflow = if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) { + if self.parse_keyword(Keyword::ERROR) { + Some(ListAggOnOverflow::Error) + } else { + self.expect_keyword(Keyword::TRUNCATE)?; + let filler = match self.peek_token() { + Token::Word(w) + if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => + { + None + } + Token::SingleQuotedString(_) + | Token::NationalStringLiteral(_) + | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), + unexpected => { + self.expected("either filler, WITH, or WITHOUT in LISTAGG", unexpected)? + } + }; + let with_count = self.parse_keyword(Keyword::WITH); + if !with_count && !self.parse_keyword(Keyword::WITHOUT) { + self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; + } + self.expect_keyword(Keyword::COUNT)?; + Some(ListAggOnOverflow::Truncate { filler, with_count }) + } + } else { + None + }; + self.expect_token(&Token::RParen)?; + // Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the + // more general implementation. + let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { + self.expect_token(&Token::LParen)?; + self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; + let order_by_expr = self.parse_comma_separated(Parser::parse_order_by_expr)?; + self.expect_token(&Token::RParen)?; + order_by_expr + } else { + vec![] + }; + Ok(Expr::ListAgg(ListAgg { + distinct, + expr, + separator, + on_overflow, + within_group, + })) } - /// Bail out if the following tokens are not the expected sequence of - /// keywords, or consume them if they are. - pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { - for &kw in expected { - self.expect_keyword(kw)?; - } - Ok(()) - } + /// Parse an INTERVAL literal. + /// + /// Some syntactically valid intervals: + /// + /// 1. `INTERVAL '1' DAY` + /// 2. `INTERVAL '1-1' YEAR TO MONTH` + /// 3. `INTERVAL '1' SECOND` + /// 4. `INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5)` + /// 5. `INTERVAL '1.1' SECOND (2, 2)` + /// 6. `INTERVAL '1:1' HOUR (5) TO MINUTE (5)` + /// + /// Note that we do not currently attempt to parse the quoted value. + pub fn parse_literal_interval(&mut self) -> Result { + // The SQL standard allows an optional sign before the value string, but + // it is not clear if any implementations support that syntax, so we + // don't currently try to parse it. (The sign can instead be included + // inside the value string.) + + // The first token in an interval is a string literal which specifies + // the duration of the interval. + let value = self.parse_literal_string()?; + + // Following the string literal is a qualifier which indicates the units + // of the duration specified in the string literal. + // + // Note that PostgreSQL allows omitting the qualifier, so we provide + // this more general implemenation. + let leading_field = match self.peek_token() { + Token::Word(kw) + if [ + Keyword::YEAR, + Keyword::MONTH, + Keyword::DAY, + Keyword::HOUR, + Keyword::MINUTE, + Keyword::SECOND, + ] + .iter() + .any(|d| kw.keyword == *d) => + { + Some(self.parse_date_time_field()?) + } + _ => None, + }; - /// Consume the next token if it matches the expected token, otherwise return false - #[must_use] - pub fn consume_token(&mut self, expected: &Token) -> bool { - if self.peek_token() == *expected { - self.next_token(); - true - } else { - false - } - } + let (leading_precision, last_field, fsec_precision) = + if leading_field == Some(DateTimeField::Second) { + // SQL mandates special syntax for `SECOND TO SECOND` literals. + // Instead of + // `SECOND [()] TO SECOND[()]` + // one must use the special format: + // `SECOND [( [ , ] )]` + let last_field = None; + let (leading_precision, fsec_precision) = self.parse_optional_precision_scale()?; + (leading_precision, last_field, fsec_precision) + } else { + let leading_precision = self.parse_optional_precision()?; + if self.parse_keyword(Keyword::TO) { + let last_field = Some(self.parse_date_time_field()?); + let fsec_precision = if last_field == Some(DateTimeField::Second) { + self.parse_optional_precision()? + } else { + None + }; + (leading_precision, last_field, fsec_precision) + } else { + (leading_precision, None, None) + } + }; - /// Bail out if the current token is not an expected keyword, or consume it if it is - pub fn expect_token(&mut self, expected: &Token) -> Result<(), ParserError> { - if self.consume_token(expected) { - Ok(()) - } else { - self.expected(&expected.to_string(), self.peek_token()) - } + Ok(Expr::Value(Value::Interval { + value, + leading_field, + leading_precision, + last_field, + fractional_seconds_precision: fsec_precision, + })) } - /// Parse a comma-separated list of 1+ items accepted by `F` - pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - let mut values = vec![]; - loop { - values.push(f(self)?); - if !self.consume_token(&Token::Comma) { - break; - } - } - Ok(values) - } + // ============================================================================================ + // Parsing helper methods + // ============================================================================================ - /// Run a parser method `f`, reverting back to the current position - /// if unsuccessful. - #[must_use] - fn maybe_parse(&mut self, mut f: F) -> Option - where - F: FnMut(&mut Parser) -> Result, - { - let index = self.index; - if let Ok(t) = f(self) { - Some(t) - } else { - self.index = index; - None + // This function parses date/time fields for both the EXTRACT function-like + // operator and interval qualifiers. EXTRACT supports a wider set of + // date/time fields than interval qualifiers, so this function may need to + // be split in two. + pub fn parse_date_time_field(&mut self) -> Result { + match self.next_token() { + Token::Word(w) => match w.keyword { + Keyword::YEAR => Ok(DateTimeField::Year), + Keyword::MONTH => Ok(DateTimeField::Month), + Keyword::DAY => Ok(DateTimeField::Day), + Keyword::HOUR => Ok(DateTimeField::Hour), + Keyword::MINUTE => Ok(DateTimeField::Minute), + Keyword::SECOND => Ok(DateTimeField::Second), + _ => self.expected("date/time field", Token::Word(w))?, + }, + unexpected => self.expected("date/time field", unexpected), } } @@ -1254,189 +1522,47 @@ impl<'a> Parser<'a> { } } - /// Parse a SQL `CREATE` statement - pub fn parse_create(&mut self) -> Result { - let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); - let temporary = self - .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) - .is_some(); - if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace, temporary) - } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { - self.prev_token(); - self.parse_create_view(or_replace) - } else if self.parse_keyword(Keyword::EXTERNAL) { - self.parse_create_external_table(or_replace) - } else if or_replace { - self.expected( - "[EXTERNAL] TABLE or [MATERIALIZED] VIEW after CREATE OR REPLACE", - self.peek_token(), - ) - } else if self.parse_keyword(Keyword::INDEX) { - self.parse_create_index(false) - } else if self.parse_keywords(&[Keyword::UNIQUE, Keyword::INDEX]) { - self.parse_create_index(true) - } else if self.parse_keyword(Keyword::VIRTUAL) { - self.parse_create_virtual_table() - } else if self.parse_keyword(Keyword::SCHEMA) { - self.parse_create_schema() - } else { - self.expected("an object type after CREATE", self.peek_token()) - } - } - - pub fn parse_file_format(&mut self) -> Result { - match self.next_token() { - Token::Word(w) => match w.keyword { - Keyword::AVRO => Ok(FileFormat::AVRO), - Keyword::JSONFILE => Ok(FileFormat::JSONFILE), - Keyword::ORC => Ok(FileFormat::ORC), - Keyword::PARQUET => Ok(FileFormat::PARQUET), - Keyword::RCFILE => Ok(FileFormat::RCFILE), - Keyword::SEQUENCEFILE => Ok(FileFormat::SEQUENCEFILE), - Keyword::TEXTFILE => Ok(FileFormat::TEXTFILE), - _ => self.expected("fileformat", Token::Word(w)), - }, - unexpected => self.expected("fileformat", unexpected), - } - } - - /// Parse a `DROP` statement - pub fn parse_drop(&mut self) -> Result { - let object_type = if self.parse_keyword(Keyword::TABLE) { - ObjectType::Table - } else if self.parse_keyword(Keyword::VIEW) { - ObjectType::View - } else if self.parse_keyword(Keyword::INDEX) { - ObjectType::Index - } else if self.parse_keyword(Keyword::SCHEMA) { - ObjectType::Schema - } else { - return self.expected("TABLE, VIEW, INDEX or SCHEMA after DROP", self.peek_token()); - }; - // Many dialects support the non standard `IF EXISTS` clause and allow - // specifying multiple objects to delete in a single statement - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let names = self.parse_comma_separated(Parser::parse_object_name)?; - let cascade = self.parse_keyword(Keyword::CASCADE); - let restrict = self.parse_keyword(Keyword::RESTRICT); - let purge = self.parse_keyword(Keyword::PURGE); - if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); - } - Ok(Statement::Drop { - object_type, - if_exists, - names, - cascade, - purge, - }) - } + /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + pub fn parse_order_by_expr(&mut self) -> Result { + let expr = self.parse_expr()?; - /// Parse a `ALTER` statement - pub fn parse_alter(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let _ = self.parse_keyword(Keyword::ONLY); - let table_name = self.parse_object_name()?; - let operation = if self.parse_keyword(Keyword::ADD) { - if let Some(constraint) = self.parse_optional_table_constraint()? { - AlterTableOperation::AddConstraint(constraint) - } else { - let if_not_exists = - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::AddPartitions { - if_not_exists, - new_partitions: partitions, - } - } else { - let _ = self.parse_keyword(Keyword::COLUMN); - let column_def = self.parse_column_def()?; - AlterTableOperation::AddColumn { column_def } - } - } - } else if self.parse_keyword(Keyword::RENAME) { - if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_object_name()?; - AlterTableOperation::RenameTable { table_name } - } else { - let _ = self.parse_keyword(Keyword::COLUMN); - let old_column_name = self.parse_identifier()?; - self.expect_keyword(Keyword::TO)?; - let new_column_name = self.parse_identifier()?; - AlterTableOperation::RenameColumn { - old_column_name, - new_column_name, - } - } - } else if self.parse_keyword(Keyword::DROP) { - if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: true, - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: false, - } - } else { - let _ = self.parse_keyword(Keyword::COLUMN); - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, - } - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let before = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::RENAME)?; - self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; - self.expect_token(&Token::LParen)?; - let renames = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::RenamePartitions { - old_partitions: before, - new_partitions: renames, - } + let asc = if self.parse_keyword(Keyword::ASC) { + Some(true) + } else if self.parse_keyword(Keyword::DESC) { + Some(false) } else { - return self.expected( - "ADD, RENAME, PARTITION or DROP after ALTER TABLE", - self.peek_token(), - ); + None }; - Ok(Statement::AlterTable { - name: table_name, - operation, + + let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) { + Some(true) + } else if self.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) { + Some(false) + } else { + None + }; + + Ok(OrderByExpr { + expr, + asc, + nulls_first, }) } - /// Parse a `COPY` statement - pub fn parse_copy(&mut self) -> Result { - let table_name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; - self.expect_keywords(&[Keyword::FROM, Keyword::STDIN])?; - self.expect_token(&Token::SemiColon)?; - let values = self.parse_tab_values(); - Ok(Statement::Copy { - table_name, - columns, - values, - }) + pub fn parse_file_format(&mut self) -> Result { + match self.next_token() { + Token::Word(w) => match w.keyword { + Keyword::AVRO => Ok(FileFormat::AVRO), + Keyword::JSONFILE => Ok(FileFormat::JSONFILE), + Keyword::ORC => Ok(FileFormat::ORC), + Keyword::PARQUET => Ok(FileFormat::PARQUET), + Keyword::RCFILE => Ok(FileFormat::RCFILE), + Keyword::SEQUENCEFILE => Ok(FileFormat::SEQUENCEFILE), + Keyword::TEXTFILE => Ok(FileFormat::TEXTFILE), + _ => self.expected("fileformat", Token::Word(w)), + }, + unexpected => self.expected("fileformat", unexpected), + } } /// Parse a literal value (numbers, strings, date/time, booleans) @@ -1567,44 +1693,6 @@ impl<'a> Parser<'a> { } } - /// Parse a possibly qualified, possibly quoted identifier, e.g. - /// `foo` or `myschema."table" - pub fn parse_object_name(&mut self) -> Result { - let mut idents = vec![]; - loop { - idents.push(self.parse_identifier()?); - if !self.consume_token(&Token::Period) { - break; - } - } - Ok(ObjectName(idents)) - } - - /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) - pub fn parse_identifier(&mut self) -> Result { - match self.next_token() { - Token::Word(w) => Ok(w.to_ident()), - Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), - unexpected => self.expected("identifier", unexpected), - } - } - - /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers - pub fn parse_parenthesized_column_list( - &mut self, - optional: IsOptional, - ) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - let cols = self.parse_comma_separated(Parser::parse_identifier)?; - self.expect_token(&Token::RParen)?; - Ok(cols) - } else if optional == IsOptional::Optional { - Ok(vec![]) - } else { - self.expected("a list of columns in parentheses", self.peek_token()) - } - } - pub fn parse_optional_precision(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { let n = self.parse_literal_uint()?; @@ -1632,297 +1720,237 @@ impl<'a> Parser<'a> { } } - /// Parse a SQL `DELETE` statement - pub fn parse_delete(&mut self) -> Result { - self.expect_keyword(Keyword::FROM)?; - let table_name = self.parse_object_name()?; - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - - Ok(Statement::Delete { - table_name, - selection, - }) - } - - /// Parse a SQL `EXPLAIN` statement - pub fn parse_explain(&mut self) -> Result { - let analyze = self.parse_keyword(Keyword::ANALYZE); - let verbose = self.parse_keyword(Keyword::VERBOSE); - - let statement = Box::new(self.parse_statement()?); - - Ok(Statement::Explain { - analyze, - verbose, - statement, - }) - } - - /// Parse a SQL `SET ` or `SET TRANSACTION` statement - pub fn parse_set(&mut self) -> Result { - let modifier = - self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); - if let Some(Keyword::HIVEVAR) = modifier { - self.expect_token(&Token::Colon)?; - } - let variable = self.parse_identifier()?; - if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - let mut values = vec![]; - loop { - let token = self.peek_token(); - let value = match (self.parse_value(), token) { - (Ok(value), _) => SetVariableValue::Literal(value), - (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), - (Err(_), unexpected) => self.expected("variable value", unexpected)?, - }; - values.push(value); - if self.consume_token(&Token::Comma) { - continue; - } - return Ok(Statement::SetVariable { - local: modifier == Some(Keyword::LOCAL), - hivevar: Some(Keyword::HIVEVAR) == modifier, - variable, - value: values, - }); + /// Parse a possibly qualified, possibly quoted identifier, e.g. + /// `foo` or `myschema."table" + pub fn parse_object_name(&mut self) -> Result { + let mut idents = vec![]; + loop { + idents.push(self.parse_identifier()?); + if !self.consume_token(&Token::Period) { + break; } - } else if variable.value == "TRANSACTION" && modifier.is_none() { - Ok(Statement::SetTransaction { - modes: self.parse_transaction_modes()?, - }) - } else { - self.expected("equals sign or TO", self.peek_token()) } + Ok(ObjectName(idents)) } - /// Parse a SQL `SHOW` statement - pub fn parse_show(&mut self) -> Result { - if self - .parse_one_of_keywords(&[ - Keyword::EXTENDED, - Keyword::FULL, - Keyword::COLUMNS, - Keyword::FIELDS, - ]) - .is_some() - { - self.prev_token(); - Ok(self.parse_show_columns()?) - } else if self.parse_one_of_keywords(&[Keyword::CREATE]).is_some() { - Ok(self.parse_show_create()?) - } else { - Ok(self.parse_show_variable()?) + /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) + pub fn parse_identifier(&mut self) -> Result { + match self.next_token() { + Token::Word(w) => Ok(w.to_ident()), + Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), + unexpected => self.expected("identifier", unexpected), } } - /// Parse an `INSERT` statement - pub fn parse_insert(&mut self) -> Result { - let or = if !dialect_of!(self is SQLiteDialect) { - None - } else if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { - Some(SqliteOnConflict::Replace) - } else if self.parse_keywords(&[Keyword::OR, Keyword::ROLLBACK]) { - Some(SqliteOnConflict::Rollback) - } else if self.parse_keywords(&[Keyword::OR, Keyword::ABORT]) { - Some(SqliteOnConflict::Abort) - } else if self.parse_keywords(&[Keyword::OR, Keyword::FAIL]) { - Some(SqliteOnConflict::Fail) - } else if self.parse_keywords(&[Keyword::OR, Keyword::IGNORE]) { - Some(SqliteOnConflict::Ignore) - } else if self.parse_keyword(Keyword::REPLACE) { - Some(SqliteOnConflict::Replace) - } else { - None - }; - let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; - let overwrite = action == Keyword::OVERWRITE; - let local = self.parse_keyword(Keyword::LOCAL); - - if self.parse_keyword(Keyword::DIRECTORY) { - let path = self.parse_literal_string()?; - let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { - Some(self.parse_file_format()?) - } else { - None - }; - let source = Box::new(self.parse_query()?); - Ok(Statement::Directory { - local, - path, - overwrite, - file_format, - source, - }) - } else { - // Hive lets you put table here regardless - let table = self.parse_keyword(Keyword::TABLE); - let table_name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; - - let partitioned = if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let r = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - r - } else { - None - }; - - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; - - let source = Box::new(self.parse_query()?); - Ok(Statement::Insert { - or, - table_name, - overwrite, - partitioned, - columns, - after_columns, - source, - table, - }) + /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers + pub fn parse_parenthesized_column_list( + &mut self, + optional: IsOptional, + ) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let cols = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + Ok(cols) + } else if optional == IsOptional::Optional { + Ok(vec![]) + } else { + self.expected("a list of columns in parentheses", self.peek_token()) } } - /// Parse an `UPDATE` statement - pub fn parse_update(&mut self) -> Result { - let table_name = self.parse_object_name()?; - self.expect_keyword(Keyword::SET)?; - let assignments = self.parse_comma_separated(Parser::parse_assignment)?; - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) + /// Parse a comma-separated list of 1+ items accepted by `F` + pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if !self.consume_token(&Token::Comma) { + break; + } + } + Ok(values) + } + + /// Run a parser method `f`, reverting back to the current position + /// if unsuccessful. + #[must_use] + fn maybe_parse(&mut self, mut f: F) -> Option + where + F: FnMut(&mut Parser) -> Result, + { + let index = self.index; + if let Ok(t) = f(self) { + Some(t) } else { + self.index = index; None - }; - Ok(Statement::Update { - table_name, - assignments, - selection, - }) + } } - fn parse_function_args(&mut self) -> Result { - if self.peek_nth_token(1) == Token::RArrow { - let name = self.parse_identifier()?; - - self.expect_token(&Token::RArrow)?; - let arg = self.parse_expr()?; + // ============================================================================================ + // Token (Keyword) helper methods + // ============================================================================================ - Ok(FunctionArg::Named { name, arg }) - } else { - Ok(FunctionArg::Unnamed(self.parse_expr()?)) + /// Look for an expected keyword and consume it if it exists + #[must_use] + pub fn parse_keyword(&mut self, expected: Keyword) -> bool { + match self.peek_token() { + Token::Word(w) if expected == w.keyword => { + self.next_token(); + true + } + _ => false, } } - pub fn parse_optional_args(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::RParen) { - Ok(vec![]) - } else { - let args = self.parse_comma_separated(Parser::parse_function_args)?; - self.expect_token(&Token::RParen)?; - Ok(args) + /// Look for an expected sequence of keywords and consume them if they exist + #[must_use] + pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { + let index = self.index; + for &keyword in keywords { + if !self.parse_keyword(keyword) { + // println!("parse_keywords aborting .. did not find {:?}", keyword); + // reset index and return immediately + self.index = index; + return false; + } } + true } - /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) - pub fn parse_order_by_expr(&mut self) -> Result { - let expr = self.parse_expr()?; + /// Look for one of the given keywords and return the one that matches. + #[must_use] + pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { + match self.peek_token() { + Token::Word(w) => { + keywords + .iter() + .find(|keyword| **keyword == w.keyword) + .map(|keyword| { + self.next_token(); + *keyword + }) + } + _ => None, + } + } - let asc = if self.parse_keyword(Keyword::ASC) { - Some(true) - } else if self.parse_keyword(Keyword::DESC) { - Some(false) + /// Bail out if the current token is not one of the expected keywords, or consume it if it is + pub fn expect_one_of_keywords(&mut self, keywords: &[Keyword]) -> Result { + if let Some(keyword) = self.parse_one_of_keywords(keywords) { + Ok(keyword) } else { - None - }; + let keywords: Vec = keywords.iter().map(|x| format!("{:?}", x)).collect(); + self.expected( + &format!("one of {}", keywords.join(" or ")), + self.peek_token(), + ) + } + } - let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) { - Some(true) - } else if self.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) { - Some(false) + /// Bail out if the current token is not an expected keyword, or consume it if it is + pub fn expect_keyword(&mut self, expected: Keyword) -> Result<(), ParserError> { + if self.parse_keyword(expected) { + Ok(()) } else { - None - }; - - Ok(OrderByExpr { - expr, - asc, - nulls_first, - }) + self.expected(format!("{:?}", &expected).as_str(), self.peek_token()) + } } - /// Parse a SQL `START TRANSACTION` statement - pub fn parse_start_transaction(&mut self) -> Result { - self.expect_keyword(Keyword::TRANSACTION)?; - Ok(Statement::StartTransaction { - modes: self.parse_transaction_modes()?, - }) + /// Bail out if the following tokens are not the expected sequence of + /// keywords, or consume them if they are. + pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { + for &kw in expected { + self.expect_keyword(kw)?; + } + Ok(()) } - /// Parse an `BEGIN` statement - pub fn parse_begin(&mut self) -> Result { - let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); - Ok(Statement::StartTransaction { - modes: self.parse_transaction_modes()?, - }) - } + // ============================================================================================ + // Token helper methods + // ============================================================================================ - /// Parse an `COMMIT` statement - pub fn parse_commit(&mut self) -> Result { - Ok(Statement::Commit { - chain: self.parse_commit_rollback_chain()?, - }) + /// Consume the next token if it matches the expected token, otherwise return false + #[must_use] + pub fn consume_token(&mut self, expected: &Token) -> bool { + if self.peek_token() == *expected { + self.next_token(); + true + } else { + false + } } - /// Parse an `ROLLBACK` statement - pub fn parse_rollback(&mut self) -> Result { - Ok(Statement::Rollback { - chain: self.parse_commit_rollback_chain()?, - }) + /// Bail out if the current token is not an expected keyword, or consume it if it is + pub fn expect_token(&mut self, expected: &Token) -> Result<(), ParserError> { + if self.consume_token(expected) { + Ok(()) + } else { + self.expected(&expected.to_string(), self.peek_token()) + } } - /// Parse a SQL `DEALLOCATE` statement - pub fn parse_deallocate(&mut self) -> Result { - let prepare = self.parse_keyword(Keyword::PREPARE); - let name = self.parse_identifier()?; - Ok(Statement::Deallocate { name, prepare }) + /// Return the first non-whitespace token that has not yet been processed + /// (or None if reached end-of-file) + pub fn peek_token(&self) -> Token { + self.peek_nth_token(0) } - /// Parse a SQL `EXECUTE` statement - pub fn parse_execute(&mut self) -> Result { - let name = self.parse_identifier()?; - - let mut parameters = vec![]; - if self.consume_token(&Token::LParen) { - parameters = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + /// Return nth non-whitespace token that has not yet been processed + pub fn peek_nth_token(&self, mut n: usize) -> Token { + let mut index = self.index; + loop { + index += 1; + match self.tokens.get(index - 1) { + Some(Token::Whitespace(_)) => continue, + non_whitespace => { + if n == 0 { + return non_whitespace.cloned().unwrap_or(Token::EOF); + } + n -= 1; + } + } } + } - Ok(Statement::Execute { name, parameters }) + /// Return the first non-whitespace token that has not yet been processed + /// (or None if reached end-of-file) and mark it as processed. OK to call + /// repeatedly after reaching EOF. + pub fn next_token(&mut self) -> Token { + loop { + self.index += 1; + match self.tokens.get(self.index - 1) { + Some(Token::Whitespace(_)) => continue, + token => return token.cloned().unwrap_or(Token::EOF), + } + } } - /// Parse a SQL `PREPARE` statement - pub fn parse_prepare(&mut self) -> Result { - let name = self.parse_identifier()?; + /// Return the first unprocessed token, possibly whitespace. + pub fn next_token_no_skip(&mut self) -> Option<&Token> { + self.index += 1; + self.tokens.get(self.index - 1) + } - let mut data_types = vec![]; - if self.consume_token(&Token::LParen) { - data_types = self.parse_comma_separated(Parser::parse_data_type)?; - self.expect_token(&Token::RParen)?; + /// Push back the last one non-whitespace token. Must be called after + /// `next_token()`, otherwise might panic. OK to call after + /// `next_token()` indicates an EOF. + pub fn prev_token(&mut self) { + loop { + assert!(self.index > 0); + self.index -= 1; + if let Some(Token::Whitespace(_)) = self.tokens.get(self.index) { + continue; + } + return; } + } - self.expect_keyword(Keyword::AS)?; - let statement = Box::new(self.parse_statement()?); - Ok(Statement::Prepare { - name, - data_types, - statement, - }) + /// Report unexpected token + fn expected(&self, expected: &str, found: Token) -> Result { + parser_err!(format!("Expected {}, found: {}", expected, found)) } } From 628c66c4d98888565032db814a19b239a8ce1c9c Mon Sep 17 00:00:00 2001 From: koushiro Date: Thu, 9 Sep 2021 18:19:30 +0800 Subject: [PATCH 5/5] Some nits Signed-off-by: koushiro --- src/parser/ddl.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/parser/ddl.rs b/src/parser/ddl.rs index 7dd68b337..3c6815e98 100644 --- a/src/parser/ddl.rs +++ b/src/parser/ddl.rs @@ -16,7 +16,7 @@ use alloc::{boxed::Box, format, vec, vec::Vec}; use crate::ast::*; use crate::dialect::keywords::Keyword; use crate::dialect::*; -use crate::parser::{IsOptional::*, Parser, ParserError}; +use crate::parser::{IsOptional, Parser, ParserError}; use crate::tokenizer::Token; impl<'a> Parser<'a> { @@ -148,7 +148,7 @@ impl<'a> Parser<'a> { // general that the arguments can be made to appear as column // definitions in a traditional CREATE TABLE statement", but // we don't implement that. - let module_args = self.parse_parenthesized_column_list(Optional)?; + let module_args = self.parse_parenthesized_column_list(IsOptional::Optional)?; Ok(Statement::CreateVirtualTable { name: table_name, if_not_exists, @@ -164,7 +164,7 @@ impl<'a> Parser<'a> { // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(Optional)?; + let columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; let with_options = self.parse_options(Keyword::WITH)?; self.expect_keyword(Keyword::AS)?; let query = Box::new(self.parse_query()?); @@ -255,7 +255,7 @@ impl<'a> Parser<'a> { let foreign_table = self.parse_object_name()?; // PostgreSQL allows omitting the column list and // uses the primary key column of the foreign table by default - let referred_columns = self.parse_parenthesized_column_list(Optional)?; + let referred_columns = self.parse_parenthesized_column_list(IsOptional::Optional)?; let mut on_delete = None; let mut on_update = None; loop { @@ -313,7 +313,7 @@ impl<'a> Parser<'a> { if is_primary { self.expect_keyword(Keyword::KEY)?; } - let columns = self.parse_parenthesized_column_list(Mandatory)?; + let columns = self.parse_parenthesized_column_list(IsOptional::Mandatory)?; Ok(Some(TableConstraint::Unique { name, columns, @@ -322,10 +322,11 @@ impl<'a> Parser<'a> { } Token::Word(w) if w.keyword == Keyword::FOREIGN => { self.expect_keyword(Keyword::KEY)?; - let columns = self.parse_parenthesized_column_list(Mandatory)?; + let columns = self.parse_parenthesized_column_list(IsOptional::Mandatory)?; self.expect_keyword(Keyword::REFERENCES)?; let foreign_table = self.parse_object_name()?; - let referred_columns = self.parse_parenthesized_column_list(Mandatory)?; + let referred_columns = + self.parse_parenthesized_column_list(IsOptional::Mandatory)?; let mut on_delete = None; let mut on_update = None; loop {