diff --git a/src/lib.rs b/src/lib.rs index f04ae07a9..7a0816182 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,6 +44,7 @@ pub mod ast; pub mod dialect; pub mod keywords; pub mod parser; +pub mod span; pub mod tokenizer; #[doc(hidden)] diff --git a/src/parser.rs b/src/parser.rs index 6d917f027..b0bf11eec 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -24,21 +24,24 @@ use core::fmt; use log::debug; -use crate::ast::*; -use crate::dialect::*; use crate::keywords::{self, Keyword}; use crate::tokenizer::*; +use crate::{ast::*, span::Span}; +use crate::{dialect::*, span::Spanned}; #[derive(Debug, Clone, PartialEq)] pub enum ParserError { - TokenizerError(String), - ParserError(String), + TokenizerError { message: String, span: Span }, + ParserError { message: String, span: Span }, } // Use `Parser::expected` instead, if possible macro_rules! parser_err { - ($MSG:expr) => { - Err(ParserError::ParserError($MSG.to_string())) + ($MSG:expr, $SPAN: expr) => { + Err(ParserError::ParserError { + message: $MSG.to_string(), + span: $SPAN, + }) }; } @@ -84,7 +87,10 @@ impl From for FunctionArgExpr { impl From for ParserError { fn from(e: TokenizerError) -> Self { - ParserError::TokenizerError(e.to_string()) + ParserError::TokenizerError { + message: e.to_string(), + span: e.span, + } } } @@ -94,13 +100,22 @@ impl fmt::Display for ParserError { f, "sql parser error: {}", match self { - ParserError::TokenizerError(s) => s, - ParserError::ParserError(s) => s, + ParserError::TokenizerError { message, .. } => message, + ParserError::ParserError { message, .. } => message, } ) } } +impl Spanned for ParserError { + fn span(&self) -> Span { + match self { + ParserError::TokenizerError { span, .. } => *span, + ParserError::ParserError { span, .. } => *span, + } + } +} + #[cfg(feature = "std")] impl std::error::Error for ParserError {} @@ -131,11 +146,11 @@ impl<'a> Parser<'a> { debug!("Parsing sql '{}'...", sql); loop { // ignore empty statements (between successive statement delimiters) - while parser.consume_token(&Token::SemiColon) { + while parser.consume_token(&Token::SemiColon { span: Span::new() }) { expecting_statement_delimiter = false; } - if parser.peek_token() == Token::EOF { + if let Token::EOF { .. } = parser.peek_token() { break; } if expecting_statement_delimiter { @@ -153,7 +168,7 @@ impl<'a> Parser<'a> { /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { match self.next_token() { - Token::Word(w) => match w.keyword { + Token::Word { value: w, span } => match w.keyword { Keyword::DESCRIBE => Ok(self.parse_explain(true)?), Keyword::EXPLAIN => Ok(self.parse_explain(false)?), Keyword::ANALYZE => Ok(self.parse_analyze()?), @@ -194,9 +209,9 @@ impl<'a> Parser<'a> { Keyword::COMMENT if dialect_of!(self is PostgreSqlDialect) => { Ok(self.parse_comment()?) } - _ => self.expected("an SQL statement", Token::Word(w)), + _ => self.expected("an SQL statement", Token::Word { value: w, span }), }, - Token::LParen => { + Token::LParen { .. } => { self.prev_token(); Ok(Statement::Query(Box::new(self.parse_query()?))) } @@ -236,9 +251,9 @@ impl<'a> Parser<'a> { let table_name = self.parse_object_name()?; let mut partitions = None; if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; } Ok(Statement::Truncate { table_name, @@ -264,9 +279,9 @@ impl<'a> Parser<'a> { Keyword::COMPUTE, ]) { Some(Keyword::PARTITION) => { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; } Some(Keyword::NOSCAN) => noscan = true, Some(Keyword::FOR) => { @@ -307,13 +322,15 @@ impl<'a> Parser<'a> { let index = self.index; match self.next_token() { - Token::Word(w) if self.peek_token() == Token::Period => { + Token::Word { value: w, .. } + if self.peek_token() == Token::Period { span: Span::new() } => + { let mut id_parts: Vec = vec![w.to_ident()]; - while self.consume_token(&Token::Period) { + while self.consume_token(&Token::Period { span: Span::new() }) { match self.next_token() { - Token::Word(w) => id_parts.push(w.to_ident()), - Token::Mul => { + Token::Word { value: w, .. } => id_parts.push(w.to_ident()), + Token::Mul { .. } => { return Ok(WildcardExpr::QualifiedWildcard(ObjectName(id_parts))); } unexpected => { @@ -322,7 +339,7 @@ impl<'a> Parser<'a> { } } } - Token::Mul => { + Token::Mul { .. } => { return Ok(WildcardExpr::Wildcard); } _ => (), @@ -394,7 +411,7 @@ impl<'a> Parser<'a> { // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the // `type 'string'` syntax for the custom data types at all. - DataType::Custom(..) => parser_err!("dummy"), + DataType::Custom(..) => parser_err!("dummy", Span::new()), data_type => Ok(Expr::TypedString { data_type, value: parser.parse_literal_string()?, @@ -403,7 +420,7 @@ impl<'a> Parser<'a> { })); let expr = match self.next_token() { - Token::Word(w) => match w.keyword { + Token::Word { value: w, .. } => match w.keyword { Keyword::TRUE | Keyword::FALSE | Keyword::NULL => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -424,11 +441,11 @@ impl<'a> Parser<'a> { // Here `w` is a word, check if it's a part of a multi-part // identifier, a function call, or a simple identifier: _ => match self.peek_token() { - Token::LParen | Token::Period => { + Token::LParen { .. } | Token::Period { .. } => { let mut id_parts: Vec = vec![w.to_ident()]; - while self.consume_token(&Token::Period) { + while self.consume_token(&Token::Period { span: Span::new() }) { match self.next_token() { - Token::Word(w) => id_parts.push(w.to_ident()), + Token::Word { value: w, .. } => id_parts.push(w.to_ident()), unexpected => { return self .expected("an identifier or a '*' after '.'", unexpected); @@ -436,7 +453,7 @@ impl<'a> Parser<'a> { } } - if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::LParen { span: Span::new() }) { self.prev_token(); self.parse_function(ObjectName(id_parts)) } else { @@ -446,8 +463,8 @@ impl<'a> Parser<'a> { _ => Ok(Expr::Identifier(w.to_ident())), }, }, // End of Token::Word - tok @ Token::Minus | tok @ Token::Plus => { - let op = if tok == Token::Plus { + tok @ Token::Minus { .. } | tok @ Token::Plus { .. } => { + let op = if let Token::Plus { .. } = tok { UnaryOperator::Plus } else { UnaryOperator::Minus @@ -457,19 +474,19 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } - tok @ Token::DoubleExclamationMark - | tok @ Token::PGSquareRoot - | tok @ Token::PGCubeRoot - | tok @ Token::AtSign - | tok @ Token::Tilde + tok @ Token::DoubleExclamationMark { .. } + | tok @ Token::PGSquareRoot { .. } + | tok @ Token::PGCubeRoot { .. } + | tok @ Token::AtSign { .. } + | tok @ Token::Tilde { .. } if dialect_of!(self is PostgreSqlDialect) => { let op = match tok { - Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial, - Token::PGSquareRoot => UnaryOperator::PGSquareRoot, - Token::PGCubeRoot => UnaryOperator::PGCubeRoot, - Token::AtSign => UnaryOperator::PGAbs, - Token::Tilde => UnaryOperator::PGBitwiseNot, + Token::DoubleExclamationMark { .. } => UnaryOperator::PGPrefixFactorial, + Token::PGSquareRoot { .. } => UnaryOperator::PGSquareRoot, + Token::PGCubeRoot { .. } => UnaryOperator::PGCubeRoot, + Token::AtSign { .. } => UnaryOperator::PGAbs, + Token::Tilde { .. } => UnaryOperator::PGBitwiseNot, _ => unreachable!(), }; Ok(Expr::UnaryOp { @@ -477,15 +494,15 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } - Token::Number(_, _) - | Token::SingleQuotedString(_) - | Token::NationalStringLiteral(_) - | Token::HexStringLiteral(_) => { + Token::Number { .. } + | Token::SingleQuotedString { .. } + | Token::NationalStringLiteral { .. } + | Token::HexStringLiteral { .. } => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } - Token::LParen => { + Token::LParen { .. } => { let expr = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { self.prev_token(); @@ -493,7 +510,7 @@ impl<'a> Parser<'a> { } else { Expr::Nested(Box::new(self.parse_expr()?)) }; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(expr) } unexpected => self.expected("an expression:", unexpected), @@ -510,12 +527,12 @@ impl<'a> Parser<'a> { } pub fn parse_function(&mut self, name: ObjectName) -> Result { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let distinct = self.parse_all_or_distinct()?; let args = self.parse_optional_args()?; let over = if self.parse_keyword(Keyword::OVER) { // TBD: support window names (`OVER mywin`) in place of inline specification - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { // a list of possibly-qualified column names self.parse_comma_separated(Parser::parse_expr)? @@ -527,9 +544,9 @@ impl<'a> Parser<'a> { } else { vec![] }; - let window_frame = if !self.consume_token(&Token::RParen) { + let window_frame = if !self.consume_token(&Token::RParen { span: Span::new() }) { let window_frame = self.parse_window_frame()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Some(window_frame) } else { None @@ -554,11 +571,11 @@ impl<'a> Parser<'a> { pub fn parse_window_frame_units(&mut self) -> Result { match self.next_token() { - Token::Word(w) => match w.keyword { + Token::Word { value, span } => match value.keyword { Keyword::ROWS => Ok(WindowFrameUnits::Rows), Keyword::RANGE => Ok(WindowFrameUnits::Range), Keyword::GROUPS => Ok(WindowFrameUnits::Groups), - _ => self.expected("ROWS, RANGE, GROUPS", Token::Word(w))?, + _ => self.expected("ROWS, RANGE, GROUPS", Token::Word { value, span })?, }, unexpected => self.expected("ROWS, RANGE, GROUPS", unexpected), } @@ -606,19 +623,19 @@ impl<'a> Parser<'a> { fn parse_group_by_expr(&mut self) -> Result { if dialect_of!(self is PostgreSqlDialect) { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Expr::GroupingSets(result)) } else if self.parse_keyword(Keyword::CUBE) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Expr::Cube(result)) } else if self.parse_keyword(Keyword::ROLLUP) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Expr::Rollup(result)) } else { self.parse_expr() @@ -638,25 +655,27 @@ impl<'a> Parser<'a> { allow_empty: bool, ) -> Result, ParserError> { if lift_singleton { - if self.consume_token(&Token::LParen) { - let result = if allow_empty && self.consume_token(&Token::RParen) { - vec![] - } else { - let result = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - result - }; + if self.consume_token(&Token::LParen { span: Span::new() }) { + let result = + if allow_empty && self.consume_token(&Token::RParen { span: Span::new() }) { + vec![] + } else { + let result = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen { span: Span::new() })?; + result + }; Ok(result) } else { Ok(vec![self.parse_expr()?]) } } else { - self.expect_token(&Token::LParen)?; - let result = if allow_empty && self.consume_token(&Token::RParen) { + self.expect_token(&Token::LParen { span: Span::new() })?; + let result = if allow_empty && self.consume_token(&Token::RParen { span: Span::new() }) + { vec![] } else { let result = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; result }; Ok(result) @@ -695,11 +714,11 @@ impl<'a> Parser<'a> { /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Expr::Cast { expr: Box::new(expr), data_type, @@ -708,11 +727,11 @@ impl<'a> Parser<'a> { /// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)` pub fn parse_try_cast_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Expr::TryCast { expr: Box::new(expr), data_type, @@ -721,18 +740,18 @@ impl<'a> Parser<'a> { /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. pub fn parse_exists_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let exists_node = Expr::Exists(Box::new(self.parse_query()?)); - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(exists_node) } pub fn parse_extract_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let field = self.parse_date_time_field()?; self.expect_keyword(Keyword::FROM)?; let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Expr::Extract { field, expr: Box::new(expr), @@ -741,18 +760,22 @@ impl<'a> Parser<'a> { pub fn parse_substring_expr(&mut self) -> Result { // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let expr = self.parse_expr()?; let mut from_expr = None; - if self.parse_keyword(Keyword::FROM) || self.consume_token(&Token::Comma) { + if self.parse_keyword(Keyword::FROM) + || self.consume_token(&Token::Comma { span: Span::new() }) + { from_expr = Some(self.parse_expr()?); } let mut to_expr = None; - if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { + if self.parse_keyword(Keyword::FOR) + || self.consume_token(&Token::Comma { span: Span::new() }) + { to_expr = Some(self.parse_expr()?); } - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Expr::Substring { expr: Box::new(expr), @@ -764,9 +787,9 @@ impl<'a> Parser<'a> { /// TRIM (WHERE 'text' FROM 'text')\ /// TRIM ('text') pub fn parse_trim_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let mut where_expr = None; - if let Token::Word(word) = self.peek_token() { + if let Token::Word { value: word, .. } = self.peek_token() { if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] .iter() .any(|d| word.keyword == *d) @@ -778,7 +801,7 @@ impl<'a> Parser<'a> { } } let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Expr::Trim { expr: Box::new(expr), @@ -788,11 +811,11 @@ impl<'a> Parser<'a> { pub fn parse_trim_where(&mut self) -> Result { match self.next_token() { - Token::Word(w) => match w.keyword { + Token::Word { value, span } => match value.keyword { Keyword::BOTH => Ok(TrimWhereField::Both), Keyword::LEADING => Ok(TrimWhereField::Leading), Keyword::TRAILING => Ok(TrimWhereField::Trailing), - _ => self.expected("trim_where field", Token::Word(w))?, + _ => self.expected("trim_where field", Token::Word { value, span })?, }, unexpected => self.expected("trim_where field", unexpected), } @@ -800,12 +823,12 @@ impl<'a> Parser<'a> { /// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`. pub fn parse_listagg_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let distinct = self.parse_all_or_distinct()?; let expr = Box::new(self.parse_expr()?); // While ANSI SQL would would require the separator, Redshift makes this optional. Here we // choose to make the separator optional as this provides the more general implementation. - let separator = if self.consume_token(&Token::Comma) { + let separator = if self.consume_token(&Token::Comma { span: Span::new() }) { Some(Box::new(self.parse_expr()?)) } else { None @@ -816,14 +839,14 @@ impl<'a> Parser<'a> { } else { self.expect_keyword(Keyword::TRUNCATE)?; let filler = match self.peek_token() { - Token::Word(w) + Token::Word { value: w, .. } if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => { None } - Token::SingleQuotedString(_) - | Token::NationalStringLiteral(_) - | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), + Token::SingleQuotedString { .. } + | Token::NationalStringLiteral { .. } + | Token::HexStringLiteral { .. } => Some(Box::new(self.parse_expr()?)), unexpected => { self.expected("either filler, WITH, or WITHOUT in LISTAGG", unexpected)? } @@ -838,14 +861,14 @@ impl<'a> Parser<'a> { } else { None }; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; // Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the // more general implementation. let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; let order_by_expr = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; order_by_expr } else { vec![] @@ -865,14 +888,14 @@ impl<'a> Parser<'a> { // be split in two. pub fn parse_date_time_field(&mut self) -> Result { match self.next_token() { - Token::Word(w) => match w.keyword { + Token::Word { value, span } => match value.keyword { Keyword::YEAR => Ok(DateTimeField::Year), Keyword::MONTH => Ok(DateTimeField::Month), Keyword::DAY => Ok(DateTimeField::Day), Keyword::HOUR => Ok(DateTimeField::Hour), Keyword::MINUTE => Ok(DateTimeField::Minute), Keyword::SECOND => Ok(DateTimeField::Second), - _ => self.expected("date/time field", Token::Word(w))?, + _ => self.expected("date/time field", Token::Word { value, span })?, }, unexpected => self.expected("date/time field", unexpected), } @@ -906,7 +929,7 @@ impl<'a> Parser<'a> { // Note that PostgreSQL allows omitting the qualifier, so we provide // this more general implemenation. let leading_field = match self.peek_token() { - Token::Word(kw) + Token::Word { value: kw, .. } if [ Keyword::YEAR, Keyword::MONTH, @@ -961,37 +984,37 @@ impl<'a> Parser<'a> { pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { let tok = self.next_token(); let regular_binary_operator = match &tok { - Token::Spaceship => Some(BinaryOperator::Spaceship), - Token::DoubleEq => Some(BinaryOperator::Eq), - Token::Eq => Some(BinaryOperator::Eq), - Token::Neq => Some(BinaryOperator::NotEq), - Token::Gt => Some(BinaryOperator::Gt), - Token::GtEq => Some(BinaryOperator::GtEq), - Token::Lt => Some(BinaryOperator::Lt), - Token::LtEq => Some(BinaryOperator::LtEq), - Token::Plus => Some(BinaryOperator::Plus), - Token::Minus => Some(BinaryOperator::Minus), - Token::Mul => Some(BinaryOperator::Multiply), - Token::Mod => Some(BinaryOperator::Modulo), - Token::StringConcat => Some(BinaryOperator::StringConcat), - Token::Pipe => Some(BinaryOperator::BitwiseOr), - Token::Caret => Some(BinaryOperator::BitwiseXor), - Token::Ampersand => Some(BinaryOperator::BitwiseAnd), - Token::Div => Some(BinaryOperator::Divide), - Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect) => { + Token::Spaceship { .. } => Some(BinaryOperator::Spaceship), + Token::DoubleEq { .. } => Some(BinaryOperator::Eq), + Token::Eq { .. } => Some(BinaryOperator::Eq), + Token::Neq { .. } => Some(BinaryOperator::NotEq), + Token::Gt { .. } => Some(BinaryOperator::Gt), + Token::GtEq { .. } => Some(BinaryOperator::GtEq), + Token::Lt { .. } => Some(BinaryOperator::Lt), + Token::LtEq { .. } => Some(BinaryOperator::LtEq), + Token::Plus { .. } => Some(BinaryOperator::Plus), + Token::Minus { .. } => Some(BinaryOperator::Minus), + Token::Mul { .. } => Some(BinaryOperator::Multiply), + Token::Mod { .. } => Some(BinaryOperator::Modulo), + Token::StringConcat { .. } => Some(BinaryOperator::StringConcat), + Token::Pipe { .. } => Some(BinaryOperator::BitwiseOr), + Token::Caret { .. } => Some(BinaryOperator::BitwiseXor), + Token::Ampersand { .. } => Some(BinaryOperator::BitwiseAnd), + Token::Div { .. } => Some(BinaryOperator::Divide), + Token::ShiftLeft { .. } if dialect_of!(self is PostgreSqlDialect) => { Some(BinaryOperator::PGBitwiseShiftLeft) } - Token::ShiftRight if dialect_of!(self is PostgreSqlDialect) => { + Token::ShiftRight { .. } if dialect_of!(self is PostgreSqlDialect) => { Some(BinaryOperator::PGBitwiseShiftRight) } - Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { + Token::Sharp { .. } if dialect_of!(self is PostgreSqlDialect) => { Some(BinaryOperator::PGBitwiseXor) } - Token::Tilde => Some(BinaryOperator::PGRegexMatch), - Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch), - Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch), - Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch), - Token::Word(w) => match w.keyword { + Token::Tilde { .. } => Some(BinaryOperator::PGRegexMatch), + Token::TildeAsterisk { .. } => Some(BinaryOperator::PGRegexIMatch), + Token::ExclamationMarkTilde { .. } => Some(BinaryOperator::PGRegexNotMatch), + Token::ExclamationMarkTildeAsterisk { .. } => Some(BinaryOperator::PGRegexNotIMatch), + Token::Word { value: w, .. } => match w.keyword { Keyword::AND => Some(BinaryOperator::And), Keyword::OR => Some(BinaryOperator::Or), Keyword::LIKE => Some(BinaryOperator::Like), @@ -1012,13 +1035,14 @@ impl<'a> Parser<'a> { }; if let Some(op) = regular_binary_operator { - Ok(Expr::BinaryOp { + return Ok(Expr::BinaryOp { left: Box::new(expr), op, right: Box::new(self.parse_subexpr(precedence)?), - }) - } else if let Token::Word(w) = &tok { - match w.keyword { + }); + } + match tok { + Token::Word { value, span } => match value.keyword { Keyword::IS => { if self.parse_keyword(Keyword::NULL) { Ok(Expr::IsNull(Box::new(expr))) @@ -1050,32 +1074,40 @@ impl<'a> Parser<'a> { } } // Can only happen if `get_next_precedence` got out of sync with this function - _ => parser_err!(format!("No infix parser for token {:?}", tok)), - } - } else if Token::DoubleColon == tok { - self.parse_pg_cast(expr) - } else if Token::ExclamationMark == tok { + _ => parser_err!( + format!( + "No infix parser for token '{}'", + Token::Word { value, span }, + ), + span + ), + }, + Token::DoubleColon { .. } => self.parse_pg_cast(expr), + Token::ExclamationMark { .. } => // PostgreSQL factorial operation - Ok(Expr::UnaryOp { - op: UnaryOperator::PGPostfixFactorial, - expr: Box::new(expr), - }) - } else if Token::LBracket == tok { - self.parse_map_access(expr) - } else { + { + Ok(Expr::UnaryOp { + op: UnaryOperator::PGPostfixFactorial, + expr: Box::new(expr), + }) + } + Token::LBracket { .. } => self.parse_map_access(expr), + _ => // Can only happen if `get_next_precedence` got out of sync with this function - parser_err!(format!("No infix parser for token {:?}", tok)) + { + parser_err!(format!("No infix parser for token '{}'", tok), tok.span()) + } } } pub fn parse_map_access(&mut self, expr: Expr) -> Result { let key = self.parse_map_key()?; - let tok = self.consume_token(&Token::RBracket); + let tok = self.consume_token(&Token::RBracket { span: Span::new() }); debug!("Tok: {}", tok); let mut key_parts: Vec = vec![key]; - while self.consume_token(&Token::LBracket) { + while self.consume_token(&Token::LBracket { span: Span::new() }) { let key = self.parse_map_key()?; - let tok = self.consume_token(&Token::RBracket); + let tok = self.consume_token(&Token::RBracket { span: Span::new() }); debug!("Tok: {}", tok); key_parts.push(key); } @@ -1090,7 +1122,7 @@ impl<'a> Parser<'a> { /// Parses the parens following the `[ NOT ] IN` operator pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let in_op = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { self.prev_token(); Expr::InSubquery { @@ -1105,7 +1137,7 @@ impl<'a> Parser<'a> { negated, } }; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(in_op) } @@ -1141,46 +1173,60 @@ impl<'a> Parser<'a> { let token = self.peek_token(); debug!("get_next_precedence() {:?}", token); match token { - Token::Word(w) if w.keyword == Keyword::OR => Ok(5), - Token::Word(w) if w.keyword == Keyword::AND => Ok(10), - Token::Word(w) if w.keyword == Keyword::XOR => Ok(24), - Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1) { + Token::Word { value: w, .. } if w.keyword == Keyword::OR => Ok(5), + Token::Word { value: w, .. } if w.keyword == Keyword::AND => Ok(10), + Token::Word { value: w, .. } if w.keyword == Keyword::XOR => Ok(24), + Token::Word { value: w, .. } if w.keyword == Keyword::NOT => match self + .peek_nth_token(1) + { // The precedence of NOT varies depending on keyword that // follows it. If it is followed by IN, BETWEEN, or LIKE, // it takes on the precedence of those tokens. Otherwise it // is not an infix operator, and therefore has zero // precedence. - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), + Token::Word { value: w, .. } if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), + Token::Word { value: w, .. } if w.keyword == Keyword::BETWEEN => { + Ok(Self::BETWEEN_PREC) + } + Token::Word { value: w, .. } if w.keyword == Keyword::LIKE => { + Ok(Self::BETWEEN_PREC) + } + Token::Word { value: w, .. } if w.keyword == Keyword::ILIKE => { + Ok(Self::BETWEEN_PREC) + } _ => Ok(0), }, - Token::Word(w) if w.keyword == Keyword::IS => Ok(17), - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), - Token::Eq - | Token::Lt - | Token::LtEq - | Token::Neq - | Token::Gt - | Token::GtEq - | Token::DoubleEq - | Token::Tilde - | Token::TildeAsterisk - | Token::ExclamationMarkTilde - | Token::ExclamationMarkTildeAsterisk - | Token::Spaceship => Ok(20), - Token::Pipe => Ok(21), - Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), - Token::Ampersand => Ok(23), - Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mul | Token::Div | Token::Mod | Token::StringConcat => Ok(40), - Token::DoubleColon => Ok(50), - Token::ExclamationMark => Ok(50), - Token::LBracket | Token::RBracket => Ok(10), + Token::Word { value: w, .. } if w.keyword == Keyword::IS => Ok(17), + Token::Word { value: w, .. } if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), + Token::Word { value: w, .. } if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), + Token::Word { value: w, .. } if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), + Token::Word { value: w, .. } if w.keyword == Keyword::ILIKE => Ok(Self::BETWEEN_PREC), + Token::Eq { .. } + | Token::Lt { .. } + | Token::LtEq { .. } + | Token::Neq { .. } + | Token::Gt { .. } + | Token::GtEq { .. } + | Token::DoubleEq { .. } + | Token::Tilde { .. } + | Token::TildeAsterisk { .. } + | Token::ExclamationMarkTilde { .. } + | Token::ExclamationMarkTildeAsterisk { .. } + | Token::Spaceship { .. } => Ok(20), + Token::Pipe { .. } => Ok(21), + Token::Caret { .. } + | Token::Sharp { .. } + | Token::ShiftRight { .. } + | Token::ShiftLeft { .. } => Ok(22), + Token::Ampersand { .. } => Ok(23), + Token::Plus { .. } | Token::Minus { .. } => Ok(Self::PLUS_MINUS_PREC), + Token::Mul { .. } + | Token::Div { .. } + | Token::Mod { .. } + | Token::StringConcat { .. } => Ok(40), + Token::DoubleColon { .. } => Ok(50), + Token::ExclamationMark { .. } => Ok(50), + Token::LBracket { .. } | Token::RBracket { .. } => Ok(10), _ => Ok(0), } } @@ -1191,16 +1237,27 @@ impl<'a> Parser<'a> { self.peek_nth_token(0) } + pub fn eof(&self) -> Token { + let mut s = Span::new(); + if let Some(v) = self.tokens.last() { + s.expand(v); + } + let end = s.end().unwrap_or(0); + Token::EOF { + span: (end..end).into(), + } + } + /// Return nth non-whitespace token that has not yet been processed pub fn peek_nth_token(&self, mut n: usize) -> Token { let mut index = self.index; loop { index += 1; match self.tokens.get(index - 1) { - Some(Token::Whitespace(_)) => continue, + Some(Token::Whitespace { .. }) => continue, non_whitespace => { if n == 0 { - return non_whitespace.cloned().unwrap_or(Token::EOF); + return non_whitespace.cloned().unwrap_or_else(|| self.eof()); } n -= 1; } @@ -1215,8 +1272,8 @@ impl<'a> Parser<'a> { loop { self.index += 1; match self.tokens.get(self.index - 1) { - Some(Token::Whitespace(_)) => continue, - token => return token.cloned().unwrap_or(Token::EOF), + Some(Token::Whitespace { .. }) => continue, + token => return token.cloned().unwrap_or_else(|| self.eof()), } } } @@ -1234,7 +1291,7 @@ impl<'a> Parser<'a> { loop { assert!(self.index > 0); self.index -= 1; - if let Some(Token::Whitespace(_)) = self.tokens.get(self.index) { + if let Some(Token::Whitespace { .. }) = self.tokens.get(self.index) { continue; } return; @@ -1243,14 +1300,17 @@ impl<'a> Parser<'a> { /// Report unexpected token fn expected(&self, expected: &str, found: Token) -> Result { - parser_err!(format!("Expected {}, found: {}", expected, found)) + parser_err!( + format!("Expected {}, found: {}", expected, found), + found.span() + ) } /// Look for an expected keyword and consume it if it exists #[must_use] pub fn parse_keyword(&mut self, expected: Keyword) -> bool { match self.peek_token() { - Token::Word(w) if expected == w.keyword => { + Token::Word { value: w, .. } if expected == w.keyword => { self.next_token(); true } @@ -1277,15 +1337,13 @@ impl<'a> Parser<'a> { #[must_use] pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { match self.peek_token() { - Token::Word(w) => { - keywords - .iter() - .find(|keyword| **keyword == w.keyword) - .map(|keyword| { - self.next_token(); - *keyword - }) - } + Token::Word { value: w, .. } => keywords + .iter() + .find(|keyword| **keyword == w.keyword) + .map(|keyword| { + self.next_token(); + *keyword + }), _ => None, } } @@ -1349,7 +1407,7 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { values.push(f(self)?); - if !self.consume_token(&Token::Comma) { + if !self.consume_token(&Token::Comma { span: Span::new() }) { break; } } @@ -1378,7 +1436,10 @@ impl<'a> Parser<'a> { let all = self.parse_keyword(Keyword::ALL); let distinct = self.parse_keyword(Keyword::DISTINCT); if all && distinct { - return parser_err!("Cannot specify both ALL and DISTINCT".to_string()); + return parser_err!( + "Cannot specify both ALL and DISTINCT".to_string(), + self.peek_token().span() + ); } else { Ok(distinct) } @@ -1510,7 +1571,7 @@ impl<'a> Parser<'a> { pub fn parse_file_format(&mut self) -> Result { match self.next_token() { - Token::Word(w) => match w.keyword { + Token::Word { value, span } => match value.keyword { Keyword::AVRO => Ok(FileFormat::AVRO), Keyword::JSONFILE => Ok(FileFormat::JSONFILE), Keyword::ORC => Ok(FileFormat::ORC), @@ -1518,7 +1579,7 @@ impl<'a> Parser<'a> { Keyword::RCFILE => Ok(FileFormat::RCFILE), Keyword::SEQUENCEFILE => Ok(FileFormat::SEQUENCEFILE), Keyword::TEXTFILE => Ok(FileFormat::TEXTFILE), - _ => self.expected("fileformat", Token::Word(w)), + _ => self.expected("fileformat", Token::Word { value, span }), }, unexpected => self.expected("fileformat", unexpected), } @@ -1563,10 +1624,14 @@ impl<'a> Parser<'a> { let names = self.parse_comma_separated(Parser::parse_object_name)?; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); - let purge = self.parse_keyword(Keyword::PURGE); if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); + self.prev_token(); + return parser_err!( + "Cannot specify both CASCADE and RESTRICT in DROP", + self.peek_token().span() + ); } + let purge = self.parse_keyword(Keyword::PURGE); Ok(Statement::Drop { object_type, if_exists, @@ -1581,9 +1646,9 @@ impl<'a> Parser<'a> { let index_name = self.parse_object_name()?; self.expect_keyword(Keyword::ON)?; let table_name = self.parse_object_name()?; - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Statement::CreateIndex { name: index_name, table_name, @@ -1596,9 +1661,9 @@ impl<'a> Parser<'a> { //TODO: Implement parsing for Skewed and Clustered pub fn parse_hive_distribution(&mut self) -> Result { if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let columns = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(HiveDistributionStyle::PARTITIONED { columns }) } else { Ok(HiveDistributionStyle::NONE) @@ -1702,20 +1767,22 @@ impl<'a> Parser<'a> { fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { let mut columns = vec![]; let mut constraints = vec![]; - if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + if !self.consume_token(&Token::LParen { span: Span::new() }) + || self.consume_token(&Token::RParen { span: Span::new() }) + { return Ok((columns, constraints)); } loop { if let Some(constraint) = self.parse_optional_table_constraint()? { constraints.push(constraint); - } else if let Token::Word(_) = self.peek_token() { + } else if let Token::Word { .. } = self.peek_token() { columns.push(self.parse_column_def()?); } else { return self.expected("column name or constraint definition", self.peek_token()); } - let comma = self.consume_token(&Token::Comma); - if self.consume_token(&Token::RParen) { + let comma = self.consume_token(&Token::Comma { span: Span::new() }); + if self.consume_token(&Token::RParen { span: Span::new() }) { // allow a trailing comma, even though it's not in standard break; } else if !comma { @@ -1796,9 +1863,9 @@ impl<'a> Parser<'a> { on_update, })) } else if self.parse_keyword(Keyword::CHECK) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Some(ColumnOption::Check(expr))) } else if self.parse_keyword(Keyword::AUTO_INCREMENT) && dialect_of!(self is MySqlDialect | GenericDialect) @@ -1847,7 +1914,9 @@ impl<'a> Parser<'a> { None }; match self.next_token() { - Token::Word(w) if w.keyword == Keyword::PRIMARY || w.keyword == Keyword::UNIQUE => { + Token::Word { value: w, .. } + if w.keyword == Keyword::PRIMARY || w.keyword == Keyword::UNIQUE => + { let is_primary = w.keyword == Keyword::PRIMARY; if is_primary { self.expect_keyword(Keyword::KEY)?; @@ -1859,7 +1928,7 @@ impl<'a> Parser<'a> { is_primary, })) } - Token::Word(w) if w.keyword == Keyword::FOREIGN => { + Token::Word { value: w, .. } if w.keyword == Keyword::FOREIGN => { self.expect_keyword(Keyword::KEY)?; let columns = self.parse_parenthesized_column_list(Mandatory)?; self.expect_keyword(Keyword::REFERENCES)?; @@ -1887,10 +1956,10 @@ impl<'a> Parser<'a> { on_update, })) } - Token::Word(w) if w.keyword == Keyword::CHECK => { - self.expect_token(&Token::LParen)?; + Token::Word { value: w, .. } if w.keyword == Keyword::CHECK => { + self.expect_token(&Token::LParen { span: Span::new() })?; let expr = Box::new(self.parse_expr()?); - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Some(TableConstraint::Check { name, expr })) } unexpected => { @@ -1906,9 +1975,9 @@ impl<'a> Parser<'a> { pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { if self.parse_keyword(keyword) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let options = self.parse_comma_separated(Parser::parse_sql_option)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(options) } else { Ok(vec![]) @@ -1917,7 +1986,7 @@ impl<'a> Parser<'a> { pub fn parse_sql_option(&mut self) -> Result { let name = self.parse_identifier()?; - self.expect_token(&Token::Eq)?; + self.expect_token(&Token::Eq { span: Span::new() })?; let value = self.parse_value()?; Ok(SqlOption { name, value }) } @@ -1933,9 +2002,9 @@ impl<'a> Parser<'a> { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; AlterTableOperation::AddPartitions { if_not_exists, new_partitions: partitions, @@ -1967,17 +2036,17 @@ impl<'a> Parser<'a> { } } else if self.parse_keyword(Keyword::DROP) { if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; AlterTableOperation::DropPartitions { partitions, if_exists: true, } } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; AlterTableOperation::DropPartitions { partitions, if_exists: false, @@ -1994,14 +2063,14 @@ impl<'a> Parser<'a> { } } } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let before = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; self.expect_keyword(Keyword::RENAME)?; self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let renames = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; AlterTableOperation::RenamePartitions { old_partitions: before, new_partitions: renames, @@ -2071,7 +2140,7 @@ impl<'a> Parser<'a> { let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; self.expect_keywords(&[Keyword::FROM, Keyword::STDIN])?; - self.expect_token(&Token::SemiColon)?; + self.expect_token(&Token::SemiColon { span: Span::new() })?; let values = self.parse_tsv(); Ok(Statement::Copy { table_name, @@ -2091,19 +2160,25 @@ impl<'a> Parser<'a> { let mut content = String::from(""); while let Some(t) = self.next_token_no_skip() { match t { - Token::Whitespace(Whitespace::Tab) => { + Token::Whitespace { + value: Whitespace::Tab, + .. + } => { values.push(Some(content.to_string())); content.clear(); } - Token::Whitespace(Whitespace::Newline) => { + Token::Whitespace { + value: Whitespace::Newline, + .. + } => { values.push(Some(content.to_string())); content.clear(); } - Token::Backslash => { - if self.consume_token(&Token::Period) { + Token::Backslash { .. } => { + if self.consume_token(&Token::Period { span: Span::new() }) { return values; } - if let Token::Word(w) = self.next_token() { + if let Token::Word { value: w, .. } = self.next_token() { if w.value == "N" { values.push(None); } @@ -2120,27 +2195,40 @@ impl<'a> Parser<'a> { /// Parse a literal value (numbers, strings, date/time, booleans) fn parse_value(&mut self) -> Result { match self.next_token() { - Token::Word(w) => match w.keyword { + Token::Word { value, span } => match value.keyword { Keyword::TRUE => Ok(Value::Boolean(true)), Keyword::FALSE => Ok(Value::Boolean(false)), Keyword::NULL => Ok(Value::Null), - Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { - Some('"') => Ok(Value::DoubleQuotedString(w.value)), - Some('\'') => Ok(Value::SingleQuotedString(w.value)), - _ => self.expected("A value?", Token::Word(w))?, + Keyword::NoKeyword if value.quote_style.is_some() => match value.quote_style { + Some('"') => Ok(Value::DoubleQuotedString(value.value)), + Some('\'') => Ok(Value::SingleQuotedString(value.value)), + _ => self.expected("A value?", Token::Word { value, span })?, }, - _ => self.expected("a concrete value", Token::Word(w)), + _ => self.expected("a concrete value", Token::Word { value, span }), }, // The call to n.parse() returns a bigdecimal when the // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). - Token::Number(ref n, l) => match n.parse() { - Ok(n) => Ok(Value::Number(n, l)), - Err(e) => parser_err!(format!("Could not parse '{}' as number: {}", n, e)), + Token::Number { + ref value, + long, + span, + } => match value.parse() { + Ok(n) => Ok(Value::Number(n, long)), + Err(e) => parser_err!( + format!("Could not parse '{}' as number: {}", value, e), + span + ), }, - Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), - Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), - Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), + Token::SingleQuotedString { ref value, .. } => { + Ok(Value::SingleQuotedString(value.to_string())) + } + Token::NationalStringLiteral { ref value, .. } => { + Ok(Value::NationalStringLiteral(value.to_string())) + } + Token::HexStringLiteral { ref value, .. } => { + Ok(Value::HexStringLiteral(value.to_string())) + } unexpected => self.expected("a value", unexpected), } } @@ -2158,9 +2246,12 @@ impl<'a> Parser<'a> { /// Parse an unsigned literal integer/long pub fn parse_literal_uint(&mut self) -> Result { match self.next_token() { - Token::Number(s, _) => s.parse::().map_err(|e| { - ParserError::ParserError(format!("Could not parse '{}' as u64: {}", s, e)) - }), + Token::Number { value, span, .. } => { + value.parse::().map_err(|e| ParserError::ParserError { + message: format!("Could not parse '{}' as u64: {}", value, e), + span, + }) + } unexpected => self.expected("literal int", unexpected), } } @@ -2168,8 +2259,11 @@ impl<'a> Parser<'a> { /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { match self.next_token() { - Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), - Token::SingleQuotedString(s) => Ok(s), + Token::Word { + value: Word { value, keyword, .. }, + .. + } if keyword == Keyword::NoKeyword => Ok(value), + Token::SingleQuotedString { value, .. } => Ok(value), unexpected => self.expected("literal string", unexpected), } } @@ -2177,17 +2271,24 @@ impl<'a> Parser<'a> { /// Parse a map key string pub fn parse_map_key(&mut self) -> Result { match self.next_token() { - Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => { - if self.peek_token() == Token::LParen { + Token::Word { + value: Word { value, keyword, .. }, + .. + } if keyword == Keyword::NoKeyword => { + if matches!(self.peek_token(), Token::LParen { .. }) { return self.parse_function(ObjectName(vec![Ident::new(value)])); } Ok(Expr::Value(Value::SingleQuotedString(value))) } - Token::SingleQuotedString(s) => Ok(Expr::Value(Value::SingleQuotedString(s))), + Token::SingleQuotedString { value, .. } => { + Ok(Expr::Value(Value::SingleQuotedString(value))) + } #[cfg(not(feature = "bigdecimal"))] - Token::Number(s, _) => Ok(Expr::Value(Value::Number(s, false))), + Token::Number { value, .. } => Ok(Expr::Value(Value::Number(value, false))), #[cfg(feature = "bigdecimal")] - Token::Number(s, _) => Ok(Expr::Value(Value::Number(s.parse().unwrap(), false))), + Token::Number { value, .. } => { + Ok(Expr::Value(Value::Number(value.parse().unwrap(), false))) + } unexpected => self.expected("literal string, number or function", unexpected), } } @@ -2195,7 +2296,7 @@ impl<'a> Parser<'a> { /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { match self.next_token() { - Token::Word(w) => match w.keyword { + Token::Word { value: w, .. } => match w.keyword { Keyword::BOOLEAN => Ok(DataType::Boolean), Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), @@ -2240,9 +2341,9 @@ impl<'a> Parser<'a> { Keyword::REGCLASS => Ok(DataType::Regclass), Keyword::STRING => Ok(DataType::String), Keyword::TEXT => { - if self.consume_token(&Token::LBracket) { + if self.consume_token(&Token::LBracket { span: Span::new() }) { // Note: this is postgresql-specific - self.expect_token(&Token::RBracket)?; + self.expect_token(&Token::RBracket { span: Span::new() })?; Ok(DataType::Array(Box::new(DataType::Text))) } else { Ok(DataType::Text) @@ -2277,7 +2378,7 @@ impl<'a> Parser<'a> { // which may start a construct allowed in this position, to be parsed as aliases. // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, // not an alias.) - Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => { + Token::Word { value: w, .. } if after_as || !reserved_kwds.contains(&w.keyword) => { Ok(Some(w.to_ident())) } // MSSQL supports single-quoted strings as aliases for columns @@ -2292,7 +2393,7 @@ impl<'a> Parser<'a> { // character. When it sees such a , your DBMS will // ignore the and treat the multiple strings as // a single ." - Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), + Token::SingleQuotedString { value, .. } => Ok(Some(Ident::with_quote('\'', value))), not_an_ident => { if after_as { return self.expected("an identifier after AS", not_an_ident); @@ -2326,7 +2427,7 @@ impl<'a> Parser<'a> { let mut idents = vec![]; loop { idents.push(self.parse_identifier()?); - if !self.consume_token(&Token::Period) { + if !self.consume_token(&Token::Period { span: Span::new() }) { break; } } @@ -2338,14 +2439,14 @@ impl<'a> Parser<'a> { let mut idents = vec![]; loop { match self.peek_token() { - Token::Word(w) => { + Token::Word { value: w, .. } => { if w.keyword != Keyword::NoKeyword { break; } idents.push(w.to_ident()); } - Token::EOF | Token::Eq => break, + Token::EOF { .. } | Token::Eq { .. } => break, _ => {} } @@ -2360,10 +2461,10 @@ impl<'a> Parser<'a> { let mut idents = vec![]; loop { match self.next_token() { - Token::Word(w) => { + Token::Word { value: w, .. } => { idents.push(w.to_ident()); } - Token::EOF => break, + Token::EOF { .. } => break, _ => {} } } @@ -2374,8 +2475,8 @@ impl<'a> Parser<'a> { /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) pub fn parse_identifier(&mut self) -> Result { match self.next_token() { - Token::Word(w) => Ok(w.to_ident()), - Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), + Token::Word { value: w, .. } => Ok(w.to_ident()), + Token::SingleQuotedString { value: s, .. } => Ok(Ident::with_quote('\'', s)), unexpected => self.expected("identifier", unexpected), } } @@ -2385,9 +2486,9 @@ impl<'a> Parser<'a> { &mut self, optional: IsOptional, ) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::LParen { span: Span::new() }) { let cols = self.parse_comma_separated(Parser::parse_identifier)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(cols) } else if optional == Optional { Ok(vec![]) @@ -2397,9 +2498,9 @@ impl<'a> Parser<'a> { } pub fn parse_optional_precision(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::LParen { span: Span::new() }) { let n = self.parse_literal_uint()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(Some(n)) } else { Ok(None) @@ -2409,14 +2510,14 @@ impl<'a> Parser<'a> { pub fn parse_optional_precision_scale( &mut self, ) -> Result<(Option, Option), ParserError> { - if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::LParen { span: Span::new() }) { let n = self.parse_literal_uint()?; - let scale = if self.consume_token(&Token::Comma) { + let scale = if self.consume_token(&Token::Comma { span: Span::new() }) { Some(self.parse_literal_uint()?) } else { None }; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok((Some(n), scale)) } else { Ok((None, None)) @@ -2527,9 +2628,9 @@ impl<'a> Parser<'a> { let name = self.parse_identifier()?; let mut cte = if self.parse_keyword(Keyword::AS) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; let alias = TableAlias { name, columns: vec![], @@ -2542,9 +2643,9 @@ impl<'a> Parser<'a> { } else { let columns = self.parse_parenthesized_column_list(Optional)?; self.expect_keyword(Keyword::AS)?; - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; let alias = TableAlias { name, columns }; Cte { alias, @@ -2571,10 +2672,10 @@ impl<'a> Parser<'a> { // Start by parsing a restricted SELECT or a `(subquery)`: let mut expr = if self.parse_keyword(Keyword::SELECT) { SetExpr::Select(Box::new(self.parse_select()?)) - } else if self.consume_token(&Token::LParen) { + } else if self.consume_token(&Token::LParen { span: Span::new() }) { // CTEs are not allowed here, but the parser currently accepts them let subquery = self.parse_query()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; SetExpr::Query(Box::new(subquery)) } else if self.parse_keyword(Keyword::VALUES) { SetExpr::Values(self.parse_values()?) @@ -2613,9 +2714,13 @@ impl<'a> Parser<'a> { fn parse_set_operator(&mut self, token: &Token) -> Option { match token { - Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), - Token::Word(w) if w.keyword == Keyword::EXCEPT => Some(SetOperator::Except), - Token::Word(w) if w.keyword == Keyword::INTERSECT => Some(SetOperator::Intersect), + Token::Word { value: w, .. } if w.keyword == Keyword::UNION => Some(SetOperator::Union), + Token::Word { value: w, .. } if w.keyword == Keyword::EXCEPT => { + Some(SetOperator::Except) + } + Token::Word { value: w, .. } if w.keyword == Keyword::INTERSECT => { + Some(SetOperator::Intersect) + } _ => None, } } @@ -2729,20 +2834,22 @@ impl<'a> Parser<'a> { let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); if let Some(Keyword::HIVEVAR) = modifier { - self.expect_token(&Token::Colon)?; + self.expect_token(&Token::Colon { span: Span::new() })?; } let variable = self.parse_identifier()?; - if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + if self.consume_token(&Token::Eq { span: Span::new() }) || self.parse_keyword(Keyword::TO) { let mut values = vec![]; loop { let token = self.peek_token(); let value = match (self.parse_value(), token) { (Ok(value), _) => SetVariableValue::Literal(value), - (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), + (Err(_), Token::Word { value: ident, .. }) => { + SetVariableValue::Ident(ident.to_ident()) + } (Err(_), unexpected) => self.expected("variable value", unexpected)?, }; values.push(value); - if self.consume_token(&Token::Comma) { + if self.consume_token(&Token::Comma { span: Span::new() }) { continue; } return Ok(Statement::SetVariable { @@ -2812,10 +2919,10 @@ impl<'a> Parser<'a> { Keyword::FUNCTION => Ok(ShowCreateObject::Function), Keyword::PROCEDURE => Ok(ShowCreateObject::Procedure), Keyword::EVENT => Ok(ShowCreateObject::Event), - keyword => Err(ParserError::ParserError(format!( - "Unable to map keyword to ShowCreateObject: {:?}", - keyword - ))), + keyword => Err(ParserError::ParserError { + message: format!("Unable to map keyword to ShowCreateObject: {:?}", keyword), + span: self.peek_token().span(), + }), }?; let obj_name = self.parse_object_name()?; @@ -2887,7 +2994,7 @@ impl<'a> Parser<'a> { } } else { let natural = self.parse_keyword(Keyword::NATURAL); - let peek_keyword = if let Token::Word(w) = self.peek_token() { + let peek_keyword = if let Token::Word { value: w, .. } = self.peek_token() { w.keyword } else { Keyword::NoKeyword @@ -2934,18 +3041,18 @@ impl<'a> Parser<'a> { pub fn parse_table_factor(&mut self) -> Result { if self.parse_keyword(Keyword::LATERAL) { // LATERAL must always be followed by a subquery. - if !self.consume_token(&Token::LParen) { + if !self.consume_token(&Token::LParen { span: Span::new() }) { self.expected("subquery after LATERAL", self.peek_token())?; } self.parse_derived_table_factor(Lateral) } else if self.parse_keyword(Keyword::TABLE) { // parse table function (SELECT * FROM TABLE () [ AS ]) - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::TableFunction { expr, alias }) - } else if self.consume_token(&Token::LParen) { + } else if self.consume_token(&Token::LParen { span: Span::new() }) { // A left paren introduces either a derived table (i.e., a subquery) // or a nested join. It's nearly impossible to determine ahead of // time which it is... so we just try to parse both. @@ -2980,12 +3087,12 @@ impl<'a> Parser<'a> { #[allow(clippy::if_same_then_else)] if !table_and_joins.joins.is_empty() { - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) // (A) } else if let TableFactor::NestedJoin(_) = &table_and_joins.relation { // (B): `table_and_joins` (what we found inside the parentheses) // is a nested join `(foo JOIN bar)`, not followed by other joins. - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { // Dialect-specific behavior: Snowflake diverges from the @@ -2994,7 +3101,7 @@ impl<'a> Parser<'a> { // around lone table names (e.g. `FROM (mytable [AS alias])`) // and around derived tables (e.g. `FROM ((SELECT ...) // [AS alias])`) as well. - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; if let Some(outer_alias) = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? @@ -3007,10 +3114,10 @@ impl<'a> Parser<'a> { | TableFactor::TableFunction { alias, .. } => { // but not `FROM (mytable AS alias1) AS alias2`. if let Some(inner_alias) = alias { - return Err(ParserError::ParserError(format!( - "duplicate alias {}", - inner_alias - ))); + return Err(ParserError::ParserError { + message: format!("duplicate alias {}", inner_alias), + span: self.peek_token().span(), + }); } // Act as if the alias was specified normally next // to the table name: `(mytable) AS alias` -> @@ -3030,7 +3137,7 @@ impl<'a> Parser<'a> { } else { let name = self.parse_object_name()?; // Postgres, MSSQL: table-valued functions: - let args = if self.consume_token(&Token::LParen) { + let args = if self.consume_token(&Token::LParen { span: Span::new() }) { self.parse_optional_args()? } else { vec![] @@ -3039,9 +3146,9 @@ impl<'a> Parser<'a> { // MSSQL-specific table hints: let mut with_hints = vec![]; if self.parse_keyword(Keyword::WITH) { - if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::LParen { span: Span::new() }) { with_hints = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; } else { // rewind, as WITH may belong to the next statement's CTE self.prev_token(); @@ -3061,7 +3168,7 @@ impl<'a> Parser<'a> { lateral: IsLateral, ) -> Result { let subquery = Box::new(self.parse_query()?); - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::Derived { lateral: match lateral { @@ -3218,7 +3325,11 @@ impl<'a> Parser<'a> { let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE"); + self.prev_token(); + return parser_err!( + "Cannot specify both CASCADE and RESTRICT in REVOKE", + self.peek_token().span() + ); } Ok(Statement::Revoke { @@ -3275,9 +3386,9 @@ impl<'a> Parser<'a> { let columns = self.parse_parenthesized_column_list(Optional)?; let partitioned = if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; + self.expect_token(&Token::LParen { span: Span::new() })?; let r = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; r } else { None @@ -3331,16 +3442,16 @@ impl<'a> Parser<'a> { /// Parse a `var = expr` assignment, used in an UPDATE statement pub fn parse_assignment(&mut self) -> Result { let id = self.parse_identifiers_non_keywords()?; - self.expect_token(&Token::Eq)?; + self.expect_token(&Token::Eq { span: Span::new() })?; let value = self.parse_expr()?; Ok(Assignment { id, value }) } fn parse_function_args(&mut self) -> Result { - if self.peek_nth_token(1) == Token::RArrow { + if matches!(self.peek_nth_token(1), Token::RArrow { .. }) { let name = self.parse_identifier()?; - self.expect_token(&Token::RArrow)?; + self.expect_token(&Token::RArrow { span: Span::new() })?; let arg = self.parse_wildcard_expr()?.into(); Ok(FunctionArg::Named { name, arg }) @@ -3350,11 +3461,11 @@ impl<'a> Parser<'a> { } pub fn parse_optional_args(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::RParen) { + if self.consume_token(&Token::RParen { span: Span::new() }) { Ok(vec![]) } else { let args = self.parse_comma_separated(Parser::parse_function_args)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Ok(args) } } @@ -3403,9 +3514,9 @@ impl<'a> Parser<'a> { /// Parse a TOP clause, MSSQL equivalent of LIMIT, /// that follows after SELECT [DISTINCT]. pub fn parse_top(&mut self) -> Result { - let quantity = if self.consume_token(&Token::LParen) { + let quantity = if self.consume_token(&Token::LParen { span: Span::new() }) { let quantity = self.parse_expr()?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; Some(quantity) } else { Some(Expr::Value(self.parse_number_value()?)) @@ -3474,9 +3585,9 @@ impl<'a> Parser<'a> { pub fn parse_values(&mut self) -> Result { let values = self.parse_comma_separated(|parser| { - parser.expect_token(&Token::LParen)?; + parser.expect_token(&Token::LParen { span: Span::new() })?; let exprs = parser.parse_comma_separated(Parser::parse_expr)?; - parser.expect_token(&Token::RParen)?; + parser.expect_token(&Token::RParen { span: Span::new() })?; Ok(exprs) })?; Ok(Values(values)) @@ -3527,7 +3638,7 @@ impl<'a> Parser<'a> { // PostgreSQL, for historical reasons, does not. We follow // PostgreSQL in making the comma optional, since that is strictly // more general. - required = self.consume_token(&Token::Comma); + required = self.consume_token(&Token::Comma { span: Span::new() }); } Ok(modes) } @@ -3565,9 +3676,9 @@ impl<'a> Parser<'a> { let name = self.parse_identifier()?; let mut parameters = vec![]; - if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::LParen { span: Span::new() }) { parameters = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; } Ok(Statement::Execute { name, parameters }) @@ -3577,9 +3688,9 @@ impl<'a> Parser<'a> { let name = self.parse_identifier()?; let mut data_types = vec![]; - if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::LParen { span: Span::new() }) { data_types = self.parse_comma_separated(Parser::parse_data_type)?; - self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen { span: Span::new() })?; } self.expect_keyword(Keyword::AS)?; @@ -3596,11 +3707,11 @@ impl<'a> Parser<'a> { let token = self.next_token(); let (object_type, object_name) = match token { - Token::Word(w) if w.keyword == Keyword::COLUMN => { + Token::Word { value: w, .. } if w.keyword == Keyword::COLUMN => { let object_name = self.parse_object_name()?; (CommentObject::Column, object_name) } - Token::Word(w) if w.keyword == Keyword::TABLE => { + Token::Word { value: w, .. } if w.keyword == Keyword::TABLE => { let object_name = self.parse_object_name()?; (CommentObject::Table, object_name) } @@ -3647,11 +3758,11 @@ mod tests { parser.prev_token(); assert_eq!(parser.peek_token(), Token::make_word("version", None)); assert_eq!(parser.next_token(), Token::make_word("version", None)); - assert_eq!(parser.peek_token(), Token::EOF); + assert_eq!(parser.peek_token(), Token::EOF { span: Span::new() }); parser.prev_token(); assert_eq!(parser.next_token(), Token::make_word("version", None)); - assert_eq!(parser.next_token(), Token::EOF); - assert_eq!(parser.next_token(), Token::EOF); + assert_eq!(parser.next_token(), Token::EOF { span: Span::new() }); + assert_eq!(parser.next_token(), Token::EOF { span: Span::new() }); parser.prev_token(); }); } diff --git a/src/span.rs b/src/span.rs new file mode 100644 index 000000000..fa0baeb66 --- /dev/null +++ b/src/span.rs @@ -0,0 +1,142 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, vec::Vec}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +/// A byte span within the parsed string +#[derive(Debug, Eq, Clone, Copy)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum Span { + Unset, + Set { start: usize, end: usize }, +} + +/// All spans are equal +impl PartialEq for Span { + fn eq(&self, _: &Self) -> bool { + true + } +} + +/// All spans hash to the same value +impl core::hash::Hash for Span { + fn hash(&self, _: &mut H) {} +} + +impl Span { + pub fn new() -> Self { + Span::Unset + } + + pub fn expanded(&self, item: &impl Spanned) -> Span { + match self { + Span::Unset => item.span(), + Span::Set { start: s1, end: e1 } => match item.span() { + Span::Unset => *self, + Span::Set { start: s2, end: e2 } => { + (usize::min(*s1, s2)..usize::max(*e1, e2)).into() + } + }, + } + } + + pub fn expand(&mut self, item: &impl Spanned) { + *self = self.expanded(item); + } + + pub fn start(&self) -> Option { + match self { + Span::Unset => None, + Span::Set { start, .. } => Some(*start), + } + } + + pub fn end(&self) -> Option { + match self { + Span::Unset => None, + Span::Set { end, .. } => Some(*end), + } + } + + pub fn range(&self) -> Option> { + match self { + Span::Unset => None, + Span::Set { start, end } => Some(*start..*end), + } + } +} + +impl Default for Span { + fn default() -> Self { + Span::Unset + } +} + +impl core::convert::From> for Span { + fn from(r: core::ops::Range) -> Self { + Self::Set { + start: r.start, + end: r.end, + } + } +} + +pub struct UnsetSpanError; + +impl core::convert::TryInto> for Span { + type Error = UnsetSpanError; + + fn try_into(self) -> Result, Self::Error> { + match self { + Span::Unset => Err(UnsetSpanError), + Span::Set { start, end } => Ok(start..end), + } + } +} + +pub trait Spanned { + fn span(&self) -> Span; +} + +impl Spanned for Span { + fn span(&self) -> Span { + *self + } +} + +impl Spanned for Option { + fn span(&self) -> Span { + match self { + Some(v) => v.span(), + None => Default::default(), + } + } +} + +impl Spanned for Vec { + fn span(&self) -> Span { + let mut ans = Span::new(); + for v in self { + ans.expand(v); + } + ans + } +} + +impl Spanned for Box { + fn span(&self) -> Span { + self.as_ref().span() + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 296bcc64b..57db06755 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -24,176 +24,244 @@ use alloc::{ vec, vec::Vec, }; + use core::fmt; use core::iter::Peekable; -use core::str::Chars; +use core::str::CharIndices; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use crate::dialect::SnowflakeDialect; -use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; +use crate::{dialect::Dialect, dialect::MySqlDialect, span::Spanned}; + +use crate::span::Span; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Token { /// An end-of-file marker, not a real token - EOF, + EOF { span: Span }, /// A keyword (like SELECT) or an optionally quoted SQL identifier - Word(Word), + Word { value: Word, span: Span }, /// An unsigned numeric literal - Number(String, bool), + Number { + value: String, + long: bool, + span: Span, + }, /// A character that could not be tokenized - Char(char), + Char { value: char, span: Span }, /// Single quoted string: i.e: 'string' - SingleQuotedString(String), + SingleQuotedString { value: String, span: Span }, /// "National" string literal: i.e: N'string' - NationalStringLiteral(String), + NationalStringLiteral { value: String, span: Span }, /// Hexadecimal string literal: i.e.: X'deadbeef' - HexStringLiteral(String), + HexStringLiteral { value: String, span: Span }, /// Comma - Comma, + Comma { span: Span }, /// Whitespace (space, tab, etc) - Whitespace(Whitespace), + Whitespace { value: Whitespace, span: Span }, /// Double equals sign `==` - DoubleEq, + DoubleEq { span: Span }, /// Equality operator `=` - Eq, + Eq { span: Span }, /// Not Equals operator `<>` (or `!=` in some dialects) - Neq, + Neq { span: Span }, /// Less Than operator `<` - Lt, + Lt { span: Span }, /// Greater Than operator `>` - Gt, + Gt { span: Span }, /// Less Than Or Equals operator `<=` - LtEq, + LtEq { span: Span }, /// Greater Than Or Equals operator `>=` - GtEq, + GtEq { span: Span }, /// Spaceship operator <=> - Spaceship, + Spaceship { span: Span }, /// Plus operator `+` - Plus, + Plus { span: Span }, /// Minus operator `-` - Minus, + Minus { span: Span }, /// Multiplication operator `*` - Mul, + Mul { span: Span }, /// Division operator `/` - Div, + Div { span: Span }, /// Modulo Operator `%` - Mod, + Mod { span: Span }, /// String concatenation `||` - StringConcat, + StringConcat { span: Span }, /// Left parenthesis `(` - LParen, + LParen { span: Span }, /// Right parenthesis `)` - RParen, + RParen { span: Span }, /// Period (used for compound identifiers or projections into nested types) - Period, + Period { span: Span }, /// Colon `:` - Colon, + Colon { span: Span }, /// DoubleColon `::` (used for casting in postgresql) - DoubleColon, + DoubleColon { span: Span }, /// SemiColon `;` used as separator for COPY and payload - SemiColon, + SemiColon { span: Span }, /// Backslash `\` used in terminating the COPY payload with `\.` - Backslash, + Backslash { span: Span }, /// Left bracket `[` - LBracket, + LBracket { span: Span }, /// Right bracket `]` - RBracket, + RBracket { span: Span }, /// Ampersand `&` - Ampersand, + Ampersand { span: Span }, /// Pipe `|` - Pipe, + Pipe { span: Span }, /// Caret `^` - Caret, + Caret { span: Span }, /// Left brace `{` - LBrace, + LBrace { span: Span }, /// Right brace `}` - RBrace, + RBrace { span: Span }, /// Right Arrow `=>` - RArrow, + RArrow { span: Span }, /// Sharp `#` used for PostgreSQL Bitwise XOR operator - Sharp, + Sharp { span: Span }, /// Tilde `~` used for PostgreSQL Bitwise NOT operator or case sensitive match regular expression operator - Tilde, + Tilde { span: Span }, /// `~*` , a case insensitive match regular expression operator in PostgreSQL - TildeAsterisk, + TildeAsterisk { span: Span }, /// `!~` , a case sensitive not match regular expression operator in PostgreSQL - ExclamationMarkTilde, + ExclamationMarkTilde { span: Span }, /// `!~*` , a case insensitive not match regular expression operator in PostgreSQL - ExclamationMarkTildeAsterisk, + ExclamationMarkTildeAsterisk { span: Span }, /// `<<`, a bitwise shift left operator in PostgreSQL - ShiftLeft, + ShiftLeft { span: Span }, /// `>>`, a bitwise shift right operator in PostgreSQL - ShiftRight, + ShiftRight { span: Span }, /// Exclamation Mark `!` used for PostgreSQL factorial operator - ExclamationMark, + ExclamationMark { span: Span }, /// Double Exclamation Mark `!!` used for PostgreSQL prefix factorial operator - DoubleExclamationMark, + DoubleExclamationMark { span: Span }, /// AtSign `@` used for PostgreSQL abs operator - AtSign, + AtSign { span: Span }, /// `|/`, a square root math operator in PostgreSQL - PGSquareRoot, + PGSquareRoot { span: Span }, /// `||/` , a cube root math operator in PostgreSQL - PGCubeRoot, + PGCubeRoot { span: Span }, +} + +impl Spanned for Token { + fn span(&self) -> Span { + match self { + Token::EOF { span, .. } => *span, + Token::Word { span, .. } => *span, + Token::Number { span, .. } => *span, + Token::Char { span, .. } => *span, + Token::SingleQuotedString { span, .. } => *span, + Token::NationalStringLiteral { span, .. } => *span, + Token::HexStringLiteral { span, .. } => *span, + Token::Comma { span, .. } => *span, + Token::Whitespace { span, .. } => *span, + Token::DoubleEq { span, .. } => *span, + Token::Eq { span, .. } => *span, + Token::Neq { span, .. } => *span, + Token::Lt { span, .. } => *span, + Token::Gt { span, .. } => *span, + Token::LtEq { span, .. } => *span, + Token::GtEq { span, .. } => *span, + Token::Spaceship { span, .. } => *span, + Token::Plus { span, .. } => *span, + Token::Minus { span, .. } => *span, + Token::Mul { span, .. } => *span, + Token::Div { span, .. } => *span, + Token::Mod { span, .. } => *span, + Token::StringConcat { span, .. } => *span, + Token::LParen { span, .. } => *span, + Token::RParen { span, .. } => *span, + Token::Period { span, .. } => *span, + Token::Colon { span, .. } => *span, + Token::DoubleColon { span, .. } => *span, + Token::SemiColon { span, .. } => *span, + Token::Backslash { span, .. } => *span, + Token::LBracket { span, .. } => *span, + Token::RBracket { span, .. } => *span, + Token::Ampersand { span, .. } => *span, + Token::Pipe { span, .. } => *span, + Token::Caret { span, .. } => *span, + Token::LBrace { span, .. } => *span, + Token::RBrace { span, .. } => *span, + Token::RArrow { span, .. } => *span, + Token::Sharp { span, .. } => *span, + Token::Tilde { span, .. } => *span, + Token::TildeAsterisk { span, .. } => *span, + Token::ExclamationMarkTilde { span, .. } => *span, + Token::ExclamationMarkTildeAsterisk { span, .. } => *span, + Token::ShiftLeft { span, .. } => *span, + Token::ShiftRight { span, .. } => *span, + Token::ExclamationMark { span, .. } => *span, + Token::DoubleExclamationMark { span, .. } => *span, + Token::AtSign { span, .. } => *span, + Token::PGSquareRoot { span, .. } => *span, + Token::PGCubeRoot { span, .. } => *span, + } + } } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Token::EOF => f.write_str("EOF"), - Token::Word(ref w) => write!(f, "{}", w), - Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }), - Token::Char(ref c) => write!(f, "{}", c), - Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), - Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), - Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), - Token::Comma => f.write_str(","), - Token::Whitespace(ws) => write!(f, "{}", ws), - Token::DoubleEq => f.write_str("=="), - Token::Spaceship => f.write_str("<=>"), - Token::Eq => f.write_str("="), - Token::Neq => f.write_str("<>"), - Token::Lt => f.write_str("<"), - Token::Gt => f.write_str(">"), - Token::LtEq => f.write_str("<="), - Token::GtEq => f.write_str(">="), - Token::Plus => f.write_str("+"), - Token::Minus => f.write_str("-"), - Token::Mul => f.write_str("*"), - Token::Div => f.write_str("/"), - Token::StringConcat => f.write_str("||"), - Token::Mod => f.write_str("%"), - Token::LParen => f.write_str("("), - Token::RParen => f.write_str(")"), - Token::Period => f.write_str("."), - Token::Colon => f.write_str(":"), - Token::DoubleColon => f.write_str("::"), - Token::SemiColon => f.write_str(";"), - Token::Backslash => f.write_str("\\"), - Token::LBracket => f.write_str("["), - Token::RBracket => f.write_str("]"), - Token::Ampersand => f.write_str("&"), - Token::Caret => f.write_str("^"), - Token::Pipe => f.write_str("|"), - Token::LBrace => f.write_str("{"), - Token::RBrace => f.write_str("}"), - Token::RArrow => f.write_str("=>"), - Token::Sharp => f.write_str("#"), - Token::ExclamationMark => f.write_str("!"), - Token::DoubleExclamationMark => f.write_str("!!"), - Token::Tilde => f.write_str("~"), - Token::TildeAsterisk => f.write_str("~*"), - Token::ExclamationMarkTilde => f.write_str("!~"), - Token::ExclamationMarkTildeAsterisk => f.write_str("!~*"), - Token::AtSign => f.write_str("@"), - Token::ShiftLeft => f.write_str("<<"), - Token::ShiftRight => f.write_str(">>"), - Token::PGSquareRoot => f.write_str("|/"), - Token::PGCubeRoot => f.write_str("||/"), + Token::EOF { .. } => f.write_str("EOF"), + Token::Word { ref value, .. } => write!(f, "{}", value), + Token::Number { + ref value, + ref long, + .. + } => write!(f, "{}{long}", value, long = if *long { "L" } else { "" }), + Token::Char { ref value, .. } => write!(f, "{}", value), + Token::SingleQuotedString { ref value, .. } => write!(f, "'{}'", value), + Token::NationalStringLiteral { ref value, .. } => write!(f, "N'{}'", value), + Token::HexStringLiteral { ref value, .. } => write!(f, "X'{}'", value), + Token::Comma { .. } => f.write_str(","), + Token::Whitespace { value, .. } => write!(f, "{}", value), + Token::DoubleEq { .. } => f.write_str("=="), + Token::Spaceship { .. } => f.write_str("<=>"), + Token::Eq { .. } => f.write_str("="), + Token::Neq { .. } => f.write_str("<>"), + Token::Lt { .. } => f.write_str("<"), + Token::Gt { .. } => f.write_str(">"), + Token::LtEq { .. } => f.write_str("<="), + Token::GtEq { .. } => f.write_str(">="), + Token::Plus { .. } => f.write_str("+"), + Token::Minus { .. } => f.write_str("-"), + Token::Mul { .. } => f.write_str("*"), + Token::Div { .. } => f.write_str("/"), + Token::StringConcat { .. } => f.write_str("||"), + Token::Mod { .. } => f.write_str("%"), + Token::LParen { .. } => f.write_str("("), + Token::RParen { .. } => f.write_str(")"), + Token::Period { .. } => f.write_str("."), + Token::Colon { .. } => f.write_str(":"), + Token::DoubleColon { .. } => f.write_str("::"), + Token::SemiColon { .. } => f.write_str(";"), + Token::Backslash { .. } => f.write_str("\\"), + Token::LBracket { .. } => f.write_str("["), + Token::RBracket { .. } => f.write_str("]"), + Token::Ampersand { .. } => f.write_str("&"), + Token::Caret { .. } => f.write_str("^"), + Token::Pipe { .. } => f.write_str("|"), + Token::LBrace { .. } => f.write_str("{"), + Token::RBrace { .. } => f.write_str("}"), + Token::RArrow { .. } => f.write_str("=>"), + Token::Sharp { .. } => f.write_str("#"), + Token::ExclamationMark { .. } => f.write_str("!"), + Token::DoubleExclamationMark { .. } => f.write_str("!!"), + Token::Tilde { .. } => f.write_str("~"), + Token::TildeAsterisk { .. } => f.write_str("~*"), + Token::ExclamationMarkTilde { .. } => f.write_str("!~"), + Token::ExclamationMarkTildeAsterisk { .. } => f.write_str("!~*"), + Token::AtSign { .. } => f.write_str("@"), + Token::ShiftLeft { .. } => f.write_str("<<"), + Token::ShiftRight { .. } => f.write_str(">>"), + Token::PGSquareRoot { .. } => f.write_str("|/"), + Token::PGCubeRoot { .. } => f.write_str("||/"), } } } @@ -205,16 +273,19 @@ impl Token { pub fn make_word(word: &str, quote_style: Option) -> Self { let word_uppercase = word.to_uppercase(); - Token::Word(Word { - value: word.to_string(), - quote_style, - keyword: if quote_style == None { - let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str()); - keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) - } else { - Keyword::NoKeyword + Token::Word { + value: Word { + value: word.to_string(), + quote_style, + keyword: if quote_style == None { + let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str()); + keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) + } else { + Keyword::NoKeyword + }, }, - }) + span: Default::default(), + } } } @@ -285,6 +356,7 @@ pub struct TokenizerError { pub message: String, pub line: u64, pub col: u64, + pub span: Span, } impl fmt::Display for TokenizerError { @@ -306,6 +378,7 @@ pub struct Tokenizer<'a> { query: &'a str, line: u64, col: u64, + start: usize, } impl<'a> Tokenizer<'a> { @@ -316,58 +389,259 @@ impl<'a> Tokenizer<'a> { query, line: 1, col: 1, + start: 0, } } /// Tokenize the statement and produce a vector of tokens pub fn tokenize(&mut self) -> Result, TokenizerError> { - let mut peekable = self.query.chars().peekable(); + let mut peekable = self.query.char_indices().peekable(); - let mut tokens: Vec = vec![]; + let mut tokens = vec![]; - while let Some(token) = self.next_token(&mut peekable)? { + self.start = peekable + .peek() + .map(|(start, _)| *start) + .unwrap_or(self.query.len()); + loop { + let mut token = match self.next_token(&mut peekable)? { + None => break, + Some(token) => token, + }; + let end = peekable + .peek() + .map(|(start, _)| *start) + .unwrap_or(self.query.len()); + let s: Span = (self.start..end).into(); + match &mut token { + Token::EOF { span } => { + *span = s; + } + Token::Word { span, .. } => { + *span = s; + } + Token::Number { span, .. } => { + *span = s; + } + Token::Char { span, .. } => { + *span = s; + } + Token::SingleQuotedString { span, .. } => { + *span = s; + } + Token::NationalStringLiteral { span, .. } => { + *span = s; + } + Token::HexStringLiteral { span, .. } => { + *span = s; + } + Token::Comma { span } => { + *span = s; + } + Token::Whitespace { span, .. } => { + *span = s; + } + Token::DoubleEq { span } => { + *span = s; + } + Token::Eq { span } => { + *span = s; + } + Token::Neq { span } => { + *span = s; + } + Token::Lt { span } => { + *span = s; + } + Token::Gt { span } => { + *span = s; + } + Token::LtEq { span } => { + *span = s; + } + Token::GtEq { span } => { + *span = s; + } + Token::Spaceship { span } => { + *span = s; + } + Token::Plus { span } => { + *span = s; + } + Token::Minus { span } => { + *span = s; + } + Token::Mul { span } => { + *span = s; + } + Token::Div { span } => { + *span = s; + } + Token::Mod { span } => { + *span = s; + } + Token::StringConcat { span } => { + *span = s; + } + Token::LParen { span } => { + *span = s; + } + Token::RParen { span } => { + *span = s; + } + Token::Period { span } => { + *span = s; + } + Token::Colon { span } => { + *span = s; + } + Token::DoubleColon { span } => { + *span = s; + } + Token::SemiColon { span } => { + *span = s; + } + Token::Backslash { span } => { + *span = s; + } + Token::LBracket { span } => { + *span = s; + } + Token::RBracket { span } => { + *span = s; + } + Token::Ampersand { span } => { + *span = s; + } + Token::Pipe { span } => { + *span = s; + } + Token::Caret { span } => { + *span = s; + } + Token::LBrace { span } => { + *span = s; + } + Token::RBrace { span } => { + *span = s; + } + Token::RArrow { span } => { + *span = s; + } + Token::Sharp { span } => { + *span = s; + } + Token::Tilde { span } => { + *span = s; + } + Token::TildeAsterisk { span } => { + *span = s; + } + Token::ExclamationMarkTilde { span } => { + *span = s; + } + Token::ExclamationMarkTildeAsterisk { span } => { + *span = s; + } + Token::ShiftLeft { span } => { + *span = s; + } + Token::ShiftRight { span } => { + *span = s; + } + Token::ExclamationMark { span } => { + *span = s; + } + Token::DoubleExclamationMark { span } => { + *span = s; + } + Token::AtSign { span } => { + *span = s; + } + Token::PGSquareRoot { span } => { + *span = s; + } + Token::PGCubeRoot { span } => { + *span = s; + } + } match &token { - Token::Whitespace(Whitespace::Newline) => { + Token::Whitespace { + value: Whitespace::Newline, + .. + } => { self.line += 1; self.col = 1; } - - Token::Whitespace(Whitespace::Tab) => self.col += 4, - Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64, - Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2, - Token::Number(s, _) => self.col += s.len() as u64, - Token::SingleQuotedString(s) => self.col += s.len() as u64, + Token::Whitespace { + value: Whitespace::Tab, + .. + } => self.col += 4, + Token::Word { value, .. } if value.quote_style == None => { + self.col += value.value.len() as u64 + } + Token::Word { value, .. } if value.quote_style != None => { + self.col += value.value.len() as u64 + 2 + } + Token::Number { value, .. } => self.col += value.len() as u64, + Token::SingleQuotedString { value, .. } => self.col += value.len() as u64, _ => self.col += 1, } tokens.push(token); + self.start = end; } Ok(tokens) } /// Get the next token or return None - fn next_token(&self, chars: &mut Peekable>) -> Result, TokenizerError> { + fn next_token( + &self, + chars: &mut Peekable>, + ) -> Result, TokenizerError> { //println!("next_token: {:?}", chars.peek()); + let span = Span::new(); match chars.peek() { - Some(&ch) => match ch { - ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)), - '\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)), - '\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)), + Some(&ch) => match ch.1 { + ' ' => self.consume_and_return( + chars, + Token::Whitespace { + value: Whitespace::Space, + span, + }, + ), + '\t' => self.consume_and_return( + chars, + Token::Whitespace { + value: Whitespace::Tab, + span, + }, + ), + '\n' => self.consume_and_return( + chars, + Token::Whitespace { + value: Whitespace::Newline, + span, + }, + ), '\r' => { // Emit a single Whitespace::Newline token for \r and \r\n chars.next(); - if let Some('\n') = chars.peek() { + if let Some((_, '\n')) = chars.peek() { chars.next(); } - Ok(Some(Token::Whitespace(Whitespace::Newline))) + Ok(Some(Token::Whitespace { + value: Whitespace::Newline, + span, + })) } 'N' => { chars.next(); // consume, to check the next char match chars.peek() { - Some('\'') => { + Some((_, '\'')) => { // N'...' - a - let s = self.tokenize_single_quoted_string(chars)?; - Ok(Some(Token::NationalStringLiteral(s))) + let value = self.tokenize_single_quoted_string(chars)?; + Ok(Some(Token::NationalStringLiteral { value, span })) } _ => { // regular identifier starting with an "N" @@ -381,10 +655,10 @@ impl<'a> Tokenizer<'a> { x @ 'x' | x @ 'X' => { chars.next(); // consume, to check the next char match chars.peek() { - Some('\'') => { + Some((_, '\'')) => { // X'...' - a - let s = self.tokenize_single_quoted_string(chars)?; - Ok(Some(Token::HexStringLiteral(s))) + let value = self.tokenize_single_quoted_string(chars)?; + Ok(Some(Token::HexStringLiteral { value, span })) } _ => { // regular identifier starting with an "X" @@ -399,222 +673,256 @@ impl<'a> Tokenizer<'a> { let s = self.tokenize_word(ch, chars); if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') { - let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| { - matches!(ch, '0'..='9' | '.') - }); + let mut value = + peeking_take_while(&mut s.char_indices().peekable(), |ch| { + matches!(ch, '0'..='9' | '.') + }); let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); - s += s2.as_str(); - return Ok(Some(Token::Number(s, false))); + value += s2.as_str(); + return Ok(Some(Token::Number { + value, + long: false, + span, + })); } Ok(Some(Token::make_word(&s, None))) } // string '\'' => { - let s = self.tokenize_single_quoted_string(chars)?; - - Ok(Some(Token::SingleQuotedString(s))) + let value = self.tokenize_single_quoted_string(chars)?; + Ok(Some(Token::SingleQuotedString { value, span })) } // delimited (quoted) identifier quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { chars.next(); // consume the opening quote let quote_end = Word::matching_end_quote(quote_start); let s = peeking_take_while(chars, |ch| ch != quote_end); - if chars.next() == Some(quote_end) { + if chars.next().map(|(_, c)| c) == Some(quote_end) { Ok(Some(Token::make_word(&s, Some(quote_start)))) } else { - self.tokenizer_error(format!( - "Expected close delimiter '{}' before EOF.", - quote_end - )) + self.tokenizer_error( + self.start..self.query.len(), + format!("Expected close delimiter '{}' before EOF.", quote_end), + ) } } // numbers and period '0'..='9' | '.' => { - let mut s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9')); + let mut value = peeking_take_while(chars, |ch| matches!(ch, '0'..='9')); // match binary literal that starts with 0x - if s == "0" && chars.peek() == Some(&'x') { + if value == "0" && chars.peek().map(|(_, c)| c) == Some(&'x') { chars.next(); - let s2 = peeking_take_while( + let value = peeking_take_while( chars, |ch| matches!(ch, '0'..='9' | 'A'..='F' | 'a'..='f'), ); - return Ok(Some(Token::HexStringLiteral(s2))); + return Ok(Some(Token::HexStringLiteral { value, span })); } // match one period - if let Some('.') = chars.peek() { - s.push('.'); + if let Some((_, '.')) = chars.peek() { + value.push('.'); chars.next(); } - s += &peeking_take_while(chars, |ch| matches!(ch, '0'..='9')); + value += &peeking_take_while(chars, |ch| matches!(ch, '0'..='9')); // No number -> Token::Period - if s == "." { - return Ok(Some(Token::Period)); + if value == "." { + return Ok(Some(Token::Period { span })); } - let long = if chars.peek() == Some(&'L') { + let long = if let Some((_, 'L')) = chars.peek() { chars.next(); true } else { false }; - Ok(Some(Token::Number(s, long))) + Ok(Some(Token::Number { value, long, span })) } // punctuation - '(' => self.consume_and_return(chars, Token::LParen), - ')' => self.consume_and_return(chars, Token::RParen), - ',' => self.consume_and_return(chars, Token::Comma), + '(' => self.consume_and_return(chars, Token::LParen { span }), + ')' => self.consume_and_return(chars, Token::RParen { span }), + ',' => self.consume_and_return(chars, Token::Comma { span }), // operators '-' => { chars.next(); // consume the '-' match chars.peek() { - Some('-') => { + Some((_, '-')) => { chars.next(); // consume the second '-', starting a single-line comment let comment = self.tokenize_single_line_comment(chars); - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_owned(), - comment, - }))) + Ok(Some(Token::Whitespace { + value: Whitespace::SingleLineComment { + prefix: "--".to_owned(), + comment, + }, + span, + })) } // a regular '-' operator - _ => Ok(Some(Token::Minus)), + _ => Ok(Some(Token::Minus { span })), } } '/' => { chars.next(); // consume the '/' match chars.peek() { - Some('*') => { + Some((_, '*')) => { chars.next(); // consume the '*', starting a multi-line comment self.tokenize_multiline_comment(chars) } - Some('/') if dialect_of!(self is SnowflakeDialect) => { + Some((_, '/')) if dialect_of!(self is SnowflakeDialect) => { chars.next(); // consume the second '/', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "//".to_owned(), - comment, - }))) + Ok(Some(Token::Whitespace { + value: Whitespace::SingleLineComment { + prefix: "//".to_owned(), + comment, + }, + span, + })) } // a regular '/' operator - _ => Ok(Some(Token::Div)), + _ => Ok(Some(Token::Div { span })), } } - '+' => self.consume_and_return(chars, Token::Plus), - '*' => self.consume_and_return(chars, Token::Mul), - '%' => self.consume_and_return(chars, Token::Mod), + '+' => self.consume_and_return(chars, Token::Plus { span }), + '*' => self.consume_and_return(chars, Token::Mul { span }), + '%' => self.consume_and_return(chars, Token::Mod { span }), '|' => { chars.next(); // consume the '|' match chars.peek() { - Some('/') => self.consume_and_return(chars, Token::PGSquareRoot), - Some('|') => { + Some((_, '/')) => { + self.consume_and_return(chars, Token::PGSquareRoot { span }) + } + Some((_, '|')) => { chars.next(); // consume the second '|' match chars.peek() { - Some('/') => self.consume_and_return(chars, Token::PGCubeRoot), - _ => Ok(Some(Token::StringConcat)), + Some((_, '/')) => { + self.consume_and_return(chars, Token::PGCubeRoot { span }) + } + _ => Ok(Some(Token::StringConcat { span })), } } // Bitshift '|' operator - _ => Ok(Some(Token::Pipe)), + _ => Ok(Some(Token::Pipe { span })), } } '=' => { chars.next(); // consume match chars.peek() { - Some('>') => self.consume_and_return(chars, Token::RArrow), - _ => Ok(Some(Token::Eq)), + Some((_, '>')) => self.consume_and_return(chars, Token::RArrow { span }), + _ => Ok(Some(Token::Eq { span })), } } '!' => { chars.next(); // consume match chars.peek() { - Some('=') => self.consume_and_return(chars, Token::Neq), - Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark), - Some('~') => { + Some((_, '=')) => self.consume_and_return(chars, Token::Neq { span }), + Some((_, '!')) => { + self.consume_and_return(chars, Token::DoubleExclamationMark { span }) + } + Some((_, '~')) => { chars.next(); match chars.peek() { - Some('*') => self - .consume_and_return(chars, Token::ExclamationMarkTildeAsterisk), - _ => Ok(Some(Token::ExclamationMarkTilde)), + Some((_, '*')) => self.consume_and_return( + chars, + Token::ExclamationMarkTildeAsterisk { span }, + ), + _ => Ok(Some(Token::ExclamationMarkTilde { span })), } } - _ => Ok(Some(Token::ExclamationMark)), + _ => Ok(Some(Token::ExclamationMark { span })), } } '<' => { chars.next(); // consume match chars.peek() { - Some('=') => { + Some((_, '=')) => { chars.next(); match chars.peek() { - Some('>') => self.consume_and_return(chars, Token::Spaceship), - _ => Ok(Some(Token::LtEq)), + Some((_, '>')) => { + self.consume_and_return(chars, Token::Spaceship { span }) + } + _ => Ok(Some(Token::LtEq { span })), } } - Some('>') => self.consume_and_return(chars, Token::Neq), - Some('<') => self.consume_and_return(chars, Token::ShiftLeft), - _ => Ok(Some(Token::Lt)), + Some((_, '>')) => self.consume_and_return(chars, Token::Neq { span }), + Some((_, '<')) => self.consume_and_return(chars, Token::ShiftLeft { span }), + _ => Ok(Some(Token::Lt { span })), } } '>' => { chars.next(); // consume match chars.peek() { - Some('=') => self.consume_and_return(chars, Token::GtEq), - Some('>') => self.consume_and_return(chars, Token::ShiftRight), - _ => Ok(Some(Token::Gt)), + Some((_, '=')) => self.consume_and_return(chars, Token::GtEq { span }), + Some((_, '>')) => { + self.consume_and_return(chars, Token::ShiftRight { span }) + } + _ => Ok(Some(Token::Gt { span })), } } ':' => { chars.next(); match chars.peek() { - Some(':') => self.consume_and_return(chars, Token::DoubleColon), - _ => Ok(Some(Token::Colon)), + Some((_, ':')) => { + self.consume_and_return(chars, Token::DoubleColon { span }) + } + _ => Ok(Some(Token::Colon { span })), } } - ';' => self.consume_and_return(chars, Token::SemiColon), - '\\' => self.consume_and_return(chars, Token::Backslash), - '[' => self.consume_and_return(chars, Token::LBracket), - ']' => self.consume_and_return(chars, Token::RBracket), - '&' => self.consume_and_return(chars, Token::Ampersand), - '^' => self.consume_and_return(chars, Token::Caret), - '{' => self.consume_and_return(chars, Token::LBrace), - '}' => self.consume_and_return(chars, Token::RBrace), + ';' => self.consume_and_return(chars, Token::SemiColon { span }), + '\\' => self.consume_and_return(chars, Token::Backslash { span }), + '[' => self.consume_and_return(chars, Token::LBracket { span }), + ']' => self.consume_and_return(chars, Token::RBracket { span }), + '&' => self.consume_and_return(chars, Token::Ampersand { span }), + '^' => self.consume_and_return(chars, Token::Caret { span }), + '{' => self.consume_and_return(chars, Token::LBrace { span }), + '}' => self.consume_and_return(chars, Token::RBrace { span }), '#' if dialect_of!(self is SnowflakeDialect) => { chars.next(); // consume the '#', starting a snowflake single-line comment let comment = self.tokenize_single_line_comment(chars); - Ok(Some(Token::Whitespace(Whitespace::SingleLineComment { - prefix: "#".to_owned(), - comment, - }))) + Ok(Some(Token::Whitespace { + value: Whitespace::SingleLineComment { + prefix: "#".to_owned(), + comment, + }, + span, + })) } '~' => { chars.next(); // consume match chars.peek() { - Some('*') => self.consume_and_return(chars, Token::TildeAsterisk), - _ => Ok(Some(Token::Tilde)), + Some((_, '*')) => { + self.consume_and_return(chars, Token::TildeAsterisk { span }) + } + _ => Ok(Some(Token::Tilde { span })), } } - '#' => self.consume_and_return(chars, Token::Sharp), - '@' => self.consume_and_return(chars, Token::AtSign), - other => self.consume_and_return(chars, Token::Char(other)), + '#' => self.consume_and_return(chars, Token::Sharp { span }), + '@' => self.consume_and_return(chars, Token::AtSign { span }), + other => self.consume_and_return(chars, Token::Char { value: other, span }), }, None => Ok(None), } } - fn tokenizer_error(&self, message: impl Into) -> Result { + fn tokenizer_error( + &self, + span: impl Into, + message: impl Into, + ) -> Result { Err(TokenizerError { message: message.into(), + span: span.into(), col: self.col, line: self.line, }) } // Consume characters until newline - fn tokenize_single_line_comment(&self, chars: &mut Peekable>) -> String { + fn tokenize_single_line_comment(&self, chars: &mut Peekable>) -> String { let mut comment = peeking_take_while(chars, |ch| ch != '\n'); - if let Some(ch) = chars.next() { + if let Some((_, ch)) = chars.next() { assert_eq!(ch, '\n'); comment.push(ch); } @@ -622,7 +930,7 @@ impl<'a> Tokenizer<'a> { } /// Tokenize an identifier or keyword, after the first char is already consumed. - fn tokenize_word(&self, first_char: char, chars: &mut Peekable>) -> String { + fn tokenize_word(&self, first_char: char, chars: &mut Peekable>) -> String { let mut s = first_char.to_string(); s.push_str(&peeking_take_while(chars, |ch| { self.dialect.is_identifier_part(ch) @@ -633,7 +941,7 @@ impl<'a> Tokenizer<'a> { /// Read a single quoted string, starting with the opening quote. fn tokenize_single_quoted_string( &self, - chars: &mut Peekable>, + chars: &mut Peekable>, ) -> Result { let mut s = String::new(); chars.next(); // consume the opening quote @@ -642,48 +950,52 @@ impl<'a> Tokenizer<'a> { let mut is_escaped = false; while let Some(&ch) = chars.peek() { match ch { - '\'' => { + (_, '\'') => { chars.next(); // consume if is_escaped { - s.push(ch); + s.push('\''); is_escaped = false; - } else if chars.peek().map(|c| *c == '\'').unwrap_or(false) { - s.push(ch); + } else if chars.peek().map(|c| c.1 == '\'').unwrap_or(false) { + s.push('\''); chars.next(); } else { return Ok(s); } } - '\\' => { + (_, '\\') => { if dialect_of!(self is MySqlDialect) { is_escaped = !is_escaped; } else { - s.push(ch); + s.push('\\'); } chars.next(); } - _ => { + (_, ch) => { chars.next(); // consume s.push(ch); } } } - self.tokenizer_error("Unterminated string literal") + let end = chars.peek().map(|(i, _)| *i).unwrap_or(self.query.len()); + self.tokenizer_error(self.start..end, "Unterminated string literal") } fn tokenize_multiline_comment( &self, - chars: &mut Peekable>, + chars: &mut Peekable>, ) -> Result, TokenizerError> { let mut s = String::new(); let mut maybe_closing_comment = false; // TODO: deal with nested comments loop { match chars.next() { - Some(ch) => { + Some((_, ch)) => { if maybe_closing_comment { if ch == '/' { - break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); + break Ok(Some(Token::Whitespace { + value: Whitespace::MultiLineComment(s), + span: Span::default(), + })); } else { s.push('*'); } @@ -693,7 +1005,12 @@ impl<'a> Tokenizer<'a> { s.push(ch); } } - None => break self.tokenizer_error("Unexpected EOF while in a multi-line comment"), + None => { + break self.tokenizer_error( + self.start..self.query.len(), + "Unexpected EOF while in a multi-line comment", + ) + } } } } @@ -701,7 +1018,7 @@ impl<'a> Tokenizer<'a> { #[allow(clippy::unnecessary_wraps)] fn consume_and_return( &self, - chars: &mut Peekable>, + chars: &mut Peekable>, t: Token, ) -> Result, TokenizerError> { chars.next(); @@ -713,11 +1030,11 @@ impl<'a> Tokenizer<'a> { /// Return the characters read as String, and keep the first non-matching /// char available as `chars.next()`. fn peeking_take_while( - chars: &mut Peekable>, + chars: &mut Peekable>, mut predicate: impl FnMut(char) -> bool, ) -> String { let mut s = String::new(); - while let Some(&ch) = chars.peek() { + while let Some(ch) = chars.peek().map(|(_, ch)| *ch) { if predicate(ch) { chars.next(); // consume s.push(ch); @@ -739,6 +1056,7 @@ mod tests { message: "test".into(), line: 1, col: 1, + span: Span::new(), }; #[cfg(feature = "std")] { @@ -757,8 +1075,15 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1"), false), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Number { + value: String::from("1"), + long: false, + span: Span::new(), + }, ]; compare(expected, tokens); @@ -773,8 +1098,15 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Number(String::from(".1"), false), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Number { + value: String::from(".1"), + long: false, + span: Span::new(), + }, ]; compare(expected, tokens); @@ -789,11 +1121,18 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("sqrt", None), - Token::LParen, - Token::Number(String::from("1"), false), - Token::RParen, + Token::LParen { span: Span::new() }, + Token::Number { + value: String::from("1"), + long: false, + span: Span::new(), + }, + Token::RParen { span: Span::new() }, ]; compare(expected, tokens); @@ -808,12 +1147,27 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::SingleQuotedString(String::from("a")), - Token::Whitespace(Whitespace::Space), - Token::StringConcat, - Token::Whitespace(Whitespace::Space), - Token::SingleQuotedString(String::from("b")), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::SingleQuotedString { + value: String::from("a"), + span: Span::new(), + }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::StringConcat { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::SingleQuotedString { + value: String::from("b"), + span: Span::new(), + }, ]; compare(expected, tokens); @@ -827,15 +1181,30 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("one", None), - Token::Whitespace(Whitespace::Space), - Token::Pipe, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Pipe { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("two", None), - Token::Whitespace(Whitespace::Space), - Token::Caret, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Caret { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("three", None), ]; compare(expected, tokens); @@ -851,32 +1220,68 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("true"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("true"), - Token::Comma, - Token::Whitespace(Whitespace::Space), + Token::Comma { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("false"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("false"), - Token::Comma, - Token::Whitespace(Whitespace::Space), + Token::Comma { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("true"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("false"), - Token::Comma, - Token::Whitespace(Whitespace::Space), + Token::Comma { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("false"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("XOR"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("true"), ]; compare(expected, tokens); @@ -891,24 +1296,59 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Mul, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Mul { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), - Token::Eq, - Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1"), false), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Eq { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Number { + value: String::from("1"), + long: false, + span: Span::new(), + }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("LIMIT"), - Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5"), false), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Number { + value: String::from("5"), + long: false, + span: Span::new(), + }, ]; compare(expected, tokens); @@ -923,22 +1363,50 @@ mod tests { let expected = vec![ Token::make_keyword("EXPLAIN"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Mul, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Mul { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), - Token::Eq, - Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1"), false), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Eq { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Number { + value: String::from("1"), + long: false, + span: Span::new(), + }, ]; compare(expected, tokens); @@ -953,24 +1421,55 @@ mod tests { let expected = vec![ Token::make_keyword("EXPLAIN"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("ANALYZE"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Mul, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Mul { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("id", None), - Token::Whitespace(Whitespace::Space), - Token::Eq, - Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1"), false), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Eq { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Number { + value: String::from("1"), + long: false, + span: Span::new(), + }, ]; compare(expected, tokens); @@ -985,20 +1484,44 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Mul, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Mul { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("customer", None), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("WHERE"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("salary", None), - Token::Whitespace(Whitespace::Space), - Token::Neq, - Token::Whitespace(Whitespace::Space), - Token::SingleQuotedString(String::from("Not Provided")), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Neq { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::SingleQuotedString { + value: String::from("Not Provided"), + span: Span::new(), + }, ]; compare(expected, tokens); @@ -1013,12 +1536,30 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::Char('م'), - Token::Char('ص'), - Token::Char('ط'), - Token::Char('ف'), - Token::Char('ى'), + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, + Token::Char { + value: 'م', + span: Span::new(), + }, + Token::Char { + value: 'ص', + span: Span::new(), + }, + Token::Char { + value: 'ط', + span: Span::new(), + }, + Token::Char { + value: 'ف', + span: Span::new(), + }, + Token::Char { + value: 'ى', + span: Span::new(), + }, Token::make_word("h", None), ]; compare(expected, tokens); @@ -1031,7 +1572,10 @@ mod tests { let dialect = GenericDialect {}; let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); - let expected = vec![Token::SingleQuotedString("foo\r\nbar\nbaz".to_string())]; + let expected = vec![Token::SingleQuotedString { + value: "foo\r\nbar\nbaz".to_string(), + span: Span::new(), + }]; compare(expected, tokens); } @@ -1046,7 +1590,8 @@ mod tests { Err(TokenizerError { message: "Unterminated string literal".to_string(), line: 1, - col: 8 + col: 8, + span: (7..11).into() }) ); } @@ -1060,21 +1605,54 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); // println!("tokens: {:#?}", tokens); let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::Whitespace(Whitespace::Newline), + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), - Token::Mul, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Mul { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("table"), - Token::Whitespace(Whitespace::Tab), - Token::Char('م'), - Token::Char('ص'), - Token::Char('ط'), - Token::Char('ف'), - Token::Char('ى'), + Token::Whitespace { + value: Whitespace::Tab, + span: Span::new(), + }, + Token::Char { + value: 'م', + span: Span::new(), + }, + Token::Char { + value: 'ص', + span: Span::new(), + }, + Token::Char { + value: 'ط', + span: Span::new(), + }, + Token::Char { + value: 'ف', + span: Span::new(), + }, + Token::Char { + value: 'ى', + span: Span::new(), + }, Token::make_word("h", None), ]; compare(expected, tokens); @@ -1088,11 +1666,11 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_word("FUNCTION", None), - Token::LParen, + Token::LParen { span: Span::new() }, Token::make_word("key", None), - Token::RArrow, + Token::RArrow { span: Span::new() }, Token::make_word("value", None), - Token::RParen, + Token::RParen { span: Span::new() }, ]; compare(expected, tokens); } @@ -1106,9 +1684,15 @@ mod tests { let expected = vec![ Token::make_word("a", None), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("IS"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("NULL"), ]; @@ -1123,12 +1707,23 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment\n".to_string(), - }), - Token::Number("1".to_string(), false), + Token::Number { + value: "0".to_string(), + long: false, + span: Span::new(), + }, + Token::Whitespace { + value: Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "this is a comment\n".to_string(), + }, + span: Span::new(), + }, + Token::Number { + value: "1".to_string(), + long: false, + span: Span::new(), + }, ]; compare(expected, tokens); } @@ -1140,10 +1735,13 @@ mod tests { let dialect = GenericDialect {}; let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); - let expected = vec![Token::Whitespace(Whitespace::SingleLineComment { - prefix: "--".to_string(), - comment: "this is a comment".to_string(), - })]; + let expected = vec![Token::Whitespace { + value: Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "this is a comment".to_string(), + }, + span: Span::new(), + }]; compare(expected, tokens); } @@ -1155,11 +1753,20 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string(), false), - Token::Whitespace(Whitespace::MultiLineComment( - "multi-line\n* /comment".to_string(), - )), - Token::Number("1".to_string(), false), + Token::Number { + value: "0".to_string(), + long: false, + span: Span::new(), + }, + Token::Whitespace { + value: Whitespace::MultiLineComment("multi-line\n* /comment".to_string()), + span: Span::new(), + }, + Token::Number { + value: "1".to_string(), + long: false, + span: Span::new(), + }, ]; compare(expected, tokens); } @@ -1172,9 +1779,18 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Whitespace(Whitespace::Newline), - Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())), - Token::Whitespace(Whitespace::Newline), + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, + Token::Whitespace { + value: Whitespace::MultiLineComment("* Comment *".to_string()), + span: Span::new(), + }, + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, ]; compare(expected, tokens); } @@ -1190,7 +1806,8 @@ mod tests { Err(TokenizerError { message: "Expected close delimiter '\"' before EOF.".to_string(), line: 1, - col: 1 + col: 1, + span: (0..4).into(), }) ); } @@ -1204,13 +1821,25 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_word("line1", None), - Token::Whitespace(Whitespace::Newline), + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, Token::make_word("line2", None), - Token::Whitespace(Whitespace::Newline), + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, Token::make_word("line3", None), - Token::Whitespace(Whitespace::Newline), + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, Token::make_word("line4", None), - Token::Whitespace(Whitespace::Newline), + Token::Whitespace { + value: Whitespace::Newline, + span: Span::new(), + }, ]; compare(expected, tokens); } @@ -1223,15 +1852,34 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("TOP"), - Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5"), false), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Number { + value: String::from("5"), + long: false, + span: Span::new(), + }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("bar", Some('[')), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("FROM"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("foo", None), ]; compare(expected, tokens); @@ -1245,33 +1893,81 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ Token::make_keyword("SELECT"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), - Token::Tilde, - Token::Whitespace(Whitespace::Space), - Token::SingleQuotedString("^a".into()), - Token::Comma, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::Tilde { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::SingleQuotedString { + value: "^a".into(), + span: Span::new(), + }, + Token::Comma { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), - Token::TildeAsterisk, - Token::Whitespace(Whitespace::Space), - Token::SingleQuotedString("^a".into()), - Token::Comma, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::TildeAsterisk { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::SingleQuotedString { + value: "^a".into(), + span: Span::new(), + }, + Token::Comma { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), - Token::ExclamationMarkTilde, - Token::Whitespace(Whitespace::Space), - Token::SingleQuotedString("^a".into()), - Token::Comma, - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::ExclamationMarkTilde { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::SingleQuotedString { + value: "^a".into(), + span: Span::new(), + }, + Token::Comma { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_word("col", None), - Token::Whitespace(Whitespace::Space), - Token::ExclamationMarkTildeAsterisk, - Token::Whitespace(Whitespace::Space), - Token::SingleQuotedString("^a".into()), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::ExclamationMarkTildeAsterisk { span: Span::new() }, + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, + Token::SingleQuotedString { + value: "^a".into(), + span: Span::new(), + }, ]; compare(expected, tokens); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 07a0db524..297d8fbcb 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -25,6 +25,7 @@ use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, PostgreSqlDialect, SQLiteDialect}; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::{Parser, ParserError}; +use sqlparser::span::Span; use test_utils::{ all_dialects, expr_from_projection, join, number, only, table, table_alias, TestedDialects, }; @@ -94,7 +95,10 @@ fn parse_insert_invalid() { let sql = "INSERT public.customer (id, name, active) VALUES (1, 2, 3)"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected one of INTO or OVERWRITE, found: public".to_string()), + ParserError::ParserError { + message: "Expected one of INTO or OVERWRITE, found: public".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -174,14 +178,20 @@ fn parse_update() { let sql = "UPDATE t WHERE 1"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected SET, found: WHERE".to_string()), + ParserError::ParserError { + message: "Expected SET, found: WHERE".to_string(), + span: Span::new() + }, res.unwrap_err() ); let sql = "UPDATE t SET a = 1 extrabadstuff"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: extrabadstuff".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: extrabadstuff".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -336,7 +346,10 @@ fn parse_select_all() { fn parse_select_all_distinct() { let result = parse_sql_statements("SELECT ALL DISTINCT name FROM customer"); assert_eq!( - ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()), + ParserError::ParserError { + message: "Cannot specify both ALL and DISTINCT".to_string(), + span: Span::new() + }, result.unwrap_err(), ); } @@ -367,7 +380,10 @@ fn parse_select_wildcard() { let sql = "SELECT * + * FROM foo;"; let result = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: +".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: +".to_string(), + span: Span::new() + }, result.unwrap_err(), ); } @@ -407,13 +423,19 @@ fn parse_column_aliases() { fn test_eof_after_as() { let res = parse_sql_statements("SELECT foo AS"); assert_eq!( - ParserError::ParserError("Expected an identifier after AS, found: EOF".to_string()), + ParserError::ParserError { + message: "Expected an identifier after AS, found: EOF".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = parse_sql_statements("SELECT 1 FROM foo AS"); assert_eq!( - ParserError::ParserError("Expected an identifier after AS, found: EOF".to_string()), + ParserError::ParserError { + message: "Expected an identifier after AS, found: EOF".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -422,7 +444,10 @@ fn test_eof_after_as() { fn test_no_infix_error() { let res = Parser::parse_sql(&GenericDialect {}, "ASSERT-URA<<"); assert_eq!( - ParserError::ParserError("No infix parser for token ShiftLeft".to_string()), + ParserError::ParserError { + message: "No infix parser for token '<<'".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -467,7 +492,10 @@ fn parse_select_count_distinct() { let sql = "SELECT COUNT(ALL DISTINCT + x) FROM customer"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()), + ParserError::ParserError { + message: "Cannot specify both ALL and DISTINCT".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -483,7 +511,10 @@ fn parse_not() { fn parse_invalid_infix_not() { let res = parse_sql_statements("SELECT c FROM t WHERE c NOT ("); assert_eq!( - ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: NOT".to_string(), + span: Span::new() + }, res.unwrap_err(), ); } @@ -1279,7 +1310,10 @@ fn parse_extract() { let res = parse_sql_statements("SELECT EXTRACT(MILLISECOND FROM d)"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: MILLISECOND".to_string()), + ParserError::ParserError { + message: "Expected date/time field, found: MILLISECOND".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -2035,7 +2069,7 @@ fn parse_alter_table_alter_column_type() { &format!("{} ALTER COLUMN is_active TYPE TEXT", alter_stmt), ); assert_eq!( - ParserError::ParserError("Expected SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), + ParserError::ParserError{message: "Expected SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string(), span: Span::new()}, res.unwrap_err() ); @@ -2047,7 +2081,10 @@ fn parse_alter_table_alter_column_type() { ), ); assert_eq!( - ParserError::ParserError("Expected end of statement, found: USING".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: USING".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -2056,15 +2093,19 @@ fn parse_alter_table_alter_column_type() { fn parse_bad_constraint() { let res = parse_sql_statements("ALTER TABLE tab ADD"); assert_eq!( - ParserError::ParserError("Expected identifier, found: EOF".to_string()), + ParserError::ParserError { + message: "Expected identifier, found: EOF".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = parse_sql_statements("CREATE TABLE tab (foo int,"); assert_eq!( - ParserError::ParserError( - "Expected column name or constraint definition, found: EOF".to_string() - ), + ParserError::ParserError { + message: "Expected column name or constraint definition, found: EOF".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -2362,13 +2403,19 @@ fn parse_literal_interval() { let result = parse_sql_statements("SELECT INTERVAL '1' SECOND TO SECOND"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: SECOND".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: SECOND".to_string(), + span: Span::new() + }, result.unwrap_err(), ); let result = parse_sql_statements("SELECT INTERVAL '10' HOUR (1) TO HOUR (2)"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: (".to_string(), + span: Span::new() + }, result.unwrap_err(), ); @@ -2427,13 +2474,19 @@ fn parse_table_function() { let res = parse_sql_statements("SELECT * FROM TABLE '1' AS a"); assert_eq!( - ParserError::ParserError("Expected (, found: \'1\'".to_string()), + ParserError::ParserError { + message: "Expected (, found: \'1\'".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = parse_sql_statements("SELECT * FROM TABLE (FUN(a) AS a"); assert_eq!( - ParserError::ParserError("Expected ), found: AS".to_string()), + ParserError::ParserError { + message: "Expected ), found: AS".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -2796,7 +2849,10 @@ fn parse_natural_join() { let sql = "SELECT * FROM t1 natural"; assert_eq!( - ParserError::ParserError("Expected a join type after NATURAL, found: EOF".to_string()), + ParserError::ParserError { + message: "Expected a join type after NATURAL, found: EOF".to_string(), + span: Span::new() + }, parse_sql_statements(sql).unwrap_err(), ); } @@ -2862,7 +2918,10 @@ fn parse_join_syntax_variants() { let res = parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1"); assert_eq!( - ParserError::ParserError("Expected APPLY, found: JOIN".to_string()), + ParserError::ParserError { + message: "Expected APPLY, found: JOIN".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -3062,7 +3121,10 @@ fn parse_multiple_statements() { // Check that forgetting the semicolon results in an error: let res = parse_sql_statements(&(sql1.to_owned() + " " + sql2_kw + sql2_rest)); assert_eq!( - ParserError::ParserError("Expected end of statement, found: ".to_string() + sql2_kw), + ParserError::ParserError { + message: "Expected end of statement, found: ".to_string() + sql2_kw, + span: Span::new() + }, res.unwrap_err() ); } @@ -3134,7 +3196,10 @@ fn parse_trim() { one_statement_parses_to("SELECT TRIM(' foo ')", "SELECT TRIM(' foo ')"); assert_eq!( - ParserError::ParserError("Expected ), found: 'xyz'".to_owned()), + ParserError::ParserError { + message: "Expected ), found: 'xyz'".to_owned(), + span: Span::new() + }, parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); } @@ -3164,17 +3229,21 @@ fn parse_exists_subquery() { let res = parse_sql_statements("SELECT EXISTS ("); assert_eq!( - ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: EOF".to_string() - ), + ParserError::ParserError { + message: "Expected SELECT, VALUES, or a subquery in the query body, found: EOF" + .to_string(), + span: Span::new() + }, res.unwrap_err(), ); let res = parse_sql_statements("SELECT EXISTS (NULL)"); assert_eq!( - ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: NULL".to_string() - ), + ParserError::ParserError { + message: "Expected SELECT, VALUES, or a subquery in the query body, found: NULL" + .to_string(), + span: Span::new() + }, res.unwrap_err(), ); } @@ -3364,13 +3433,19 @@ fn parse_drop_table() { let sql = "DROP TABLE"; assert_eq!( - ParserError::ParserError("Expected identifier, found: EOF".to_string()), + ParserError::ParserError { + message: "Expected identifier, found: EOF".to_string(), + span: Span::new() + }, parse_sql_statements(sql).unwrap_err(), ); let sql = "DROP TABLE IF EXISTS foo, bar CASCADE RESTRICT"; assert_eq!( - ParserError::ParserError("Cannot specify both CASCADE and RESTRICT in DROP".to_string()), + ParserError::ParserError { + message: "Cannot specify both CASCADE and RESTRICT in DROP".to_string(), + span: Span::new() + }, parse_sql_statements(sql).unwrap_err(), ); } @@ -3396,7 +3471,10 @@ fn parse_drop_view() { fn parse_invalid_subquery_without_parens() { let res = parse_sql_statements("SELECT SELECT 1 FROM bar WHERE 1=1 FROM baz"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: 1".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: 1".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -3610,18 +3688,21 @@ fn lateral_derived() { let sql = "SELECT * FROM customer LEFT JOIN LATERAL generate_series(1, customer.id)"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError( - "Expected subquery after LATERAL, found: generate_series".to_string() - ), + ParserError::ParserError { + message: "Expected subquery after LATERAL, found: generate_series".to_string(), + span: Span::new() + }, res.unwrap_err() ); let sql = "SELECT * FROM a LEFT JOIN LATERAL (b CROSS JOIN c)"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: b".to_string() - ), + ParserError::ParserError { + message: "Expected SELECT, VALUES, or a subquery in the query body, found: b" + .to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -3680,19 +3761,28 @@ fn parse_start_transaction() { let res = parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD"); assert_eq!( - ParserError::ParserError("Expected isolation level, found: BAD".to_string()), + ParserError::ParserError { + message: "Expected isolation level, found: BAD".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = parse_sql_statements("START TRANSACTION BAD"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: BAD".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: BAD".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = parse_sql_statements("START TRANSACTION READ ONLY,"); assert_eq!( - ParserError::ParserError("Expected transaction mode, found: EOF".to_string()), + ParserError::ParserError { + message: "Expected transaction mode, found: EOF".to_string(), + span: Span::new() + }, res.unwrap_err() ); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 60d9c1cb4..922cdac57 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -16,6 +16,7 @@ #[macro_use] mod test_utils; +use sqlparser::span::Span; use test_utils::*; #[cfg(feature = "bigdecimal")] @@ -327,25 +328,37 @@ fn parse_create_table_if_not_exists() { fn parse_bad_if_not_exists() { let res = pg().parse_sql_statements("CREATE TABLE NOT EXISTS uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: EXISTS".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF EXISTS uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: EXISTS".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: uk_cities".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: uk_cities".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF NOT uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: NOT".to_string(), + span: Span::new() + }, res.unwrap_err() ); } @@ -468,23 +481,26 @@ fn parse_set() { assert_eq!( pg_and_generic().parse_sql_statements("SET"), - Err(ParserError::ParserError( - "Expected identifier, found: EOF".to_string() - )), + Err(ParserError::ParserError { + message: "Expected identifier, found: EOF".to_string(), + span: Span::new() + }), ); assert_eq!( pg_and_generic().parse_sql_statements("SET a b"), - Err(ParserError::ParserError( - "Expected equals sign or TO, found: b".to_string() - )), + Err(ParserError::ParserError { + message: "Expected equals sign or TO, found: b".to_string(), + span: Span::new() + }), ); assert_eq!( pg_and_generic().parse_sql_statements("SET a ="), - Err(ParserError::ParserError( - "Expected variable value, found: EOF".to_string() - )), + Err(ParserError::ParserError { + message: "Expected variable value, found: EOF".to_string(), + span: Span::new() + }), ); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index c08632a15..b97cf0ddd 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -16,6 +16,7 @@ #[macro_use] mod test_utils; +use sqlparser::span::Span; use test_utils::*; use sqlparser::ast::*; @@ -43,12 +44,18 @@ fn test_snowflake_single_line_tokenize() { let expected = vec![ Token::make_keyword("CREATE"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("TABLE"), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "#".to_string(), - comment: " this is a comment \n".to_string(), - }), + Token::Whitespace { + value: Whitespace::SingleLineComment { + prefix: "#".to_string(), + comment: " this is a comment \n".to_string(), + }, + span: Span::new(), + }, Token::make_word("table_1", None), ]; @@ -60,12 +67,18 @@ fn test_snowflake_single_line_tokenize() { let expected = vec![ Token::make_keyword("CREATE"), - Token::Whitespace(Whitespace::Space), + Token::Whitespace { + value: Whitespace::Space, + span: Span::new(), + }, Token::make_keyword("TABLE"), - Token::Whitespace(Whitespace::SingleLineComment { - prefix: "//".to_string(), - comment: " this is a comment \n".to_string(), - }), + Token::Whitespace { + value: Whitespace::SingleLineComment { + prefix: "//".to_string(), + comment: " this is a comment \n".to_string(), + }, + span: Span::new(), + }, Token::make_word("table_1", None), ]; @@ -133,13 +146,19 @@ fn test_single_table_in_parenthesis_with_alias() { let res = snowflake_and_generic().parse_sql_statements("SELECT * FROM (a NATURAL JOIN b) c"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: c".to_string()), + ParserError::ParserError { + message: "Expected end of statement, found: c".to_string(), + span: Span::new() + }, res.unwrap_err() ); let res = snowflake().parse_sql_statements("SELECT * FROM (a b) c"); assert_eq!( - ParserError::ParserError("duplicate alias b".to_string()), + ParserError::ParserError { + message: "duplicate alias b".to_string(), + span: Span::new() + }, res.unwrap_err() ); }