diff --git a/src/ast/operator.rs b/src/ast/operator.rs index e70df344a..db6ed0564 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -151,7 +151,7 @@ pub enum BinaryOperator { Arrow, /// The `->>` operator. /// - /// On PostgreSQL, this operator that extracts a JSON object field or JSON + /// On PostgreSQL, this operator extracts a JSON object field or JSON /// array element and converts it to text, for example `'{"a":"b"}'::json /// ->> 'a'` or `[1, 2, 3]'::json ->> 2`. /// diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 22e0baeb2..fc45545d4 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -24,12 +24,13 @@ mod redshift; mod snowflake; mod sqlite; -use crate::ast::{Expr, Statement}; use core::any::{Any, TypeId}; use core::fmt::Debug; use core::iter::Peekable; use core::str::Chars; +use log::debug; + pub use self::ansi::AnsiDialect; pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; @@ -43,8 +44,11 @@ pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; +use crate::ast::{Expr, Statement}; pub use crate::keywords; +use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; +use crate::tokenizer::Token; #[cfg(not(feature = "std"))] use alloc::boxed::Box; @@ -300,13 +304,172 @@ pub trait Dialect: Debug + Any { // return None to fall back to the default behavior None } + + /// Get the precedence of the next token. This "full" method means all precedence logic and remain + /// in the dialect. while still allowing overriding the `get_next_precedence` method with the option to + /// fallback to the default behavior. + /// + /// Higher number => higher precedence + fn get_next_precedence_full(&self, parser: &Parser) -> Result { + if let Some(precedence) = self.get_next_precedence(parser) { + return precedence; + } + + let token = parser.peek_token(); + debug!("get_next_precedence() {:?}", token); + match token.token { + Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC), + Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC), + Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC), + + Token::Word(w) if w.keyword == Keyword::AT => { + match ( + parser.peek_nth_token(1).token, + parser.peek_nth_token(2).token, + ) { + (Token::Word(w), Token::Word(w2)) + if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => + { + Ok(AT_TZ_PREC) + } + _ => Ok(UNKNOWN_PREC), + } + } + + Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise, it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC), + _ => Ok(UNKNOWN_PREC), + }, + Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC), + Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC), + Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC), + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Tilde + | Token::TildeAsterisk + | Token::ExclamationMarkTilde + | Token::ExclamationMarkTildeAsterisk + | Token::DoubleTilde + | Token::DoubleTildeAsterisk + | Token::ExclamationMarkDoubleTilde + | Token::ExclamationMarkDoubleTildeAsterisk + | Token::Spaceship => Ok(EQ_PREC), + Token::Pipe => Ok(PIPE_PREC), + Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC), + Token::Ampersand => Ok(AMPERSAND_PREC), + Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC), + Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { + Ok(MUL_DIV_MOD_OP_PREC) + } + Token::DoubleColon + | Token::ExclamationMark + | Token::LBracket + | Token::Overlap + | Token::CaretAt => Ok(DOUBLE_COLON_PREC), + // Token::Colon if (self as dyn Dialect).is::() => Ok(DOUBLE_COLON_PREC), + Token::Arrow + | Token::LongArrow + | Token::HashArrow + | Token::HashLongArrow + | Token::AtArrow + | Token::ArrowAt + | Token::HashMinus + | Token::AtQuestion + | Token::AtAt + | Token::Question + | Token::QuestionAnd + | Token::QuestionPipe + | Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC), + _ => Ok(UNKNOWN_PREC), + } + } + /// Dialect-specific statement parser override fn parse_statement(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior None } + + /// The following precedence values are used directly by `Parse` or in dialects, + /// so have to be made public by the dialect. + fn prec_double_colon(&self) -> u8 { + DOUBLE_COLON_PREC + } + + fn prec_mul_div_mod_op(&self) -> u8 { + MUL_DIV_MOD_OP_PREC + } + + fn prec_plus_minus(&self) -> u8 { + PLUS_MINUS_PREC + } + + fn prec_between(&self) -> u8 { + BETWEEN_PREC + } + + fn prec_like(&self) -> u8 { + LIKE_PREC + } + + fn prec_unary_not(&self) -> u8 { + UNARY_NOT_PREC + } + + fn prec_unknown(&self) -> u8 { + UNKNOWN_PREC + } } +// Define the lexical Precedence of operators. +// +// Uses (APPROXIMATELY) as a reference +// higher number = higher precedence +// +// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator +// actually has higher precedence than addition. +// See . +const DOUBLE_COLON_PREC: u8 = 50; +const AT_TZ_PREC: u8 = 41; +const MUL_DIV_MOD_OP_PREC: u8 = 40; +const PLUS_MINUS_PREC: u8 = 30; +const XOR_PREC: u8 = 24; +const AMPERSAND_PREC: u8 = 23; +const CARET_PREC: u8 = 22; +const PIPE_PREC: u8 = 21; +const BETWEEN_PREC: u8 = 20; +const EQ_PREC: u8 = 20; +const LIKE_PREC: u8 = 19; +const IS_PREC: u8 = 17; +const PG_OTHER_PREC: u8 = 16; +const UNARY_NOT_PREC: u8 = 15; +const AND_PREC: u8 = 10; +const OR_PREC: u8 = 5; +const UNKNOWN_PREC: u8 = 0; + impl dyn Dialect { #[inline] pub fn is(&self) -> bool { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 8254e807b..293fb9e7d 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -9,6 +9,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +use log::debug; use crate::ast::{CommentObject, Statement}; use crate::dialect::Dialect; @@ -20,6 +21,23 @@ use crate::tokenizer::Token; #[derive(Debug)] pub struct PostgreSqlDialect {} +const DOUBLE_COLON_PREC: u8 = 140; +const BRACKET_PREC: u8 = 130; +const COLLATE_PREC: u8 = 120; +const AT_TZ_PREC: u8 = 110; +const CARET_PREC: u8 = 100; +const MUL_DIV_MOD_OP_PREC: u8 = 90; +const PLUS_MINUS_PREC: u8 = 80; +// there's no XOR operator in PostgreSQL, but support it here to avoid breaking tests +const XOR_PREC: u8 = 75; +const PG_OTHER_PREC: u8 = 70; +const BETWEEN_LIKE_PREC: u8 = 60; +const EQ_PREC: u8 = 50; +const IS_PREC: u8 = 40; +const NOT_PREC: u8 = 30; +const AND_PREC: u8 = 20; +const OR_PREC: u8 = 10; + impl Dialect for PostgreSqlDialect { fn identifier_quote_style(&self, _identifier: &str) -> Option { Some('"') @@ -67,6 +85,102 @@ impl Dialect for PostgreSqlDialect { ) } + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token(); + debug!("get_next_precedence() {:?}", token); + + let precedence = match token.token { + Token::Word(w) if w.keyword == Keyword::OR => OR_PREC, + Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC, + Token::Word(w) if w.keyword == Keyword::AND => AND_PREC, + Token::Word(w) if w.keyword == Keyword::AT => { + match ( + parser.peek_nth_token(1).token, + parser.peek_nth_token(2).token, + ) { + (Token::Word(w), Token::Word(w2)) + if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => + { + AT_TZ_PREC + } + _ => self.prec_unknown(), + } + } + + Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise, it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC, + _ => self.prec_unknown(), + }, + Token::Word(w) if w.keyword == Keyword::IS => IS_PREC, + Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC, + Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC, + Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC, + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Tilde + | Token::TildeAsterisk + | Token::ExclamationMarkTilde + | Token::ExclamationMarkTildeAsterisk + | Token::DoubleTilde + | Token::DoubleTildeAsterisk + | Token::ExclamationMarkDoubleTilde + | Token::ExclamationMarkDoubleTildeAsterisk + | Token::Spaceship => EQ_PREC, + Token::Caret => CARET_PREC, + Token::Plus | Token::Minus => PLUS_MINUS_PREC, + Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC, + Token::DoubleColon => DOUBLE_COLON_PREC, + Token::LBracket => BRACKET_PREC, + Token::Arrow + | Token::LongArrow + | Token::HashArrow + | Token::HashLongArrow + | Token::AtArrow + | Token::ArrowAt + | Token::HashMinus + | Token::AtQuestion + | Token::AtAt + | Token::Question + | Token::QuestionAnd + | Token::QuestionPipe + | Token::ExclamationMark + | Token::Overlap + | Token::CaretAt + | Token::StringConcat + | Token::Sharp + | Token::ShiftRight + | Token::ShiftLeft + | Token::Pipe + | Token::Ampersand + | Token::CustomBinaryOperator(_) => PG_OTHER_PREC, + _ => self.prec_unknown(), + }; + Some(Ok(precedence)) + } + fn parse_statement(&self, parser: &mut Parser) -> Option> { if parser.parse_keyword(Keyword::COMMENT) { Some(parse_comment(parser)) @@ -82,6 +196,26 @@ impl Dialect for PostgreSqlDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn prec_mul_div_mod_op(&self) -> u8 { + MUL_DIV_MOD_OP_PREC + } + + fn prec_plus_minus(&self) -> u8 { + PLUS_MINUS_PREC + } + + fn prec_between(&self) -> u8 { + BETWEEN_LIKE_PREC + } + + fn prec_like(&self) -> u8 { + BETWEEN_LIKE_PREC + } + + fn prec_unary_not(&self) -> u8 { + NOT_PREC + } } pub fn parse_comment(parser: &mut Parser) -> Result { diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 9f1d7f27b..fe35d8da3 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -145,6 +145,15 @@ impl Dialect for SnowflakeDialect { None } + + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token(); + // Snowflake supports the `:` cast operator unlike other dialects + match token.token { + Token::Colon => Some(Ok(self.prec_double_colon())), + _ => None, + } + } } /// Parse snowflake create table statement. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 725e24bfb..5d1f1f575 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -871,7 +871,7 @@ impl<'a> Parser<'a> { /// Parse a new expression. pub fn parse_expr(&mut self) -> Result { let _guard = self.recursion_counter.try_decrease()?; - self.parse_subexpr(0) + self.parse_subexpr(self.dialect.prec_unknown()) } /// Parse tokens until the precedence changes. @@ -893,7 +893,7 @@ impl<'a> Parser<'a> { } pub fn parse_interval_expr(&mut self) -> Result { - let precedence = 0; + let precedence = self.dialect.prec_unknown(); let mut expr = self.parse_prefix()?; loop { @@ -914,9 +914,9 @@ impl<'a> Parser<'a> { let token = self.peek_token(); match token.token { - Token::Word(w) if w.keyword == Keyword::AND => Ok(0), - Token::Word(w) if w.keyword == Keyword::OR => Ok(0), - Token::Word(w) if w.keyword == Keyword::XOR => Ok(0), + Token::Word(w) if w.keyword == Keyword::AND => Ok(self.dialect.prec_unknown()), + Token::Word(w) if w.keyword == Keyword::OR => Ok(self.dialect.prec_unknown()), + Token::Word(w) if w.keyword == Keyword::XOR => Ok(self.dialect.prec_unknown()), _ => self.get_next_precedence(), } } @@ -1075,7 +1075,7 @@ impl<'a> Parser<'a> { self.parse_bigquery_struct_literal() } Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { - let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; + let expr = self.parse_subexpr(self.dialect.prec_plus_minus())?; Ok(Expr::Prior(Box::new(expr))) } Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { @@ -1163,7 +1163,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::UnaryOp { op, - expr: Box::new(self.parse_subexpr(Self::MUL_DIV_MOD_OP_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_mul_div_mod_op())?), }) } tok @ Token::DoubleExclamationMark @@ -1183,7 +1183,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::UnaryOp { op, - expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_plus_minus())?), }) } Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => @@ -1712,12 +1712,13 @@ impl<'a> Parser<'a> { } pub fn parse_position_expr(&mut self, ident: Ident) -> Result { + let between_prec = self.dialect.prec_between(); let position_expr = self.maybe_parse(|p| { // PARSE SELECT POSITION('@' in field) p.expect_token(&Token::LParen)?; // Parse the subexpr till the IN keyword - let expr = p.parse_subexpr(Self::BETWEEN_PREC)?; + let expr = p.parse_subexpr(between_prec)?; p.expect_keyword(Keyword::IN)?; let from = p.parse_expr()?; p.expect_token(&Token::RParen)?; @@ -1967,12 +1968,12 @@ impl<'a> Parser<'a> { } _ => Ok(Expr::UnaryOp { op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?), }), }, _ => Ok(Expr::UnaryOp { op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), + expr: Box::new(self.parse_subexpr(self.dialect.prec_unary_not())?), }), } } @@ -2648,7 +2649,7 @@ impl<'a> Parser<'a> { Ok(Expr::RLike { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), regexp, }) } else if self.parse_keyword(Keyword::IN) { @@ -2659,21 +2660,21 @@ impl<'a> Parser<'a> { Ok(Expr::Like { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else if self.parse_keyword(Keyword::ILIKE) { Ok(Expr::ILike { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) { Ok(Expr::SimilarTo { negated, expr: Box::new(expr), - pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + pattern: Box::new(self.parse_subexpr(self.dialect.prec_like())?), escape_char: self.parse_escape_char()?, }) } else { @@ -2948,9 +2949,9 @@ impl<'a> Parser<'a> { pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { // Stop parsing subexpressions for and on tokens with // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. - let low = self.parse_subexpr(Self::BETWEEN_PREC)?; + let low = self.parse_subexpr(self.dialect.prec_between())?; self.expect_keyword(Keyword::AND)?; - let high = self.parse_subexpr(Self::BETWEEN_PREC)?; + let high = self.parse_subexpr(self.dialect.prec_between())?; Ok(Expr::Between { expr: Box::new(expr), negated, @@ -2969,118 +2970,9 @@ impl<'a> Parser<'a> { }) } - // Use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference - // higher number = higher precedence - // - // NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator - // actually has higher precedence than addition. - // See https://postgrespro.com/list/thread-id/2673331. - const AT_TZ_PREC: u8 = 41; - const MUL_DIV_MOD_OP_PREC: u8 = 40; - const PLUS_MINUS_PREC: u8 = 30; - const XOR_PREC: u8 = 24; - const BETWEEN_PREC: u8 = 20; - const LIKE_PREC: u8 = 19; - const IS_PREC: u8 = 17; - const PG_OTHER_PREC: u8 = 16; - const UNARY_NOT_PREC: u8 = 15; - const AND_PREC: u8 = 10; - const OR_PREC: u8 = 5; - /// Get the precedence of the next token pub fn get_next_precedence(&self) -> Result { - // allow the dialect to override precedence logic - if let Some(precedence) = self.dialect.get_next_precedence(self) { - return precedence; - } - - let token = self.peek_token(); - debug!("get_next_precedence() {:?}", token); - let [token_0, token_1, token_2] = self.peek_tokens_with_location(); - debug!("0: {token_0} 1: {token_1} 2: {token_2}"); - match token.token { - Token::Word(w) if w.keyword == Keyword::OR => Ok(Self::OR_PREC), - Token::Word(w) if w.keyword == Keyword::AND => Ok(Self::AND_PREC), - Token::Word(w) if w.keyword == Keyword::XOR => Ok(Self::XOR_PREC), - - Token::Word(w) if w.keyword == Keyword::AT => { - match (self.peek_nth_token(1).token, self.peek_nth_token(2).token) { - (Token::Word(w), Token::Word(w2)) - if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => - { - Ok(Self::AT_TZ_PREC) - } - _ => Ok(0), - } - } - - Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token { - // The precedence of NOT varies depending on keyword that - // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise, it - // is not an infix operator, and therefore has zero - // precedence. - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), - _ => Ok(0), - }, - Token::Word(w) if w.keyword == Keyword::IS => Ok(Self::IS_PREC), - Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), - Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), - Token::Eq - | Token::Lt - | Token::LtEq - | Token::Neq - | Token::Gt - | Token::GtEq - | Token::DoubleEq - | Token::Tilde - | Token::TildeAsterisk - | Token::ExclamationMarkTilde - | Token::ExclamationMarkTildeAsterisk - | Token::DoubleTilde - | Token::DoubleTildeAsterisk - | Token::ExclamationMarkDoubleTilde - | Token::ExclamationMarkDoubleTildeAsterisk - | Token::Spaceship => Ok(20), - Token::Pipe => Ok(21), - Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), - Token::Ampersand => Ok(23), - Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC), - Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => { - Ok(Self::MUL_DIV_MOD_OP_PREC) - } - Token::DoubleColon => Ok(50), - Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50), - Token::ExclamationMark => Ok(50), - Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), - Token::Arrow - | Token::LongArrow - | Token::HashArrow - | Token::HashLongArrow - | Token::AtArrow - | Token::ArrowAt - | Token::HashMinus - | Token::AtQuestion - | Token::AtAt - | Token::Question - | Token::QuestionAnd - | Token::QuestionPipe - | Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC), - _ => Ok(0), - } + self.dialect.get_next_precedence_full(self) } /// Return the first non-whitespace token that has not yet been processed @@ -8047,7 +7939,7 @@ impl<'a> Parser<'a> { format_clause: None, }) } else { - let body = self.parse_boxed_query_body(0)?; + let body = self.parse_boxed_query_body(self.dialect.prec_unknown())?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7406bdd74..150f06913 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4476,3 +4476,115 @@ fn test_unicode_string_literal() { } } } + +fn check_arrow_precedence(sql: &str, arrow_operator: BinaryOperator) { + assert_eq!( + pg().verified_stmt(sql), + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: arrow_operator, + right: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + }), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::SingleQuotedString("spam".to_string()))), + })], + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + })) + ) +} + +#[test] +fn arrow_precedence() { + check_arrow_precedence("SELECT foo -> 'bar' = 'spam'", BinaryOperator::Arrow); +} + +#[test] +fn long_arrow_precedence() { + check_arrow_precedence("SELECT foo ->> 'bar' = 'spam'", BinaryOperator::LongArrow); +} + +#[test] +fn arrow_cast_precedence() { + // check this matches postgres where you would need `(foo -> 'bar')::TEXT` + let stmt = pg().verified_stmt("SELECT foo -> 'bar'::TEXT"); + assert_eq!( + stmt, + Statement::Query(Box::new(Query { + with: None, + body: Box::new(SetExpr::Select(Box::new(Select { + distinct: None, + top: None, + projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident { + value: "foo".to_string(), + quote_style: None, + })), + op: BinaryOperator::Arrow, + right: Box::new(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(Value::SingleQuotedString("bar".to_string()))), + data_type: DataType::Text, + format: None, + }), + })], + into: None, + from: vec![], + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: None, + }))), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + })) + ) +}