Skip to content

Commit f223530

Browse files
authored
Simple custom lexical precedence in PostgreSQL dialect (#1379)
1 parent 6a11a67 commit f223530

File tree

4 files changed

+137
-212
lines changed

4 files changed

+137
-212
lines changed

src/dialect/mod.rs

+81-108
Original file line numberDiff line numberDiff line change
@@ -354,13 +354,18 @@ pub trait Dialect: Debug + Any {
354354
if let Some(precedence) = self.get_next_precedence(parser) {
355355
return precedence;
356356
}
357+
macro_rules! p {
358+
($precedence:ident) => {
359+
self.prec_value(Precedence::$precedence)
360+
};
361+
}
357362

358363
let token = parser.peek_token();
359364
debug!("get_next_precedence_full() {:?}", token);
360365
match token.token {
361-
Token::Word(w) if w.keyword == Keyword::OR => Ok(OR_PREC),
362-
Token::Word(w) if w.keyword == Keyword::AND => Ok(AND_PREC),
363-
Token::Word(w) if w.keyword == Keyword::XOR => Ok(XOR_PREC),
366+
Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)),
367+
Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)),
368+
Token::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)),
364369

365370
Token::Word(w) if w.keyword == Keyword::AT => {
366371
match (
@@ -370,9 +375,9 @@ pub trait Dialect: Debug + Any {
370375
(Token::Word(w), Token::Word(w2))
371376
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
372377
{
373-
Ok(AT_TZ_PREC)
378+
Ok(p!(AtTz))
374379
}
375-
_ => Ok(UNKNOWN_PREC),
380+
_ => Ok(self.prec_unknown()),
376381
}
377382
}
378383

@@ -382,25 +387,25 @@ pub trait Dialect: Debug + Any {
382387
// it takes on the precedence of those tokens. Otherwise, it
383388
// is not an infix operator, and therefore has zero
384389
// precedence.
385-
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
386-
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
387-
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
388-
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
389-
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
390-
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
391-
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
392-
_ => Ok(UNKNOWN_PREC),
390+
Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
391+
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
392+
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
393+
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
394+
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
395+
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
396+
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
397+
_ => Ok(self.prec_unknown()),
393398
},
394-
Token::Word(w) if w.keyword == Keyword::IS => Ok(IS_PREC),
395-
Token::Word(w) if w.keyword == Keyword::IN => Ok(BETWEEN_PREC),
396-
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(BETWEEN_PREC),
397-
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(LIKE_PREC),
398-
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(LIKE_PREC),
399-
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(LIKE_PREC),
400-
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(LIKE_PREC),
401-
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(LIKE_PREC),
402-
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(BETWEEN_PREC),
403-
Token::Word(w) if w.keyword == Keyword::DIV => Ok(MUL_DIV_MOD_OP_PREC),
399+
Token::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)),
400+
Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
401+
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
402+
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
403+
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
404+
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
405+
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
406+
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
407+
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)),
408+
Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)),
404409
Token::Eq
405410
| Token::Lt
406411
| Token::LtEq
@@ -416,20 +421,19 @@ pub trait Dialect: Debug + Any {
416421
| Token::DoubleTildeAsterisk
417422
| Token::ExclamationMarkDoubleTilde
418423
| Token::ExclamationMarkDoubleTildeAsterisk
419-
| Token::Spaceship => Ok(EQ_PREC),
420-
Token::Pipe => Ok(PIPE_PREC),
421-
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(CARET_PREC),
422-
Token::Ampersand => Ok(AMPERSAND_PREC),
423-
Token::Plus | Token::Minus => Ok(PLUS_MINUS_PREC),
424+
| Token::Spaceship => Ok(p!(Eq)),
425+
Token::Pipe => Ok(p!(Pipe)),
426+
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(p!(Caret)),
427+
Token::Ampersand => Ok(p!(Ampersand)),
428+
Token::Plus | Token::Minus => Ok(p!(PlusMinus)),
424429
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
425-
Ok(MUL_DIV_MOD_OP_PREC)
430+
Ok(p!(MulDivModOp))
426431
}
427432
Token::DoubleColon
428433
| Token::ExclamationMark
429434
| Token::LBracket
430435
| Token::Overlap
431-
| Token::CaretAt => Ok(DOUBLE_COLON_PREC),
432-
// Token::Colon if (self as dyn Dialect).is::<SnowflakeDialect>() => Ok(DOUBLE_COLON_PREC),
436+
| Token::CaretAt => Ok(p!(DoubleColon)),
433437
Token::Arrow
434438
| Token::LongArrow
435439
| Token::HashArrow
@@ -442,8 +446,8 @@ pub trait Dialect: Debug + Any {
442446
| Token::Question
443447
| Token::QuestionAnd
444448
| Token::QuestionPipe
445-
| Token::CustomBinaryOperator(_) => Ok(PG_OTHER_PREC),
446-
_ => Ok(UNKNOWN_PREC),
449+
| Token::CustomBinaryOperator(_) => Ok(p!(PgOther)),
450+
_ => Ok(self.prec_unknown()),
447451
}
448452
}
449453

@@ -457,88 +461,57 @@ pub trait Dialect: Debug + Any {
457461
None
458462
}
459463

460-
// The following precedence values are used directly by `Parse` or in dialects,
461-
// so have to be made public by the dialect.
462-
463-
/// Return the precedence of the `::` operator.
464+
/// Decide the lexical Precedence of operators.
464465
///
465-
/// Default is 50.
466-
fn prec_double_colon(&self) -> u8 {
467-
DOUBLE_COLON_PREC
468-
}
469-
470-
/// Return the precedence of `*`, `/`, and `%` operators.
471-
///
472-
/// Default is 40.
473-
fn prec_mul_div_mod_op(&self) -> u8 {
474-
MUL_DIV_MOD_OP_PREC
475-
}
476-
477-
/// Return the precedence of the `+` and `-` operators.
478-
///
479-
/// Default is 30.
480-
fn prec_plus_minus(&self) -> u8 {
481-
PLUS_MINUS_PREC
482-
}
483-
484-
/// Return the precedence of the `BETWEEN` operator.
485-
///
486-
/// For example `BETWEEN <low> AND <high>`
487-
///
488-
/// Default is 22.
489-
fn prec_between(&self) -> u8 {
490-
BETWEEN_PREC
491-
}
492-
493-
/// Return the precedence of the `LIKE` operator.
494-
///
495-
/// Default is 19.
496-
fn prec_like(&self) -> u8 {
497-
LIKE_PREC
498-
}
499-
500-
/// Return the precedence of the unary `NOT` operator.
501-
///
502-
/// For example `NOT (a OR b)`
503-
///
504-
/// Default is 15.
505-
fn prec_unary_not(&self) -> u8 {
506-
UNARY_NOT_PREC
466+
/// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
467+
fn prec_value(&self, prec: Precedence) -> u8 {
468+
match prec {
469+
Precedence::DoubleColon => 50,
470+
Precedence::AtTz => 41,
471+
Precedence::MulDivModOp => 40,
472+
Precedence::PlusMinus => 30,
473+
Precedence::Xor => 24,
474+
Precedence::Ampersand => 23,
475+
Precedence::Caret => 22,
476+
Precedence::Pipe => 21,
477+
Precedence::Between => 20,
478+
Precedence::Eq => 20,
479+
Precedence::Like => 19,
480+
Precedence::Is => 17,
481+
Precedence::PgOther => 16,
482+
Precedence::UnaryNot => 15,
483+
Precedence::And => 10,
484+
Precedence::Or => 5,
485+
}
507486
}
508487

509-
/// Return the default (unknown) precedence.
510-
///
511-
/// Default is 0.
512488
fn prec_unknown(&self) -> u8 {
513-
UNKNOWN_PREC
489+
0
514490
}
515491
}
516492

517-
// Define the lexical Precedence of operators.
518-
//
519-
// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
520-
// higher number = higher precedence
521-
//
522-
// NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator
523-
// actually has higher precedence than addition.
524-
// See <https://postgrespro.com/list/thread-id/2673331>.
525-
const DOUBLE_COLON_PREC: u8 = 50;
526-
const AT_TZ_PREC: u8 = 41;
527-
const MUL_DIV_MOD_OP_PREC: u8 = 40;
528-
const PLUS_MINUS_PREC: u8 = 30;
529-
const XOR_PREC: u8 = 24;
530-
const AMPERSAND_PREC: u8 = 23;
531-
const CARET_PREC: u8 = 22;
532-
const PIPE_PREC: u8 = 21;
533-
const BETWEEN_PREC: u8 = 20;
534-
const EQ_PREC: u8 = 20;
535-
const LIKE_PREC: u8 = 19;
536-
const IS_PREC: u8 = 17;
537-
const PG_OTHER_PREC: u8 = 16;
538-
const UNARY_NOT_PREC: u8 = 15;
539-
const AND_PREC: u8 = 10;
540-
const OR_PREC: u8 = 5;
541-
const UNKNOWN_PREC: u8 = 0;
493+
/// This represents the operators for which precedence must be defined
494+
///
495+
/// higher number -> higher precedence
496+
#[derive(Debug, Clone, Copy)]
497+
pub enum Precedence {
498+
DoubleColon,
499+
AtTz,
500+
MulDivModOp,
501+
PlusMinus,
502+
Xor,
503+
Ampersand,
504+
Caret,
505+
Pipe,
506+
Between,
507+
Eq,
508+
Like,
509+
Is,
510+
PgOther,
511+
UnaryNot,
512+
And,
513+
Or,
514+
}
542515

543516
impl dyn Dialect {
544517
#[inline]

src/dialect/postgresql.rs

+28-90
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
use log::debug;
1313

1414
use crate::ast::{CommentObject, Statement};
15-
use crate::dialect::Dialect;
15+
use crate::dialect::{Dialect, Precedence};
1616
use crate::keywords::Keyword;
1717
use crate::parser::{Parser, ParserError};
1818
use crate::tokenizer::Token;
@@ -89,71 +89,11 @@ impl Dialect for PostgreSqlDialect {
8989
let token = parser.peek_token();
9090
debug!("get_next_precedence() {:?}", token);
9191

92-
let precedence = match token.token {
93-
Token::Word(w) if w.keyword == Keyword::OR => OR_PREC,
94-
Token::Word(w) if w.keyword == Keyword::XOR => XOR_PREC,
95-
Token::Word(w) if w.keyword == Keyword::AND => AND_PREC,
96-
Token::Word(w) if w.keyword == Keyword::AT => {
97-
match (
98-
parser.peek_nth_token(1).token,
99-
parser.peek_nth_token(2).token,
100-
) {
101-
(Token::Word(w), Token::Word(w2))
102-
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
103-
{
104-
AT_TZ_PREC
105-
}
106-
_ => self.prec_unknown(),
107-
}
108-
}
109-
110-
Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
111-
// The precedence of NOT varies depending on keyword that
112-
// follows it. If it is followed by IN, BETWEEN, or LIKE,
113-
// it takes on the precedence of those tokens. Otherwise, it
114-
// is not an infix operator, and therefore has zero
115-
// precedence.
116-
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
117-
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
118-
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
119-
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
120-
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
121-
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
122-
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
123-
_ => self.prec_unknown(),
124-
},
125-
Token::Word(w) if w.keyword == Keyword::IS => IS_PREC,
126-
Token::Word(w) if w.keyword == Keyword::IN => BETWEEN_LIKE_PREC,
127-
Token::Word(w) if w.keyword == Keyword::BETWEEN => BETWEEN_LIKE_PREC,
128-
Token::Word(w) if w.keyword == Keyword::LIKE => BETWEEN_LIKE_PREC,
129-
Token::Word(w) if w.keyword == Keyword::ILIKE => BETWEEN_LIKE_PREC,
130-
Token::Word(w) if w.keyword == Keyword::RLIKE => BETWEEN_LIKE_PREC,
131-
Token::Word(w) if w.keyword == Keyword::REGEXP => BETWEEN_LIKE_PREC,
132-
Token::Word(w) if w.keyword == Keyword::SIMILAR => BETWEEN_LIKE_PREC,
133-
Token::Word(w) if w.keyword == Keyword::OPERATOR => BETWEEN_LIKE_PREC,
134-
Token::Word(w) if w.keyword == Keyword::DIV => MUL_DIV_MOD_OP_PREC,
135-
Token::Word(w) if w.keyword == Keyword::COLLATE => COLLATE_PREC,
136-
Token::Eq
137-
| Token::Lt
138-
| Token::LtEq
139-
| Token::Neq
140-
| Token::Gt
141-
| Token::GtEq
142-
| Token::DoubleEq
143-
| Token::Tilde
144-
| Token::TildeAsterisk
145-
| Token::ExclamationMarkTilde
146-
| Token::ExclamationMarkTildeAsterisk
147-
| Token::DoubleTilde
148-
| Token::DoubleTildeAsterisk
149-
| Token::ExclamationMarkDoubleTilde
150-
| Token::ExclamationMarkDoubleTildeAsterisk
151-
| Token::Spaceship => EQ_PREC,
152-
Token::Caret => CARET_PREC,
153-
Token::Plus | Token::Minus => PLUS_MINUS_PREC,
154-
Token::Mul | Token::Div | Token::Mod => MUL_DIV_MOD_OP_PREC,
155-
Token::DoubleColon => DOUBLE_COLON_PREC,
156-
Token::LBracket => BRACKET_PREC,
92+
// we only return some custom value here when the behaviour (not merely the numeric value) differs
93+
// from the default implementation
94+
match token.token {
95+
Token::Word(w) if w.keyword == Keyword::COLLATE => Some(Ok(COLLATE_PREC)),
96+
Token::LBracket => Some(Ok(BRACKET_PREC)),
15797
Token::Arrow
15898
| Token::LongArrow
15999
| Token::HashArrow
@@ -173,12 +113,9 @@ impl Dialect for PostgreSqlDialect {
173113
| Token::Sharp
174114
| Token::ShiftRight
175115
| Token::ShiftLeft
176-
| Token::Pipe
177-
| Token::Ampersand
178-
| Token::CustomBinaryOperator(_) => PG_OTHER_PREC,
179-
_ => self.prec_unknown(),
180-
};
181-
Some(Ok(precedence))
116+
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
117+
_ => None,
118+
}
182119
}
183120

184121
fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
@@ -197,24 +134,25 @@ impl Dialect for PostgreSqlDialect {
197134
true
198135
}
199136

200-
fn prec_mul_div_mod_op(&self) -> u8 {
201-
MUL_DIV_MOD_OP_PREC
202-
}
203-
204-
fn prec_plus_minus(&self) -> u8 {
205-
PLUS_MINUS_PREC
206-
}
207-
208-
fn prec_between(&self) -> u8 {
209-
BETWEEN_LIKE_PREC
210-
}
211-
212-
fn prec_like(&self) -> u8 {
213-
BETWEEN_LIKE_PREC
214-
}
215-
216-
fn prec_unary_not(&self) -> u8 {
217-
NOT_PREC
137+
fn prec_value(&self, prec: Precedence) -> u8 {
138+
match prec {
139+
Precedence::DoubleColon => DOUBLE_COLON_PREC,
140+
Precedence::AtTz => AT_TZ_PREC,
141+
Precedence::MulDivModOp => MUL_DIV_MOD_OP_PREC,
142+
Precedence::PlusMinus => PLUS_MINUS_PREC,
143+
Precedence::Xor => XOR_PREC,
144+
Precedence::Ampersand => PG_OTHER_PREC,
145+
Precedence::Caret => CARET_PREC,
146+
Precedence::Pipe => PG_OTHER_PREC,
147+
Precedence::Between => BETWEEN_LIKE_PREC,
148+
Precedence::Eq => EQ_PREC,
149+
Precedence::Like => BETWEEN_LIKE_PREC,
150+
Precedence::Is => IS_PREC,
151+
Precedence::PgOther => PG_OTHER_PREC,
152+
Precedence::UnaryNot => NOT_PREC,
153+
Precedence::And => AND_PREC,
154+
Precedence::Or => OR_PREC,
155+
}
218156
}
219157
}
220158

0 commit comments

Comments
 (0)