Skip to content

Commit c1dd27d

Browse files
committed
added parsing for PostgreSQL operations
1 parent 2f71324 commit c1dd27d

File tree

6 files changed

+257
-7
lines changed

6 files changed

+257
-7
lines changed

src/ast/mod.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,11 @@ pub enum Expr {
191191
right: Box<Expr>,
192192
},
193193
/// Unary operation e.g. `NOT foo`
194-
UnaryOp { op: UnaryOperator, expr: Box<Expr> },
194+
UnaryOp {
195+
op: UnaryOperator,
196+
expr: Box<Expr>,
197+
infix: bool,
198+
},
195199
/// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))`
196200
Cast {
197201
expr: Box<Expr>,
@@ -282,7 +286,13 @@ impl fmt::Display for Expr {
282286
high
283287
),
284288
Expr::BinaryOp { left, op, right } => write!(f, "{} {} {}", left, op, right),
285-
Expr::UnaryOp { op, expr } => write!(f, "{} {}", op, expr),
289+
Expr::UnaryOp { op, expr, infix } => {
290+
if *infix {
291+
write!(f, "{}{}", expr, op)
292+
} else {
293+
write!(f, "{} {}", op, expr)
294+
}
295+
}
286296
Expr::Cast { expr, data_type } => write!(f, "CAST({} AS {})", expr, data_type),
287297
Expr::Extract { field, expr } => write!(f, "EXTRACT({} FROM {})", field, expr),
288298
Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation),

src/ast/operator.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ pub enum UnaryOperator {
2121
Plus,
2222
Minus,
2323
Not,
24+
PGBitwiseNot,
25+
PGSqrt,
26+
PGCbrt,
27+
PGFactorial,
28+
PGInfixFactorial,
29+
PGAbs,
2430
}
2531

2632
impl fmt::Display for UnaryOperator {
@@ -29,6 +35,12 @@ impl fmt::Display for UnaryOperator {
2935
UnaryOperator::Plus => "+",
3036
UnaryOperator::Minus => "-",
3137
UnaryOperator::Not => "NOT",
38+
UnaryOperator::PGBitwiseNot => "~",
39+
UnaryOperator::PGSqrt => "|/",
40+
UnaryOperator::PGCbrt => "||/",
41+
UnaryOperator::PGFactorial => "!",
42+
UnaryOperator::PGInfixFactorial => "!!",
43+
UnaryOperator::PGAbs => "@",
3244
})
3345
}
3446
}
@@ -56,6 +68,9 @@ pub enum BinaryOperator {
5668
BitwiseOr,
5769
BitwiseAnd,
5870
BitwiseXor,
71+
PGBitwiseXor,
72+
PGBitwiseShiftLeft,
73+
PGBitwiseShiftRight,
5974
}
6075

6176
impl fmt::Display for BinaryOperator {
@@ -80,6 +95,9 @@ impl fmt::Display for BinaryOperator {
8095
BinaryOperator::BitwiseOr => "|",
8196
BinaryOperator::BitwiseAnd => "&",
8297
BinaryOperator::BitwiseXor => "^",
98+
BinaryOperator::PGBitwiseXor => "#",
99+
BinaryOperator::PGBitwiseShiftLeft => "<<",
100+
BinaryOperator::PGBitwiseShiftRight => ">>",
83101
})
84102
}
85103
}

src/parser.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ impl<'a> Parser<'a> {
250250
Keyword::NOT => Ok(Expr::UnaryOp {
251251
op: UnaryOperator::Not,
252252
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
253+
infix: false,
253254
}),
254255
// Here `w` is a word, check if it's a part of a multi-part
255256
// identifier, a function call, or a simple identifier:
@@ -283,6 +284,31 @@ impl<'a> Parser<'a> {
283284
},
284285
}, // End of Token::Word
285286
Token::Mult => Ok(Expr::Wildcard),
287+
Token::Tilde => Ok(Expr::UnaryOp {
288+
op: UnaryOperator::PGBitwiseNot,
289+
expr: Box::new(self.parse_subexpr(0)?),
290+
infix: false,
291+
}),
292+
Token::DoubleExclamationMark => Ok(Expr::UnaryOp {
293+
op: UnaryOperator::PGInfixFactorial,
294+
expr: Box::new(self.parse_subexpr(0)?),
295+
infix: false,
296+
}),
297+
Token::SquareRoot => Ok(Expr::UnaryOp {
298+
op: UnaryOperator::PGSqrt,
299+
expr: Box::new(self.parse_subexpr(0)?),
300+
infix: false,
301+
}),
302+
Token::CubeRoot => Ok(Expr::UnaryOp {
303+
op: UnaryOperator::PGCbrt,
304+
expr: Box::new(self.parse_subexpr(0)?),
305+
infix: false,
306+
}),
307+
Token::Ampersat => Ok(Expr::UnaryOp {
308+
op: UnaryOperator::PGAbs,
309+
expr: Box::new(self.parse_subexpr(0)?),
310+
infix: false,
311+
}),
286312
tok @ Token::Minus | tok @ Token::Plus => {
287313
let op = if tok == Token::Plus {
288314
UnaryOperator::Plus
@@ -292,6 +318,7 @@ impl<'a> Parser<'a> {
292318
Ok(Expr::UnaryOp {
293319
op,
294320
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
321+
infix: false,
295322
})
296323
}
297324
Token::Number(_)
@@ -658,6 +685,9 @@ impl<'a> Parser<'a> {
658685
Token::Caret => Some(BinaryOperator::BitwiseXor),
659686
Token::Ampersand => Some(BinaryOperator::BitwiseAnd),
660687
Token::Div => Some(BinaryOperator::Divide),
688+
Token::ShiftLeft => Some(BinaryOperator::PGBitwiseShiftLeft),
689+
Token::ShiftRight => Some(BinaryOperator::PGBitwiseShiftRight),
690+
Token::Sharp => Some(BinaryOperator::PGBitwiseXor),
661691
Token::Word(w) => match w.keyword {
662692
Keyword::AND => Some(BinaryOperator::And),
663693
Keyword::OR => Some(BinaryOperator::Or),
@@ -707,6 +737,13 @@ impl<'a> Parser<'a> {
707737
}
708738
} else if Token::DoubleColon == tok {
709739
self.parse_pg_cast(expr)
740+
} else if Token::ExclamationMark == tok {
741+
// PostgreSQL factorial operation
742+
Ok(Expr::UnaryOp {
743+
op: UnaryOperator::PGFactorial,
744+
expr: Box::new(expr),
745+
infix: true,
746+
})
710747
} else {
711748
// Can only happen if `get_next_precedence` got out of sync with this function
712749
panic!("No infix parser for token {:?}", tok)
@@ -785,11 +822,12 @@ impl<'a> Parser<'a> {
785822
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC),
786823
Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20),
787824
Token::Pipe => Ok(21),
788-
Token::Caret => Ok(22),
825+
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22),
789826
Token::Ampersand => Ok(23),
790827
Token::Plus | Token::Minus => Ok(Self::PLUS_MINUS_PREC),
791828
Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40),
792829
Token::DoubleColon => Ok(50),
830+
Token::ExclamationMark => Ok(50),
793831
_ => Ok(0),
794832
}
795833
}

src/tokenizer.rs

Lines changed: 103 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pub enum Token {
5454
Neq,
5555
/// Less Than operator `<`
5656
Lt,
57-
/// Greater han operator `>`
57+
/// Greater Than operator `>`
5858
Gt,
5959
/// Less Than Or Equals operator `<=`
6060
LtEq,
@@ -102,6 +102,24 @@ pub enum Token {
102102
RBrace,
103103
/// Right Arrow `=>`
104104
RArrow,
105+
/// Sharp `#` use for PostgreSQL Bitwise XOR operator
106+
Sharp,
107+
/// Tilde `~` use for PostgreSQL Bitwise NOT operator
108+
Tilde,
109+
/// Bitwise left operator `<<` use for PostgreSQL
110+
ShiftLeft,
111+
/// Bitwise right operator `>>` use for PostgreSQL
112+
ShiftRight,
113+
/// Exclamation Mark `!` use for PostgreSQL factorial operator
114+
ExclamationMark,
115+
/// Exclamation Mark `!!` use for PostgreSQL prefix factorial operator
116+
DoubleExclamationMark,
117+
/// Ampersat `@` use for PostgreSQL abs operator
118+
Ampersat,
119+
/// PostgreSQL square root math operator
120+
SquareRoot,
121+
/// PostgreSQL cube root math operator
122+
CubeRoot,
105123
}
106124

107125
impl fmt::Display for Token {
@@ -143,6 +161,15 @@ impl fmt::Display for Token {
143161
Token::LBrace => f.write_str("{"),
144162
Token::RBrace => f.write_str("}"),
145163
Token::RArrow => f.write_str("=>"),
164+
Token::Sharp => f.write_str("#"),
165+
Token::ExclamationMark => f.write_str("!"),
166+
Token::DoubleExclamationMark => f.write_str("!!"),
167+
Token::Tilde => f.write_str("~"),
168+
Token::Ampersat => f.write_str("@"),
169+
Token::ShiftLeft => f.write_str("<<"),
170+
Token::ShiftRight => f.write_str(">>"),
171+
Token::SquareRoot => f.write_str("|/"),
172+
Token::CubeRoot => f.write_str("||/"),
146173
}
147174
}
148175
}
@@ -406,7 +433,14 @@ impl<'a> Tokenizer<'a> {
406433
'|' => {
407434
chars.next(); // consume the '|'
408435
match chars.peek() {
409-
Some('|') => self.consume_and_return(chars, Token::StringConcat),
436+
Some('/') => self.consume_and_return(chars, Token::SquareRoot),
437+
Some('|') => {
438+
chars.next(); // consume the second '|'
439+
match chars.peek() {
440+
Some('/') => self.consume_and_return(chars, Token::CubeRoot),
441+
_ => Ok(Some(Token::StringConcat)),
442+
}
443+
}
410444
// Bitshift '|' operator
411445
_ => Ok(Some(Token::Pipe)),
412446
}
@@ -423,21 +457,24 @@ impl<'a> Tokenizer<'a> {
423457
chars.next(); // consume
424458
match chars.peek() {
425459
Some('=') => self.consume_and_return(chars, Token::Neq),
426-
_ => self.tokenizer_error("Expected to see '=' after '!' character"),
460+
Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
461+
_ => Ok(Some(Token::ExclamationMark)),
427462
}
428463
}
429464
'<' => {
430465
chars.next(); // consume
431466
match chars.peek() {
432467
Some('=') => self.consume_and_return(chars, Token::LtEq),
433468
Some('>') => self.consume_and_return(chars, Token::Neq),
469+
Some('<') => self.consume_and_return(chars, Token::ShiftLeft),
434470
_ => Ok(Some(Token::Lt)),
435471
}
436472
}
437473
'>' => {
438474
chars.next(); // consume
439475
match chars.peek() {
440476
Some('=') => self.consume_and_return(chars, Token::GtEq),
477+
Some('>') => self.consume_and_return(chars, Token::ShiftRight),
441478
_ => Ok(Some(Token::Gt)),
442479
}
443480
}
@@ -464,6 +501,9 @@ impl<'a> Tokenizer<'a> {
464501
comment,
465502
})))
466503
}
504+
'~' => self.consume_and_return(chars, Token::Tilde),
505+
'#' => self.consume_and_return(chars, Token::Sharp),
506+
'@' => self.consume_and_return(chars, Token::Ampersat),
467507
other => self.consume_and_return(chars, Token::Char(other)),
468508
},
469509
None => Ok(None),
@@ -586,6 +626,7 @@ mod tests {
586626
use super::super::dialect::GenericDialect;
587627
use super::super::dialect::MsSqlDialect;
588628
use super::*;
629+
use crate::dialect::PostgreSqlDialect;
589630

590631
#[test]
591632
fn tokenize_select_1() {
@@ -958,6 +999,65 @@ mod tests {
958999
compare(expected, tokens);
9591000
}
9601001

1002+
#[test]
1003+
fn tokenize_postgresql_bitwise_operations() {
1004+
let sql = String::from("SELECT ~one << two # three >> four");
1005+
let dialect = PostgreSqlDialect {};
1006+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
1007+
let tokens = tokenizer.tokenize().unwrap();
1008+
1009+
let expected = vec![
1010+
Token::make_keyword("SELECT"),
1011+
Token::Whitespace(Whitespace::Space),
1012+
Token::Tilde,
1013+
Token::make_word("one", None),
1014+
Token::Whitespace(Whitespace::Space),
1015+
Token::ShiftLeft,
1016+
Token::Whitespace(Whitespace::Space),
1017+
Token::make_word("two", None),
1018+
Token::Whitespace(Whitespace::Space),
1019+
Token::Sharp,
1020+
Token::Whitespace(Whitespace::Space),
1021+
Token::make_word("three", None),
1022+
Token::Whitespace(Whitespace::Space),
1023+
Token::ShiftRight,
1024+
Token::Whitespace(Whitespace::Space),
1025+
Token::make_word("four", None),
1026+
];
1027+
1028+
compare(expected, tokens);
1029+
}
1030+
1031+
#[test]
1032+
fn tokenize_postgresql_math_operations() {
1033+
let sql = String::from("SELECT !!5 5! @-6 |/4 ||/8");
1034+
let dialect = PostgreSqlDialect {};
1035+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
1036+
let tokens = tokenizer.tokenize().unwrap();
1037+
1038+
let expected = vec![
1039+
Token::make_keyword("SELECT"),
1040+
Token::Whitespace(Whitespace::Space),
1041+
Token::DoubleExclamationMark,
1042+
Token::Number("5".to_string()),
1043+
Token::Whitespace(Whitespace::Space),
1044+
Token::Number("5".to_string()),
1045+
Token::ExclamationMark,
1046+
Token::Whitespace(Whitespace::Space),
1047+
Token::Ampersat,
1048+
Token::Minus,
1049+
Token::Number("6".to_string()),
1050+
Token::Whitespace(Whitespace::Space),
1051+
Token::SquareRoot,
1052+
Token::Number("4".to_string()),
1053+
Token::Whitespace(Whitespace::Space),
1054+
Token::CubeRoot,
1055+
Token::Number("8".to_string()),
1056+
];
1057+
1058+
compare(expected, tokens);
1059+
}
1060+
9611061
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
9621062
//println!("------------------------------");
9631063
//println!("tokens = {:?}", actual);

tests/sqlparser_common.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,8 @@ fn parse_select_count_distinct() {
343343
name: ObjectName(vec![Ident::new("COUNT")]),
344344
args: vec![FunctionArg::Unnamed(Expr::UnaryOp {
345345
op: UnaryOperator::Plus,
346-
expr: Box::new(Expr::Identifier(Ident::new("x")))
346+
expr: Box::new(Expr::Identifier(Ident::new("x"))),
347+
infix: false,
347348
})],
348349
over: None,
349350
distinct: true,
@@ -506,11 +507,13 @@ fn parse_unary_math() {
506507
left: Box::new(UnaryOp {
507508
op: UnaryOperator::Minus,
508509
expr: Box::new(Identifier(Ident::new("a"))),
510+
infix: false,
509511
}),
510512
op: BinaryOperator::Plus,
511513
right: Box::new(UnaryOp {
512514
op: UnaryOperator::Minus,
513515
expr: Box::new(Identifier(Ident::new("b"))),
516+
infix: false,
514517
}),
515518
},
516519
verified_expr(sql)
@@ -565,6 +568,7 @@ fn parse_not_precedence() {
565568
high: Box::new(Expr::Value(number("2"))),
566569
negated: true,
567570
}),
571+
infix: false,
568572
},
569573
);
570574

@@ -579,6 +583,7 @@ fn parse_not_precedence() {
579583
op: BinaryOperator::NotLike,
580584
right: Box::new(Expr::Value(Value::SingleQuotedString("b".into()))),
581585
}),
586+
infix: false,
582587
},
583588
);
584589

@@ -593,6 +598,7 @@ fn parse_not_precedence() {
593598
list: vec![Expr::Value(Value::SingleQuotedString("a".into()))],
594599
negated: true,
595600
}),
601+
infix: false,
596602
},
597603
);
598604
}
@@ -2604,6 +2610,7 @@ fn parse_exists_subquery() {
26042610
Expr::UnaryOp {
26052611
op: UnaryOperator::Not,
26062612
expr: Box::new(Expr::Exists(Box::new(expected_inner))),
2613+
infix: false,
26072614
},
26082615
select.selection.unwrap(),
26092616
);

0 commit comments

Comments
 (0)