Skip to content

Commit 7676257

Browse files
alambserprex
authored andcommitted
fix parsing of identifiers after % symbol (apache#927)
1 parent 1badab4 commit 7676257

File tree

3 files changed

+48
-8
lines changed

3 files changed

+48
-8
lines changed

src/test_utils.rs

+11-5
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@ impl TestedDialects {
116116
only_statement
117117
}
118118

119+
/// Ensures that `sql` parses as an [`Expr`], and that
120+
/// re-serializing the parse result produces canonical
121+
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
122+
let ast = self
123+
.run_parser_method(sql, |parser| parser.parse_expr())
124+
.unwrap();
125+
assert_eq!(canonical, &ast.to_string());
126+
ast
127+
}
128+
119129
/// Ensures that `sql` parses as a single [Statement], and that
120130
/// re-serializing the parse result produces the same `sql`
121131
/// string (is not modified after a serialization round-trip).
@@ -147,11 +157,7 @@ impl TestedDialects {
147157
/// re-serializing the parse result produces the same `sql`
148158
/// string (is not modified after a serialization round-trip).
149159
pub fn verified_expr(&self, sql: &str) -> Expr {
150-
let ast = self
151-
.run_parser_method(sql, |parser| parser.parse_expr())
152-
.unwrap();
153-
assert_eq!(sql, &ast.to_string(), "round-tripping without changes");
154-
ast
160+
self.expr_parses_to(sql, sql)
155161
}
156162
}
157163

src/tokenizer.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ struct State<'a> {
424424
}
425425

426426
impl<'a> State<'a> {
427+
/// return the next character and advance the stream
427428
pub fn next(&mut self) -> Option<char> {
428429
match self.peekable.next() {
429430
None => None,
@@ -439,6 +440,7 @@ impl<'a> State<'a> {
439440
}
440441
}
441442

443+
/// return the next character but do not advance the stream
442444
pub fn peek(&mut self) -> Option<&char> {
443445
self.peekable.peek()
444446
}
@@ -849,13 +851,13 @@ impl<'a> Tokenizer<'a> {
849851
'+' => self.consume_and_return(chars, Token::Plus),
850852
'*' => self.consume_and_return(chars, Token::Mul),
851853
'%' => {
852-
chars.next();
854+
chars.next(); // advance past '%'
853855
match chars.peek() {
854-
Some(' ') => self.consume_and_return(chars, Token::Mod),
856+
Some(' ') => Ok(Some(Token::Mod)),
855857
Some(sch) if self.dialect.is_identifier_start('%') => {
856858
self.tokenize_identifier_or_keyword([ch, *sch], chars)
857859
}
858-
_ => self.consume_and_return(chars, Token::Mod),
860+
_ => Ok(Some(Token::Mod)),
859861
}
860862
}
861863
'|' => {

tests/sqlparser_common.rs

+32
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,20 @@ fn parse_unary_math_with_multiply() {
11431143
);
11441144
}
11451145

1146+
#[test]
1147+
fn parse_mod() {
1148+
use self::Expr::*;
1149+
let sql = "a % b";
1150+
assert_eq!(
1151+
BinaryOp {
1152+
left: Box::new(Identifier(Ident::new("a"))),
1153+
op: BinaryOperator::Modulo,
1154+
right: Box::new(Identifier(Ident::new("b"))),
1155+
},
1156+
verified_expr(sql)
1157+
);
1158+
}
1159+
11461160
fn pg_and_generic() -> TestedDialects {
11471161
TestedDialects {
11481162
dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})],
@@ -1178,6 +1192,24 @@ fn parse_json_ops_without_colon() {
11781192
}
11791193
}
11801194

1195+
#[test]
1196+
fn parse_mod_no_spaces() {
1197+
use self::Expr::*;
1198+
let canonical = "a1 % b1";
1199+
let sqls = ["a1 % b1", "a1% b1", "a1 %b1", "a1%b1"];
1200+
for sql in sqls {
1201+
println!("Parsing {sql}");
1202+
assert_eq!(
1203+
BinaryOp {
1204+
left: Box::new(Identifier(Ident::new("a1"))),
1205+
op: BinaryOperator::Modulo,
1206+
right: Box::new(Identifier(Ident::new("b1"))),
1207+
},
1208+
pg_and_generic().expr_parses_to(sql, canonical)
1209+
);
1210+
}
1211+
}
1212+
11811213
#[test]
11821214
fn parse_is_null() {
11831215
use self::Expr::*;

0 commit comments

Comments
 (0)