Skip to content

fix parsing of identifiers after % symbol #927

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@ impl TestedDialects {
only_statement
}

/// Ensures that `sql` parses as an [`Expr`], and that
/// re-serializing the parse result produces canonical
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
let ast = self
.run_parser_method(sql, |parser| parser.parse_expr())
.unwrap();
assert_eq!(canonical, &ast.to_string());
ast
}

/// Ensures that `sql` parses as a single [Statement], and that
/// re-serializing the parse result produces the same `sql`
/// string (is not modified after a serialization round-trip).
Expand Down Expand Up @@ -147,11 +157,7 @@ impl TestedDialects {
/// re-serializing the parse result produces the same `sql`
/// string (is not modified after a serialization round-trip).
pub fn verified_expr(&self, sql: &str) -> Expr {
let ast = self
.run_parser_method(sql, |parser| parser.parse_expr())
.unwrap();
assert_eq!(sql, &ast.to_string(), "round-tripping without changes");
ast
self.expr_parses_to(sql, sql)
}
}

Expand Down
8 changes: 5 additions & 3 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ struct State<'a> {
}

impl<'a> State<'a> {
/// return the next character and advance the stream
pub fn next(&mut self) -> Option<char> {
match self.peekable.next() {
None => None,
Expand All @@ -439,6 +440,7 @@ impl<'a> State<'a> {
}
}

/// return the next character but do not advance the stream
pub fn peek(&mut self) -> Option<&char> {
self.peekable.peek()
}
Expand Down Expand Up @@ -849,13 +851,13 @@ impl<'a> Tokenizer<'a> {
'+' => self.consume_and_return(chars, Token::Plus),
'*' => self.consume_and_return(chars, Token::Mul),
'%' => {
chars.next();
chars.next(); // advance past '%'
match chars.peek() {
Some(' ') => self.consume_and_return(chars, Token::Mod),
Some(' ') => Ok(Some(Token::Mod)),
Some(sch) if self.dialect.is_identifier_start('%') => {
self.tokenize_identifier_or_keyword([ch, *sch], chars)
}
_ => self.consume_and_return(chars, Token::Mod),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the bug fix. At is at this point the % has been consumed by chars.next() so calling consume_and_return` skips another character by accident

It probably doesn't matter for ' ' but it does matter for everything else in this match arm

_ => Ok(Some(Token::Mod)),
}
}
'|' => {
Expand Down
32 changes: 32 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,20 @@ fn parse_unary_math_with_multiply() {
);
}

#[test]
fn parse_mod() {
use self::Expr::*;
let sql = "a % b";
assert_eq!(
BinaryOp {
left: Box::new(Identifier(Ident::new("a"))),
op: BinaryOperator::Modulo,
right: Box::new(Identifier(Ident::new("b"))),
},
verified_expr(sql)
);
}

fn pg_and_generic() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})],
Expand Down Expand Up @@ -1178,6 +1192,24 @@ fn parse_json_ops_without_colon() {
}
}

#[test]
fn parse_mod_no_spaces() {
use self::Expr::*;
let canonical = "a1 % b1";
let sqls = ["a1 % b1", "a1% b1", "a1 %b1", "a1%b1"];
for sql in sqls {
println!("Parsing {sql}");
assert_eq!(
BinaryOp {
left: Box::new(Identifier(Ident::new("a1"))),
op: BinaryOperator::Modulo,
right: Box::new(Identifier(Ident::new("b1"))),
},
pg_and_generic().expr_parses_to(sql, canonical)
);
}
}

#[test]
fn parse_is_null() {
use self::Expr::*;
Expand Down