From 5b51ba5a203ebedc96f3f09e2074b707bf028131 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 20 Jul 2023 14:57:56 -0500 Subject: [PATCH 1/2] fix parsing of identifiers after `%` symbol --- src/test_utils.rs | 17 ++++++++++++----- src/tokenizer.rs | 8 +++++--- tests/sqlparser_common.rs | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index 47fb00d5d..1a37375e7 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -116,6 +116,17 @@ impl TestedDialects { only_statement } + /// Ensures that `sql` parses as an [`Expr`], and that + /// re-serializing the parse result produces canonical + pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr { + let ast = self + .run_parser_method(sql, |parser| parser.parse_expr()) + .unwrap(); + assert_eq!(canonical, &ast.to_string()); + ast + } + + /// Ensures that `sql` parses as a single [Statement], and that /// re-serializing the parse result produces the same `sql` /// string (is not modified after a serialization round-trip). @@ -147,11 +158,7 @@ impl TestedDialects { /// re-serializing the parse result produces the same `sql` /// string (is not modified after a serialization round-trip). pub fn verified_expr(&self, sql: &str) -> Expr { - let ast = self - .run_parser_method(sql, |parser| parser.parse_expr()) - .unwrap(); - assert_eq!(sql, &ast.to_string(), "round-tripping without changes"); - ast + self.expr_parses_to(sql, sql) } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 83e9f317e..a153907e4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -424,6 +424,7 @@ struct State<'a> { } impl<'a> State<'a> { + /// return the next character and advance the stream pub fn next(&mut self) -> Option { match self.peekable.next() { None => None, @@ -439,6 +440,7 @@ impl<'a> State<'a> { } } + /// return the next character but do not advance the stream pub fn peek(&mut self) -> Option<&char> { self.peekable.peek() } @@ -849,13 +851,13 @@ impl<'a> Tokenizer<'a> { '+' => self.consume_and_return(chars, Token::Plus), '*' => self.consume_and_return(chars, Token::Mul), '%' => { - chars.next(); + chars.next(); // advance past '%' match chars.peek() { - Some(' ') => self.consume_and_return(chars, Token::Mod), + Some(' ') => Ok(Some(Token::Mod)), Some(sch) if self.dialect.is_identifier_start('%') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } - _ => self.consume_and_return(chars, Token::Mod), + _ => Ok(Some(Token::Mod)) } } '|' => { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 356926e13..c5d974270 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1143,6 +1143,20 @@ fn parse_unary_math_with_multiply() { ); } +#[test] +fn parse_mod() { + use self::Expr::*; + let sql = "a % b"; + assert_eq!( + BinaryOp { + left: Box::new(Identifier(Ident::new("a"))), + op: BinaryOperator::Modulo, + right: Box::new(Identifier(Ident::new("b"))), + }, + verified_expr(sql) + ); +} + fn pg_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})], @@ -1178,6 +1192,31 @@ fn parse_json_ops_without_colon() { } } + +#[test] +fn parse_mod_no_spaces() { + use self::Expr::*; + let canonical = "a1 % b1"; + let sqls = [ + "a1 % b1", + "a1% b1", + "a1 %b1", + "a1%b1" + ]; + for sql in sqls { + println!("Parsing {sql}"); + assert_eq!( + BinaryOp { + left: Box::new(Identifier(Ident::new("a1"))), + op: BinaryOperator::Modulo, + right: Box::new(Identifier(Ident::new("b1"))), + }, + pg_and_generic().expr_parses_to(sql, canonical) + ); + } +} + + #[test] fn parse_is_null() { use self::Expr::*; From 54bcbf673348fa8318ea3bd24bc2262fd3f92373 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 20 Jul 2023 16:27:06 -0500 Subject: [PATCH 2/2] fmt --- src/test_utils.rs | 1 - src/tokenizer.rs | 2 +- tests/sqlparser_common.rs | 9 +-------- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index 1a37375e7..0ec595095 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -126,7 +126,6 @@ impl TestedDialects { ast } - /// Ensures that `sql` parses as a single [Statement], and that /// re-serializing the parse result produces the same `sql` /// string (is not modified after a serialization round-trip). diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a153907e4..f20e01b71 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -857,7 +857,7 @@ impl<'a> Tokenizer<'a> { Some(sch) if self.dialect.is_identifier_start('%') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } - _ => Ok(Some(Token::Mod)) + _ => Ok(Some(Token::Mod)), } } '|' => { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c5d974270..a9fd419ea 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1192,17 +1192,11 @@ fn parse_json_ops_without_colon() { } } - #[test] fn parse_mod_no_spaces() { use self::Expr::*; let canonical = "a1 % b1"; - let sqls = [ - "a1 % b1", - "a1% b1", - "a1 %b1", - "a1%b1" - ]; + let sqls = ["a1 % b1", "a1% b1", "a1 %b1", "a1%b1"]; for sql in sqls { println!("Parsing {sql}"); assert_eq!( @@ -1216,7 +1210,6 @@ fn parse_mod_no_spaces() { } } - #[test] fn parse_is_null() { use self::Expr::*;