diff --git a/src/sqlparser.rs b/src/sqlparser.rs index b887eef0a..6bb392e68 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -822,15 +822,10 @@ impl Parser { )); }; let if_exists = self.parse_keywords(vec!["IF", "EXISTS"]); - let mut names = vec![self.parse_object_name()?]; + let mut names = vec![]; loop { - let token = &self.next_token(); - if let Some(Token::Comma) = token { - names.push(self.parse_object_name()?) - } else { - if token.is_some() { - self.prev_token(); - } + names.push(self.parse_object_name()?); + if !self.consume_token(&Token::Comma) { break; } } @@ -1014,10 +1009,9 @@ impl Parser { self.expect_token(&Token::Eq)?; let value = self.parse_value()?; options.push(SQLOption { name, value }); - match self.peek_token() { - Some(Token::Comma) => self.next_token(), - _ => break, - }; + if !self.consume_token(&Token::Comma) { + break; + } } self.expect_token(&Token::RParen)?; Ok(options) @@ -1279,29 +1273,13 @@ impl Parser { /// Parse one or more identifiers with the specified separator between them pub fn parse_list_of_ids(&mut self, separator: &Token) -> Result, ParserError> { let mut idents = vec![]; - let mut expect_identifier = true; loop { - let token = &self.next_token(); - match token { - Some(Token::SQLWord(s)) if expect_identifier => { - expect_identifier = false; - idents.push(s.as_sql_ident()); - } - Some(token) if token == separator && !expect_identifier => { - expect_identifier = true; - continue; - } - _ => { - self.prev_token(); - break; - } + idents.push(self.parse_identifier()?); + if !self.consume_token(separator) { + break; } } - if expect_identifier { - self.expected("identifier", self.peek_token()) - } else { - Ok(idents) - } + Ok(idents) } /// Parse a possibly qualified, possibly quoted identifier, e.g. @@ -1844,10 +1822,9 @@ impl Parser { self.expect_token(&Token::LParen)?; values.push(self.parse_expr_list()?); self.expect_token(&Token::RParen)?; - match self.peek_token() { - Some(Token::Comma) => self.next_token(), - _ => break, - }; + if !self.consume_token(&Token::Comma) { + break; + } } Ok(SQLValues(values)) } diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index e8b345973..ea7ca49c9 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -319,29 +319,25 @@ impl<'a> Tokenizer<'a> { } // delimited (quoted) identifier quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { - let mut s = String::new(); chars.next(); // consume the opening quote let quote_end = SQLWord::matching_end_quote(quote_start); - while let Some(ch) = chars.next() { - match ch { - c if c == quote_end => break, - _ => s.push(ch), - } + let s = peeking_take_while(chars, |ch| ch != quote_end); + if chars.next() == Some(quote_end) { + Ok(Some(Token::make_word(&s, Some(quote_start)))) + } else { + Err(TokenizerError(format!( + "Expected close delimiter '{}' before EOF.", + quote_end + ))) } - Ok(Some(Token::make_word(&s, Some(quote_start)))) } // numbers '0'..='9' => { - let mut s = String::new(); - while let Some(&ch) = chars.peek() { - match ch { - '0'..='9' | '.' => { - chars.next(); // consume - s.push(ch); - } - _ => break, - } - } + // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal + let s = peeking_take_while(chars, |ch| match ch { + '0'..='9' | '.' => true, + _ => false, + }); Ok(Some(Token::Number(s))) } // punctuation @@ -354,22 +350,12 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('-') => { chars.next(); // consume the second '-', starting a single-line comment - let mut s = String::new(); - loop { - match chars.next() { - Some(ch) if ch != '\n' => { - s.push(ch); - } - other => { - if other.is_some() { - s.push('\n'); - } - break Ok(Some(Token::Whitespace( - Whitespace::SingleLineComment(s), - ))); - } - } + let mut s = peeking_take_while(chars, |ch| ch != '\n'); + if let Some(ch) = chars.next() { + assert_eq!(ch, '\n'); + s.push(ch); } + Ok(Some(Token::Whitespace(Whitespace::SingleLineComment(s)))) } // a regular '-' operator _ => Ok(Some(Token::Minus)), @@ -394,14 +380,8 @@ impl<'a> Tokenizer<'a> { '!' => { chars.next(); // consume match chars.peek() { - Some(&ch) => match ch { - '=' => self.consume_and_return(chars, Token::Neq), - _ => Err(TokenizerError(format!( - "Tokenizer Error at Line: {}, Col: {}", - self.line, self.col - ))), - }, - None => Err(TokenizerError(format!( + Some('=') => self.consume_and_return(chars, Token::Neq), + _ => Err(TokenizerError(format!( "Tokenizer Error at Line: {}, Col: {}", self.line, self.col ))), @@ -410,39 +390,27 @@ impl<'a> Tokenizer<'a> { '<' => { chars.next(); // consume match chars.peek() { - Some(&ch) => match ch { - '=' => self.consume_and_return(chars, Token::LtEq), - '>' => self.consume_and_return(chars, Token::Neq), - _ => Ok(Some(Token::Lt)), - }, - None => Ok(Some(Token::Lt)), + Some('=') => self.consume_and_return(chars, Token::LtEq), + Some('>') => self.consume_and_return(chars, Token::Neq), + _ => Ok(Some(Token::Lt)), } } '>' => { chars.next(); // consume match chars.peek() { - Some(&ch) => match ch { - '=' => self.consume_and_return(chars, Token::GtEq), - _ => Ok(Some(Token::Gt)), - }, - None => Ok(Some(Token::Gt)), + Some('=') => self.consume_and_return(chars, Token::GtEq), + _ => Ok(Some(Token::Gt)), } } - // colon ':' => { chars.next(); match chars.peek() { - Some(&ch) => match ch { - // double colon - ':' => self.consume_and_return(chars, Token::DoubleColon), - _ => Ok(Some(Token::Colon)), - }, - None => Ok(Some(Token::Colon)), + Some(':') => self.consume_and_return(chars, Token::DoubleColon), + _ => Ok(Some(Token::Colon)), } } ';' => self.consume_and_return(chars, Token::SemiColon), '\\' => self.consume_and_return(chars, Token::Backslash), - // brakets '[' => self.consume_and_return(chars, Token::LBracket), ']' => self.consume_and_return(chars, Token::RBracket), '&' => self.consume_and_return(chars, Token::Ampersand), @@ -456,16 +424,10 @@ impl<'a> Tokenizer<'a> { /// Tokenize an identifier or keyword, after the first char is already consumed. fn tokenize_word(&self, first_char: char, chars: &mut Peekable>) -> String { - let mut s = String::new(); - s.push(first_char); - while let Some(&ch) = chars.peek() { - if self.dialect.is_identifier_part(ch) { - chars.next(); // consume - s.push(ch); - } else { - break; - } - } + let mut s = first_char.to_string(); + s.push_str(&peeking_take_while(chars, |ch| { + self.dialect.is_identifier_part(ch) + })); s } @@ -539,6 +501,25 @@ impl<'a> Tokenizer<'a> { } } +/// Read from `chars` until `predicate` returns `false` or EOF is hit. +/// Return the characters read as String, and keep the first non-matching +/// char available as `chars.next()`. +fn peeking_take_while( + chars: &mut Peekable>, + mut predicate: impl FnMut(char) -> bool, +) -> String { + let mut s = String::new(); + while let Some(&ch) = chars.peek() { + if predicate(ch) { + chars.next(); // consume + s.push(ch); + } else { + break; + } + } + s +} + #[cfg(test)] mod tests { use super::super::dialect::GenericSqlDialect; @@ -768,6 +749,20 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_mismatched_quotes() { + let sql = String::from("\"foo"); + + let dialect = GenericSqlDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + assert_eq!( + tokenizer.tokenize(), + Err(TokenizerError( + "Expected close delimiter '\"' before EOF.".to_string(), + )) + ); + } + #[test] fn tokenize_newlines() { let sql = String::from("line1\nline2\rline3\r\nline4\r"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8998dca58..39a47ed50 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1902,7 +1902,7 @@ fn parse_drop_table() { assert_eq!(SQLObjectType::Table, object_type); assert_eq!( vec!["foo"], - names.iter().map(|n| n.to_string()).collect::>() + names.iter().map(ToString::to_string).collect::>() ); assert_eq!(false, cascade); } @@ -1921,7 +1921,7 @@ fn parse_drop_table() { assert_eq!(SQLObjectType::Table, object_type); assert_eq!( vec!["foo", "bar"], - names.iter().map(|n| n.to_string()).collect::>() + names.iter().map(ToString::to_string).collect::>() ); assert_eq!(true, cascade); } @@ -1950,7 +1950,7 @@ fn parse_drop_view() { } => { assert_eq!( vec!["myschema.myview"], - names.iter().map(|n| n.to_string()).collect::>() + names.iter().map(ToString::to_string).collect::>() ); assert_eq!(SQLObjectType::View, object_type); }