Skip to content

Commit 518c883

Browse files
authored
Merge pull request #110 from nickolay/pr/cleanups
Minor code clean-ups
2 parents 2d1e05e + 9976871 commit 518c883

File tree

3 files changed

+80
-108
lines changed

3 files changed

+80
-108
lines changed

src/sqlparser.rs

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -894,15 +894,10 @@ impl Parser {
894894
));
895895
};
896896
let if_exists = self.parse_keywords(vec!["IF", "EXISTS"]);
897-
let mut names = vec![self.parse_object_name()?];
897+
let mut names = vec![];
898898
loop {
899-
let token = &self.next_token();
900-
if let Some(Token::Comma) = token {
901-
names.push(self.parse_object_name()?)
902-
} else {
903-
if token.is_some() {
904-
self.prev_token();
905-
}
899+
names.push(self.parse_object_name()?);
900+
if !self.consume_token(&Token::Comma) {
906901
break;
907902
}
908903
}
@@ -1086,10 +1081,9 @@ impl Parser {
10861081
self.expect_token(&Token::Eq)?;
10871082
let value = self.parse_value()?;
10881083
options.push(SQLOption { name, value });
1089-
match self.peek_token() {
1090-
Some(Token::Comma) => self.next_token(),
1091-
_ => break,
1092-
};
1084+
if !self.consume_token(&Token::Comma) {
1085+
break;
1086+
}
10931087
}
10941088
self.expect_token(&Token::RParen)?;
10951089
Ok(options)
@@ -1355,29 +1349,13 @@ impl Parser {
13551349
/// Parse one or more identifiers with the specified separator between them
13561350
pub fn parse_list_of_ids(&mut self, separator: &Token) -> Result<Vec<SQLIdent>, ParserError> {
13571351
let mut idents = vec![];
1358-
let mut expect_identifier = true;
13591352
loop {
1360-
let token = &self.next_token();
1361-
match token {
1362-
Some(Token::SQLWord(s)) if expect_identifier => {
1363-
expect_identifier = false;
1364-
idents.push(s.as_sql_ident());
1365-
}
1366-
Some(token) if token == separator && !expect_identifier => {
1367-
expect_identifier = true;
1368-
continue;
1369-
}
1370-
_ => {
1371-
self.prev_token();
1372-
break;
1373-
}
1353+
idents.push(self.parse_identifier()?);
1354+
if !self.consume_token(separator) {
1355+
break;
13741356
}
13751357
}
1376-
if expect_identifier {
1377-
self.expected("identifier", self.peek_token())
1378-
} else {
1379-
Ok(idents)
1380-
}
1358+
Ok(idents)
13811359
}
13821360

13831361
/// Parse a possibly qualified, possibly quoted identifier, e.g.
@@ -1920,10 +1898,9 @@ impl Parser {
19201898
self.expect_token(&Token::LParen)?;
19211899
values.push(self.parse_expr_list()?);
19221900
self.expect_token(&Token::RParen)?;
1923-
match self.peek_token() {
1924-
Some(Token::Comma) => self.next_token(),
1925-
_ => break,
1926-
};
1901+
if !self.consume_token(&Token::Comma) {
1902+
break;
1903+
}
19271904
}
19281905
Ok(SQLValues(values))
19291906
}

src/sqltokenizer.rs

Lines changed: 64 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -319,29 +319,25 @@ impl<'a> Tokenizer<'a> {
319319
}
320320
// delimited (quoted) identifier
321321
quote_start if self.dialect.is_delimited_identifier_start(quote_start) => {
322-
let mut s = String::new();
323322
chars.next(); // consume the opening quote
324323
let quote_end = SQLWord::matching_end_quote(quote_start);
325-
while let Some(ch) = chars.next() {
326-
match ch {
327-
c if c == quote_end => break,
328-
_ => s.push(ch),
329-
}
324+
let s = peeking_take_while(chars, |ch| ch != quote_end);
325+
if chars.next() == Some(quote_end) {
326+
Ok(Some(Token::make_word(&s, Some(quote_start))))
327+
} else {
328+
Err(TokenizerError(format!(
329+
"Expected close delimiter '{}' before EOF.",
330+
quote_end
331+
)))
330332
}
331-
Ok(Some(Token::make_word(&s, Some(quote_start))))
332333
}
333334
// numbers
334335
'0'..='9' => {
335-
let mut s = String::new();
336-
while let Some(&ch) = chars.peek() {
337-
match ch {
338-
'0'..='9' | '.' => {
339-
chars.next(); // consume
340-
s.push(ch);
341-
}
342-
_ => break,
343-
}
344-
}
336+
// TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal
337+
let s = peeking_take_while(chars, |ch| match ch {
338+
'0'..='9' | '.' => true,
339+
_ => false,
340+
});
345341
Ok(Some(Token::Number(s)))
346342
}
347343
// punctuation
@@ -354,22 +350,12 @@ impl<'a> Tokenizer<'a> {
354350
match chars.peek() {
355351
Some('-') => {
356352
chars.next(); // consume the second '-', starting a single-line comment
357-
let mut s = String::new();
358-
loop {
359-
match chars.next() {
360-
Some(ch) if ch != '\n' => {
361-
s.push(ch);
362-
}
363-
other => {
364-
if other.is_some() {
365-
s.push('\n');
366-
}
367-
break Ok(Some(Token::Whitespace(
368-
Whitespace::SingleLineComment(s),
369-
)));
370-
}
371-
}
353+
let mut s = peeking_take_while(chars, |ch| ch != '\n');
354+
if let Some(ch) = chars.next() {
355+
assert_eq!(ch, '\n');
356+
s.push(ch);
372357
}
358+
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment(s))))
373359
}
374360
// a regular '-' operator
375361
_ => Ok(Some(Token::Minus)),
@@ -394,14 +380,8 @@ impl<'a> Tokenizer<'a> {
394380
'!' => {
395381
chars.next(); // consume
396382
match chars.peek() {
397-
Some(&ch) => match ch {
398-
'=' => self.consume_and_return(chars, Token::Neq),
399-
_ => Err(TokenizerError(format!(
400-
"Tokenizer Error at Line: {}, Col: {}",
401-
self.line, self.col
402-
))),
403-
},
404-
None => Err(TokenizerError(format!(
383+
Some('=') => self.consume_and_return(chars, Token::Neq),
384+
_ => Err(TokenizerError(format!(
405385
"Tokenizer Error at Line: {}, Col: {}",
406386
self.line, self.col
407387
))),
@@ -410,39 +390,27 @@ impl<'a> Tokenizer<'a> {
410390
'<' => {
411391
chars.next(); // consume
412392
match chars.peek() {
413-
Some(&ch) => match ch {
414-
'=' => self.consume_and_return(chars, Token::LtEq),
415-
'>' => self.consume_and_return(chars, Token::Neq),
416-
_ => Ok(Some(Token::Lt)),
417-
},
418-
None => Ok(Some(Token::Lt)),
393+
Some('=') => self.consume_and_return(chars, Token::LtEq),
394+
Some('>') => self.consume_and_return(chars, Token::Neq),
395+
_ => Ok(Some(Token::Lt)),
419396
}
420397
}
421398
'>' => {
422399
chars.next(); // consume
423400
match chars.peek() {
424-
Some(&ch) => match ch {
425-
'=' => self.consume_and_return(chars, Token::GtEq),
426-
_ => Ok(Some(Token::Gt)),
427-
},
428-
None => Ok(Some(Token::Gt)),
401+
Some('=') => self.consume_and_return(chars, Token::GtEq),
402+
_ => Ok(Some(Token::Gt)),
429403
}
430404
}
431-
// colon
432405
':' => {
433406
chars.next();
434407
match chars.peek() {
435-
Some(&ch) => match ch {
436-
// double colon
437-
':' => self.consume_and_return(chars, Token::DoubleColon),
438-
_ => Ok(Some(Token::Colon)),
439-
},
440-
None => Ok(Some(Token::Colon)),
408+
Some(':') => self.consume_and_return(chars, Token::DoubleColon),
409+
_ => Ok(Some(Token::Colon)),
441410
}
442411
}
443412
';' => self.consume_and_return(chars, Token::SemiColon),
444413
'\\' => self.consume_and_return(chars, Token::Backslash),
445-
// brakets
446414
'[' => self.consume_and_return(chars, Token::LBracket),
447415
']' => self.consume_and_return(chars, Token::RBracket),
448416
'&' => self.consume_and_return(chars, Token::Ampersand),
@@ -456,16 +424,10 @@ impl<'a> Tokenizer<'a> {
456424

457425
/// Tokenize an identifier or keyword, after the first char is already consumed.
458426
fn tokenize_word(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String {
459-
let mut s = String::new();
460-
s.push(first_char);
461-
while let Some(&ch) = chars.peek() {
462-
if self.dialect.is_identifier_part(ch) {
463-
chars.next(); // consume
464-
s.push(ch);
465-
} else {
466-
break;
467-
}
468-
}
427+
let mut s = first_char.to_string();
428+
s.push_str(&peeking_take_while(chars, |ch| {
429+
self.dialect.is_identifier_part(ch)
430+
}));
469431
s
470432
}
471433

@@ -539,6 +501,25 @@ impl<'a> Tokenizer<'a> {
539501
}
540502
}
541503

504+
/// Read from `chars` until `predicate` returns `false` or EOF is hit.
505+
/// Return the characters read as String, and keep the first non-matching
506+
/// char available as `chars.next()`.
507+
fn peeking_take_while(
508+
chars: &mut Peekable<Chars<'_>>,
509+
mut predicate: impl FnMut(char) -> bool,
510+
) -> String {
511+
let mut s = String::new();
512+
while let Some(&ch) = chars.peek() {
513+
if predicate(ch) {
514+
chars.next(); // consume
515+
s.push(ch);
516+
} else {
517+
break;
518+
}
519+
}
520+
s
521+
}
522+
542523
#[cfg(test)]
543524
mod tests {
544525
use super::super::dialect::GenericSqlDialect;
@@ -768,6 +749,20 @@ mod tests {
768749
compare(expected, tokens);
769750
}
770751

752+
#[test]
753+
fn tokenize_mismatched_quotes() {
754+
let sql = String::from("\"foo");
755+
756+
let dialect = GenericSqlDialect {};
757+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
758+
assert_eq!(
759+
tokenizer.tokenize(),
760+
Err(TokenizerError(
761+
"Expected close delimiter '\"' before EOF.".to_string(),
762+
))
763+
);
764+
}
765+
771766
#[test]
772767
fn tokenize_newlines() {
773768
let sql = String::from("line1\nline2\rline3\r\nline4\r");

tests/sqlparser_common.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1994,7 +1994,7 @@ fn parse_drop_table() {
19941994
assert_eq!(SQLObjectType::Table, object_type);
19951995
assert_eq!(
19961996
vec!["foo"],
1997-
names.iter().map(|n| n.to_string()).collect::<Vec<_>>()
1997+
names.iter().map(ToString::to_string).collect::<Vec<_>>()
19981998
);
19991999
assert_eq!(false, cascade);
20002000
}
@@ -2013,7 +2013,7 @@ fn parse_drop_table() {
20132013
assert_eq!(SQLObjectType::Table, object_type);
20142014
assert_eq!(
20152015
vec!["foo", "bar"],
2016-
names.iter().map(|n| n.to_string()).collect::<Vec<_>>()
2016+
names.iter().map(ToString::to_string).collect::<Vec<_>>()
20172017
);
20182018
assert_eq!(true, cascade);
20192019
}
@@ -2042,7 +2042,7 @@ fn parse_drop_view() {
20422042
} => {
20432043
assert_eq!(
20442044
vec!["myschema.myview"],
2045-
names.iter().map(|n| n.to_string()).collect::<Vec<_>>()
2045+
names.iter().map(ToString::to_string).collect::<Vec<_>>()
20462046
);
20472047
assert_eq!(SQLObjectType::View, object_type);
20482048
}

0 commit comments

Comments
 (0)