Skip to content

Commit 64b1ea7

Browse files
authored
Merge pull request #54 from nickolay/windows-newlines
Support \r and \r\n line breaks in tokenizer
2 parents 07d66a9 + bf3110f commit 64b1ea7

File tree

1 file changed

+28
-9
lines changed

1 file changed

+28
-9
lines changed

src/sqltokenizer.rs

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -260,16 +260,15 @@ impl<'a> Tokenizer<'a> {
260260
//println!("next_token: {:?}", chars.peek());
261261
match chars.peek() {
262262
Some(&ch) => match ch {
263-
' ' => {
264-
chars.next();
265-
Ok(Some(Token::Whitespace(Whitespace::Space)))
266-
}
267-
'\t' => {
268-
chars.next();
269-
Ok(Some(Token::Whitespace(Whitespace::Tab)))
270-
}
271-
'\n' => {
263+
' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
264+
'\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)),
265+
'\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)),
266+
'\r' => {
267+
// Emit a single Whitespace::Newline token for \r and \r\n
272268
chars.next();
269+
if let Some('\n') = chars.peek() {
270+
chars.next();
271+
}
273272
Ok(Some(Token::Whitespace(Whitespace::Newline)))
274273
}
275274
'N' => {
@@ -749,6 +748,26 @@ mod tests {
749748
compare(expected, tokens);
750749
}
751750

751+
#[test]
752+
fn tokenize_newlines() {
753+
let sql = String::from("line1\nline2\rline3\r\nline4\r");
754+
755+
let dialect = GenericSqlDialect {};
756+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
757+
let tokens = tokenizer.tokenize().unwrap();
758+
let expected = vec![
759+
Token::make_word("line1", None),
760+
Token::Whitespace(Whitespace::Newline),
761+
Token::make_word("line2", None),
762+
Token::Whitespace(Whitespace::Newline),
763+
Token::make_word("line3", None),
764+
Token::Whitespace(Whitespace::Newline),
765+
Token::make_word("line4", None),
766+
Token::Whitespace(Whitespace::Newline),
767+
];
768+
compare(expected, tokens);
769+
}
770+
752771
fn compare(expected: Vec<Token>, actual: Vec<Token>) {
753772
//println!("------------------------------");
754773
//println!("tokens = {:?}", actual);

0 commit comments

Comments
 (0)