Skip to content

Commit b984fc3

Browse files
michael-2956mobuchowski
authored andcommitted
Count characters instead of bytes (apache#529)
* Count characters instead of bytes * cargo fmt * add tests to PR apache#529
1 parent 521e00d commit b984fc3

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

src/tokenizer.rs

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -354,11 +354,15 @@ impl<'a> Tokenizer<'a> {
354354
}
355355

356356
Token::Whitespace(Whitespace::Tab) => self.col += 4,
357-
Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64,
358-
Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2,
359-
Token::Number(s, _) => self.col += s.len() as u64,
360-
Token::SingleQuotedString(s) => self.col += s.len() as u64,
361-
Token::Placeholder(s) => self.col += s.len() as u64,
357+
Token::Word(w) if w.quote_style == None => {
358+
self.col += w.value.chars().count() as u64
359+
}
360+
Token::Word(w) if w.quote_style != None => {
361+
self.col += w.value.chars().count() as u64 + 2
362+
}
363+
Token::Number(s, _) => self.col += s.chars().count() as u64,
364+
Token::SingleQuotedString(s) => self.col += s.chars().count() as u64,
365+
Token::Placeholder(s) => self.col += s.chars().count() as u64,
362366
_ => self.col += 1,
363367
}
364368

@@ -1220,6 +1224,22 @@ mod tests {
12201224
);
12211225
}
12221226

1227+
#[test]
1228+
fn tokenize_unterminated_string_literal_utf8() {
1229+
let sql = String::from("SELECT \"なにか\" FROM Y WHERE \"なにか\" = 'test;");
1230+
1231+
let dialect = GenericDialect {};
1232+
let mut tokenizer = Tokenizer::new(&dialect, &sql);
1233+
assert_eq!(
1234+
tokenizer.tokenize(),
1235+
Err(TokenizerError {
1236+
message: "Unterminated string literal".to_string(),
1237+
line: 1,
1238+
col: 35
1239+
})
1240+
);
1241+
}
1242+
12231243
#[test]
12241244
fn tokenize_invalid_string_cols() {
12251245
let sql = String::from("\n\nSELECT * FROM table\tمصطفىh");

0 commit comments

Comments
 (0)