@@ -354,11 +354,15 @@ impl<'a> Tokenizer<'a> {
354
354
}
355
355
356
356
Token :: Whitespace ( Whitespace :: Tab ) => self . col += 4 ,
357
- Token :: Word ( w) if w. quote_style == None => self . col += w. value . len ( ) as u64 ,
358
- Token :: Word ( w) if w. quote_style != None => self . col += w. value . len ( ) as u64 + 2 ,
359
- Token :: Number ( s, _) => self . col += s. len ( ) as u64 ,
360
- Token :: SingleQuotedString ( s) => self . col += s. len ( ) as u64 ,
361
- Token :: Placeholder ( s) => self . col += s. len ( ) as u64 ,
357
+ Token :: Word ( w) if w. quote_style == None => {
358
+ self . col += w. value . chars ( ) . count ( ) as u64
359
+ }
360
+ Token :: Word ( w) if w. quote_style != None => {
361
+ self . col += w. value . chars ( ) . count ( ) as u64 + 2
362
+ }
363
+ Token :: Number ( s, _) => self . col += s. chars ( ) . count ( ) as u64 ,
364
+ Token :: SingleQuotedString ( s) => self . col += s. chars ( ) . count ( ) as u64 ,
365
+ Token :: Placeholder ( s) => self . col += s. chars ( ) . count ( ) as u64 ,
362
366
_ => self . col += 1 ,
363
367
}
364
368
@@ -1220,6 +1224,22 @@ mod tests {
1220
1224
) ;
1221
1225
}
1222
1226
1227
+ #[ test]
1228
+ fn tokenize_unterminated_string_literal_utf8 ( ) {
1229
+ let sql = String :: from ( "SELECT \" なにか\" FROM Y WHERE \" なにか\" = 'test;" ) ;
1230
+
1231
+ let dialect = GenericDialect { } ;
1232
+ let mut tokenizer = Tokenizer :: new ( & dialect, & sql) ;
1233
+ assert_eq ! (
1234
+ tokenizer. tokenize( ) ,
1235
+ Err ( TokenizerError {
1236
+ message: "Unterminated string literal" . to_string( ) ,
1237
+ line: 1 ,
1238
+ col: 35
1239
+ } )
1240
+ ) ;
1241
+ }
1242
+
1223
1243
#[ test]
1224
1244
fn tokenize_invalid_string_cols ( ) {
1225
1245
let sql = String :: from ( "\n \n SELECT * FROM table\t مصطفىh" ) ;
0 commit comments