@@ -1603,28 +1603,33 @@ impl<'a> Tokenizer<'a> {
1603
1603
) -> Result < Option < Token > , TokenizerError > {
1604
1604
let mut s = String :: new ( ) ;
1605
1605
let mut nested = 1 ;
1606
- let mut last_ch = ' ' ;
1606
+ let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
1607
1607
1608
1608
loop {
1609
1609
match chars. next ( ) {
1610
- Some ( ch) => {
1611
- if last_ch == '/' && ch == '*' {
1612
- nested += 1 ;
1613
- } else if last_ch == '*' && ch == '/' {
1614
- nested -= 1 ;
1615
- if nested == 0 {
1616
- s. pop ( ) ;
1617
- break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
1618
- }
1610
+ Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
1611
+ chars. next ( ) ; // consume the '*'
1612
+ s. push ( '/' ) ;
1613
+ s. push ( '*' ) ;
1614
+ nested += 1 ;
1615
+ }
1616
+ Some ( '*' ) if matches ! ( chars. peek( ) , Some ( '/' ) ) => {
1617
+ chars. next ( ) ; // consume the '/'
1618
+ nested -= 1 ;
1619
+ if nested == 0 {
1620
+ break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
1619
1621
}
1622
+ s. push ( '*' ) ;
1623
+ s. push ( '/' ) ;
1624
+ }
1625
+ Some ( ch) => {
1620
1626
s. push ( ch) ;
1621
- last_ch = ch;
1622
1627
}
1623
1628
None => {
1624
1629
break self . tokenizer_error (
1625
1630
chars. location ( ) ,
1626
1631
"Unexpected EOF while in a multi-line comment" ,
1627
- )
1632
+ ) ;
1628
1633
}
1629
1634
}
1630
1635
}
@@ -2466,18 +2471,90 @@ mod tests {
2466
2471
2467
2472
#[ test]
2468
2473
fn tokenize_nested_multiline_comment ( ) {
2469
- let sql = String :: from ( "0/*multi-line\n * \n /* comment \n /*comment*/*/ */ /comment*/1" ) ;
2474
+ let dialect = GenericDialect { } ;
2475
+ let test_cases = vec ! [
2476
+ (
2477
+ "0/*multi-line\n * \n /* comment \n /*comment*/*/ */ /comment*/1" ,
2478
+ vec![
2479
+ Token :: Number ( "0" . to_string( ) , false ) ,
2480
+ Token :: Whitespace ( Whitespace :: MultiLineComment (
2481
+ "multi-line\n * \n /* comment \n /*comment*/*/ " . into( ) ,
2482
+ ) ) ,
2483
+ Token :: Whitespace ( Whitespace :: Space ) ,
2484
+ Token :: Div ,
2485
+ Token :: Word ( Word {
2486
+ value: "comment" . to_string( ) ,
2487
+ quote_style: None ,
2488
+ keyword: Keyword :: COMMENT ,
2489
+ } ) ,
2490
+ Token :: Mul ,
2491
+ Token :: Div ,
2492
+ Token :: Number ( "1" . to_string( ) , false ) ,
2493
+ ] ,
2494
+ ) ,
2495
+ (
2496
+ "0/*multi-line\n * \n /* comment \n /*comment/**/ */ /comment*/*/1" ,
2497
+ vec![
2498
+ Token :: Number ( "0" . to_string( ) , false ) ,
2499
+ Token :: Whitespace ( Whitespace :: MultiLineComment (
2500
+ "multi-line\n * \n /* comment \n /*comment/**/ */ /comment*/" . into( ) ,
2501
+ ) ) ,
2502
+ Token :: Number ( "1" . to_string( ) , false ) ,
2503
+ ] ,
2504
+ ) ,
2505
+ (
2506
+ "SELECT 1/* a /* b */ c */0" ,
2507
+ vec![
2508
+ Token :: make_keyword( "SELECT" ) ,
2509
+ Token :: Whitespace ( Whitespace :: Space ) ,
2510
+ Token :: Number ( "1" . to_string( ) , false ) ,
2511
+ Token :: Whitespace ( Whitespace :: MultiLineComment ( " a /* b */ c " . to_string( ) ) ) ,
2512
+ Token :: Number ( "0" . to_string( ) , false ) ,
2513
+ ] ,
2514
+ ) ,
2515
+ ] ;
2516
+
2517
+ for ( sql, expected) in test_cases {
2518
+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) . unwrap ( ) ;
2519
+ compare ( expected, tokens) ;
2520
+ }
2521
+ }
2522
+
2523
+ #[ test]
2524
+ fn tokenize_nested_multiline_comment_empty ( ) {
2525
+ let sql = "select 1/*/**/*/0" ;
2470
2526
2471
2527
let dialect = GenericDialect { } ;
2472
- let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
2528
+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) . unwrap ( ) ;
2473
2529
let expected = vec ! [
2530
+ Token :: make_keyword( "select" ) ,
2531
+ Token :: Whitespace ( Whitespace :: Space ) ,
2532
+ Token :: Number ( "1" . to_string( ) , false ) ,
2533
+ Token :: Whitespace ( Whitespace :: MultiLineComment ( "/**/" . to_string( ) ) ) ,
2474
2534
Token :: Number ( "0" . to_string( ) , false ) ,
2535
+ ] ;
2536
+
2537
+ compare ( expected, tokens) ;
2538
+ }
2539
+
2540
+ #[ test]
2541
+ fn tokenize_nested_comments_if_not_supported ( ) {
2542
+ let dialect = SQLiteDialect { } ;
2543
+ let sql = "SELECT 1/*/* nested comment */*/0" ;
2544
+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) ;
2545
+ let expected = vec ! [
2546
+ Token :: make_keyword( "SELECT" ) ,
2547
+ Token :: Whitespace ( Whitespace :: Space ) ,
2548
+ Token :: Number ( "1" . to_string( ) , false ) ,
2475
2549
Token :: Whitespace ( Whitespace :: MultiLineComment (
2476
- "multi-line \n * \n /* comment \n /* comment*/*/ */ /comment " . to_string( ) ,
2550
+ "/* nested comment " . to_string( ) ,
2477
2551
) ) ,
2478
- Token :: Number ( "1" . to_string( ) , false ) ,
2552
+ Token :: Mul ,
2553
+ Token :: Div ,
2554
+ Token :: Number ( "0" . to_string( ) , false ) ,
2479
2555
] ;
2480
- compare ( expected, tokens) ;
2556
+
2557
+ compare ( expected, tokens. unwrap ( ) ) ;
2481
2558
}
2482
2559
2483
2560
#[ test]
0 commit comments