@@ -7,8 +7,9 @@ import core.Names.SimpleName
7
7
import Scanners ._
8
8
import util .SourceFile
9
9
import JavaTokens ._
10
- import scala .annotation .{ switch , tailrec }
10
+ import scala .annotation .{switch , tailrec }
11
11
import util .Chars ._
12
+ import PartialFunction .cond
12
13
13
14
object JavaScanners {
14
15
@@ -31,23 +32,29 @@ object JavaScanners {
31
32
// Get next token ------------------------------------------------------------
32
33
33
34
def nextToken (): Unit =
34
- if ( next.token == EMPTY ) {
35
+ if next.token == EMPTY then
35
36
lastOffset = lastCharOffset
36
37
fetchToken()
37
- }
38
- else {
39
- this copyFrom next
38
+ else
39
+ this .copyFrom(next)
40
40
next.token = EMPTY
41
- }
42
41
43
- def lookaheadToken : Int = {
44
- prev copyFrom this
45
- nextToken()
42
+ def lookaheadToken : Int =
43
+ lookAhead()
46
44
val t = token
47
- next copyFrom this
48
- this copyFrom prev
45
+ reset()
49
46
t
50
- }
47
+
48
+ def lookAhead () =
49
+ prev.copyFrom(this )
50
+ nextToken()
51
+
52
+ def reset () =
53
+ next.copyFrom(this )
54
+ this .copyFrom(prev)
55
+
56
+ class LookaheadScanner extends JavaScanner (source, startFrom = charOffset - 1 ):
57
+ override protected def initialize (): Unit = nextChar()
51
58
52
59
/** read next token
53
60
*/
@@ -93,15 +100,23 @@ object JavaScanners {
93
100
94
101
case '\" ' =>
95
102
nextChar()
96
- while (ch != '\" ' && (isUnicodeEscape || ch != CR && ch != LF && ch != SU ))
97
- getlitch()
98
- if (ch == '\" ' ) {
99
- token = STRINGLIT
100
- setStrVal()
101
- nextChar()
102
- }
103
+ if ch != '\" ' then // "..." non-empty string literal
104
+ while ch != '\" ' && (isUnicodeEscape || ch != CR && ch != LF && ch != SU ) do
105
+ getlitch()
106
+ if ch == '\" ' then
107
+ token = STRINGLIT
108
+ setStrVal()
109
+ nextChar()
110
+ else
111
+ error(" unclosed string literal" )
103
112
else
104
- error(" unclosed string literal" )
113
+ nextChar()
114
+ if ch != '\" ' then // "" empty string literal
115
+ token = STRINGLIT
116
+ setStrVal()
117
+ else
118
+ nextChar()
119
+ getTextBlock()
105
120
106
121
case '\' ' =>
107
122
nextChar()
@@ -399,46 +414,177 @@ object JavaScanners {
399
414
400
415
// Literals -----------------------------------------------------------------
401
416
402
- /** read next character in character or string literal:
417
+ /** Read next character in character or string literal.
403
418
*/
404
- protected def getlitch (): Unit =
405
- if (ch == '\\ ' ) {
419
+ protected def getlitch (): Unit = getlitch(scanOnly = false , inTextBlock = false )
420
+
421
+ /** Read next character in character or string literal.
422
+ *
423
+ * @param scanOnly skip emitting errors or adding to the literal buffer
424
+ * @param inTextBlock is this for a text block?
425
+ */
426
+ def getlitch (scanOnly : Boolean , inTextBlock : Boolean ): Unit =
427
+ def octal : Char =
428
+ val leadch : Char = ch
429
+ var oct : Int = digit2int(ch, 8 )
406
430
nextChar()
407
431
if ('0' <= ch && ch <= '7' ) {
408
- val leadch : Char = ch
409
- var oct : Int = digit2int(ch, 8 )
432
+ oct = oct * 8 + digit2int(ch, 8 )
410
433
nextChar()
411
- if ('0' <= ch && ch <= '7' ) {
434
+ if (leadch <= '3' && '0' <= ch && ch <= '7' ) {
412
435
oct = oct * 8 + digit2int(ch, 8 )
413
436
nextChar()
414
- if (leadch <= '3' && '0' <= ch && ch <= '7' ) {
415
- oct = oct * 8 + digit2int(ch, 8 )
416
- nextChar()
437
+ }
438
+ }
439
+ oct.asInstanceOf [Char ]
440
+ end octal
441
+ def greatEscape : Char =
442
+ nextChar()
443
+ if '0' <= ch && ch <= '7' then octal
444
+ else
445
+ val x = ch match
446
+ case 'b' => '\b '
447
+ case 's' => ' '
448
+ case 't' => '\t '
449
+ case 'n' => '\n '
450
+ case 'f' => '\f '
451
+ case 'r' => '\r '
452
+ case '\" ' => '\" '
453
+ case '\' ' => '\' '
454
+ case '\\ ' => '\\ '
455
+ case CR | LF if inTextBlock =>
456
+ if ! scanOnly then nextChar()
457
+ 0
458
+ case _ =>
459
+ if ! scanOnly then error(" invalid escape character" , charOffset - 1 )
460
+ ch
461
+ if x != 0 then nextChar()
462
+ x
463
+ end greatEscape
464
+
465
+ // begin getlitch
466
+ val c : Char =
467
+ if ch == '\\ ' then greatEscape
468
+ else
469
+ val res = ch
470
+ nextChar()
471
+ res
472
+ if c != 0 && ! scanOnly then putChar(c)
473
+ end getlitch
474
+
475
+ /** Read a triple-quote delimited text block, starting after the first three double quotes.
476
+ */
477
+ private def getTextBlock (): Unit = {
478
+ // Open delimiter is followed by optional space, then a newline
479
+ while (ch == ' ' || ch == '\t ' || ch == FF ) {
480
+ nextChar()
481
+ }
482
+ if (ch != LF && ch != CR ) { // CR-LF is already normalized into LF by `JavaCharArrayReader`
483
+ error(" illegal text block open delimiter sequence, missing line terminator" )
484
+ return
485
+ }
486
+ nextChar()
487
+
488
+ /* Do a lookahead scan over the full text block to:
489
+ * - compute common white space prefix
490
+ * - find the offset where the text block ends
491
+ */
492
+ var commonWhiteSpacePrefix = Int .MaxValue
493
+ var blockEndOffset = 0
494
+ var blockClosed = false
495
+ var lineWhiteSpacePrefix = 0
496
+ var lineIsOnlyWhitespace = true
497
+ val in = LookaheadScanner ()
498
+ while (! blockClosed && (isUnicodeEscape || ch != SU )) {
499
+ if (in.ch == '\" ' ) { // Potential end of the block
500
+ in.nextChar()
501
+ if (in.ch == '\" ' ) {
502
+ in.nextChar()
503
+ if (in.ch == '\" ' ) {
504
+ blockClosed = true
505
+ commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
506
+ blockEndOffset = in.charOffset - 2
417
507
}
418
508
}
419
- putChar(oct.asInstanceOf [Char ])
509
+
510
+ // Not the end of the block - just a single or double " character
511
+ if (! blockClosed) {
512
+ lineIsOnlyWhitespace = false
513
+ }
514
+ } else if (in.ch == CR || in.ch == LF ) { // new line in the block
515
+ in.nextChar()
516
+ if (! lineIsOnlyWhitespace) {
517
+ commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
518
+ }
519
+ lineWhiteSpacePrefix = 0
520
+ lineIsOnlyWhitespace = true
521
+ } else if (lineIsOnlyWhitespace && Character .isWhitespace(in.ch)) { // extend white space prefix
522
+ in.nextChar()
523
+ lineWhiteSpacePrefix += 1
524
+ } else {
525
+ lineIsOnlyWhitespace = false
526
+ in.getlitch(scanOnly = true , inTextBlock = true )
420
527
}
421
- else {
422
- ch match {
423
- case 'b' => putChar('\b ' )
424
- case 't' => putChar('\t ' )
425
- case 'n' => putChar('\n ' )
426
- case 'f' => putChar('\f ' )
427
- case 'r' => putChar('\r ' )
428
- case '\" ' => putChar('\" ' )
429
- case '\' ' => putChar('\' ' )
430
- case '\\ ' => putChar('\\ ' )
431
- case _ =>
432
- error(" invalid escape character" , charOffset - 1 )
433
- putChar(ch)
528
+ }
529
+
530
+ // Bail out if the block never did have an end
531
+ if (! blockClosed) {
532
+ error(" unclosed text block" )
533
+ return
534
+ }
535
+
536
+ // Second pass: construct the literal string value this time
537
+ while (charOffset < blockEndOffset) {
538
+ // Drop the line's leading whitespace
539
+ var remainingPrefix = commonWhiteSpacePrefix
540
+ while (remainingPrefix > 0 && ch != CR && ch != LF && charOffset < blockEndOffset) {
541
+ nextChar()
542
+ remainingPrefix -= 1
543
+ }
544
+
545
+ var trailingWhitespaceLength = 0
546
+ var escapedNewline = false // Does the line end with `\`?
547
+ while (ch != CR && ch != LF && charOffset < blockEndOffset && ! escapedNewline) {
548
+ if (Character .isWhitespace(ch)) {
549
+ trailingWhitespaceLength += 1
550
+ } else {
551
+ trailingWhitespaceLength = 0
434
552
}
553
+
554
+ // Detect if the line is about to end with `\`
555
+ if ch == '\\ ' && cond(lookaheadChar()) { case CR | LF => true } then
556
+ escapedNewline = true
557
+
558
+ getlitch(scanOnly = false , inTextBlock = true )
559
+ }
560
+
561
+ // Remove the last N characters from the buffer */
562
+ def popNChars (n : Int ): Unit =
563
+ if n > 0 then
564
+ val text = litBuf.toString
565
+ litBuf.clear()
566
+ val trimmed = text.substring(0 , text.length - (n min text.length))
567
+ trimmed.nn.foreach(litBuf.append)
568
+
569
+ // Drop the line's trailing whitespace
570
+ popNChars(trailingWhitespaceLength)
571
+
572
+ // Normalize line terminators
573
+ if ((ch == CR || ch == LF ) && ! escapedNewline) {
435
574
nextChar()
575
+ putChar('\n ' )
436
576
}
437
577
}
438
- else {
439
- putChar(ch)
440
- nextChar()
441
- }
578
+
579
+ token = STRINGLIT
580
+ setStrVal()
581
+
582
+ // Trailing """
583
+ nextChar()
584
+ nextChar()
585
+ nextChar()
586
+ }
587
+ end getTextBlock
442
588
443
589
/** read fractional part and exponent of floating point number
444
590
* if one is present.
@@ -585,8 +731,10 @@ object JavaScanners {
585
731
}
586
732
587
733
/* Initialization: read first char, then first token */
588
- nextChar()
589
- nextToken()
734
+ protected def initialize (): Unit =
735
+ nextChar()
736
+ nextToken()
737
+ initialize()
590
738
}
591
739
592
740
private val (lastKeywordStart, kwArray) = buildKeywordArray(keywords)
0 commit comments