Skip to content

Commit 213fdbc

Browse files
authored
Merge pull request #14882 from som-snytt/forward/text-blocks
Port text block support
2 parents fb6d004 + 1dc5fca commit 213fdbc

File tree

11 files changed

+475
-90
lines changed

11 files changed

+475
-90
lines changed

compiler/src/dotty/tools/dotc/config/CommandLineParser.scala

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,19 +95,17 @@ object CommandLineParser:
9595

9696
def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x))
9797

98-
/**
99-
* Expands all arguments starting with @ to the contents of the
100-
* file named like each argument.
98+
/** Expands all arguments starting with @ to the contents of the file named like each argument.
10199
*/
102100
def expandArg(arg: String): List[String] =
103-
def stripComment(s: String) = s takeWhile (_ != '#')
104-
val path = Paths.get(arg stripPrefix "@")
105-
if (!Files.exists(path))
101+
val path = Paths.get(arg.stripPrefix("@"))
102+
if !Files.exists(path) then
106103
System.err.nn.println(s"Argument file ${path.nn.getFileName} could not be found")
107104
Nil
108105
else
109-
val lines = Files.readAllLines(path).nn // default to UTF-8 encoding
110-
val params = lines.asScala map stripComment mkString " "
106+
def stripComment(s: String) = s.indexOf('#') match { case -1 => s case i => s.substring(0, i) }
107+
val lines = Files.readAllLines(path).nn
108+
val params = lines.asScala.map(stripComment).filter(!_.nn.isEmpty).mkString(" ")
111109
tokenize(params)
112110

113111
class ParseException(msg: String) extends RuntimeException(msg)

compiler/src/dotty/tools/dotc/parsing/JavaScanners.scala

Lines changed: 197 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ import core.Names.SimpleName
77
import Scanners._
88
import util.SourceFile
99
import JavaTokens._
10-
import scala.annotation.{ switch, tailrec }
10+
import scala.annotation.{switch, tailrec}
1111
import util.Chars._
12+
import PartialFunction.cond
1213

1314
object JavaScanners {
1415

@@ -31,23 +32,29 @@ object JavaScanners {
3132
// Get next token ------------------------------------------------------------
3233

3334
def nextToken(): Unit =
34-
if (next.token == EMPTY) {
35+
if next.token == EMPTY then
3536
lastOffset = lastCharOffset
3637
fetchToken()
37-
}
38-
else {
39-
this copyFrom next
38+
else
39+
this.copyFrom(next)
4040
next.token = EMPTY
41-
}
4241

43-
def lookaheadToken: Int = {
44-
prev copyFrom this
45-
nextToken()
42+
def lookaheadToken: Int =
43+
lookAhead()
4644
val t = token
47-
next copyFrom this
48-
this copyFrom prev
45+
reset()
4946
t
50-
}
47+
48+
def lookAhead() =
49+
prev.copyFrom(this)
50+
nextToken()
51+
52+
def reset() =
53+
next.copyFrom(this)
54+
this.copyFrom(prev)
55+
56+
class LookaheadScanner extends JavaScanner(source, startFrom = charOffset - 1):
57+
override protected def initialize(): Unit = nextChar()
5158

5259
/** read next token
5360
*/
@@ -93,15 +100,23 @@ object JavaScanners {
93100

94101
case '\"' =>
95102
nextChar()
96-
while (ch != '\"' && (isUnicodeEscape || ch != CR && ch != LF && ch != SU))
97-
getlitch()
98-
if (ch == '\"') {
99-
token = STRINGLIT
100-
setStrVal()
101-
nextChar()
102-
}
103+
if ch != '\"' then // "..." non-empty string literal
104+
while ch != '\"' && (isUnicodeEscape || ch != CR && ch != LF && ch != SU) do
105+
getlitch()
106+
if ch == '\"' then
107+
token = STRINGLIT
108+
setStrVal()
109+
nextChar()
110+
else
111+
error("unclosed string literal")
103112
else
104-
error("unclosed string literal")
113+
nextChar()
114+
if ch != '\"' then // "" empty string literal
115+
token = STRINGLIT
116+
setStrVal()
117+
else
118+
nextChar()
119+
getTextBlock()
105120

106121
case '\'' =>
107122
nextChar()
@@ -399,46 +414,177 @@ object JavaScanners {
399414

400415
// Literals -----------------------------------------------------------------
401416

402-
/** read next character in character or string literal:
417+
/** Read next character in character or string literal.
403418
*/
404-
protected def getlitch(): Unit =
405-
if (ch == '\\') {
419+
protected def getlitch(): Unit = getlitch(scanOnly = false, inTextBlock = false)
420+
421+
/** Read next character in character or string literal.
422+
*
423+
* @param scanOnly skip emitting errors or adding to the literal buffer
424+
* @param inTextBlock is this for a text block?
425+
*/
426+
def getlitch(scanOnly: Boolean, inTextBlock: Boolean): Unit =
427+
def octal: Char =
428+
val leadch: Char = ch
429+
var oct: Int = digit2int(ch, 8)
406430
nextChar()
407431
if ('0' <= ch && ch <= '7') {
408-
val leadch: Char = ch
409-
var oct: Int = digit2int(ch, 8)
432+
oct = oct * 8 + digit2int(ch, 8)
410433
nextChar()
411-
if ('0' <= ch && ch <= '7') {
434+
if (leadch <= '3' && '0' <= ch && ch <= '7') {
412435
oct = oct * 8 + digit2int(ch, 8)
413436
nextChar()
414-
if (leadch <= '3' && '0' <= ch && ch <= '7') {
415-
oct = oct * 8 + digit2int(ch, 8)
416-
nextChar()
437+
}
438+
}
439+
oct.asInstanceOf[Char]
440+
end octal
441+
def greatEscape: Char =
442+
nextChar()
443+
if '0' <= ch && ch <= '7' then octal
444+
else
445+
val x = ch match
446+
case 'b' => '\b'
447+
case 's' => ' '
448+
case 't' => '\t'
449+
case 'n' => '\n'
450+
case 'f' => '\f'
451+
case 'r' => '\r'
452+
case '\"' => '\"'
453+
case '\'' => '\''
454+
case '\\' => '\\'
455+
case CR | LF if inTextBlock =>
456+
if !scanOnly then nextChar()
457+
0
458+
case _ =>
459+
if !scanOnly then error("invalid escape character", charOffset - 1)
460+
ch
461+
if x != 0 then nextChar()
462+
x
463+
end greatEscape
464+
465+
// begin getlitch
466+
val c: Char =
467+
if ch == '\\' then greatEscape
468+
else
469+
val res = ch
470+
nextChar()
471+
res
472+
if c != 0 && !scanOnly then putChar(c)
473+
end getlitch
474+
475+
/** Read a triple-quote delimited text block, starting after the first three double quotes.
476+
*/
477+
private def getTextBlock(): Unit = {
478+
// Open delimiter is followed by optional space, then a newline
479+
while (ch == ' ' || ch == '\t' || ch == FF) {
480+
nextChar()
481+
}
482+
if (ch != LF && ch != CR) { // CR-LF is already normalized into LF by `JavaCharArrayReader`
483+
error("illegal text block open delimiter sequence, missing line terminator")
484+
return
485+
}
486+
nextChar()
487+
488+
/* Do a lookahead scan over the full text block to:
489+
* - compute common white space prefix
490+
* - find the offset where the text block ends
491+
*/
492+
var commonWhiteSpacePrefix = Int.MaxValue
493+
var blockEndOffset = 0
494+
var blockClosed = false
495+
var lineWhiteSpacePrefix = 0
496+
var lineIsOnlyWhitespace = true
497+
val in = LookaheadScanner()
498+
while (!blockClosed && (isUnicodeEscape || ch != SU)) {
499+
if (in.ch == '\"') { // Potential end of the block
500+
in.nextChar()
501+
if (in.ch == '\"') {
502+
in.nextChar()
503+
if (in.ch == '\"') {
504+
blockClosed = true
505+
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
506+
blockEndOffset = in.charOffset - 2
417507
}
418508
}
419-
putChar(oct.asInstanceOf[Char])
509+
510+
// Not the end of the block - just a single or double " character
511+
if (!blockClosed) {
512+
lineIsOnlyWhitespace = false
513+
}
514+
} else if (in.ch == CR || in.ch == LF) { // new line in the block
515+
in.nextChar()
516+
if (!lineIsOnlyWhitespace) {
517+
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
518+
}
519+
lineWhiteSpacePrefix = 0
520+
lineIsOnlyWhitespace = true
521+
} else if (lineIsOnlyWhitespace && Character.isWhitespace(in.ch)) { // extend white space prefix
522+
in.nextChar()
523+
lineWhiteSpacePrefix += 1
524+
} else {
525+
lineIsOnlyWhitespace = false
526+
in.getlitch(scanOnly = true, inTextBlock = true)
420527
}
421-
else {
422-
ch match {
423-
case 'b' => putChar('\b')
424-
case 't' => putChar('\t')
425-
case 'n' => putChar('\n')
426-
case 'f' => putChar('\f')
427-
case 'r' => putChar('\r')
428-
case '\"' => putChar('\"')
429-
case '\'' => putChar('\'')
430-
case '\\' => putChar('\\')
431-
case _ =>
432-
error("invalid escape character", charOffset - 1)
433-
putChar(ch)
528+
}
529+
530+
// Bail out if the block never did have an end
531+
if (!blockClosed) {
532+
error("unclosed text block")
533+
return
534+
}
535+
536+
// Second pass: construct the literal string value this time
537+
while (charOffset < blockEndOffset) {
538+
// Drop the line's leading whitespace
539+
var remainingPrefix = commonWhiteSpacePrefix
540+
while (remainingPrefix > 0 && ch != CR && ch != LF && charOffset < blockEndOffset) {
541+
nextChar()
542+
remainingPrefix -= 1
543+
}
544+
545+
var trailingWhitespaceLength = 0
546+
var escapedNewline = false // Does the line end with `\`?
547+
while (ch != CR && ch != LF && charOffset < blockEndOffset && !escapedNewline) {
548+
if (Character.isWhitespace(ch)) {
549+
trailingWhitespaceLength += 1
550+
} else {
551+
trailingWhitespaceLength = 0
434552
}
553+
554+
// Detect if the line is about to end with `\`
555+
if ch == '\\' && cond(lookaheadChar()) { case CR | LF => true } then
556+
escapedNewline = true
557+
558+
getlitch(scanOnly = false, inTextBlock = true)
559+
}
560+
561+
// Remove the last N characters from the buffer */
562+
def popNChars(n: Int): Unit =
563+
if n > 0 then
564+
val text = litBuf.toString
565+
litBuf.clear()
566+
val trimmed = text.substring(0, text.length - (n min text.length))
567+
trimmed.nn.foreach(litBuf.append)
568+
569+
// Drop the line's trailing whitespace
570+
popNChars(trailingWhitespaceLength)
571+
572+
// Normalize line terminators
573+
if ((ch == CR || ch == LF) && !escapedNewline) {
435574
nextChar()
575+
putChar('\n')
436576
}
437577
}
438-
else {
439-
putChar(ch)
440-
nextChar()
441-
}
578+
579+
token = STRINGLIT
580+
setStrVal()
581+
582+
// Trailing """
583+
nextChar()
584+
nextChar()
585+
nextChar()
586+
}
587+
end getTextBlock
442588

443589
/** read fractional part and exponent of floating point number
444590
* if one is present.
@@ -585,8 +731,10 @@ object JavaScanners {
585731
}
586732

587733
/* Initialization: read first char, then first token */
588-
nextChar()
589-
nextToken()
734+
protected def initialize(): Unit =
735+
nextChar()
736+
nextToken()
737+
initialize()
590738
}
591739

592740
private val (lastKeywordStart, kwArray) = buildKeywordArray(keywords)

compiler/test/dotty/tools/dotc/transform/PatmatExhaustivityTest.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class PatmatExhaustivityTest {
2323
val options = List("-pagewidth", "80", "-color:never", "-Ystop-after:explicitSelf", "-classpath", TestConfiguration.basicClasspath)
2424

2525
private def compile(files: List[JPath]): Seq[String] = {
26-
val opts = toolArgsFor(files)
26+
val opts = toolArgsFor(files).get(ToolName.Scalac).getOrElse(Nil)
2727
val stringBuffer = new StringWriter()
2828
val printWriter = new PrintWriter(stringBuffer)
2929
val reporter = TestReporter.simplifiedReporter(printWriter)

compiler/test/dotty/tools/repl/ReplTest.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ extends ReplDriver(options, new PrintStream(out, true, StandardCharsets.UTF_8.na
6969

7070
val expectedOutput = lines.filter(nonBlank)
7171
val actualOutput = {
72-
val opts = toolArgsParse(lines.take(1))
72+
val opts = toolArgsFor(ToolName.Scalac)(lines.take(1))
7373
val (optsLine, inputLines) = if opts.isEmpty then ("", lines) else (lines.head, lines.drop(1))
7474
resetToInitial(opts)
7575

0 commit comments

Comments
 (0)