Skip to content

Commit 1c6fe6b

Browse files
authored
Merge pull request #15497 from som-snytt/forward/command-tokener
Command line parser respects outer escaped quote
2 parents 0632405 + 3a851c3 commit 1c6fe6b

File tree

2 files changed

+38
-16
lines changed

2 files changed

+38
-16
lines changed

compiler/src/dotty/tools/dotc/config/CommandLineParser.scala

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,44 @@
11
package dotty.tools.dotc.config
22

3-
import scala.annotation.tailrec
4-
import scala.collection.mutable.ArrayBuffer
53
import java.lang.Character.isWhitespace
64
import java.nio.file.{Files, Paths}
7-
import scala.jdk.CollectionConverters._
5+
import scala.annotation.tailrec
6+
import scala.collection.mutable.ArrayBuffer
7+
import scala.jdk.CollectionConverters.*
88

9-
/** A simple enough command line parser.
9+
/** Split a line of text using shell conventions.
1010
*/
1111
object CommandLineParser:
1212
inline private val DQ = '"'
1313
inline private val SQ = '\''
1414
inline private val EOF = -1
1515

16-
/** Split the line into tokens separated by whitespace or quotes.
16+
/** Split the line into tokens separated by whitespace.
17+
*
18+
* Single or double quotes can be embedded to preserve internal whitespace:
1719
*
18-
* Invoke `errorFn` with message on bad quote.
20+
* `""" echo "hello, world!" """` => "echo" :: "hello, world!" :: Nil
21+
* `""" echo hello,' 'world! """` => "echo" :: "hello, world!" :: Nil
22+
* `""" echo \"hello, world!\" """` => "echo" :: "\"hello," :: "world!\"" :: Nil
23+
*
24+
* The embedded quotes are stripped. Escaping backslash is not stripped.
25+
*
26+
* Invoke `errorFn` with a descriptive message if an end quote is missing.
1927
*/
2028
def tokenize(line: String, errorFn: String => Unit): List[String] =
2129

2230
var accum: List[String] = Nil
2331

2432
var pos = 0
2533
var start = 0
26-
val qpos = new ArrayBuffer[Int](16) // positions of paired quotes
34+
val qpos = new ArrayBuffer[Int](16) // positions of paired quotes in current token
2735

2836
inline def cur = if done then EOF else line.charAt(pos): Int
2937
inline def bump() = pos += 1
3038
inline def done = pos >= line.length
3139

32-
def skipToQuote(q: Int): Boolean =
40+
// Skip to the given unescaped end quote; false on no more input.
41+
def skipToEndQuote(q: Int): Boolean =
3342
var escaped = false
3443
def terminal = cur match
3544
case _ if escaped => escaped = false ; false
@@ -39,13 +48,18 @@ object CommandLineParser:
3948
while !terminal do bump()
4049
!done
4150

42-
@tailrec def skipToDelim(): Boolean =
51+
// Skip to the next whitespace word boundary; record unescaped embedded quotes; false on missing quote.
52+
def skipToDelim(): Boolean =
53+
var escaped = false
4354
inline def quote() = { qpos += pos ; bump() }
44-
cur match
45-
case q @ (DQ | SQ) => { quote() ; skipToQuote(q) } && { quote() ; skipToDelim() }
46-
case -1 => true
55+
@tailrec def advance(): Boolean = cur match
56+
case _ if escaped => escaped = false ; bump() ; advance()
57+
case '\\' => escaped = true ; bump() ; advance()
58+
case q @ (DQ | SQ) => { quote() ; skipToEndQuote(q) } && { quote() ; advance() }
59+
case EOF => true
4760
case c if isWhitespace(c) => true
48-
case _ => bump(); skipToDelim()
61+
case _ => bump(); advance()
62+
advance()
4963

5064
def copyText(): String =
5165
val buf = new java.lang.StringBuilder
@@ -64,6 +78,7 @@ object CommandLineParser:
6478
p = qpos(i)
6579
buf.toString
6680

81+
// the current token, stripped of any embedded quotes.
6782
def text(): String =
6883
val res =
6984
if qpos.isEmpty then line.substring(start, pos)
@@ -74,7 +89,7 @@ object CommandLineParser:
7489

7590
inline def badquote() = errorFn(s"Unmatched quote [${qpos.last}](${line.charAt(qpos.last)})")
7691

77-
inline def skipWhitespace() = while isWhitespace(cur) do pos += 1
92+
inline def skipWhitespace() = while isWhitespace(cur) do bump()
7893

7994
@tailrec def loop(): List[String] =
8095
skipWhitespace()
@@ -85,12 +100,11 @@ object CommandLineParser:
85100
badquote()
86101
Nil
87102
else
88-
accum = text() :: accum
103+
accum ::= text()
89104
loop()
90105
end loop
91106

92107
loop()
93-
94108
end tokenize
95109

96110
def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x))

compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,11 @@ class CommandLineParserTest:
4444
// missing quotes
4545
checkFails(""""x""", "Unmatched quote [0](\")") // was assertEquals(List("\"x"), tokenize(""""x"""))
4646
checkFails("""x'""", "Unmatched quote [1](')")
47+
48+
@Test def `leading quote is escaped`: Unit =
49+
check("echo", "hello, world!")("""echo "hello, world!" """)
50+
check("echo", "hello, world!")("""echo hello,' 'world! """)
51+
check("echo", """\"hello,""", """world!\"""")("""echo \"hello, world!\" """)
52+
check("""a\"b\"c""")("""a\"b\"c""")
53+
check("a", "\\'b", "\\'", "c")("""a \'b \' c""")
54+
check("a", "\\\\b ", "c")("""a \\'b ' c""")

0 commit comments

Comments
 (0)