Skip to content

Commit d010ef7

Browse files
authored
Merge pull request #8480 from martijnhoekstra/uni
Unicode escapes are ordinary escape sequences
2 parents bcce959 + c64f004 commit d010ef7

17 files changed

+352
-34
lines changed

compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ abstract class CharArrayReader { self =>
1010
protected def startFrom: Int = 0
1111

1212
/** Switch whether unicode should be decoded */
13-
protected def decodeUni: Boolean = true
13+
protected def decodeUni: Boolean = false
1414

1515
/** An error routine to call on bad unicode escapes \\uxxxx. */
1616
protected def error(msg: String, offset: Int): Unit

compiler/src/dotty/tools/dotc/parsing/JavaScanners.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ object JavaScanners {
1414

1515
class JavaScanner(source: SourceFile, override val startFrom: Offset = 0)(implicit ctx: Context) extends ScannerCommon(source)(ctx) {
1616

17+
override def decodeUni: Boolean = true
18+
1719
def toToken(name: SimpleName): Token = {
1820
val idx = name.start
1921
if (idx >= 0 && idx <= lastKeywordStart) kwArray(idx) else IDENTIFIER

compiler/src/dotty/tools/dotc/parsing/Scanners.scala

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,26 @@ object Scanners {
11371137
* and advance to next character.
11381138
*/
11391139
protected def getLitChar(): Unit =
1140+
def invalidUnicodeEscape() = {
1141+
error("invalid character in unicode escape sequence", charOffset - 1)
1142+
putChar(ch)
1143+
}
1144+
def putUnicode(): Unit = {
1145+
while ch == 'u' || ch == 'U' do nextChar()
1146+
var i = 0
1147+
var cp = 0
1148+
while (i < 4) {
1149+
val shift = (3 - i) * 4
1150+
val d = digit2int(ch, 16)
1151+
if(d < 0) {
1152+
return invalidUnicodeEscape()
1153+
}
1154+
cp += (d << shift)
1155+
nextChar()
1156+
i += 1
1157+
}
1158+
putChar(cp.asInstanceOf[Char])
1159+
}
11401160
if (ch == '\\') {
11411161
nextChar()
11421162
if ('0' <= ch && ch <= '7') {
@@ -1153,6 +1173,9 @@ object Scanners {
11531173
}
11541174
putChar(oct.toChar)
11551175
}
1176+
else if (ch == 'u' || ch == 'U') {
1177+
putUnicode()
1178+
}
11561179
else {
11571180
ch match {
11581181
case 'b' => putChar('\b')

compiler/src/dotty/tools/dotc/transform/localopt/StringInterpolatorOpt.scala

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,20 @@ class StringInterpolatorOpt extends MiniPhase {
6363
}
6464
}
6565

66+
//Extract the position from InvalidUnicodeEscapeException
67+
//which due to bincompat reasons is unaccessible.
68+
//TODO: remove once there is less restrictive bincompat
69+
private object InvalidEscapePosition {
70+
def unapply(t: Throwable): Option[Int] = t match {
71+
case iee: StringContext.InvalidEscapeException => Some(iee.index)
72+
case il: IllegalArgumentException => il.getMessage() match {
73+
case s"""invalid unicode escape at index $index of $_""" => index.toIntOption
74+
case _ => None
75+
}
76+
case _ => None
77+
}
78+
}
79+
6680
/**
6781
* Match trees that resemble s and raw string interpolations. In the case of the s
6882
* interpolator, escapes the string constants. Exposes the string constants as well as
@@ -74,14 +88,22 @@ class StringInterpolatorOpt extends MiniPhase {
7488
case SOrRawInterpolator(strs, elems) =>
7589
if (tree.symbol == defn.StringContext_raw) Some(strs, elems)
7690
else { // tree.symbol == defn.StringContextS
91+
import dotty.tools.dotc.util.SourcePosition
92+
var stringPosition: SourcePosition = null
7793
try {
78-
val escapedStrs = strs.map { str =>
79-
val escapedValue = StringContext.processEscapes(str.const.stringValue)
80-
cpy.Literal(str)(Constant(escapedValue))
81-
}
94+
val escapedStrs = strs.map(str => {
95+
stringPosition = str.sourcePos
96+
val escaped = StringContext.processEscapes(str.const.stringValue)
97+
cpy.Literal(str)(Constant(escaped))
98+
})
8299
Some(escapedStrs, elems)
83100
} catch {
84-
case _: StringContext.InvalidEscapeException => None
101+
case t @ InvalidEscapePosition(p) => {
102+
val errorSpan = stringPosition.span.startPos.shift(p)
103+
val errorPosition = stringPosition.withSpan(errorSpan)
104+
ctx.error(t.getMessage() + "\n", errorPosition)
105+
None
106+
}
85107
}
86108
}
87109
case _ => None
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package dotty.tools
2+
package dotc
3+
package parsing
4+
5+
import ast.untpd._
6+
import org.junit.Test
7+
8+
class StringInterpolationPositionTest extends ParserTest {
9+
10+
val tq = "\"\"\""
11+
val program = s"""
12+
|class A {
13+
| val expr = 42
14+
| val s0 = s"string1"
15+
| val s1 = s"string1$${expr}string2"
16+
| val s2 = s"string1$${expr}string2$${expr}string3"
17+
| val s0m = s${tq}string1${tq}
18+
| val s1m = s${tq}string1$${expr}string2${tq}
19+
| val s2m = s${tq}string1$${expr}string2$${expr}string3${tq}
20+
|}""".stripMargin
21+
22+
@Test
23+
def interpolationLiteralPosition: Unit = {
24+
val t = parseText(program)
25+
t match {
26+
case PackageDef(_, List(TypeDef(_, Template(_, _, _, statements: List[Tree])))) => {
27+
val interpolations = statements.collect{ case ValDef(_, _, InterpolatedString(_, int)) => int }
28+
val lits = interpolations.flatten.flatMap {
29+
case l @ Literal(_) => List(l)
30+
case Thicket(trees) => trees.collect { case l @ Literal(_) => l }
31+
}
32+
for {
33+
lit <- lits
34+
Literal(c) = lit
35+
str <- List(c.value).collect { case str: String => str}
36+
} {
37+
val fromPos = program.substring(lit.span.start, lit.span.end)
38+
assert(fromPos == str, s"$fromPos == $str")
39+
}
40+
}
41+
}
42+
}
43+
}

tests/neg/firstError.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1 @@
1-
. // error: expected class or object definition
2-
3-
\u890u3084eu // error: error in unicode escape // error: illegal character '\uffff'
4-
1+
. // error: expected class or object definition
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:2:27 -----------------------------------------------------------
2+
2 | val badInters1 = s"foo \unope that's wrong" // error
3+
| ^
4+
| invalid unicode escape at index 6 of foo \unope that's wrong
5+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:3:32 -----------------------------------------------------------
6+
3 | val badIntersEnd1 = s"foo \u12" // error
7+
| ^
8+
| invalid unicode escape at index 8 of foo \u12
9+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:4:29 -----------------------------------------------------------
10+
4 | val badInters3 = s"""foo \unope that's wrong""" // error
11+
| ^
12+
| invalid unicode escape at index 6 of foo \unope that's wrong
13+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:5:28 -----------------------------------------------------------
14+
5 | val caretPos1 = s"foo \u12x3 pos @ x" // error
15+
| ^
16+
| invalid unicode escape at index 8 of foo \u12x3 pos @ x
17+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:6:34 -----------------------------------------------------------
18+
6 | val caretPos2 = s"foo \uuuuuuu12x3 pos @ x" // error
19+
| ^
20+
| invalid unicode escape at index 14 of foo \uuuuuuu12x3 pos @ x
21+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:7:30 -----------------------------------------------------------
22+
7 | val caretPos3 = s"""foo \u12x3 pos @ x""" // error
23+
| ^
24+
| invalid unicode escape at index 8 of foo \u12x3 pos @ x
25+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:8:36 -----------------------------------------------------------
26+
8 | val caretPos4 = s"""foo \uuuuuuu12x3 pos @ x""" // error
27+
| ^
28+
| invalid unicode escape at index 14 of foo \uuuuuuu12x3 pos @ x
29+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:10:53 ----------------------------------------------------------
30+
10 | val badIntersmultiAfter = s"foo $placeholder bar \unope that's wrong" // error
31+
| ^
32+
| invalid unicode escape at index 7 of bar \unope that's wrong
33+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:11:37 ----------------------------------------------------------
34+
11 | val badIntersmultiBefore = s"foo \unope $placeholder that's wrong" // error
35+
| ^
36+
| invalid unicode escape at index 6 of foo \unope
37+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:12:56 ----------------------------------------------------------
38+
12 | val badInterstmultiAfter = s"""foo $placeholder bar \unope that's wrong""" // error
39+
| ^
40+
| invalid unicode escape at index 7 of bar \unope that's wrong
41+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:13:40 ----------------------------------------------------------
42+
13 | val badInterstmultiBefore = s"""foo \unope $placeholder that's wrong""" // error
43+
| ^
44+
| invalid unicode escape at index 6 of foo \unope
45+
-- Error: tests/neg/unicodeEscapes-interpolations.scala:14:29 ----------------------------------------------------------
46+
14 | val badInterother = s"this \p ain't legal either" // error
47+
| ^
48+
|invalid escape '\p' not one of [\b, \t, \n, \f, \r, \\, \", \', \uxxxx] at index 5 in "this \p ain't legal either". Use \\ for literal \.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
object Example {
2+
val badInters1 = s"foo \unope that's wrong" // error
3+
val badIntersEnd1 = s"foo \u12" // error
4+
val badInters3 = s"""foo \unope that's wrong""" // error
5+
val caretPos1 = s"foo \u12x3 pos @ x" // error
6+
val caretPos2 = s"foo \uuuuuuu12x3 pos @ x" // error
7+
val caretPos3 = s"""foo \u12x3 pos @ x""" // error
8+
val caretPos4 = s"""foo \uuuuuuu12x3 pos @ x""" // error
9+
val placeholder = "place"
10+
val badIntersmultiAfter = s"foo $placeholder bar \unope that's wrong" // error
11+
val badIntersmultiBefore = s"foo \unope $placeholder that's wrong" // error
12+
val badInterstmultiAfter = s"""foo $placeholder bar \unope that's wrong""" // error
13+
val badInterstmultiBefore = s"""foo \unope $placeholder that's wrong""" // error
14+
val badInterother = s"this \p ain't legal either" // error
15+
}

tests/neg/unicodeEscapes.check

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
-- Error: tests/neg/unicodeEscapes.scala:3:25 --------------------------------------------------------------------------
2+
3 | val badsingle = "foo \unope that's wrong" // error
3+
| ^
4+
| invalid character in unicode escape sequence
5+
-- Error: tests/neg/unicodeEscapes.scala:4:26 --------------------------------------------------------------------------
6+
4 | val caretPos = "foo \u12x3 pos @ x" // error
7+
| ^
8+
| invalid character in unicode escape sequence
9+
-- Error: tests/neg/unicodeEscapes.scala:5:33 --------------------------------------------------------------------------
10+
5 | val caretPos2 = "foo \uuuuuuu12x3 pos @ x" // error
11+
| ^
12+
| invalid character in unicode escape sequence
13+
-- Error: tests/neg/unicodeEscapes.scala:6:29 --------------------------------------------------------------------------
14+
6 | val carPosTerm = "foo \u123" // error
15+
| ^
16+
| invalid character in unicode escape sequence
17+
-- Error: tests/neg/unicodeEscapes.scala:7:30 --------------------------------------------------------------------------
18+
7 | val halfAnEscape = "foo \u12" // error
19+
| ^
20+
| invalid character in unicode escape sequence
21+
-- Error: tests/neg/unicodeEscapes.scala:8:30 --------------------------------------------------------------------------
22+
8 | val halfAnEscapeChar = '\u45' // error
23+
| ^
24+
| invalid character in unicode escape sequence
25+
-- Error: tests/neg/unicodeEscapes.scala:9:29 --------------------------------------------------------------------------
26+
9 | val `half An Identifier\u45` = "nope" // error
27+
| ^
28+
| invalid character in unicode escape sequence

tests/neg/unicodeEscapes.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
2+
object Example {
3+
val badsingle = "foo \unope that's wrong" // error
4+
val caretPos = "foo \u12x3 pos @ x" // error
5+
val caretPos2 = "foo \uuuuuuu12x3 pos @ x" // error
6+
val carPosTerm = "foo \u123" // error
7+
val halfAnEscape = "foo \u12" // error
8+
val halfAnEscapeChar = '\u45' // error
9+
val `half An Identifier\u45` = "nope" // error
10+
}

tests/run/literals.scala

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,38 @@
1+
// scalac: -deprecation
2+
//
13
//############################################################################
24
// Literals
35
//############################################################################
46

5-
import scala.util.{Failure, Success, Try}
7+
//############################################################################
68

79
object Test {
810

9-
/* I add a couple of Unicode identifier tests here "temporarily" */
10-
11-
def \u03b1\u03c1\u03b5\u03c4\u03b7 = "alpha rho epsilon tau eta"
12-
13-
case class GGG(i: Int) {
14-
def \u03b1\u03b1(that: GGG) = i + that.i
11+
def check_success[A](name: String, closure: => A, expected: A): Unit = {
12+
val res: Option[String] =
13+
try {
14+
val actual: A = closure
15+
if (actual == expected) None //print(" was successful")
16+
else Some(s" failed: expected $expected, found $actual")
17+
} catch {
18+
case exception: Throwable => Some(s" raised exception $exception")
19+
}
20+
for (e <- res) println(s"test $name $e")
1521
}
1622

17-
def check_success[A](name: String, closure: => A, expected: A): Unit =
18-
Try(closure) match {
19-
case Success(actual) => assert(actual == expected, s"test $name failed: expected $expected, found $actual")
20-
case Failure(error) => throw new AssertionError(s"test $name raised exception $error")
21-
}
22-
2323
def main(args: Array[String]): Unit = {
2424
// char
25+
26+
//unicode escapes escape in char literals
2527
check_success("'\\u0024' == '$'", '\u0024', '$')
2628
check_success("'\\u005f' == '_'", '\u005f', '_')
29+
30+
//unicode escapes escape in interpolations
31+
check_success("\"\\u0024\" == \"$\"", s"\u0024", "$")
32+
check_success("\"\"\"\\u0024\"\"\" == \"$\"", s"""\u0024""", "$")
33+
34+
//Int#asInstanceOf[Char] gets the char at the codepont
2735
check_success("65.asInstanceOf[Char] == 'A'", 65.asInstanceOf[Char], 'A')
28-
check_success("\"\\141\\142\" == \"ab\"", "\141\142", "ab")
29-
check_success("\"\\0x61\\0x62\".trim() == \"x61\\0x62\"", "\0x61\0x62".substring(1), "x61\0x62")
3036

3137
// boolean
3238
check_success("(65 : Byte) == 'A'", (65: Byte) == 'A', true) // contrib #176
@@ -77,7 +83,6 @@ object Test {
7783
check_success("01.23f == 1.23f", 01.23f, 1.23f)
7884
check_success("3.14f == 3.14f", 3.14f, 3.14f)
7985
check_success("6.022e23f == 6.022e23f", 6.022e23f, 6.022e23f)
80-
check_success("9f == 9.0f", 9f, 9.0f)
8186
check_success("09f == 9.0f", 09f, 9.0f)
8287
check_success("1.00000017881393421514957253748434595763683319091796875001f == 1.0000001f",
8388
1.00000017881393421514957253748434595763683319091796875001f,
@@ -107,11 +112,7 @@ object Test {
107112
check_success("1L.asInstanceOf[Double] == 1.0", 1L.asInstanceOf[Double], 1.0)
108113

109114
check_success("\"\".length()", "\u001a".length(), 1)
110-
111-
val ggg = GGG(1) \u03b1\u03b1 GGG(2)
112-
check_success("ggg == 3", ggg, 3)
113-
114115
}
115116
}
116117

117-
//############################################################################
118+
//############################################################################

tests/run/t3220-3.check

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
processed...OK
2+
unprocessed...OK
3+
after backslashes
4+
List(\, \, u, 0, 0, 4, 0)
5+
List(\, u, 0, 0, 4, 0)
6+
List(\, \, u, 0, 0, 4, 0)
7+
List(\, u, 0, 0, 4, 0)
8+
List(", (, [, ^, ", \, x, 0, 0, -, \, x, 1, F, \, x, 7, F, \, \, ], |, \, \, [, \, \, ', ", b, f, n, r, t, ], |, \, \, u, [, a, -, f, A, -, F, 0, -, 9, ], {, 4, }, ), *, ")
9+
List(b, a, d, \)

0 commit comments

Comments
 (0)