diff --git a/compiler/src/dotty/tools/dotc/core/NameOps.scala b/compiler/src/dotty/tools/dotc/core/NameOps.scala index 7c1073852681..4e075953d7fa 100644 --- a/compiler/src/dotty/tools/dotc/core/NameOps.scala +++ b/compiler/src/dotty/tools/dotc/core/NameOps.scala @@ -86,11 +86,17 @@ object NameOps { def isVarPattern: Boolean = testSimple { n => n.length > 0 && { + def isLowerLetterSupplementary: Boolean = + import Character.{isHighSurrogate, isLowSurrogate, isLetter, isLowerCase, isValidCodePoint, toCodePoint} + isHighSurrogate(n(0)) && n.length > 1 && isLowSurrogate(n(1)) && { + val codepoint = toCodePoint(n(0), n(1)) + isValidCodePoint(codepoint) && isLetter(codepoint) && isLowerCase(codepoint) + } val first = n.head - (((first.isLower && first.isLetter) || first == '_') - && (n != false_) - && (n != true_) - && (n != null_)) + ((first.isLower && first.isLetter || first == '_' || isLowerLetterSupplementary) + && n != false_ + && n != true_ + && n != null_) } } || name.is(PatMatGivenVarName) @@ -98,7 +104,7 @@ object NameOps { case raw.NE | raw.LE | raw.GE | EMPTY => false case name: SimpleName => - name.length > 0 && name.last == '=' && name.head != '=' && isOperatorPart(name.head) + name.length > 0 && name.last == '=' && name.head != '=' && isOperatorPart(name.firstCodePoint) case _ => false } @@ -352,6 +358,14 @@ object NameOps { val unmangled = kinds.foldLeft(name)(_.unmangle(_)) if (unmangled eq name) name else unmangled.unmangle(kinds) } + + def firstCodePoint: Int = + val first = name.firstPart + import Character.{isHighSurrogate, isLowSurrogate, isValidCodePoint, toCodePoint} + if isHighSurrogate(first(0)) && first.length > 1 && isLowSurrogate(first(1)) then + val codepoint = toCodePoint(first(0), first(1)) + if isValidCodePoint(codepoint) then codepoint else first(0) + else first(0) } extension (name: TermName) { diff --git a/compiler/src/dotty/tools/dotc/core/Names.scala b/compiler/src/dotty/tools/dotc/core/Names.scala index 7932ad7727ef..1e08379b57f0 100644 --- a/compiler/src/dotty/tools/dotc/core/Names.scala +++ b/compiler/src/dotty/tools/dotc/core/Names.scala @@ -25,7 +25,7 @@ object Names { */ abstract class Designator - /** A name if either a term name or a type name. Term names can be simple + /** A name is either a term name or a type name. Term names can be simple * or derived. A simple term name is essentially an interned string stored * in a name table. A derived term name adds a tag, and possibly a number * or a further simple name to some other name. diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 8f9ac582bc12..2d1dd22aa28b 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -21,6 +21,8 @@ import config.Feature.{migrateTo3, fewerBracesEnabled} import config.SourceVersion.`3.0` import reporting.{NoProfile, Profile, Message} +import java.util.Objects + object Scanners { /** Offset into source character array */ @@ -777,19 +779,21 @@ object Scanners { private def isSupplementary(high: Char, test: Int => Boolean, strict: Boolean = true): Boolean = isHighSurrogate(high) && { var res = false - nextChar() - val low = ch + val low = lookaheadChar() if isLowSurrogate(low) then - nextChar() val codepoint = toCodePoint(high, low) - if isValidCodePoint(codepoint) && test(codepoint) then - putChar(high) - putChar(low) - res = true + if isValidCodePoint(codepoint) then + if test(codepoint) then + putChar(high) + putChar(low) + nextChar() + nextChar() + res = true else error(em"illegal character '${toUnicode(high)}${toUnicode(low)}'") else if !strict then putChar(high) + nextChar() res = true else error(em"illegal character '${toUnicode(high)}' missing low surrogate") @@ -889,7 +893,6 @@ object Scanners { if (ch == '\"') { if (lookaheadChar() == '\"') { nextRawChar() - //offset += 3 // first part is positioned at the quote nextRawChar() stringPart(multiLine = true) } @@ -900,7 +903,6 @@ object Scanners { } } else { - //offset += 1 // first part is positioned at the quote stringPart(multiLine = false) } } @@ -977,30 +979,29 @@ object Scanners { } case _ => def fetchOther() = - if (ch == '\u21D2') { + if ch == '\u21D2' then nextChar(); token = ARROW report.deprecationWarning(em"The unicode arrow `⇒` is deprecated, use `=>` instead. If you still wish to display it as one character, consider using a font with programming ligatures such as Fira Code.", sourcePos(offset)) - } - else if (ch == '\u2190') { + else if ch == '\u2190' then nextChar(); token = LARROW report.deprecationWarning(em"The unicode arrow `←` is deprecated, use `<-` instead. If you still wish to display it as one character, consider using a font with programming ligatures such as Fira Code.", sourcePos(offset)) - } - else if (Character.isUnicodeIdentifierStart(ch)) { + else if isUnicodeIdentifierStart(ch) then putChar(ch) nextChar() getIdentRest() - } - else if (isSpecial(ch)) { + if ch == '"' && token == IDENTIFIER then token = INTERPOLATIONID + else if isSpecial(ch) then putChar(ch) nextChar() getOperatorRest() - } else if isSupplementary(ch, isUnicodeIdentifierStart) then getIdentRest() - else { + if ch == '"' && token == IDENTIFIER then token = INTERPOLATIONID + else if isSupplementary(ch, isSpecial) then + getOperatorRest() + else error(em"illegal character '${toUnicode(ch)}'") nextChar() - } fetchOther() } } @@ -1115,7 +1116,7 @@ object Scanners { else error(em"unclosed quoted identifier") } - private def getIdentRest(): Unit = (ch: @switch) match { + @tailrec private def getIdentRest(): Unit = (ch: @switch) match { case 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | @@ -1150,7 +1151,7 @@ object Scanners { finishNamed() } - private def getOperatorRest(): Unit = (ch: @switch) match { + @tailrec private def getOperatorRest(): Unit = (ch: @switch) match { case '~' | '!' | '@' | '#' | '%' | '^' | '*' | '+' | '-' | '<' | '>' | '?' | ':' | '=' | '&' | @@ -1161,23 +1162,13 @@ object Scanners { if nxch == '/' || nxch == '*' then finishNamed() else { putChar(ch); nextChar(); getOperatorRest() } case _ => - if (isSpecial(ch)) { putChar(ch); nextChar(); getOperatorRest() } + if isSpecial(ch) then { putChar(ch); nextChar(); getOperatorRest() } + else if isSupplementary(ch, isSpecial) then getOperatorRest() else finishNamed() } private def getIdentOrOperatorRest(): Unit = - if (isIdentifierPart(ch)) - getIdentRest() - else ch match { - case '~' | '!' | '@' | '#' | '%' | - '^' | '*' | '+' | '-' | '<' | - '>' | '?' | ':' | '=' | '&' | - '|' | '\\' | '/' => - getOperatorRest() - case _ => - if (isSpecial(ch)) getOperatorRest() - else finishNamed() - } + if (isIdentifierPart(ch) || isSupplementary(ch, isIdentifierPart)) getIdentRest() else getOperatorRest() def isSoftModifier: Boolean = token == IDENTIFIER @@ -1500,7 +1491,7 @@ object Scanners { if (ch == '\'') finishCharLit() else { token = op - strVal = if (name != null) name.toString else null + strVal = Objects.toString(name) litBuf.clear() } } diff --git a/compiler/src/dotty/tools/dotc/parsing/package.scala b/compiler/src/dotty/tools/dotc/parsing/package.scala index a1f9c8d73ad4..ee3ecda60aee 100644 --- a/compiler/src/dotty/tools/dotc/parsing/package.scala +++ b/compiler/src/dotty/tools/dotc/parsing/package.scala @@ -17,7 +17,7 @@ package object parsing { def precedence(operator: Name): Int = if (operator eq nme.ERROR) -1 else { - val firstCh = operator.firstPart.head + val firstCh = operator.firstCodePoint if (isScalaLetter(firstCh)) 1 else if (operator.isOpAssignmentName) 0 else firstCh match { diff --git a/compiler/src/dotty/tools/dotc/util/Chars.scala b/compiler/src/dotty/tools/dotc/util/Chars.scala index 471b68d6247e..cde1a63f5293 100644 --- a/compiler/src/dotty/tools/dotc/util/Chars.scala +++ b/compiler/src/dotty/tools/dotc/util/Chars.scala @@ -1,21 +1,20 @@ package dotty.tools.dotc.util import scala.annotation.switch -import java.lang.{Character => JCharacter} -import java.lang.Character.LETTER_NUMBER -import java.lang.Character.LOWERCASE_LETTER -import java.lang.Character.OTHER_LETTER -import java.lang.Character.TITLECASE_LETTER -import java.lang.Character.UPPERCASE_LETTER +import Character.{LETTER_NUMBER, LOWERCASE_LETTER, OTHER_LETTER, TITLECASE_LETTER, UPPERCASE_LETTER} +import Character.{MATH_SYMBOL, OTHER_SYMBOL} +import Character.{isJavaIdentifierPart, isUnicodeIdentifierStart, isUnicodeIdentifierPart} /** Contains constants and classifier methods for characters */ -object Chars { +object Chars: inline val LF = '\u000A' inline val FF = '\u000C' inline val CR = '\u000D' inline val SU = '\u001A' + type CodePoint = Int + /** Convert a character digit to an Int according to given base, * -1 if no success */ @@ -59,17 +58,21 @@ object Chars { '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' /** Can character start an alphanumeric Scala identifier? */ - def isIdentifierStart(c: Char): Boolean = - (c == '_') || (c == '$') || JCharacter.isUnicodeIdentifierStart(c) + def isIdentifierStart(c: Char): Boolean = (c == '_') || (c == '$') || isUnicodeIdentifierStart(c) + def isIdentifierStart(c: CodePoint): Boolean = (c == '_') || (c == '$') || isUnicodeIdentifierStart(c) /** Can character form part of an alphanumeric Scala identifier? */ - def isIdentifierPart(c: Char): Boolean = - (c == '$') || JCharacter.isUnicodeIdentifierPart(c) + def isIdentifierPart(c: Char): Boolean = (c == '$') || isUnicodeIdentifierPart(c) + def isIdentifierPart(c: CodePoint) = (c == '$') || isUnicodeIdentifierPart(c) /** Is character a math or other symbol in Unicode? */ def isSpecial(c: Char): Boolean = { - val chtp = JCharacter.getType(c) - chtp == JCharacter.MATH_SYMBOL.toInt || chtp == JCharacter.OTHER_SYMBOL.toInt + val chtp = Character.getType(c) + chtp == MATH_SYMBOL.toInt || chtp == OTHER_SYMBOL.toInt + } + def isSpecial(codePoint: CodePoint) = { + val chtp = Character.getType(codePoint) + chtp == MATH_SYMBOL.toInt || chtp == OTHER_SYMBOL.toInt } def isValidJVMChar(c: Char): Boolean = @@ -78,15 +81,26 @@ object Chars { def isValidJVMMethodChar(c: Char): Boolean = !(c == '.' || c == ';' || c =='[' || c == '/' || c == '<' || c == '>') - private final val otherLetters = Set[Char]('\u0024', '\u005F') // '$' and '_' - private final val letterGroups = { - import JCharacter._ - Set[Byte](LOWERCASE_LETTER, UPPERCASE_LETTER, OTHER_LETTER, TITLECASE_LETTER, LETTER_NUMBER) - } - def isScalaLetter(ch: Char): Boolean = letterGroups(JCharacter.getType(ch).toByte) || otherLetters(ch) + def isScalaLetter(c: Char): Boolean = + Character.getType(c: @switch) match { + case LOWERCASE_LETTER | UPPERCASE_LETTER | OTHER_LETTER | TITLECASE_LETTER | LETTER_NUMBER => true + case _ => c == '$' || c == '_' + } + def isScalaLetter(c: CodePoint): Boolean = + Character.getType(c: @switch) match { + case LOWERCASE_LETTER | UPPERCASE_LETTER | OTHER_LETTER | TITLECASE_LETTER | LETTER_NUMBER => true + case _ => c == '$' || c == '_' + } /** Can character form part of a Scala operator name? */ - def isOperatorPart(c : Char) : Boolean = (c: @switch) match { + def isOperatorPart(c: Char): Boolean = (c: @switch) match { + case '~' | '!' | '@' | '#' | '%' | + '^' | '*' | '+' | '-' | '<' | + '>' | '?' | ':' | '=' | '&' | + '|' | '/' | '\\' => true + case c => isSpecial(c) + } + def isOperatorPart(c: CodePoint): Boolean = (c: @switch) match { case '~' | '!' | '@' | '#' | '%' | '^' | '*' | '+' | '-' | '<' | '>' | '?' | ':' | '=' | '&' | @@ -95,5 +109,4 @@ object Chars { } /** Would the character be encoded by `NameTransformer.encode`? */ - def willBeEncoded(c : Char) : Boolean = !JCharacter.isJavaIdentifierPart(c) -} + def willBeEncoded(c: Char): Boolean = !isJavaIdentifierPart(c) diff --git a/tests/pos/surrogates.scala b/tests/pos/surrogates.scala index 1b710ad901ae..9cfd10deb669 100644 --- a/tests/pos/surrogates.scala +++ b/tests/pos/surrogates.scala @@ -25,4 +25,8 @@ class Construction { def reversed = "xyz\udc00\ud801abc" } +class Demon { + val 😈 = 42 +} + // was: error: illegal character '\ud801', '\udc00' diff --git a/tests/pos/t1406.scala b/tests/pos/t1406.scala new file mode 100644 index 000000000000..76800629e15d --- /dev/null +++ b/tests/pos/t1406.scala @@ -0,0 +1,32 @@ + +class Identifiers { + + def f(x: Any): Boolean = x match { + case 𐐨XYZ: String => true + case 𐐨 => true + } + def g(x: Any) = x match { + case 𐐨 @ _ => 𐐨 + } +} +class Ops { + def 𝆗 = 42 // was error: illegal character + def op_𝆗 = 42 // was error: illegal character + def 🌀 = 42 + def op_🌀 = 42 + def 🚀 = 42 + def op_🚀 = 42 + def 🜀 = 42 + def op_🜀 = 42 + def 𝓅 = 42 + def op_𝓅 = 42 +} +class Strings { + implicit class Interps(sc: StringContext) { + def 𝓅(parts: Any*) = "done" + } + def 𝓅 = 42 + def interpolated = s"$𝓅" + def e = "a 𝓅 b" + def f = 𝓅"one" +} diff --git a/tests/run/t1406b.scala b/tests/run/t1406b.scala new file mode 100644 index 000000000000..fa2fab0f2183 --- /dev/null +++ b/tests/run/t1406b.scala @@ -0,0 +1,28 @@ + +case class C(n: Int) { + def 𐀀(c: C): C = C(n * c.n) // actually a letter but supplementary 0x10000 + def ☀(c: C): C = C(n * c.n) // just a symbol + def ☀=(c: C): C = C(n * c.n) // just a symbol + def 🌀(c: C): C = C(n * c.n) // cyclone operator is symbol, supplementary + def 🌀=(c: C): C = C(n * c.n) // cyclone operator is symbol, supplementary + def *(c: C): C = C(n * c.n) + def +(c: C): C = C(n + c.n) +} +object Test extends App { + val Sum = 84 + val Product = 1764 + val ProductSum = 1806 + val SumProduct = 3528 + val c, d = C(42) + def assertEquals(expected: Int, actual: C) = assert(expected == actual.n) + assertEquals(Sum, c + d) + assertEquals(Product, c * d) + assertEquals(Product, c ☀ d) + assertEquals(ProductSum, c * d + d) + assertEquals(ProductSum, c ☀ d + d) + assertEquals(SumProduct, c ☀= d + d) // assignment op is low precedence + assertEquals(SumProduct, c 𐀀 d + d) // the first one, letter should be low precedence + assertEquals(ProductSum, c 🌀d + d) // the second one, cyclone should be high precedence + assertEquals(SumProduct, c 🌀= d + d) // assignment op is low precedence +} +