Skip to content

Commit c2e97c6

Browse files
committed
Add UTF8 abstraction in the TASTy format
We add a `Utf8` encoding to the grammar. This should not to be confused with the `UTF8` name tag. This mistake was made in the `Comment` format. We also add corresponding `writeUtf8` and `readUtf8` methods to the `TastyBuffer`.
1 parent 55c2002 commit c2e97c6

File tree

6 files changed

+33
-24
lines changed

6 files changed

+33
-24
lines changed

compiler/src/dotty/tools/dotc/core/tasty/CommentPickler.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,8 @@ object CommentPickler:
2222

2323
def pickleComment(addr: Addr, comment: Comment): Unit =
2424
if addr != NoAddr then
25-
val bytes = comment.raw.getBytes(StandardCharsets.UTF_8).nn
26-
val length = bytes.length
2725
buf.writeAddr(addr)
28-
buf.writeNat(length)
29-
buf.writeBytes(bytes, length)
26+
buf.writeUtf8(comment.raw)
3027
buf.writeLongInt(comment.span.coords)
3128

3229
def traverse(x: Any): Unit = x match

compiler/src/dotty/tools/dotc/core/tasty/CommentUnpickler.scala

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,9 @@ class CommentUnpickler(reader: TastyReader) {
2020
while (!isAtEnd) {
2121
val addr = readAddr()
2222
val length = readNat()
23-
if (length > 0) {
24-
val bytes = readBytes(length)
25-
val position = new Span(readLongInt())
26-
val rawComment = new String(bytes, StandardCharsets.UTF_8)
27-
comments(addr) = Comment(position, rawComment)
28-
}
23+
val rawComment = readUtf8()
24+
val position = new Span(readLongInt())
25+
comments(addr) = Comment(position, rawComment)
2926
}
3027
comments
3128
}

compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,8 @@ import collection.mutable
1313
import core.Symbols.ClassSymbol
1414
import Decorators.*
1515

16-
object TastyPickler {
17-
18-
private val versionStringBytes = {
19-
val compilerString = s"Scala ${config.Properties.simpleVersionString}"
20-
compilerString.getBytes(java.nio.charset.StandardCharsets.UTF_8)
21-
}
22-
23-
}
16+
object TastyPickler:
17+
private val versionString = s"Scala ${config.Properties.simpleVersionString}"
2418

2519
class TastyPickler(val rootCls: ClassSymbol) {
2620

@@ -48,13 +42,12 @@ class TastyPickler(val rootCls: ClassSymbol) {
4842
val uuidHi: Long = otherSectionHashes.fold(0L)(_ ^ _)
4943

5044
val headerBuffer = {
51-
val buf = new TastyBuffer(header.length + TastyPickler.versionStringBytes.length + 32)
45+
val buf = new TastyBuffer(header.length + TastyPickler.versionString.length + 32)
5246
for (ch <- header) buf.writeByte(ch.toByte)
5347
buf.writeNat(MajorVersion)
5448
buf.writeNat(MinorVersion)
5549
buf.writeNat(ExperimentalVersion)
56-
buf.writeNat(TastyPickler.versionStringBytes.length)
57-
buf.writeBytes(TastyPickler.versionStringBytes, TastyPickler.versionStringBytes.length)
50+
buf.writeUtf8(TastyPickler.versionString)
5851
buf.writeUncompressedLong(uuidLow)
5952
buf.writeUncompressedLong(uuidHi)
6053
buf

tasty/src/dotty/tools/tasty/TastyBuffer.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package dotty.tools.tasty
22

33
import util.Util.dble
4+
import java.nio.charset.StandardCharsets
45

56
object TastyBuffer {
67

@@ -115,6 +116,16 @@ class TastyBuffer(initialSize: Int) {
115116
writeBytes(bytes, 8)
116117
}
117118

119+
/** Write a UTF8 string encoded as `Nat UTF8-CodePoint*`,
120+
* where the `Nat` is the length of the code-points bytes.
121+
*/
122+
def writeUtf8(x: String): Unit = {
123+
val bytes = x.getBytes(StandardCharsets.UTF_8)
124+
val length = bytes.length
125+
writeNat(length)
126+
writeBytes(bytes, length)
127+
}
128+
118129
// -- Address handling --------------------------------------------
119130

120131
/** Write natural number `x` right-adjusted in a field of `width` bytes

tasty/src/dotty/tools/tasty/TastyFormat.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Micro-syntax:
1616
Nat = LongInt -- non-negative value, fits in an Int without overflow
1717
Digit = 0 | ... | 127
1818
StopDigit = 128 | ... | 255 -- value = digit - 128
19+
Utf8 = Nat UTF8-CodePoint*
1920
```
2021
2122
Macro-format:
@@ -24,12 +25,12 @@ Macro-format:
2425
nameTable_Length Name* Section*
2526
Header = 0x5CA1AB1F
2627
UUID = Byte*16 -- random UUID
27-
VersionString = Length UTF8-CodePoint* -- string that represents the compiler that produced the TASTy
28+
VersionString = Utf8 -- string that represents the compiler that produced the TASTy
2829
2930
Section = NameRef Length Bytes
3031
Length = Nat -- length of rest of entry in bytes
3132
32-
Name = UTF8 Length UTF8-CodePoint*
33+
Name = UTF8 Utf8
3334
QUALIFIED Length qualified_NameRef selector_NameRef -- A.B
3435
EXPANDED Length qualified_NameRef selector_NameRef -- A$$B, semantically a NameKinds.ExpandedName
3536
EXPANDPREFIX Length qualified_NameRef selector_NameRef -- A$B, prefix of expanded name, see NamedKinds.ExpandPrefixName
@@ -265,7 +266,7 @@ All elements of a position section are serialized as Ints
265266
266267
Standard Section: "Comments" Comment*
267268
```none
268-
Comment = UTF8 LongInt // Raw comment's bytes encoded as UTF-8, followed by the comment's coordinates.
269+
Comment = Utf8 LongInt // Raw comment's bytes encoded as UTF-8, followed by the comment's coordinates.
269270
```
270271
271272
Standard Section: "Attributes" Attribute*

tasty/src/dotty/tools/tasty/TastyReader.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package dotty.tools.tasty
33
import collection.mutable
44

55
import TastyBuffer._
6+
import java.nio.charset.StandardCharsets
67

78
/** A byte array buffer that can be filled with bytes or natural numbers in TASTY format,
89
* and that supports reading and patching addresses represented as natural numbers.
@@ -104,6 +105,15 @@ class TastyReader(val bytes: Array[Byte], start: Int, end: Int, val base: Int =
104105
x
105106
}
106107

108+
/** Read a UTF8 string encoded as `Nat UTF8-CodePoint*`,
109+
* where the `Nat` is the length of the code-points bytes.
110+
*/
111+
def readUtf8(): String = {
112+
val length = readNat()
113+
if (length == 0) ""
114+
else new String(readBytes(length), StandardCharsets.UTF_8)
115+
}
116+
107117
/** Read a natural number and return as a NameRef */
108118
def readNameRef(): NameRef = NameRef(readNat())
109119

0 commit comments

Comments
 (0)