Skip to content

Add TASTY pickling of quotes and implement ~ on quotes #3662

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jan 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions compiler/src/dotty/tools/dotc/core/Definitions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1047,7 +1047,7 @@ class Definitions {

// private val unboxedTypeRef = mutable.Map[TypeName, TypeRef]()
// private val javaTypeToValueTypeRef = mutable.Map[Class[_], TypeRef]()
// private val valueTypeNameToJavaType = mutable.Map[TypeName, Class[_]]()
private val valueTypeNamesToJavaType = mutable.Map[TypeName, Class[_]]()

private def valueTypeRef(name: String, boxed: TypeRef, jtype: Class[_], enc: Int, tag: Name): TypeRef = {
val vcls = ctx.requiredClassRef(name)
Expand All @@ -1056,7 +1056,7 @@ class Definitions {
typeTags(vcls.name) = tag
// unboxedTypeRef(boxed.name) = vcls
// javaTypeToValueTypeRef(jtype) = vcls
// valueTypeNameToJavaType(vcls.name) = jtype
valueTypeNamesToJavaType(vcls.name) = jtype
vcls
}

Expand All @@ -1066,6 +1066,10 @@ class Definitions {
/** The JVM tag for `tp` if it's a primitive, `java.lang.Object` otherwise. */
def typeTag(tp: Type)(implicit ctx: Context): Name = typeTags(scalaClassName(tp))

/** The `Class[_]` of a primitive value type name */
def valueTypeNameToJavaType(name: TypeName)(implicit ctx: Context): Option[Class[_]] =
valueTypeNamesToJavaType.get(if (name.firstPart eq nme.scala_) name.lastPart.toTypeName else name)

type PrimitiveClassEnc = Int

val ByteEnc = 2
Expand Down
97 changes: 97 additions & 0 deletions compiler/src/dotty/tools/dotc/core/quoted/PickledQuotes.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package dotty.tools.dotc.core.quoted

import dotty.tools.dotc.ast.Trees._
import dotty.tools.dotc.ast.{tpd, untpd}
import dotty.tools.dotc.config.Printers._
import dotty.tools.dotc.core.Constants.Constant
import dotty.tools.dotc.core.Contexts._
import dotty.tools.dotc.core.Decorators._
import dotty.tools.dotc.core.Flags._
import dotty.tools.dotc.core.Symbols._
import dotty.tools.dotc.core.tasty.{TastyPickler, TastyPrinter, TastyString}
import dotty.tools.dotc.interpreter.RawQuoted

object PickledQuotes {
import tpd._

/** Pickle the quote into a TASTY string */
def pickleQuote(tree: Tree)(implicit ctx: Context): String = {
if (ctx.reporter.hasErrors) "<error>"
else {
val encapsulated = encapsulateQuote(tree)
val pickled = pickle(encapsulated)
TastyString.tastyToString(pickled)
}
}

/** Transform the expression into its fully spliced Tree */
def quotedToTree(expr: quoted.Quoted)(implicit ctx: Context): Tree = expr match {
case expr: quoted.TastyQuoted => unpickleQuote(expr)
case expr: quoted.Liftable.ConstantExpr[_] => Literal(Constant(expr.value))
case expr: RawQuoted => expr.tree
}

/** Unpickle the tree contained in the TastyQuoted */
private def unpickleQuote(expr: quoted.TastyQuoted)(implicit ctx: Context): Tree = {
val tastyBytes = TastyString.stringToTasty(expr.tasty)
val unpickled = unpickle(tastyBytes, expr.args)
unpickled match {
case PackageDef(_, (vdef: ValDef) :: Nil) => vdef.rhs
case PackageDef(_, (tdef: TypeDef) :: Nil) => tdef.rhs
}
}

/** Encapsulate the tree in a top level `val` or `type`
* `<tree>` ==> `package _root_ { val ': Any = <tree> }`
* or
* `<type tree>` ==> `package _root_ { type ' = <tree tree> }`
*/
private def encapsulateQuote(tree: Tree)(implicit ctx: Context): Tree = {
def encapsulatedTerm = {
val sym = ctx.newSymbol(ctx.owner, "'".toTermName, Synthetic, defn.AnyType, coord = tree.pos)
ValDef(sym, tree).withPos(tree.pos)
}

def encapsulatedType =
untpd.TypeDef("'".toTypeName, tree).withPos(tree.pos).withType(defn.AnyType)

val quoted = if (tree.isTerm) encapsulatedTerm else encapsulatedType
PackageDef(ref(defn.RootPackage).asInstanceOf[Ident], quoted :: Nil).withPos(tree.pos)
}

// TASTY picklingtests/pos/quoteTest.scala

/** Pickle tree into it's TASTY bytes s*/
private def pickle(tree: Tree)(implicit ctx: Context): Array[Byte] = {
val pickler = new TastyPickler(defn.RootClass)
val treePkl = pickler.treePkl
treePkl.pickle(tree :: Nil)
treePkl.compactify()
pickler.addrOfTree = treePkl.buf.addrOfTree
pickler.addrOfSym = treePkl.addrOfSym
// if (tree.pos.exists)
// new PositionPickler(pickler, treePkl.buf.addrOfTree).picklePositions(tree :: Nil)

// other pickle sections go here.
val pickled = pickler.assembleParts()

if (pickling ne noPrinter) {
println(i"**** pickled quote of \n${tree.show}")
new TastyPrinter(pickled).printContents()
}

pickled
}

/** Unpickle TASTY bytes into it's tree */
private def unpickle(bytes: Array[Byte], splices: Seq[Any])(implicit ctx: Context): Tree = {
val unpickler = new TastyUnpickler(bytes, splices)
unpickler.enter(roots = Set(defn.RootPackage))
val tree = unpickler.body.head
if (pickling ne noPrinter) {
println(i"**** unpickled quote for \n${tree.show}")
new TastyPrinter(bytes).printContents()
}
tree
}
}
20 changes: 20 additions & 0 deletions compiler/src/dotty/tools/dotc/core/quoted/Quoted.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package dotty.tools.dotc.core.quoted

import dotty.tools.dotc.ast.Trees.GenericApply
import dotty.tools.dotc.ast.tpd
import dotty.tools.dotc.core.Contexts.Context
import dotty.tools.dotc.core.Types.Type
import dotty.tools.dotc.transform.SymUtils._

/** Extractors for quotes */
object Quoted {

/** Extracts the content of a quoted tree.
* The result can be the contents of a term ot type quote, which
* will return a term or type tree respectively.
*/
def unapply(tree: tpd.Tree)(implicit ctx: Context): Option[tpd.Tree] = tree match {
case tree: GenericApply[Type] if tree.symbol.isQuote => Some(tree.args.head)
case _ => None
}
}
24 changes: 24 additions & 0 deletions compiler/src/dotty/tools/dotc/core/quoted/TastyUnpickler.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package dotty.tools.dotc.core.quoted

import dotty.tools.dotc.core.tasty._
import dotty.tools.dotc.core.tasty.TastyUnpickler.NameTable

object TastyUnpickler {
class QuotedTreeSectionUnpickler(posUnpickler: Option[PositionUnpickler], splices: Seq[Any])
extends DottyUnpickler.TreeSectionUnpickler(posUnpickler) {
override def unpickle(reader: TastyReader, nameAtRef: NameTable) =
new TreeUnpickler(reader, nameAtRef, posUnpickler, splices)
}
}

/** A class for unpickling quoted Tasty trees and symbols.
* @param bytes the bytearray containing the Tasty file from which we unpickle
* @param splices splices that will fill the holes in the quote
*/
class TastyUnpickler(bytes: Array[Byte], splices: Seq[Any]) extends DottyUnpickler(bytes) {
import DottyUnpickler._
import TastyUnpickler._

protected override def treeSectionUnpickler(posUnpicklerOpt: Option[PositionUnpickler]): TreeSectionUnpickler =
new QuotedTreeSectionUnpickler(posUnpicklerOpt, splices)
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,18 @@ class DottyUnpickler(bytes: Array[Byte]) extends ClassfileParser.Embedded {

val unpickler = new TastyUnpickler(bytes)
private val posUnpicklerOpt = unpickler.unpickle(new PositionsSectionUnpickler)
private val treeUnpickler = unpickler.unpickle(new TreeSectionUnpickler(posUnpicklerOpt)).get
private val treeUnpickler = unpickler.unpickle(treeSectionUnpickler(posUnpicklerOpt)).get

/** Enter all toplevel classes and objects into their scopes
* @param roots a set of SymDenotations that should be overwritten by unpickling
*/
def enter(roots: Set[SymDenotation])(implicit ctx: Context): Unit =
treeUnpickler.enterTopLevel(roots)

protected def treeSectionUnpickler(posUnpicklerOpt: Option[PositionUnpickler]): TreeSectionUnpickler = {
new TreeSectionUnpickler(posUnpicklerOpt)
}

/** Only used if `-Yretain-trees` is set. */
private[this] var myBody: List[Tree] = _
/** The unpickled trees, and the source file they come from. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class TastyPrinter(bytes: Array[Byte])(implicit ctx: Context) {
printName(); printTrees()
case REFINEDtype =>
printName(); printTree(); printTrees()
case RETURN =>
case RETURN | HOLE =>
printNat(); printTrees()
case METHODtype | POLYtype | TYPELAMBDAtype =>
printTree()
Expand Down
20 changes: 20 additions & 0 deletions compiler/src/dotty/tools/dotc/core/tasty/TastyString.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package dotty.tools.dotc.core.tasty

/** Utils for String representation of TASTY */
object TastyString {

/** Decode the TASTY String into TASTY bytes */
def stringToTasty(str: String): Array[Byte] = {
val bytes = new Array[Byte](str.length)
for (i <- str.indices) bytes(i) = str.charAt(i).toByte
bytes
}

/** Encode TASTY bytes into a TASTY String */
def tastyToString(bytes: Array[Byte]): String = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

new String(bytes)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same problem

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a tricky problem. Looking at Stackoverflow, people say you should use a Codec for this, typically Base64. The scheme of mapping all bytes to ranges 0..255 looks like it would work, but it's not optimal. Strings are represented in Classfiles as UTF8 characters, with one byte for ranges 0.127 and two bytes for ranges 128-255. This means that, assuming a uniform bit distribution you get an overhead of 50%. Doing a 8->7 bit codec would give an overhead of less than 15%.

There's another problem of string size. Strings are limited to 65365 characters. This might not be enough for a larger quoted program.

scalac solves both of these problems when serializing its pickles as annotations. I think we should copy that scheme. I tried to find it but could not. @retronym @lrytz @adriaanm does one of you have an idea where the code that serializes a Pickle as an annotation is?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can leave it like this for this PR, but then we should open an issue for future improvements.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will start looking at the alternatives. I also think we should start with this for now to unblock the next PRs and allow people to use it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.

Copy link
Member

@lrytz lrytz Jan 5, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lrytz thanks for the link. Could you also point me to the place where the String/Array[Strings] are converted back into an Array[Byte]. Thanks.

Copy link
Member

@lrytz lrytz Jan 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It took me a while to find it.. Need to clean this up / document. Method parseScalaSigBytes calls ConstantPool.getBytes which goes through ByteCodecs.decode.

The encoding is explained here http://www.scala-lang.org/old/sites/default/files/sids/dubochet/Mon,%202010-05-31,%2015:25/Storage%20of%20pickled%20Scala%20signatures%20in%20class%20files.pdf

  • first map all 8-bit bytes to 7 bits (shifting the rest)
  • then increment all by 1 (in 7 bits), so 0x7f becomes 0x00
  • then encode 0x00 as 0xc0 0x80, which is an overlong utf 8 encoding for zero. it's what the jvm classfile spec uses to avoid having 0x00 in strings. it's called "modified utf 8".

the reason for the incrementing by 1 that 0x7f is expected to be less common than 0x00, so the two byte encoding hits less often.

The confusing part is that the class ScalaSigBytes used in the backend to encode the signature uses ByteCodecs.encode8to7, but does the +1 itself. It doesn't need to map 0x00 to the two byte version because ASM will do it when writing the annotation to the classfile. However, in the unpickler, we don't use ASM to read the annotation, but just get the bytes from the classfile directly. So there we'll see the two byte encoding. ByteCodecs.decode does the necessary work.

val chars = new Array[Char](bytes.length)
for (i <- bytes.indices) chars(i) = (bytes(i) & 0xff).toChar
new String(chars)
}

}
2 changes: 1 addition & 1 deletion compiler/src/dotty/tools/dotc/core/tasty/TreePickler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ class TreePickler(pickler: TastyPickler) {
pickleName(sym.name)
pickleParams
tpt match {
case templ: Template => pickleTree(tpt)
case _: Template | _: Hole => pickleTree(tpt)
case _ if tpt.isType => pickleTpt(tpt)
}
pickleTreeUnlessEmpty(rhs)
Expand Down
9 changes: 7 additions & 2 deletions compiler/src/dotty/tools/dotc/core/tasty/TreeUnpickler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ import scala.collection.{ mutable, immutable }
import config.Printers.pickling
import typer.Checking
import config.Config
import dotty.tools.dotc.core.quoted.PickledQuotes
import dotty.tools.dotc.interpreter.RawQuoted
import scala.quoted.Expr

/** Unpickler for typed trees
* @param reader the reader from which to unpickle
Expand Down Expand Up @@ -1030,8 +1033,10 @@ class TreeUnpickler(reader: TastyReader,
val idx = readNat()
val args = until(end)(readTerm())
val splice = splices(idx)
if (args.isEmpty) splice.asInstanceOf[Tree]
else splice.asInstanceOf[Seq[Any] => Tree](args)
val expr =
if (args.isEmpty) splice.asInstanceOf[Expr[_]]
else splice.asInstanceOf[Seq[Any] => Expr[_]](args.map(RawQuoted.apply))
PickledQuotes.quotedToTree(expr)
case _ =>
readPathTerm()
}
Expand Down
Loading