Skip to content

Commit e01d193

Browse files
committed
Add REPL disassembler framework
This commit only provides a framework to support bytecode disassembly from within the REPL, it does not supply any concrete implementations using any particular disassembler -- those will follow in subsequent commits. Adapted from the Scala 2 :javap implementation, which was written by Paul Phillips and Som Snytt / A. P. Marki
1 parent 8c3e7a2 commit e01d193

File tree

5 files changed

+686
-39
lines changed

5 files changed

+686
-39
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package dotty.tools.repl
2+
3+
object ClassLoaderOps:
4+
extension (cl: ClassLoader)
5+
/** The actual bytes for a class file, or an empty array if it can't be found. */
6+
def classBytes(className: String): Array[Byte] = classAsStream(className) match
7+
case null => Array()
8+
case stream => dotty.tools.io.Streamable.bytes(stream)
9+
10+
private inline def classAsStream(className: String) = cl.getResourceAsStream {
11+
if className.endsWith(".class") then className
12+
else s"${className.replace('.', '/')}.class" // classNameToPath
13+
}
14+
end ClassLoaderOps
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
package dotty.tools
2+
package repl
3+
4+
import scala.annotation.internal.sharable
5+
import scala.util.{Failure, Success, Try}
6+
import scala.util.matching.Regex
7+
8+
import dotc.core.StdNames.*
9+
import DisResult.*
10+
11+
/** Abstract representation of a disassembler.
12+
* The high-level disassembly process is as follows:
13+
* 1. parse the arguments to disassembly command
14+
* 2. map input targets to class bytes via DisassemblyClass
15+
* 3. select a DisassemblyTool implementation and run it to generate disassembly text
16+
* 4. perform any post-processing/filtering of the output text
17+
*/
18+
abstract class Disassembler:
19+
import Disassembler.*
20+
21+
/** Run the disassembly tool with the supplied options, in the context of a DisassemblerRepl */
22+
def apply(opts: DisassemblerOptions)(using DisassemblerRepl): List[DisResult]
23+
24+
/** A list of help strings for the flags supported by this disassembler.
25+
* Each entry is of the form: "-flag" -> "help text"
26+
*/
27+
def helps: List[(String, String)]
28+
29+
/** Formatted help text for this disassembler. */
30+
def helpText: String = helps.map((name, help) => f"${name}%-12.12s${help}%s%n").mkString
31+
32+
/** The post-processing filters to be applied to the text results of this disassembler,
33+
* based on the options in effect and the disassembly target. The filtering of REPL
34+
* naming artifacts is implemented here and enabled by the special `-filter` flag;
35+
* subclasses may provide additional filters as appropriate.
36+
*/
37+
def filters(target: String, opts: DisassemblerOptions): List[String => String] =
38+
if opts.filterReplNames then filterReplNames :: Nil else Nil
39+
40+
/** Combined chain of filters for post-processing disassembly output. */
41+
final def outputFilter(target: String, opts: DisassemblerOptions): String => String =
42+
filters(target, opts) match
43+
case Nil => identity
44+
case fs => Function.chain(fs)
45+
46+
object Disassembler:
47+
@sharable private val ReplWrapperName = (
48+
Regex.quote(nme.REPL_PACKAGE.toString) + "[./]"
49+
+ Regex.quote(str.REPL_SESSION_LINE) + raw"\d+" + Regex.quote("$") + "?"
50+
).r
51+
52+
/** A filter to remove REPL wrapper names from the output. */
53+
def filterReplNames(in: String): String = ReplWrapperName.replaceAllIn(in, "")
54+
55+
/** Utility method to perform line-by-line filtering based on a predicate. */
56+
def filteredLines(text: String, pred: String => Boolean): String =
57+
val bldr = StringBuilder()
58+
text.linesIterator.foreach(line =>
59+
if pred(line) then
60+
bldr.append(line).append('\n')
61+
)
62+
bldr.toString
63+
64+
/** Extract any member name from a disassembly target
65+
* e.g. Foo#bar. Foo# yields zero-length member part.
66+
*/
67+
def splitHashMember(s: String): Option[String] =
68+
s.lastIndexOf('#') match
69+
case -1 => None
70+
case i => Some(s.drop(i + 1))
71+
end Disassembler
72+
73+
/** The result of a disassembly command. */
74+
enum DisResult:
75+
case DisError(message: String)
76+
case DisSuccess(target: String, output: String)
77+
78+
/** The REPL context used for disassembly. */
79+
case class DisassemblerRepl(driver: ReplDriver, state: State):
80+
def classLoader: ClassLoader = driver.replClassLoader()(using state.context)
81+
def mostRecentEntry: Seq[String] = driver.disassemblyTargetsLastWrapper(state)
82+
83+
final case class DisassemblerOptions(flags: Seq[String], targets: Seq[String], filterReplNames: Boolean)
84+
85+
/** A generic option parser, the available options are taken from `helps` */
86+
abstract class DisassemblerOptionParser(helps: List[(String, String)]):
87+
def defaultToolOptions: List[String]
88+
89+
/** Parse the arguments to the disassembly tool.
90+
* Option args start with "-", except that "-" itself denotes the last REPL result.
91+
*/
92+
def parse(args: Seq[String])(using repl: DisassemblerRepl): DisassemblerOptions =
93+
val (options0, targets0) = args.partition(s => s.startsWith("-") && s.length > 1)
94+
val (options, filterReplNames) =
95+
val (opts, flag) = toolArgs(options0)
96+
(if opts.isEmpty then defaultToolOptions else opts, flag)
97+
98+
// "-" may expand into multiple targets (e.g. if multiple type defs in a single wrapper)
99+
val targets = targets0.flatMap {
100+
case "-" => repl.mostRecentEntry
101+
case s => Seq(s)
102+
}
103+
DisassemblerOptions(options, targets, filterReplNames)
104+
105+
// split tool options from REPL's -filter flag, also take prefixes of flag names
106+
private def toolArgs(args: Seq[String]): (Seq[String], Boolean) =
107+
val (opts, rest) = args.flatMap(massage).partition(_ != "-filter")
108+
(opts, rest.nonEmpty)
109+
110+
private def massage(arg: String): Seq[String] =
111+
require(arg.startsWith("-"))
112+
// arg matches opt "-foo/-f" if prefix of -foo or exactly -f
113+
val r = """(-[^/]*)(?:/(-.))?""".r
114+
115+
def maybe(opt: String, s: String): Option[String] = opt match
116+
// disambiguate by preferring short form
117+
case r(lf, sf) if s == sf => Some(sf)
118+
case r(lf, sf) if lf startsWith s => Some(lf)
119+
case _ => None
120+
121+
def candidates(s: String) = helps.map(h => maybe(h._1, s)).flatten
122+
123+
// one candidate or one single-char candidate
124+
def uniqueOf(maybes: Seq[String]) =
125+
def single(s: String) = s.length == 2
126+
if maybes.length == 1 then maybes
127+
else if maybes.count(single) == 1 then maybes.filter(single)
128+
else Nil
129+
130+
// each optchar must decode to exactly one option
131+
def unpacked(s: String): Try[Seq[String]] =
132+
val ones = s.drop(1).map(c =>
133+
val maybes = uniqueOf(candidates(s"-$c"))
134+
if maybes.length == 1 then Some(maybes.head) else None
135+
)
136+
Try(ones) filter (_ forall (_.isDefined)) map (_.flatten)
137+
138+
val res = uniqueOf(candidates(arg))
139+
if res.nonEmpty then res
140+
else unpacked(arg).getOrElse(Seq("-help")) // or else someone needs help
141+
end massage
142+
end DisassemblerOptionParser
143+
144+
/** A tool to perform disassembly of class bytes. */
145+
abstract class DisassemblyTool:
146+
import DisassemblyTool.*
147+
def apply(options: Seq[String])(inputs: Seq[Input]): List[DisResult]
148+
149+
object DisassemblyTool:
150+
/** The input to a disassembly tool.
151+
*
152+
* @param target The disassembly target as given by the user.
153+
* @param actual The class name or file name where the target data was found.
154+
* @param data The class bytes to be disassembled.
155+
*/
156+
case class Input(target: String, actual: String, data: Try[Array[Byte]])
157+
158+
/** A provider of the bytes to be disassembled.
159+
*
160+
* Handles translation of an input path to a (possible empty) array of bytes
161+
* from the specified classloader, where the input path may be:
162+
* - a class name (possibly qualified)
163+
* - the name of a type or term symbol in scope
164+
* - the filesystem path to a .class file
165+
*
166+
* The REPL uses an in-memory classloader, so depending on the target of the
167+
* disassembly, the bytes under examination may not exist on disk.
168+
*/
169+
class DisassemblyClass(loader: ClassLoader)(using repl: DisassemblerRepl):
170+
import DisassemblyClass.*
171+
import DisassemblyTool.*
172+
import ClassLoaderOps.*
173+
import dotty.tools.io.File
174+
import java.io.FileNotFoundException
175+
176+
/** Associate the requested path with a possibly failed or empty array of bytes. */
177+
def bytes(path: String): Input =
178+
bytesFor(path) match
179+
case Success((actual, bytes)) => Input(path, actual, Success(bytes))
180+
case Failure(ex) => Input(path, path, Failure(ex))
181+
182+
/** Find bytes. Handle "Foo#bar" (by ignoring member), "#bar" (by taking "bar").
183+
* @return the path to use for filtering, and the byte array
184+
*/
185+
private def bytesFor(path: String) =
186+
Try {
187+
path match
188+
case HashSplit(prefix, _) if prefix != null => prefix
189+
case HashSplit(_, member) if member != null => member
190+
case s => s
191+
}.flatMap(findBytes)
192+
193+
// data paired with actual path where it was found
194+
private def findBytes(path: String) = tryFile(path) orElse tryClass(path)
195+
196+
/** Assume the string is a path and try to find the classfile it represents. */
197+
private def tryFile(path: String): Try[(String, Array[Byte])] =
198+
Try(File(path.asClassResource))
199+
.filter(_.exists)
200+
.map(f => (path, f.toByteArray()))
201+
202+
/** Assume the string is a fully qualified class name and try to
203+
* find the class object it represents.
204+
* There are other symbols of interest, too:
205+
* - a definition that is wrapped in an enclosing class
206+
* - a synthetic that is not in scope but its associated class is
207+
*/
208+
private def tryClass(path: String): Try[(String, Array[Byte])] =
209+
given State = repl.state
210+
211+
def loadable(name: String) = loader.resourceable(name)
212+
213+
// if path has an interior dollar, take it as a synthetic
214+
// if the prefix up to the dollar is a symbol in scope,
215+
// result is the translated prefix + suffix
216+
def desynthesize(s: String): Option[String] =
217+
val i = s.indexOf('$')
218+
if 0 until s.length - 1 contains i then
219+
val name = s.substring(0, i)
220+
val sufx = s.substring(i)
221+
222+
def loadableOrNone(strip: Boolean) =
223+
def suffix(strip: Boolean)(x: String) =
224+
(if strip && x.endsWith("$") then x.init else x) + sufx
225+
repl.driver.binaryClassOfType(name)
226+
.map(suffix(strip)(_))
227+
.filter(loadable)
228+
229+
// try loading translated+suffix
230+
// some synthetics lack a dollar, (e.g., suffix = delayedInit$body)
231+
// so as a hack, if prefix$$suffix fails, also try prefix$suffix
232+
loadableOrNone(strip = false)
233+
.orElse(loadableOrNone(strip = true))
234+
else
235+
None
236+
end desynthesize
237+
238+
def scopedClass(name: String): Option[String] = repl.driver.binaryClassOfType(name).filter(loadable)
239+
def enclosingClass(name: String): Option[String] = repl.driver.binaryClassOfTerm(name).filter(loadable)
240+
def qualifiedName(name: String): Option[String] = Some(name).filter(_.contains('.')).filter(loadable)
241+
242+
val p = path.asClassName // scrub any suffix
243+
val className =
244+
qualifiedName(p)
245+
.orElse(scopedClass(p))
246+
.orElse(enclosingClass(p))
247+
.orElse(desynthesize(p))
248+
.getOrElse(p)
249+
250+
val classBytes = loader.classBytes(className)
251+
if classBytes.isEmpty then
252+
Failure(FileNotFoundException(s"Could not find class bytes for '$path'"))
253+
else
254+
Success(className, classBytes)
255+
end tryClass
256+
257+
object DisassemblyClass:
258+
private final val classSuffix = ".class"
259+
260+
/** Match foo#bar, both groups are optional (may be null). */
261+
@sharable private val HashSplit = "([^#]+)?(?:#(.+)?)?".r
262+
263+
// We enjoy flexibility in specifying either a fully-qualified class name com.acme.Widget
264+
// or a resource path com/acme/Widget.class; but not widget.out
265+
extension (s: String)
266+
def asClassName = s.stripSuffix(classSuffix).replace('/', '.')
267+
def asClassResource = if s.endsWith(classSuffix) then s else s.replace('.', '/') + classSuffix
268+
269+
extension (cl: ClassLoader)
270+
/** Would classBytes succeed with a nonempty array */
271+
def resourceable(className: String): Boolean =
272+
cl.getResource(className.asClassResource) != null
273+
end DisassemblyClass

0 commit comments

Comments
 (0)