Skip to content

Commit 8587414

Browse files
committed
Add REPL disassembler framework
This commit only provides a framework to support bytecode disassembly from within the REPL, it does not supply any concrete implementations using any particular disassembler -- those will follow in subsequent commits. Adapted from the Scala 2 :javap implementation, which was written by Paul Phillips and Som Snytt / A. P. Marki
1 parent 0573a77 commit 8587414

File tree

5 files changed

+705
-60
lines changed

5 files changed

+705
-60
lines changed
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
package dotty.tools
2+
package repl
3+
4+
import scala.annotation.internal.sharable
5+
import scala.util.{Failure, Success, Try}
6+
import scala.util.matching.Regex
7+
8+
import dotc.core.StdNames.*
9+
import DisResult.*
10+
11+
/** Abstract representation of a disassembler.
12+
* The high-level disassembly process is as follows:
13+
* 1. parse the arguments to disassembly command
14+
* 2. map input targets to class bytes via DisassemblyClass
15+
* 3. select a DisassemblyTool implementation and run it to generate disassembly text
16+
* 4. perform any post-processing/filtering of the output text
17+
*/
18+
abstract class Disassembler:
19+
import Disassembler.*
20+
21+
/** Run the disassembly tool with the supplied options, in the context of a DisassemblerRepl */
22+
def apply(opts: DisassemblerOptions)(using DisassemblerRepl): List[DisResult]
23+
24+
/** A list of help strings for the flags supported by this disassembler.
25+
* Each entry is of the form: "-flag" -> "help text"
26+
*/
27+
def helps: List[(String, String)]
28+
29+
/** Formatted help text for this disassembler. */
30+
def helpText: String = helps.map((name, help) => f"${name}%-12.12s${help}%s%n").mkString
31+
32+
/** The post-processing filters to be applied to the text results of this disassembler,
33+
* based on the options in effect and the disassembly target. The filtering of REPL
34+
* naming artifacts is implemented here and enabled by the special `-filter` flag;
35+
* subclasses may provide additional filters as appropriate.
36+
*/
37+
def filters(target: String, opts: DisassemblerOptions): List[String => String] =
38+
if opts.filterReplNames then filterReplNames :: Nil else Nil
39+
40+
/** Combined chain of filters for post-processing disassembly output. */
41+
final def outputFilter(target: String, opts: DisassemblerOptions): String => String =
42+
filters(target, opts) match
43+
case Nil => identity
44+
case fs => Function.chain(fs)
45+
46+
object Disassembler:
47+
@sharable private val ReplWrapperName = (
48+
Regex.quote(str.REPL_SESSION_LINE) + raw"\d+" + Regex.quote("$") + "?"
49+
).r
50+
51+
/** A filter to remove REPL wrapper names from the output. */
52+
def filterReplNames(in: String): String = ReplWrapperName.replaceAllIn(in, "")
53+
54+
/** Utility method to perform line-by-line filtering based on a predicate. */
55+
def filteredLines(text: String, pred: String => Boolean): String =
56+
val bldr = StringBuilder()
57+
text.linesIterator.foreach(line =>
58+
if pred(line) then
59+
bldr.append(line).append('\n')
60+
)
61+
bldr.toString
62+
63+
/** Extract any member name from a disassembly target
64+
* e.g. Foo#bar. Foo# yields zero-length member part.
65+
*/
66+
def splitHashMember(s: String): Option[String] =
67+
s.lastIndexOf('#') match
68+
case -1 => None
69+
case i => Some(s.drop(i + 1))
70+
end Disassembler
71+
72+
/** The result of a disassembly command. */
73+
enum DisResult:
74+
case DisError(message: String | Null)
75+
case DisSuccess(target: String, output: String)
76+
77+
/** The REPL context used for disassembly. */
78+
case class DisassemblerRepl(driver: ReplDriver, state: State):
79+
def classLoader: ClassLoader = driver.replClassLoader()(using state.context)
80+
def mostRecentEntry: Seq[String] = driver.disassemblyTargetsLastWrapper(state)
81+
82+
final case class DisassemblerOptions(flags: Seq[String], targets: Seq[String], filterReplNames: Boolean)
83+
84+
/** A generic option parser, the available options are taken from `helps` */
85+
abstract class DisassemblerOptionParser(helps: List[(String, String)]):
86+
def defaultToolOptions: List[String]
87+
88+
/** Parse the arguments to the disassembly tool.
89+
* Option args start with "-", except that "-" itself denotes the last REPL result.
90+
*/
91+
def parse(args: Seq[String])(using repl: DisassemblerRepl): DisassemblerOptions =
92+
val (options0, targets0) = args.partition(s => s.startsWith("-") && s.length > 1)
93+
val (options, filterReplNames) =
94+
val (opts, flag) = toolArgs(options0)
95+
(if opts.isEmpty then defaultToolOptions else opts, flag)
96+
97+
// "-" may expand into multiple targets (e.g. if multiple type defs in a single wrapper)
98+
val targets = targets0.flatMap {
99+
case "-" => repl.mostRecentEntry
100+
case s => Seq(s)
101+
}
102+
DisassemblerOptions(options, targets, filterReplNames)
103+
104+
// split tool options from REPL's -filter flag, also take prefixes of flag names
105+
private def toolArgs(args: Seq[String]): (Seq[String], Boolean) =
106+
val (opts, rest) = args.flatMap(massage).partition(_ != "-filter")
107+
(opts, rest.nonEmpty)
108+
109+
private def massage(arg: String): Seq[String] =
110+
require(arg.startsWith("-"))
111+
// arg matches opt "-foo/-f" if prefix of -foo or exactly -f
112+
val r = """(-[^/]*)(?:/(-.))?""".r
113+
114+
def maybe(opt: String, s: String): Option[String] = opt match
115+
// disambiguate by preferring short form
116+
case r(lf, sf) if s == sf => Some(sf)
117+
case r(lf, sf) if lf startsWith s => Some(lf)
118+
case _ => None
119+
120+
def candidates(s: String) = helps.map(h => maybe(h._1, s)).flatten
121+
122+
// one candidate or one single-char candidate
123+
def uniqueOf(maybes: Seq[String]) =
124+
def single(s: String) = s.length == 2
125+
if maybes.length == 1 then maybes
126+
else if maybes.count(single) == 1 then maybes.filter(single)
127+
else Nil
128+
129+
// each optchar must decode to exactly one option
130+
def unpacked(s: String): Try[Seq[String]] =
131+
val ones = s.drop(1).map(c =>
132+
val maybes = uniqueOf(candidates(s"-$c"))
133+
if maybes.length == 1 then Some(maybes.head) else None
134+
)
135+
Try(ones) filter (_ forall (_.isDefined)) map (_.flatten)
136+
137+
val res = uniqueOf(candidates(arg))
138+
if res.nonEmpty then res
139+
else unpacked(arg).getOrElse(Seq("-help")) // or else someone needs help
140+
end massage
141+
end DisassemblerOptionParser
142+
143+
/** A tool to perform disassembly of class bytes. */
144+
abstract class DisassemblyTool:
145+
import DisassemblyTool.*
146+
def apply(options: Seq[String])(inputs: Seq[Input]): List[DisResult]
147+
148+
object DisassemblyTool:
149+
/** The input to a disassembly tool.
150+
*
151+
* @param target The disassembly target as given by the user.
152+
* @param actual The class name or file name where the target data was found.
153+
* @param data The class bytes to be disassembled.
154+
*/
155+
case class Input(target: String, actual: String, data: Try[Array[Byte]])
156+
157+
/** A provider of the bytes to be disassembled.
158+
*
159+
* Handles translation of an input path to a (possible empty) array of bytes
160+
* from the specified classloader, where the input path may be:
161+
* - a class name (possibly qualified)
162+
* - the name of a type or term symbol in scope
163+
* - the filesystem path to a .class file
164+
*
165+
* The REPL uses an in-memory classloader, so depending on the target of the
166+
* disassembly, the bytes under examination may not exist on disk.
167+
*/
168+
class DisassemblyClass(loader: ClassLoader)(using repl: DisassemblerRepl):
169+
import DisassemblyClass.*
170+
import DisassemblyTool.*
171+
import dotty.tools.io.File
172+
import dotty.tools.runner.ClassLoaderOps.*
173+
import java.io.FileNotFoundException
174+
175+
/** Associate the requested path with a possibly failed or empty array of bytes. */
176+
def bytes(path: String): Input =
177+
bytesFor(path) match
178+
case Success((actual, bytes)) => Input(path, actual, Success(bytes))
179+
case Failure(ex) => Input(path, path, Failure(ex))
180+
181+
/** Find bytes. Handle "Foo#bar" (by ignoring member), "#bar" (by taking "bar").
182+
* @return the path to use for filtering, and the byte array
183+
*/
184+
private def bytesFor(path: String) =
185+
import scala.language.unsafeNulls // lampepfl/dotty#14672
186+
Try {
187+
path match
188+
case HashSplit(prefix, _) if prefix != null => prefix
189+
case HashSplit(_, member) if member != null => member
190+
case s => s
191+
}.flatMap(findBytes)
192+
193+
// data paired with actual path where it was found
194+
private def findBytes(path: String) = tryFile(path) orElse tryClass(path)
195+
196+
/** Assume the string is a path and try to find the classfile it represents. */
197+
private def tryFile(path: String): Try[(String, Array[Byte])] =
198+
Try(File(path.asClassResource))
199+
.filter(_.exists)
200+
.map(f => (path, f.toByteArray()))
201+
202+
/** Assume the string is a fully qualified class name and try to
203+
* find the class object it represents.
204+
* There are other symbols of interest, too:
205+
* - a definition that is wrapped in an enclosing class
206+
* - a synthetic that is not in scope but its associated class is
207+
*/
208+
private def tryClass(path: String): Try[(String, Array[Byte])] =
209+
given State = repl.state
210+
211+
def loadable(name: String) = loader.resourceable(name)
212+
213+
// if path has an interior dollar, take it as a synthetic
214+
// if the prefix up to the dollar is a symbol in scope,
215+
// result is the translated prefix + suffix
216+
def desynthesize(s: String): Option[String] =
217+
val i = s.indexOf('$')
218+
if 0 until s.length - 1 contains i then
219+
val name = s.substring(0, i).nn
220+
val sufx = s.substring(i)
221+
222+
def loadableOrNone(strip: Boolean) =
223+
def suffix(strip: Boolean)(x: String) =
224+
(if strip && x.endsWith("$") then x.init else x) + sufx
225+
repl.driver.binaryClassOfType(name)
226+
.map(suffix(strip)(_))
227+
.filter(loadable)
228+
229+
// try loading translated+suffix
230+
// some synthetics lack a dollar, (e.g., suffix = delayedInit$body)
231+
// so as a hack, if prefix$$suffix fails, also try prefix$suffix
232+
loadableOrNone(strip = false)
233+
.orElse(loadableOrNone(strip = true))
234+
else
235+
None
236+
end desynthesize
237+
238+
def scopedClass(name: String): Option[String] = repl.driver.binaryClassOfType(name).filter(loadable)
239+
def enclosingClass(name: String): Option[String] = repl.driver.binaryClassOfTerm(name).filter(loadable)
240+
def qualifiedName(name: String): Option[String] = Some(name).filter(_.contains('.')).filter(loadable)
241+
242+
val p = path.asClassName // scrub any suffix
243+
val className =
244+
qualifiedName(p)
245+
.orElse(scopedClass(p))
246+
.orElse(enclosingClass(p))
247+
.orElse(desynthesize(p))
248+
.getOrElse(p)
249+
250+
val classBytes = loader.classBytes(className)
251+
if classBytes.isEmpty then
252+
Failure(FileNotFoundException(s"Could not find class bytes for '$path'"))
253+
else
254+
Success(className, classBytes)
255+
end tryClass
256+
257+
object DisassemblyClass:
258+
private final val classSuffix = ".class"
259+
260+
/** Match foo#bar, both groups are optional (may be null). */
261+
@sharable private val HashSplit = "([^#]+)?(?:#(.+)?)?".r
262+
263+
// We enjoy flexibility in specifying either a fully-qualified class name com.acme.Widget
264+
// or a resource path com/acme/Widget.class; but not widget.out
265+
extension (s: String)
266+
def asClassName = s.stripSuffix(classSuffix).replace('/', '.').nn
267+
def asClassResource = if s.endsWith(classSuffix) then s else s.replace('.', '/').nn + classSuffix
268+
269+
extension (cl: ClassLoader)
270+
/** Would classBytes succeed with a nonempty array */
271+
def resourceable(className: String): Boolean =
272+
cl.getResource(className.asClassResource) != null
273+
end DisassemblyClass

0 commit comments

Comments
 (0)