Skip to content

Commit a66bd61

Browse files
committed
refactor: move bytecode code into bytecode.py
1 parent d64ce5f commit a66bd61

File tree

4 files changed

+149
-147
lines changed

4 files changed

+149
-147
lines changed

coverage/bytecode.py

+141-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
22
# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
33

4-
"""Bytecode manipulation for coverage.py"""
4+
"""Bytecode analysis for coverage.py"""
55

66
from __future__ import annotations
77

8+
import dis
9+
810
from types import CodeType
11+
from typing import Iterable, Optional
912
from collections.abc import Iterator
1013

14+
from coverage.types import TArc, TOffset
15+
1116

1217
def code_objects(code: CodeType) -> Iterator[CodeType]:
1318
"""Iterate over all the code objects in `code`."""
@@ -20,3 +25,138 @@ def code_objects(code: CodeType) -> Iterator[CodeType]:
2025
if isinstance(c, CodeType):
2126
stack.append(c)
2227
yield code
28+
29+
30+
def op_set(*op_names: str) -> set[int]:
31+
"""Make a set of opcodes from instruction names.
32+
33+
The names might not exist in this version of Python, skip those if not.
34+
"""
35+
return {op for name in op_names if (op := dis.opmap.get(name))}
36+
37+
38+
# Opcodes that are unconditional jumps elsewhere.
39+
ALWAYS_JUMPS = op_set(
40+
"JUMP_BACKWARD",
41+
"JUMP_BACKWARD_NO_INTERRUPT",
42+
"JUMP_FORWARD",
43+
)
44+
45+
# Opcodes that exit from a function.
46+
RETURNS = op_set("RETURN_VALUE", "RETURN_GENERATOR")
47+
48+
49+
class InstructionWalker:
50+
"""Utility to step through trails of instructions.
51+
52+
We have two reasons to need sequences of instructions from a code object:
53+
First, in strict sequence to visit all the instructions in the object.
54+
This is `walk(follow_jumps=False)`. Second, we want to follow jumps to
55+
understand how execution will flow: `walk(follow_jumps=True)`.
56+
57+
"""
58+
59+
def __init__(self, code: CodeType) -> None:
60+
self.code = code
61+
self.insts: dict[TOffset, dis.Instruction] = {}
62+
63+
inst = None
64+
for inst in dis.get_instructions(code):
65+
self.insts[inst.offset] = inst
66+
67+
assert inst is not None
68+
self.max_offset = inst.offset
69+
70+
def walk(
71+
self, *, start_at: TOffset = 0, follow_jumps: bool = True
72+
) -> Iterable[dis.Instruction]:
73+
"""
74+
Yield instructions starting from `start_at`. Follow unconditional
75+
jumps if `follow_jumps` is true.
76+
"""
77+
seen = set()
78+
offset = start_at
79+
while offset < self.max_offset + 1:
80+
if offset in seen:
81+
break
82+
seen.add(offset)
83+
if inst := self.insts.get(offset):
84+
yield inst
85+
if follow_jumps and inst.opcode in ALWAYS_JUMPS:
86+
offset = inst.jump_target
87+
continue
88+
offset += 2
89+
90+
91+
TBranchTrail = tuple[list[TOffset], Optional[TArc]]
92+
TBranchTrails = dict[TOffset, list[TBranchTrail]]
93+
94+
95+
def branch_trails(code: CodeType) -> TBranchTrails:
96+
"""
97+
Calculate branch trails for `code`.
98+
99+
Instructions can have a jump_target, where they might jump to next. Some
100+
instructions with a jump_target are unconditional jumps (ALWAYS_JUMPS), so
101+
they aren't interesting to us, since they aren't the start of a branch
102+
possibility.
103+
104+
Instructions that might or might not jump somewhere else are branch
105+
possibilities. For each of those, we track a trail of instructions. These
106+
are lists of instruction offsets, the next instructions that can execute.
107+
We follow the trail until we get to a new source line. That gives us the
108+
arc from the original instruction's line to the new source line.
109+
110+
"""
111+
the_trails: TBranchTrails = {}
112+
iwalker = InstructionWalker(code)
113+
for inst in iwalker.walk(follow_jumps=False):
114+
if not inst.jump_target:
115+
# We only care about instructions with jump targets.
116+
continue
117+
if inst.opcode in ALWAYS_JUMPS:
118+
# We don't care about unconditional jumps.
119+
continue
120+
121+
from_line = inst.line_number
122+
if from_line is None:
123+
continue
124+
125+
def walk_one_branch(start_at: TOffset) -> TBranchTrail:
126+
# pylint: disable=cell-var-from-loop
127+
inst_offsets: list[TOffset] = []
128+
to_line = None
129+
for inst2 in iwalker.walk(start_at=start_at):
130+
inst_offsets.append(inst2.offset)
131+
if inst2.line_number and inst2.line_number != from_line:
132+
to_line = inst2.line_number
133+
break
134+
elif inst2.jump_target and (inst2.opcode not in ALWAYS_JUMPS):
135+
break
136+
elif inst2.opcode in RETURNS:
137+
to_line = -code.co_firstlineno
138+
break
139+
if to_line is not None:
140+
return inst_offsets, (from_line, to_line)
141+
else:
142+
return [], None
143+
144+
# Calculate two trails: one from the next instruction, and one from the
145+
# jump_target instruction.
146+
trails = [
147+
walk_one_branch(start_at=inst.offset + 2),
148+
walk_one_branch(start_at=inst.jump_target),
149+
]
150+
the_trails[inst.offset] = trails
151+
152+
# Sometimes we get BRANCH_RIGHT or BRANCH_LEFT events from instructions
153+
# other than the original jump possibility instruction. Register each
154+
# trail under all of their offsets so we can pick up in the middle of a
155+
# trail if need be.
156+
for trail in trails:
157+
for offset in trail[0]:
158+
if offset not in the_trails:
159+
the_trails[offset] = []
160+
the_trails[offset].append(trail)
161+
162+
return the_trails

coverage/sysmon.py

+4-145
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from __future__ import annotations
77

8-
import dis
98
import functools
109
import inspect
1110
import os
@@ -19,20 +18,20 @@
1918
from typing import (
2019
Any,
2120
Callable,
22-
Iterable,
2321
NewType,
2422
Optional,
2523
cast,
2624
)
2725

2826
from coverage import env
27+
from coverage.bytecode import TBranchTrails, branch_trails
2928
from coverage.debug import short_filename, short_stack
3029
from coverage.misc import isolate_module
3130
from coverage.types import (
3231
AnyCallable,
33-
TArc,
3432
TFileDisposition,
3533
TLineNo,
34+
TOffset,
3635
TShouldStartContextFn,
3736
TShouldTraceFn,
3837
TTraceData,
@@ -58,18 +57,6 @@
5857
DISABLE_TYPE = NewType("DISABLE_TYPE", object)
5958
MonitorReturn = Optional[DISABLE_TYPE]
6059
DISABLE = cast(MonitorReturn, getattr(sys_monitoring, "DISABLE", None))
61-
TOffset = int
62-
63-
ALWAYS_JUMPS: set[int] = set()
64-
RETURNS: set[int] = set()
65-
66-
if env.PYBEHAVIOR.branch_right_left:
67-
ALWAYS_JUMPS.update(
68-
dis.opmap[name]
69-
for name in ["JUMP_FORWARD", "JUMP_BACKWARD", "JUMP_BACKWARD_NO_INTERRUPT"]
70-
)
71-
72-
RETURNS.update(dis.opmap[name] for name in ["RETURN_VALUE", "RETURN_GENERATOR"])
7360

7461

7562
if LOG: # pragma: debugging
@@ -181,131 +168,6 @@ def _decorator(meth: AnyCallable) -> AnyCallable:
181168
return _decorator
182169

183170

184-
class InstructionWalker:
185-
"""Utility to step through trails of instructions.
186-
187-
We have two reasons to need sequences of instructions from a code object:
188-
First, in strict sequence to visit all the instructions in the object.
189-
This is `walk(follow_jumps=False)`. Second, we want to follow jumps to
190-
understand how execution will flow: `walk(follow_jumps=True)`.
191-
192-
"""
193-
194-
def __init__(self, code: CodeType) -> None:
195-
self.code = code
196-
self.insts: dict[TOffset, dis.Instruction] = {}
197-
198-
inst = None
199-
for inst in dis.get_instructions(code):
200-
self.insts[inst.offset] = inst
201-
202-
assert inst is not None
203-
self.max_offset = inst.offset
204-
205-
def walk(
206-
self, *, start_at: TOffset = 0, follow_jumps: bool = True
207-
) -> Iterable[dis.Instruction]:
208-
"""
209-
Yield instructions starting from `start_at`. Follow unconditional
210-
jumps if `follow_jumps` is true.
211-
"""
212-
seen = set()
213-
offset = start_at
214-
while offset < self.max_offset + 1:
215-
if offset in seen:
216-
break
217-
seen.add(offset)
218-
if inst := self.insts.get(offset):
219-
yield inst
220-
if follow_jumps and inst.opcode in ALWAYS_JUMPS:
221-
offset = inst.jump_target
222-
continue
223-
offset += 2
224-
225-
226-
def populate_branch_trails(code: CodeType, code_info: CodeInfo) -> None:
227-
"""
228-
Populate the `branch_trails` attribute on `code_info`.
229-
230-
Instructions can have a jump_target, where they might jump to next. Some
231-
instructions with a jump_target are unconditional jumps (ALWAYS_JUMPS), so
232-
they aren't interesting to us, since they aren't the start of a branch
233-
possibility.
234-
235-
Instructions that might or might not jump somewhere else are branch
236-
possibilities. For each of those, we track a trail of instructions. These
237-
are lists of instruction offsets, the next instructions that can execute.
238-
We follow the trail until we get to a new source line. That gives us the
239-
arc from the original instruction's line to the new source line.
240-
241-
"""
242-
# log(f"populate_branch_trails: {code}")
243-
iwalker = InstructionWalker(code)
244-
for inst in iwalker.walk(follow_jumps=False):
245-
# log(f"considering {inst=}")
246-
if not inst.jump_target:
247-
# We only care about instructions with jump targets.
248-
# log("no jump_target")
249-
continue
250-
if inst.opcode in ALWAYS_JUMPS:
251-
# We don't care about unconditional jumps.
252-
# log("always jumps")
253-
continue
254-
255-
from_line = inst.line_number
256-
if from_line is None:
257-
continue
258-
259-
def walk_one_branch(
260-
start_at: TOffset, branch_kind: str
261-
) -> tuple[list[TOffset], TArc | None]:
262-
# pylint: disable=cell-var-from-loop
263-
inst_offsets: list[TOffset] = []
264-
to_line = None
265-
for inst2 in iwalker.walk(start_at=start_at):
266-
inst_offsets.append(inst2.offset)
267-
if inst2.line_number and inst2.line_number != from_line:
268-
to_line = inst2.line_number
269-
break
270-
elif inst2.jump_target and (inst2.opcode not in ALWAYS_JUMPS):
271-
# log(
272-
# f"stop: {inst2.jump_target=}, "
273-
# + f"{inst2.opcode=} ({dis.opname[inst2.opcode]}), "
274-
# + f"{ALWAYS_JUMPS=}"
275-
# )
276-
break
277-
elif inst2.opcode in RETURNS:
278-
to_line = -code.co_firstlineno
279-
break
280-
if to_line is not None:
281-
# log(
282-
# f"possible branch from @{start_at}: "
283-
# + f"{inst_offsets}, {(from_line, to_line)} {code}"
284-
# )
285-
return inst_offsets, (from_line, to_line)
286-
else:
287-
# log(f"no possible branch from @{start_at}: {inst_offsets}")
288-
return [], None
289-
290-
# Calculate two trails: one from the next instruction, and one from the
291-
# jump_target instruction.
292-
trails = [
293-
walk_one_branch(start_at=inst.offset + 2, branch_kind="not-taken"),
294-
walk_one_branch(start_at=inst.jump_target, branch_kind="taken"),
295-
]
296-
code_info.branch_trails[inst.offset] = trails
297-
298-
# Sometimes we get BRANCH_RIGHT or BRANCH_LEFT events from instructions
299-
# other than the original jump possibility instruction. Register each
300-
# trail under all of their offsets so we can pick up in the middle of a
301-
# trail if need be.
302-
for trail in trails:
303-
for offset in trail[0]:
304-
if offset not in code_info.branch_trails:
305-
code_info.branch_trails[offset] = []
306-
code_info.branch_trails[offset].append(trail)
307-
308-
309171
@dataclass
310172
class CodeInfo:
311173
"""The information we want about each code object."""
@@ -321,10 +183,7 @@ class CodeInfo:
321183
# ([offset, offset, ...], (from_line, to_line)),
322184
# ]
323185
# Two possible trails from the branch point, left and right.
324-
branch_trails: dict[
325-
TOffset,
326-
list[tuple[list[TOffset], TArc | None]],
327-
]
186+
branch_trails: TBranchTrails
328187

329188

330189
def bytes_to_lines(code: CodeType) -> dict[TOffset, TLineNo]:
@@ -571,7 +430,7 @@ def sysmon_branch_either(
571430
if not code_info.branch_trails:
572431
if self.stats is not None:
573432
self.stats["branch_trails"] += 1
574-
populate_branch_trails(code, code_info)
433+
code_info.branch_trails = branch_trails(code)
575434
# log(f"branch_trails for {code}:\n {code_info.branch_trails}")
576435
added_arc = False
577436
dest_info = code_info.branch_trails.get(instruction_offset)

coverage/types.py

+3
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ def __call__(
5353
# Line numbers are pervasive enough that they deserve their own type.
5454
TLineNo = int
5555

56+
# Bytecode offsets are pervasive enough that they deserve their own type.
57+
TOffset = int
58+
5659
TArc = tuple[TLineNo, TLineNo]
5760

5861
class TFileDisposition(Protocol):

tox.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ setenv =
116116

117117
commands =
118118
# PYVERSIONS
119-
mypy --python-version=3.9 --strict --exclude=sysmon {env:TYPEABLE}
119+
mypy --python-version=3.9 --strict --exclude=sysmon --exclude=bytecode {env:TYPEABLE}
120120
mypy --python-version=3.13 --strict {env:TYPEABLE}
121121

122122
[gh]

0 commit comments

Comments
 (0)