Skip to content

Commit d8c3522

Browse files
authored
Merge pull request #4162 from tybug/explain-ir-actual
Migrate `explain` phase to the typed choice sequence
2 parents e575bf3 + f01d9d6 commit d8c3522

File tree

7 files changed

+120
-65
lines changed

7 files changed

+120
-65
lines changed

hypothesis-python/RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
RELEASE_TYPE: patch
2+
3+
This patch migrates the :obj:`~hypothesis.Phase.explain` :ref:`phase <phases>` to our IR layer (:issue:`3921`). This should improve both its speed and precision.

hypothesis-python/src/hypothesis/control.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,10 @@ def prep_args_kwargs_from_strategies(self, kwarg_strategies):
149149
arg_labels = {}
150150
kwargs = {}
151151
for k, s in kwarg_strategies.items():
152-
start_idx = self.data.index
152+
start_idx = self.data.index_ir
153153
with deprecate_random_in_strategy("from {}={!r}", k, s) as check:
154154
obj = check(self.data.draw(s, observe_as=f"generate:{k}"))
155-
end_idx = self.data.index
155+
end_idx = self.data.index_ir
156156
kwargs[k] = obj
157157

158158
# This high up the stack, we can't see or really do much with the conjecture

hypothesis-python/src/hypothesis/internal/conjecture/engine.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,11 @@
6767
PreviouslyUnseenBehaviour,
6868
TreeRecordingObserver,
6969
)
70-
from hypothesis.internal.conjecture.junkdrawer import clamp, ensure_free_stackframes
70+
from hypothesis.internal.conjecture.junkdrawer import (
71+
clamp,
72+
ensure_free_stackframes,
73+
startswith,
74+
)
7175
from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser
7276
from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key
7377
from hypothesis.internal.healthcheck import fail_health_check
@@ -1478,14 +1482,17 @@ def check_result(
14781482
self.__data_cache[buffer] = result
14791483
return result
14801484

1481-
def passing_buffers(self, prefix: bytes = b"") -> frozenset[bytes]:
1482-
"""Return a collection of bytestrings which cause the test to pass.
1485+
def passing_choice_sequences(
1486+
self, prefix: Sequence[IRNode] = ()
1487+
) -> frozenset[bytes]:
1488+
"""Return a collection of choice sequence nodes which cause the test to pass.
14831489
Optionally restrict this by a certain prefix, which is useful for explain mode.
14841490
"""
14851491
return frozenset(
1486-
buf
1487-
for buf in self.__data_cache
1488-
if buf.startswith(prefix) and self.__data_cache[buf].status == Status.VALID
1492+
result.examples.ir_tree_nodes
1493+
for key in self.__data_cache_ir
1494+
if (result := self.__data_cache_ir[key]).status is Status.VALID
1495+
and startswith(result.examples.ir_tree_nodes, prefix)
14891496
)
14901497

14911498

hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,14 @@
2525
ConjectureResult,
2626
IRNode,
2727
Status,
28+
ir_size_nodes,
29+
ir_to_buffer,
2830
ir_value_equal,
2931
ir_value_key,
3032
ir_value_permitted,
3133
)
3234
from hypothesis.internal.conjecture.junkdrawer import (
35+
endswith,
3336
find_integer,
3437
replace_all,
3538
startswith,
@@ -540,20 +543,21 @@ def s(n):
540543
self.explain()
541544

542545
def explain(self):
546+
from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR
547+
543548
if not self.should_explain or not self.shrink_target.arg_slices:
544549
return
545-
from hypothesis.internal.conjecture.engine import BUFFER_SIZE
546550

547551
self.max_stall = 1e999
548552
shrink_target = self.shrink_target
549-
buffer = shrink_target.buffer
553+
nodes = self.nodes
550554
chunks = defaultdict(list)
551555

552556
# Before we start running experiments, let's check for known inputs which would
553557
# make them redundant. The shrinking process means that we've already tried many
554558
# variations on the minimal example, so this can save a lot of time.
555-
seen_passing_buffers = self.engine.passing_buffers(
556-
prefix=buffer[: min(self.shrink_target.arg_slices)[0]]
559+
seen_passing_seq = self.engine.passing_choice_sequences(
560+
prefix=self.nodes[: min(self.shrink_target.arg_slices)[0]]
557561
)
558562

559563
# Now that we've shrunk to a minimal failing example, it's time to try
@@ -565,8 +569,8 @@ def explain(self):
565569
# Check for any previous examples that match the prefix and suffix,
566570
# so we can skip if we found a passing example while shrinking.
567571
if any(
568-
seen.startswith(buffer[:start]) and seen.endswith(buffer[end:])
569-
for seen in seen_passing_buffers
572+
startswith(seen, nodes[:start]) and endswith(seen, nodes[end:])
573+
for seen in seen_passing_seq
570574
):
571575
continue
572576

@@ -581,47 +585,61 @@ def explain(self):
581585
# stop early if we're seeing mostly invalid examples
582586
break # pragma: no cover
583587

584-
buf_attempt_fixed = bytearray(buffer)
585-
buf_attempt_fixed[start:end] = [
586-
self.random.randint(0, 255) for _ in range(end - start)
587-
]
588-
result = self.engine.cached_test_function(
589-
buf_attempt_fixed, extend=BUFFER_SIZE - len(buf_attempt_fixed)
588+
# replace start:end with random values
589+
replacement = []
590+
for i in range(start, end):
591+
node = nodes[i]
592+
if not node.was_forced:
593+
(value, _buf) = ir_to_buffer(
594+
node.ir_type, node.kwargs, random=self.random
595+
)
596+
node = node.copy(with_value=value)
597+
replacement.append(node)
598+
599+
attempt = nodes[:start] + tuple(replacement) + nodes[end:]
600+
result = self.engine.cached_test_function_ir(
601+
attempt, extend=BUFFER_SIZE_IR - ir_size_nodes(attempt)
590602
)
591603

592604
# Turns out this was a variable-length part, so grab the infix...
593-
if result.status == Status.OVERRUN:
605+
if result.status is Status.OVERRUN:
594606
continue # pragma: no cover # flakily covered
595607
if not (
596-
len(buf_attempt_fixed) == len(result.buffer)
597-
and result.buffer.endswith(buffer[end:])
608+
len(attempt) == len(result.examples.ir_tree_nodes)
609+
and endswith(result.examples.ir_tree_nodes, nodes[end:])
598610
):
599611
for ex, res in zip(shrink_target.examples, result.examples):
600-
assert ex.start == res.start
601-
assert ex.start <= start
612+
assert ex.ir_start == res.ir_start
613+
assert ex.ir_start <= start
602614
assert ex.label == res.label
603-
if start == ex.start and end == ex.end:
604-
res_end = res.end
615+
if start == ex.ir_start and end == ex.ir_end:
616+
res_end = res.ir_end
605617
break
606618
else:
607619
raise NotImplementedError("Expected matching prefixes")
608620

609-
buf_attempt_fixed = (
610-
buffer[:start] + result.buffer[start:res_end] + buffer[end:]
621+
attempt = (
622+
nodes[:start]
623+
+ result.examples.ir_tree_nodes[start:res_end]
624+
+ nodes[end:]
611625
)
612-
chunks[(start, end)].append(result.buffer[start:res_end])
613-
result = self.engine.cached_test_function(buf_attempt_fixed)
626+
chunks[(start, end)].append(
627+
result.examples.ir_tree_nodes[start:res_end]
628+
)
629+
result = self.engine.cached_test_function_ir(attempt)
614630

615-
if result.status == Status.OVERRUN:
631+
if result.status is Status.OVERRUN:
616632
continue # pragma: no cover # flakily covered
617633
else:
618-
chunks[(start, end)].append(result.buffer[start:end])
634+
chunks[(start, end)].append(
635+
result.examples.ir_tree_nodes[start:end]
636+
)
619637

620638
if shrink_target is not self.shrink_target: # pragma: no cover
621639
# If we've shrunk further without meaning to, bail out.
622640
self.shrink_target.slice_comments.clear()
623641
return
624-
if result.status == Status.VALID:
642+
if result.status is Status.VALID:
625643
# The test passed, indicating that this param can't vary freely.
626644
# However, it's really hard to write a simple and reliable covering
627645
# test, because of our `seen_passing_buffers` check above.
@@ -640,15 +658,15 @@ def explain(self):
640658
chunks_by_start_index = sorted(chunks.items())
641659
for _ in range(500): # pragma: no branch
642660
# no-branch here because we don't coverage-test the abort-at-500 logic.
643-
new_buf = bytearray()
661+
new_nodes = []
644662
prev_end = 0
645663
for (start, end), ls in chunks_by_start_index:
646664
assert prev_end <= start < end, "these chunks must be nonoverlapping"
647-
new_buf.extend(buffer[prev_end:start])
648-
new_buf.extend(self.random.choice(ls))
665+
new_nodes.extend(nodes[prev_end:start])
666+
new_nodes.extend(self.random.choice(ls))
649667
prev_end = end
650668

651-
result = self.engine.cached_test_function(new_buf)
669+
result = self.engine.cached_test_function_ir(new_nodes)
652670

653671
# This *can't* be a shrink because none of the components were.
654672
assert shrink_target is self.shrink_target

hypothesis-python/tests/conjecture/test_engine.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,6 +1527,19 @@ def test(data):
15271527
assert d2.status == Status.VALID
15281528

15291529

1530+
def test_draw_bits_partly_from_prefix_and_partly_random():
1531+
# a draw_bits call which straddles the end of our prefix has a slightly
1532+
# different code branch.
1533+
def test(data):
1534+
# float consumes draw_bits(64)
1535+
data.draw_float()
1536+
1537+
with deterministic_PRNG():
1538+
runner = ConjectureRunner(test, settings=TEST_SETTINGS)
1539+
d = runner.cached_test_function(bytes(10), extend=100)
1540+
assert d.status == Status.VALID
1541+
1542+
15301543
def test_can_be_set_to_ignore_limits():
15311544
def test(data):
15321545
data.draw_bytes(1, 1)

hypothesis-python/tests/conjecture/test_inquisitor.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,36 +16,20 @@
1616

1717

1818
def fails_with_output(expected, error=AssertionError, **kw):
19-
expected = [expected] if isinstance(expected, str) else expected
20-
2119
def _inner(f):
2220
def _new():
2321
with pytest.raises(error) as err:
2422
settings(print_blob=False, derandomize=True, **kw)(f)()
2523
got = "\n".join(err.value.__notes__).strip() + "\n"
26-
assert any(got == s.strip() + "\n" for s in expected)
24+
assert got == expected.strip() + "\n"
2725

2826
return _new
2927

3028
return _inner
3129

3230

33-
# this should have a marked as freely varying, but
34-
# false negatives in our inquisitor code skip over it sometimes, depending on the
35-
# seen_passed_buffers. yet another thing that should be improved by moving to the ir.
3631
@fails_with_output(
37-
[
38-
"""
39-
Falsifying example: test_inquisitor_comments_basic_fail_if_either(
40-
# The test always failed when commented parts were varied together.
41-
a=False,
42-
b=True,
43-
c=[], # or any other generated value
44-
d=True,
45-
e=False, # or any other generated value
46-
)
47-
""",
48-
"""
32+
"""
4933
Falsifying example: test_inquisitor_comments_basic_fail_if_either(
5034
# The test always failed when commented parts were varied together.
5135
a=False, # or any other generated value
@@ -54,8 +38,7 @@ def _new():
5438
d=True,
5539
e=False, # or any other generated value
5640
)
57-
""",
58-
]
41+
"""
5942
)
6043
@given(st.booleans(), st.booleans(), st.lists(st.none()), st.booleans(), st.booleans())
6144
def test_inquisitor_comments_basic_fail_if_either(a, b, c, d, e):
@@ -91,6 +74,27 @@ def test_inquisitor_no_together_comment_if_single_argument(a, b):
9174
assert a
9275

9376

77+
@st.composite
78+
def ints_with_forced_draw(draw):
79+
data = draw(st.data())
80+
n = draw(st.integers())
81+
data.conjecture_data.draw_boolean(forced=True)
82+
return n
83+
84+
85+
@fails_with_output(
86+
"""
87+
Falsifying example: test_inquisitor_doesnt_break_on_varying_forced_nodes(
88+
n1=100,
89+
n2=0, # or any other generated value
90+
)
91+
"""
92+
)
93+
@given(st.integers(), ints_with_forced_draw())
94+
def test_inquisitor_doesnt_break_on_varying_forced_nodes(n1, n2):
95+
assert n1 < 100
96+
97+
9498
@fails_with(ZeroDivisionError)
9599
@settings(database=None)
96100
@given(start_date=st.datetimes(), data=st.data())

whole_repo_tests/test_mypy.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,22 @@ def get_mypy_output(fname, *extra_args):
4141

4242

4343
def get_mypy_analysed_type(fname):
44-
out = get_mypy_output(fname).rstrip()
45-
msg = "Success: no issues found in 1 source file"
46-
if out.endswith(msg):
47-
out = out[: -len(msg)]
48-
assert len(out.splitlines()) == 1, out
44+
attempts = 0
45+
while True:
46+
out = get_mypy_output(fname).rstrip()
47+
msg = "Success: no issues found in 1 source file"
48+
if out.endswith(msg):
49+
out = out[: -len(msg)]
50+
# we've noticed some flakiness in getting an empty output here. Give it
51+
# a couple tries.
52+
if len(out.splitlines()) == 0:
53+
attempts += 1
54+
continue
55+
56+
assert len(out.splitlines()) == 1, out
57+
assert attempts < 2, "too many failed retries"
58+
break
59+
4960
# See https://mypy.readthedocs.io/en/latest/common_issues.html#reveal-type
5061
# The shell output for `reveal_type([1, 2, 3])` looks like a literal:
5162
# file.py:2: error: Revealed type is 'builtins.list[builtins.int*]'
@@ -327,8 +338,7 @@ def test_stateful_target_params_mutually_exclusive(tmp_path, decorator):
327338
"target_args",
328339
[
329340
"target=b1",
330-
# FIXME: temporary workaround for mypy bug, see hypothesis/pull/4136
331-
pytest.param("targets=(b1,)", marks=pytest.mark.xfail(strict=False)),
341+
"targets=(b1,)",
332342
"targets=(b1, b2)",
333343
"",
334344
],

0 commit comments

Comments
 (0)