Skip to content

Commit 462c5fc

Browse files
authored
Merge pull request #4215 from tybug/sort-key-ir
Implement and use `sort_key_ir`
2 parents 3dbfae2 + b089be7 commit 462c5fc

File tree

8 files changed

+57
-77
lines changed

8 files changed

+57
-77
lines changed

hypothesis-python/RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
RELEASE_TYPE: patch
2+
3+
The shrinker now uses the typed choice sequence (:issue:`3921`) when ordering failing examples. As a result, Hypothesis may now report a different minimal failing example for some tests. We expect most cases to remain unchanged.

hypothesis-python/src/hypothesis/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
ensure_free_stackframes,
8686
gc_cumulative_time,
8787
)
88-
from hypothesis.internal.conjecture.shrinker import sort_key
88+
from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir
8989
from hypothesis.internal.entropy import deterministic_PRNG
9090
from hypothesis.internal.escalation import (
9191
InterestingOrigin,
@@ -1226,7 +1226,7 @@ def run_engine(self):
12261226
if runner.interesting_examples:
12271227
self.falsifying_examples = sorted(
12281228
runner.interesting_examples.values(),
1229-
key=lambda d: sort_key(d.buffer),
1229+
key=lambda d: sort_key_ir(d.ir_nodes),
12301230
reverse=True,
12311231
)
12321232
else:

hypothesis-python/src/hypothesis/internal/conjecture/engine.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
startswith,
7575
)
7676
from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser
77-
from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key
77+
from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key, sort_key_ir
7878
from hypothesis.internal.healthcheck import fail_health_check
7979
from hypothesis.reporting import base_report, report
8080

@@ -562,8 +562,8 @@ def test_function(self, data: ConjectureData) -> None:
562562
if v < existing_score:
563563
continue
564564

565-
if v > existing_score or sort_key(data.buffer) < sort_key(
566-
existing_example.buffer
565+
if v > existing_score or sort_key_ir(data.ir_nodes) < sort_key_ir(
566+
existing_example.ir_nodes
567567
):
568568
data_as_result = data.as_result()
569569
assert not isinstance(data_as_result, _Overrun)
@@ -619,7 +619,7 @@ def test_function(self, data: ConjectureData) -> None:
619619
if self.first_bug_found_at is None:
620620
self.first_bug_found_at = self.call_count
621621
else:
622-
if sort_key(data.buffer) < sort_key(existing.buffer):
622+
if sort_key_ir(data.ir_nodes) < sort_key_ir(existing.ir_nodes):
623623
self.shrinks += 1
624624
self.downgrade_buffer(existing.buffer)
625625
self.__data_cache.unpin(existing.buffer)
@@ -1376,7 +1376,7 @@ def shrink_interesting_examples(self) -> None:
13761376
self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS
13771377

13781378
for prev_data in sorted(
1379-
self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)
1379+
self.interesting_examples.values(), key=lambda d: sort_key_ir(d.ir_nodes)
13801380
):
13811381
assert prev_data.status == Status.INTERESTING
13821382
data = self.new_conjecture_data_ir(prev_data.ir_nodes)
@@ -1393,7 +1393,7 @@ def shrink_interesting_examples(self) -> None:
13931393
for k, v in self.interesting_examples.items()
13941394
if k not in self.shrunk_examples
13951395
),
1396-
key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
1396+
key=lambda kv: (sort_key_ir(kv[1].ir_nodes), sort_key(repr(kv[0]))),
13971397
)
13981398
self.debug(f"Shrinking {target!r}: {data.choices}")
13991399

hypothesis-python/src/hypothesis/internal/conjecture/pareto.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from hypothesis.internal.conjecture.data import ConjectureData, ConjectureResult, Status
1616
from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, swap
17-
from hypothesis.internal.conjecture.shrinker import sort_key
17+
from hypothesis.internal.conjecture.shrinker import sort_key_ir
1818

1919
NO_SCORE = float("-inf")
2020

@@ -45,10 +45,12 @@ def dominance(left, right):
4545
more structured or failing tests it can be useful to track, and future work
4646
will depend on it more."""
4747

48-
if left.buffer == right.buffer:
48+
left_key = sort_key_ir(left.ir_nodes)
49+
right_key = sort_key_ir(right.ir_nodes)
50+
if left_key == right_key:
4951
return DominanceRelation.EQUAL
5052

51-
if sort_key(right.buffer) < sort_key(left.buffer):
53+
if right_key < left_key:
5254
result = dominance(left=right, right=left)
5355
if result == DominanceRelation.LEFT_DOMINATES:
5456
return DominanceRelation.RIGHT_DOMINATES
@@ -60,7 +62,7 @@ def dominance(left, right):
6062
return result
6163

6264
# Either left is better or there is no dominance relationship.
63-
assert sort_key(left.buffer) < sort_key(right.buffer)
65+
assert left_key < right_key
6466

6567
# The right is more interesting
6668
if left.status < right.status:
@@ -126,7 +128,7 @@ def __init__(self, random):
126128
self.__random = random
127129
self.__eviction_listeners = []
128130

129-
self.front = SortedList(key=lambda d: sort_key(d.buffer))
131+
self.front = SortedList(key=lambda d: sort_key_ir(d.ir_nodes))
130132
self.__pending = None
131133

132134
def add(self, data):

hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py

Lines changed: 26 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import attr
1616

1717
from hypothesis.internal.compat import int_from_bytes, int_to_bytes
18-
from hypothesis.internal.conjecture.choice import choice_from_index
18+
from hypothesis.internal.conjecture.choice import choice_from_index, choice_to_index
1919
from hypothesis.internal.conjecture.data import (
2020
ConjectureData,
2121
ConjectureResult,
@@ -80,6 +80,13 @@ def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]:
8080
return (len(buffer), buffer)
8181

8282

83+
def sort_key_ir(nodes: Sequence[IRNode]) -> tuple[int, tuple[int, ...]]:
84+
return (
85+
len(nodes),
86+
tuple(choice_to_index(node.value, node.kwargs) for node in nodes),
87+
)
88+
89+
8390
SHRINK_PASS_DEFINITIONS: dict[str, "ShrinkPassDefinition"] = {}
8491

8592

@@ -305,7 +312,7 @@ def __init__(
305312
self.__derived_values: dict = {}
306313
self.__pending_shrink_explanation = None
307314

308-
self.initial_size = len(initial.buffer)
315+
self.initial_size = len(initial.choices)
309316

310317
# We keep track of the current best example on the shrink_target
311318
# attribute.
@@ -401,7 +408,7 @@ def consider_new_tree(self, tree: Sequence[IRNode]) -> bool:
401408
if startswith(tree, self.nodes):
402409
return True
403410

404-
if startswith(self.nodes, tree):
411+
if sort_key_ir(self.nodes) < sort_key_ir(tree):
405412
return False
406413

407414
previous = self.shrink_target
@@ -445,7 +452,7 @@ def incorporate_test_data(self, data):
445452
return
446453
if (
447454
self.__predicate(data)
448-
and sort_key(data.buffer) < sort_key(self.shrink_target.buffer)
455+
and sort_key_ir(data.ir_nodes) < sort_key_ir(self.shrink_target.ir_nodes)
449456
and self.__allow_transition(self.shrink_target, data)
450457
):
451458
self.update_shrink_target(data)
@@ -474,28 +481,6 @@ def shrink(self):
474481
This method is "mostly idempotent" - calling it twice is unlikely to
475482
have any effect, though it has a non-zero probability of doing so.
476483
"""
477-
# We assume that if an all-zero block of bytes is an interesting
478-
# example then we're not going to do better than that.
479-
# This might not technically be true: e.g. for integers() | booleans()
480-
# the simplest example is actually [1, 0]. Missing this case is fairly
481-
# harmless and this allows us to make various simplifying assumptions
482-
# about the structure of the data (principally that we're never
483-
# operating on a block of all zero bytes so can use non-zeroness as a
484-
# signpost of complexity).
485-
if not any(self.shrink_target.buffer) or self.incorporate_new_buffer(
486-
bytes(len(self.shrink_target.buffer))
487-
):
488-
self.explain()
489-
return
490-
491-
# There are multiple buffers that represent the same counterexample, eg
492-
# n=2 (from the 16 bit integer bucket) and n=2 (from the 32 bit integer
493-
# bucket). Before we start shrinking, we need to normalize to the minimal
494-
# such buffer, else a buffer-smaller but ir-larger value may be chosen
495-
# as the minimal counterexample.
496-
data = self.engine.new_conjecture_data_ir(self.nodes)
497-
self.engine.test_function(data)
498-
self.incorporate_test_data(data.as_result())
499484

500485
try:
501486
self.greedy_shrink()
@@ -509,7 +494,7 @@ def shrink(self):
509494
def s(n):
510495
return "s" if n != 1 else ""
511496

512-
total_deleted = self.initial_size - len(self.shrink_target.buffer)
497+
total_deleted = self.initial_size - len(self.shrink_target.choices)
513498
calls = self.engine.call_count - self.initial_calls
514499
misaligned = self.engine.misaligned_count - self.initial_misaligned
515500

@@ -518,7 +503,7 @@ def s(n):
518503
"Shrink pass profiling\n"
519504
"---------------------\n\n"
520505
f"Shrinking made a total of {calls} call{s(calls)} of which "
521-
f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} bytes out "
506+
f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} choices out "
522507
f"of {self.initial_size}."
523508
)
524509
for useful in [True, False]:
@@ -540,7 +525,7 @@ def s(n):
540525
self.debug(
541526
f" * {p.name} made {p.calls} call{s(p.calls)} of which "
542527
f"{p.shrinks} shrank and {p.misaligned} were misaligned, "
543-
f"deleting {p.deletions} byte{s(p.deletions)}."
528+
f"deleting {p.deletions} choice{s(p.deletions)}."
544529
)
545530
self.debug("")
546531
self.explain()
@@ -797,7 +782,7 @@ def fixate_shrink_passes(self, passes):
797782
# the length are the best.
798783
if self.shrink_target is before_sp:
799784
reordering[sp] = 1
800-
elif len(self.buffer) < len(before_sp.buffer):
785+
elif len(self.choices) < len(before_sp.choices):
801786
reordering[sp] = -1
802787
else:
803788
reordering[sp] = 0
@@ -988,7 +973,7 @@ def __changed_nodes(self):
988973
assert prev_target is not new_target
989974
prev_nodes = prev_target.ir_nodes
990975
new_nodes = new_target.ir_nodes
991-
assert sort_key(new_target.buffer) < sort_key(prev_target.buffer)
976+
assert sort_key_ir(new_target.ir_nodes) < sort_key_ir(prev_target.ir_nodes)
992977

993978
if len(prev_nodes) != len(new_nodes) or any(
994979
n1.ir_type != n2.ir_type for n1, n2 in zip(prev_nodes, new_nodes)
@@ -1186,11 +1171,11 @@ def remove_discarded(self):
11861171

11871172
for ex in self.shrink_target.examples:
11881173
if (
1189-
ex.length > 0
1174+
ex.ir_length > 0
11901175
and ex.discarded
1191-
and (not discarded or ex.start >= discarded[-1][-1])
1176+
and (not discarded or ex.ir_start >= discarded[-1][-1])
11921177
):
1193-
discarded.append((ex.start, ex.end))
1178+
discarded.append((ex.ir_start, ex.ir_end))
11941179

11951180
# This can happen if we have discards but they are all of
11961181
# zero length. This shouldn't happen very often so it's
@@ -1199,11 +1184,11 @@ def remove_discarded(self):
11991184
if not discarded:
12001185
break
12011186

1202-
attempt = bytearray(self.shrink_target.buffer)
1187+
attempt = list(self.nodes)
12031188
for u, v in reversed(discarded):
12041189
del attempt[u:v]
12051190

1206-
if not self.incorporate_new_buffer(attempt):
1191+
if not self.consider_new_tree(tuple(attempt)):
12071192
return False
12081193
return True
12091194

@@ -1563,7 +1548,9 @@ def test_not_equal(x, y):
15631548
],
15641549
)
15651550
),
1566-
key=lambda i: st.buffer[examples[i].start : examples[i].end],
1551+
key=lambda i: sort_key_ir(
1552+
st.ir_nodes[examples[i].ir_start : examples[i].ir_end]
1553+
),
15671554
)
15681555

15691556
def run_node_program(self, i, description, original, repeats=1):
@@ -1670,7 +1657,7 @@ def step(self, *, random_order=False):
16701657
initial_shrinks = self.shrinker.shrinks
16711658
initial_calls = self.shrinker.calls
16721659
initial_misaligned = self.shrinker.misaligned
1673-
size = len(self.shrinker.shrink_target.buffer)
1660+
size = len(self.shrinker.shrink_target.choices)
16741661
self.shrinker.engine.explain_next_call_as(self.name)
16751662

16761663
if random_order:
@@ -1687,7 +1674,7 @@ def step(self, *, random_order=False):
16871674
self.calls += self.shrinker.calls - initial_calls
16881675
self.misaligned += self.shrinker.misaligned - initial_misaligned
16891676
self.shrinks += self.shrinker.shrinks - initial_shrinks
1690-
self.deletions += size - len(self.shrinker.shrink_target.buffer)
1677+
self.deletions += size - len(self.shrinker.shrink_target.choices)
16911678
self.shrinker.engine.clear_call_explanation()
16921679
return True
16931680

hypothesis-python/tests/cover/test_deadline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def test_flaky_slow(i):
6666

6767

6868
def test_deadlines_participate_in_shrinking():
69-
@settings(deadline=500, max_examples=1000)
69+
@settings(deadline=500, max_examples=1000, database=None)
7070
@given(st.integers(min_value=0))
7171
def slow_if_large(i):
7272
if i >= 1000:

hypothesis-python/tests/nocover/test_duplication.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ def test(b):
5252
test()
5353
except ValueError:
5454
pass
55-
# There are three circumstances in which a duplicate is allowed: We replay
56-
# the failing test once to check for flakiness, once when shrinking to normalize
57-
# to the minimal buffer, and then we replay the fully minimized failing test
58-
# at the end to display the error. The complication comes from the fact that
59-
# these may or may not be the same test case, so we can see either two test
60-
# cases each run twice or one test case which has been run three times.
61-
assert set(counts.values()) in ({1, 2, 3}, {1, 4})
55+
# There are two circumstances in which a duplicate is allowed: We replay
56+
# the failing test once to check for flakiness, and then we replay the
57+
# fully minimized failing test at the end to display the error. The
58+
# complication comes from the fact that these may or may not be the same
59+
# test case, so we can see either two test cases each run twice or one
60+
# test case which has been run three times.
61+
assert set(counts.values()) in ({1, 2}, {1, 3})
6262
assert len([k for k, v in counts.items() if v > 1]) <= 2

hypothesis-python/tests/quality/test_float_shrinking.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,7 @@
1010

1111
import pytest
1212

13-
from hypothesis import (
14-
HealthCheck,
15-
Verbosity,
16-
example,
17-
given,
18-
settings,
19-
strategies as st,
20-
)
13+
from hypothesis import example, given, strategies as st
2114
from hypothesis.internal.compat import ceil
2215

2316
from tests.common.debug import minimal
@@ -39,21 +32,16 @@ def test_can_shrink_in_variable_sized_context(n):
3932
@example(1.7976931348623157e308)
4033
@example(1.5)
4134
@given(st.floats(min_value=0, allow_infinity=False, allow_nan=False))
42-
@settings(deadline=None, suppress_health_check=list(HealthCheck))
4335
def test_shrinks_downwards_to_integers(f):
44-
g = minimal(
45-
st.floats().filter(lambda x: x >= f),
46-
settings=settings(verbosity=Verbosity.quiet, max_examples=10**6),
47-
)
48-
assert g == ceil(f)
36+
assert minimal(st.floats(min_value=f)) == ceil(f)
4937

5038

5139
@example(1)
5240
@given(st.integers(1, 2**16 - 1))
53-
@settings(deadline=None, suppress_health_check=list(HealthCheck), max_examples=10)
5441
def test_shrinks_downwards_to_integers_when_fractional(b):
5542
g = minimal(
56-
st.floats().filter(lambda x: b < x < 2**53 and int(x) != x),
57-
settings=settings(verbosity=Verbosity.quiet, max_examples=10**6),
43+
st.floats(
44+
min_value=b, max_value=2**53, exclude_min=True, exclude_max=True
45+
).filter(lambda x: int(x) != x)
5846
)
5947
assert g == b + 0.5

0 commit comments

Comments
 (0)