Merge pull request #4215 from tybug/sort-key-ir

tybug · web-flow · commit 462c5fcc5ef3 · 2024-12-27T12:15:50.000-05:00
Implement and use `sort_key_ir`
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,3 @@
+RELEASE_TYPE: patch
+
+The shrinker now uses the typed choice sequence (:issue:`3921`) when ordering failing examples. As a result, Hypothesis may now report a different minimal failing example for some tests. We expect most cases to remain unchanged.
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -85,7 +85,7 @@
     ensure_free_stackframes,
     gc_cumulative_time,
 )
-from hypothesis.internal.conjecture.shrinker import sort_key
+from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir
 from hypothesis.internal.entropy import deterministic_PRNG
 from hypothesis.internal.escalation import (
     InterestingOrigin,
@@ -1226,7 +1226,7 @@ def run_engine(self):
         if runner.interesting_examples:
             self.falsifying_examples = sorted(
                 runner.interesting_examples.values(),
-                key=lambda d: sort_key(d.buffer),
+                key=lambda d: sort_key_ir(d.ir_nodes),
                 reverse=True,
             )
         else:
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -74,7 +74,7 @@
     startswith,
 )
 from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser
-from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key
+from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key, sort_key_ir
 from hypothesis.internal.healthcheck import fail_health_check
 from hypothesis.reporting import base_report, report
 
@@ -562,8 +562,8 @@ def test_function(self, data: ConjectureData) -> None:
                 if v < existing_score:
                     continue
 
-                if v > existing_score or sort_key(data.buffer) < sort_key(
-                    existing_example.buffer
+                if v > existing_score or sort_key_ir(data.ir_nodes) < sort_key_ir(
+                    existing_example.ir_nodes
                 ):
                     data_as_result = data.as_result()
                     assert not isinstance(data_as_result, _Overrun)
@@ -619,7 +619,7 @@ def test_function(self, data: ConjectureData) -> None:
                 if self.first_bug_found_at is None:
                     self.first_bug_found_at = self.call_count
             else:
-                if sort_key(data.buffer) < sort_key(existing.buffer):
+                if sort_key_ir(data.ir_nodes) < sort_key_ir(existing.ir_nodes):
                     self.shrinks += 1
                     self.downgrade_buffer(existing.buffer)
                     self.__data_cache.unpin(existing.buffer)
@@ -1376,7 +1376,7 @@ def shrink_interesting_examples(self) -> None:
         self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS
 
         for prev_data in sorted(
-            self.interesting_examples.values(), key=lambda d: sort_key(d.buffer)
+            self.interesting_examples.values(), key=lambda d: sort_key_ir(d.ir_nodes)
         ):
             assert prev_data.status == Status.INTERESTING
             data = self.new_conjecture_data_ir(prev_data.ir_nodes)
@@ -1393,7 +1393,7 @@ def shrink_interesting_examples(self) -> None:
                     for k, v in self.interesting_examples.items()
                     if k not in self.shrunk_examples
                 ),
-                key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))),
+                key=lambda kv: (sort_key_ir(kv[1].ir_nodes), sort_key(repr(kv[0]))),
             )
             self.debug(f"Shrinking {target!r}: {data.choices}")
 
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py b/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py
@@ -14,7 +14,7 @@
 
 from hypothesis.internal.conjecture.data import ConjectureData, ConjectureResult, Status
 from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, swap
-from hypothesis.internal.conjecture.shrinker import sort_key
+from hypothesis.internal.conjecture.shrinker import sort_key_ir
 
 NO_SCORE = float("-inf")
 
@@ -45,10 +45,12 @@ def dominance(left, right):
     more structured or failing tests it can be useful to track, and future work
     will depend on it more."""
 
-    if left.buffer == right.buffer:
+    left_key = sort_key_ir(left.ir_nodes)
+    right_key = sort_key_ir(right.ir_nodes)
+    if left_key == right_key:
         return DominanceRelation.EQUAL
 
-    if sort_key(right.buffer) < sort_key(left.buffer):
+    if right_key < left_key:
         result = dominance(left=right, right=left)
         if result == DominanceRelation.LEFT_DOMINATES:
             return DominanceRelation.RIGHT_DOMINATES
@@ -60,7 +62,7 @@ def dominance(left, right):
             return result
 
     # Either left is better or there is no dominance relationship.
-    assert sort_key(left.buffer) < sort_key(right.buffer)
+    assert left_key < right_key
 
     # The right is more interesting
     if left.status < right.status:
@@ -126,7 +128,7 @@ def __init__(self, random):
         self.__random = random
         self.__eviction_listeners = []
 
-        self.front = SortedList(key=lambda d: sort_key(d.buffer))
+        self.front = SortedList(key=lambda d: sort_key_ir(d.ir_nodes))
         self.__pending = None
 
     def add(self, data):
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
@@ -15,7 +15,7 @@
 import attr
 
 from hypothesis.internal.compat import int_from_bytes, int_to_bytes
-from hypothesis.internal.conjecture.choice import choice_from_index
+from hypothesis.internal.conjecture.choice import choice_from_index, choice_to_index
 from hypothesis.internal.conjecture.data import (
     ConjectureData,
     ConjectureResult,
@@ -80,6 +80,13 @@ def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]:
     return (len(buffer), buffer)
 
 
+def sort_key_ir(nodes: Sequence[IRNode]) -> tuple[int, tuple[int, ...]]:
+    return (
+        len(nodes),
+        tuple(choice_to_index(node.value, node.kwargs) for node in nodes),
+    )
+
+
 SHRINK_PASS_DEFINITIONS: dict[str, "ShrinkPassDefinition"] = {}
 
 
@@ -305,7 +312,7 @@ def __init__(
         self.__derived_values: dict = {}
         self.__pending_shrink_explanation = None
 
-        self.initial_size = len(initial.buffer)
+        self.initial_size = len(initial.choices)
 
         # We keep track of the current best example on the shrink_target
         # attribute.
@@ -401,7 +408,7 @@ def consider_new_tree(self, tree: Sequence[IRNode]) -> bool:
         if startswith(tree, self.nodes):
             return True
 
-        if startswith(self.nodes, tree):
+        if sort_key_ir(self.nodes) < sort_key_ir(tree):
             return False
 
         previous = self.shrink_target
@@ -445,7 +452,7 @@ def incorporate_test_data(self, data):
             return
         if (
             self.__predicate(data)
-            and sort_key(data.buffer) < sort_key(self.shrink_target.buffer)
+            and sort_key_ir(data.ir_nodes) < sort_key_ir(self.shrink_target.ir_nodes)
             and self.__allow_transition(self.shrink_target, data)
         ):
             self.update_shrink_target(data)
@@ -474,28 +481,6 @@ def shrink(self):
         This method is "mostly idempotent" - calling it twice is unlikely to
         have any effect, though it has a non-zero probability of doing so.
         """
-        # We assume that if an all-zero block of bytes is an interesting
-        # example then we're not going to do better than that.
-        # This might not technically be true: e.g. for integers() | booleans()
-        # the simplest example is actually [1, 0]. Missing this case is fairly
-        # harmless and this allows us to make various simplifying assumptions
-        # about the structure of the data (principally that we're never
-        # operating on a block of all zero bytes so can use non-zeroness as a
-        # signpost of complexity).
-        if not any(self.shrink_target.buffer) or self.incorporate_new_buffer(
-            bytes(len(self.shrink_target.buffer))
-        ):
-            self.explain()
-            return
-
-        # There are multiple buffers that represent the same counterexample, eg
-        # n=2 (from the 16 bit integer bucket) and n=2 (from the 32 bit integer
-        # bucket). Before we start shrinking, we need to normalize to the minimal
-        # such buffer, else a buffer-smaller but ir-larger value may be chosen
-        # as the minimal counterexample.
-        data = self.engine.new_conjecture_data_ir(self.nodes)
-        self.engine.test_function(data)
-        self.incorporate_test_data(data.as_result())
 
         try:
             self.greedy_shrink()
@@ -509,7 +494,7 @@ def shrink(self):
                 def s(n):
                     return "s" if n != 1 else ""
 
-                total_deleted = self.initial_size - len(self.shrink_target.buffer)
+                total_deleted = self.initial_size - len(self.shrink_target.choices)
                 calls = self.engine.call_count - self.initial_calls
                 misaligned = self.engine.misaligned_count - self.initial_misaligned
 
@@ -518,7 +503,7 @@ def s(n):
                     "Shrink pass profiling\n"
                     "---------------------\n\n"
                     f"Shrinking made a total of {calls} call{s(calls)} of which "
-                    f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} bytes out "
+                    f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} choices out "
                     f"of {self.initial_size}."
                 )
                 for useful in [True, False]:
@@ -540,7 +525,7 @@ def s(n):
                         self.debug(
                             f"  * {p.name} made {p.calls} call{s(p.calls)} of which "
                             f"{p.shrinks} shrank and {p.misaligned} were misaligned, "
-                            f"deleting {p.deletions} byte{s(p.deletions)}."
+                            f"deleting {p.deletions} choice{s(p.deletions)}."
                         )
                 self.debug("")
         self.explain()
@@ -797,7 +782,7 @@ def fixate_shrink_passes(self, passes):
                 # the length are the best.
                 if self.shrink_target is before_sp:
                     reordering[sp] = 1
-                elif len(self.buffer) < len(before_sp.buffer):
+                elif len(self.choices) < len(before_sp.choices):
                     reordering[sp] = -1
                 else:
                     reordering[sp] = 0
@@ -988,7 +973,7 @@ def __changed_nodes(self):
         assert prev_target is not new_target
         prev_nodes = prev_target.ir_nodes
         new_nodes = new_target.ir_nodes
-        assert sort_key(new_target.buffer) < sort_key(prev_target.buffer)
+        assert sort_key_ir(new_target.ir_nodes) < sort_key_ir(prev_target.ir_nodes)
 
         if len(prev_nodes) != len(new_nodes) or any(
             n1.ir_type != n2.ir_type for n1, n2 in zip(prev_nodes, new_nodes)
@@ -1186,11 +1171,11 @@ def remove_discarded(self):
 
             for ex in self.shrink_target.examples:
                 if (
-                    ex.length > 0
+                    ex.ir_length > 0
                     and ex.discarded
-                    and (not discarded or ex.start >= discarded[-1][-1])
+                    and (not discarded or ex.ir_start >= discarded[-1][-1])
                 ):
-                    discarded.append((ex.start, ex.end))
+                    discarded.append((ex.ir_start, ex.ir_end))
 
             # This can happen if we have discards but they are all of
             # zero length. This shouldn't happen very often so it's
@@ -1199,11 +1184,11 @@ def remove_discarded(self):
             if not discarded:
                 break
 
-            attempt = bytearray(self.shrink_target.buffer)
+            attempt = list(self.nodes)
             for u, v in reversed(discarded):
                 del attempt[u:v]
 
-            if not self.incorporate_new_buffer(attempt):
+            if not self.consider_new_tree(tuple(attempt)):
                 return False
         return True
 
@@ -1563,7 +1548,9 @@ def test_not_equal(x, y):
                     ],
                 )
             ),
-            key=lambda i: st.buffer[examples[i].start : examples[i].end],
+            key=lambda i: sort_key_ir(
+                st.ir_nodes[examples[i].ir_start : examples[i].ir_end]
+            ),
         )
 
     def run_node_program(self, i, description, original, repeats=1):
@@ -1670,7 +1657,7 @@ def step(self, *, random_order=False):
         initial_shrinks = self.shrinker.shrinks
         initial_calls = self.shrinker.calls
         initial_misaligned = self.shrinker.misaligned
-        size = len(self.shrinker.shrink_target.buffer)
+        size = len(self.shrinker.shrink_target.choices)
         self.shrinker.engine.explain_next_call_as(self.name)
 
         if random_order:
@@ -1687,7 +1674,7 @@ def step(self, *, random_order=False):
             self.calls += self.shrinker.calls - initial_calls
             self.misaligned += self.shrinker.misaligned - initial_misaligned
             self.shrinks += self.shrinker.shrinks - initial_shrinks
-            self.deletions += size - len(self.shrinker.shrink_target.buffer)
+            self.deletions += size - len(self.shrinker.shrink_target.choices)
             self.shrinker.engine.clear_call_explanation()
         return True
 
diff --git a/hypothesis-python/tests/cover/test_deadline.py b/hypothesis-python/tests/cover/test_deadline.py
@@ -66,7 +66,7 @@ def test_flaky_slow(i):
 
 
 def test_deadlines_participate_in_shrinking():
-    @settings(deadline=500, max_examples=1000)
+    @settings(deadline=500, max_examples=1000, database=None)
     @given(st.integers(min_value=0))
     def slow_if_large(i):
         if i >= 1000:
diff --git a/hypothesis-python/tests/nocover/test_duplication.py b/hypothesis-python/tests/nocover/test_duplication.py
@@ -52,11 +52,11 @@ def test(b):
         test()
     except ValueError:
         pass
-    # There are three circumstances in which a duplicate is allowed: We replay
-    # the failing test once to check for flakiness, once when shrinking to normalize
-    # to the minimal buffer, and then we replay the fully minimized failing test
-    # at the end to display the error. The complication comes from the fact that
-    # these may or may not be the same test case, so we can see either two test
-    # cases each run twice or one test case which has been run three times.
-    assert set(counts.values()) in ({1, 2, 3}, {1, 4})
+    # There are two circumstances in which a duplicate is allowed: We replay
+    # the failing test once to check for flakiness, and then we replay the
+    # fully minimized failing test at the end to display the error. The
+    # complication comes from the fact that these may or may not be the same
+    # test case, so we can see either two test cases each run twice or one
+    # test case which has been run three times.
+    assert set(counts.values()) in ({1, 2}, {1, 3})
     assert len([k for k, v in counts.items() if v > 1]) <= 2
diff --git a/hypothesis-python/tests/quality/test_float_shrinking.py b/hypothesis-python/tests/quality/test_float_shrinking.py
@@ -10,14 +10,7 @@
 
 import pytest
 
-from hypothesis import (
-    HealthCheck,
-    Verbosity,
-    example,
-    given,
-    settings,
-    strategies as st,
-)
+from hypothesis import example, given, strategies as st
 from hypothesis.internal.compat import ceil
 
 from tests.common.debug import minimal
@@ -39,21 +32,16 @@ def test_can_shrink_in_variable_sized_context(n):
 @example(1.7976931348623157e308)
 @example(1.5)
 @given(st.floats(min_value=0, allow_infinity=False, allow_nan=False))
-@settings(deadline=None, suppress_health_check=list(HealthCheck))
 def test_shrinks_downwards_to_integers(f):
-    g = minimal(
-        st.floats().filter(lambda x: x >= f),
-        settings=settings(verbosity=Verbosity.quiet, max_examples=10**6),
-    )
-    assert g == ceil(f)
+    assert minimal(st.floats(min_value=f)) == ceil(f)
 
 
 @example(1)
 @given(st.integers(1, 2**16 - 1))
-@settings(deadline=None, suppress_health_check=list(HealthCheck), max_examples=10)
 def test_shrinks_downwards_to_integers_when_fractional(b):
     g = minimal(
-        st.floats().filter(lambda x: b < x < 2**53 and int(x) != x),
-        settings=settings(verbosity=Verbosity.quiet, max_examples=10**6),
+        st.floats(
+            min_value=b, max_value=2**53, exclude_min=True, exclude_max=True
+        ).filter(lambda x: int(x) != x)
     )
     assert g == b + 0.5

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+RELEASE_TYPE: patch`
	`2`	`+`
	`3`	+The shrinker now uses the typed choice sequence (:issue:`3921`) when ordering failing examples. As a result, Hypothesis may now report a different minimal failing example for some tests. We expect most cases to remain unchanged.