Merge pull request #4216 from HypothesisWorks/DRMacIver/more-shrinking-of-alternatives

Zac-HD · web-flow · commit 4582957fc249 · 2025-01-06T19:19:57.000+11:00
Add initial coarse reduction pass for reducing alternatives
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,4 @@
+RELEASE_TYPE: patch
+
+This release further improves shrinking of strategies using :func:`~hypothesis.strategies.one_of`,
+allowing the shrinker to more reliably move between branches of the strategy.
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
@@ -483,6 +483,7 @@ def shrink(self):
         """
 
         try:
+            self.initial_coarse_reduction()
             self.greedy_shrink()
         except StopShrinking:
             # If we stopped shrinking because we're making slow progress (instead of
@@ -689,6 +690,123 @@ def greedy_shrink(self):
             ]
         )
 
+    def initial_coarse_reduction(self):
+        """Performs some preliminary reductions that should not be
+        repeated as part of the main shrink passes.
+
+        The main reason why these can't be included as part of shrink
+        passes is that they have much more ability to make the test
+        case "worse". e.g. they might rerandomise part of it, significantly
+        increasing the value of individual nodes, which works in direct
+        opposition to the lexical shrinking and will frequently undo
+        its work.
+        """
+        self.reduce_each_alternative()
+
+    @derived_value  # type: ignore
+    def examples_starting_at(self):
+        result = [[] for _ in self.shrink_target.ir_nodes]
+        for i, ex in enumerate(self.examples):
+            # We can have zero-length examples that start at the end
+            if ex.ir_start < len(result):
+                result[ex.ir_start].append(i)
+        return tuple(map(tuple, result))
+
+    def reduce_each_alternative(self):
+        """This is a pass that is designed to rerandomise use of the
+        one_of strategy or things that look like it, in order to try
+        to move from later strategies to earlier ones in the branch
+        order.
+
+        It does this by trying to systematically lower each value it
+        finds that looks like it might be the branch decision for
+        one_of, and then attempts to repair any changes in shape that
+        this causes.
+        """
+        i = 0
+        while i < len(self.shrink_target.ir_nodes):
+            nodes = self.shrink_target.ir_nodes
+            node = nodes[i]
+            if (
+                node.ir_type == "integer"
+                and not node.was_forced
+                and node.value <= 10
+                and node.kwargs["min_value"] == 0
+            ):
+                assert isinstance(node.value, int)
+
+                # We've found a plausible candidate for a ``one_of`` choice.
+                # We now want to see if the shape of the test case actually depends
+                # on it. If it doesn't, then we don't need to do this (comparatively
+                # costly) pass, and can let much simpler lexicographic reduction
+                # handle it later.
+                #
+                # We test this by trying to set the value to zero and seeing if the
+                # shape changes, as measured by either changing the number of subsequent
+                # nodes, or changing the nodes in such a way as to cause one of the
+                # previous values to no longer be valid in its position.
+                zero_attempt = self.cached_test_function_ir(
+                    nodes[:i] + (nodes[i].copy(with_value=0),) + nodes[i + 1 :]
+                )
+                if (
+                    zero_attempt is not self.shrink_target
+                    and zero_attempt is not None
+                    and zero_attempt.status >= Status.VALID
+                ):
+                    changed_shape = len(zero_attempt.ir_nodes) != len(nodes)
+
+                    if not changed_shape:
+                        for j in range(i + 1, len(nodes)):
+                            zero_node = zero_attempt.ir_nodes[j]
+                            orig_node = nodes[j]
+                            if (
+                                zero_node.ir_type != orig_node.ir_type
+                                or not ir_value_permitted(
+                                    orig_node.value, zero_node.ir_type, zero_node.kwargs
+                                )
+                            ):
+                                changed_shape = True
+                                break
+                    if changed_shape:
+                        for v in range(node.value):
+                            if self.try_lower_node_as_alternative(i, v):
+                                break
+            i += 1
+
+    def try_lower_node_as_alternative(self, i, v):
+        """Attempt to lower `self.shrink_target.ir_nodes[i]` to `v`,
+        while rerandomising and attempting to repair any subsequent
+        changes to the shape of the test case that this causes."""
+        nodes = self.shrink_target.ir_nodes
+        initial_attempt = self.cached_test_function_ir(
+            nodes[:i] + (nodes[i].copy(with_value=v),) + nodes[i + 1 :]
+        )
+        if initial_attempt is self.shrink_target:
+            return True
+
+        prefix = nodes[:i] + (nodes[i].copy(with_value=v),)
+        initial = self.shrink_target
+        examples = self.examples_starting_at[i]
+        for _ in range(3):
+            random_attempt = self.engine.cached_test_function_ir(
+                prefix, extend=len(nodes) * 2
+            )
+            if random_attempt.status < Status.VALID:
+                continue
+            self.incorporate_test_data(random_attempt)
+            for j in examples:
+                initial_ex = initial.examples[j]
+                attempt_ex = random_attempt.examples[j]
+                contents = random_attempt.ir_nodes[
+                    attempt_ex.ir_start : attempt_ex.ir_end
+                ]
+                self.consider_new_tree(
+                    nodes[:i] + contents + nodes[initial_ex.ir_end :]
+                )
+                if initial is not self.shrink_target:
+                    return True
+        return False
+
     @derived_value  # type: ignore
     def shrink_pass_choice_trees(self):
         return defaultdict(ChoiceTree)
diff --git a/hypothesis-python/tests/conjecture/test_engine.py b/hypothesis-python/tests/conjecture/test_engine.py
@@ -134,6 +134,7 @@ def generate_new_examples(self):
     runner.run()
     (last_data,) = runner.interesting_examples.values()
     assert last_data.status == Status.INTERESTING
+    assert runner.exit_reason == ExitReason.max_shrinks
     assert runner.shrinks == n
     in_db = set(db.data[runner.secondary_key])
     assert len(in_db) == n
diff --git a/hypothesis-python/tests/conjecture/test_shrinker.py b/hypothesis-python/tests/conjecture/test_shrinker.py
@@ -518,3 +518,30 @@ def shrinker(data: ConjectureData):
     # shrinking. Since the second draw is forced, this isn't possible to shrink
     # with just this pass.
     assert shrinker.choices == (15, 10)
+
+
+def test_alternative_shrinking_will_lower_to_alternate_value():
+    # We want to reject the first integer value we see when shrinking
+    # this alternative, because it will be the result of transmuting the
+    # bytes value, and we want to ensure that we can find other values
+    # there when we detect the shape change.
+    seen_int = None
+
+    @shrinking_from(ir(1, b"hello world"))
+    def shrinker(data: ConjectureData):
+        nonlocal seen_int
+        i = data.draw_integer(min_value=0, max_value=1)
+        if i == 1:
+            if data.draw_bytes():
+                data.mark_interesting()
+        else:
+            n = data.draw_integer(0, 100)
+            if n == 0:
+                return
+            if seen_int is None:
+                seen_int = n
+            elif n != seen_int:
+                data.mark_interesting()
+
+    shrinker.initial_coarse_reduction()
+    assert shrinker.choices[0] == 0
diff --git a/hypothesis-python/tests/nocover/test_precise_shrinking.py b/hypothesis-python/tests/nocover/test_precise_shrinking.py
@@ -135,8 +135,12 @@ def test_function(data):
 
 
 @lru_cache
+def minimal_for_strategy(s):
+    return precisely_shrink(s, end_marker=st.none())
+
+
 def minimal_buffer_for_strategy(s):
-    return precisely_shrink(s, end_marker=st.none())[0].buffer
+    return minimal_for_strategy(s)[0].buffer
 
 
 def test_strategy_list_is_in_sorted_order():
@@ -274,12 +278,11 @@ def shortlex(s):
     result_list = []
 
     for k, v in sorted(results.items(), key=lambda x: shortlex(x[0])):
-        if shortlex(k) < shortlex(buffer):
-            t = repr(v)
-            if t in seen:
-                continue
-            seen.add(t)
-            result_list.append((k, v))
+        t = repr(v)
+        if t in seen:
+            continue
+        seen.add(t)
+        result_list.append((k, v))
     return result_list
 
 
@@ -296,3 +299,34 @@ def test_always_shrinks_to_none(a, seed, block_falsey, allow_sloppy):
         combined_strategy, result.buffer, allow_sloppy=allow_sloppy, seed=seed
     )
     assert shrunk_values[0][1] is None
+
+
+@pytest.mark.parametrize(
+    "i,alts", [(i, alt) for alt in alternatives for i in range(1, len(alt))]
+)
+@pytest.mark.parametrize("force_small", [False, True])
+@pytest.mark.parametrize("seed", [0, 2452, 99085240570])
+def test_can_shrink_to_every_smaller_alternative(i, alts, seed, force_small):
+    types = [t for t, _ in alts]
+    strats = [s for _, s in alts]
+    combined_strategy = st.one_of(*strats)
+    if force_small:
+        result, value = precisely_shrink(
+            combined_strategy, is_interesting=lambda x: type(x) is types[i], seed=seed
+        )
+    else:
+        result, value = find_random(
+            combined_strategy, lambda x: type(x) is types[i], seed=seed
+        )
+
+    shrunk = shrinks(
+        combined_strategy,
+        result.buffer,
+        allow_sloppy=False,
+        # Arbitrary change so we don't use the same seed for each Random.
+        seed=seed * 17,
+    )
+    shrunk_values = [t for _, t in shrunk]
+
+    for j in range(i):
+        assert any(isinstance(x, types[j]) for x in shrunk_values)