Add initial coarse reduction pass for reducing alternatives

DRMacIver · DRMacIver · commit 15f040d79dc9 · 2024-12-29T11:00:40.000Z
This adds an initial phase to shrinking that is allowed to make
changes that would be bad to make as part of the main shrink pass,
with the main goal of producing better results for ``one_of``.
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,4 @@
+RELEASE_TYPE: patch
+
+This release further improves shrinking of strategies using :func:`~hypothesis.strategies.one_of`,
+allowing the shrinker to more reliably move between branches of the strategy.
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
@@ -483,6 +483,7 @@ def shrink(self):
         """
 
         try:
+            self.initial_coarse_reduction()
             self.greedy_shrink()
         except StopShrinking:
             # If we stopped shrinking because we're making slow progress (instead of
@@ -689,6 +690,123 @@ def greedy_shrink(self):
             ]
         )
 
+    def initial_coarse_reduction(self):
+        """Performs some preliminary reductions that should not be
+        repeated as part of the main shrink passes.
+
+        The main reason why these can't be included as part of shrink
+        passes is that they have much more ability to make the test
+        case "worse". e.g. they might rerandomise part of it, significantly
+        increasing the value of individual nodes, which works in direct
+        opposition to the lexical shrinking and will frequently undo
+        its work.
+        """
+        self.reduce_each_alternative()
+
+    @derived_value  # type: ignore
+    def examples_starting_at(self):
+        result = [[] for _ in self.shrink_target.ir_nodes]
+        for i, ex in enumerate(self.examples):
+            # We can have zero-length examples that start at the end
+            if ex.ir_start < len(result):
+                result[ex.ir_start].append(i)
+        return tuple(map(tuple, result))
+
+    def reduce_each_alternative(self):
+        """This is a pass that is designed to rerandomise use of the
+        one_of strategy or things that look like it, in order to try
+        to move from later strategies to earlier ones in the branch
+        order.
+
+        It does this by trying to systematically lower each value it
+        finds that looks like it might be the branch decision for
+        one_of, and then attempts to repair any changes in shape that
+        this causes.
+        """
+        i = 0
+        while i < len(self.shrink_target.ir_nodes):
+            nodes = self.shrink_target.ir_nodes
+            node = nodes[i]
+            if (
+                node.ir_type == "integer"
+                and not node.was_forced
+                and node.value <= 10
+                and node.kwargs["min_value"] == 0
+            ):
+                assert isinstance(node.value, int)
+
+                # We've found a plausible candidate for a ``one_of`` choice.
+                # We now want to see if the shape of the test case actually depends
+                # on it. If it doesn't, then we don't need to do this (comparatively
+                # costly) pass, and can let much simpler lexicographic reduction
+                # handle it later.
+                #
+                # We test this by trying to set the value to zero and seeing if the
+                # shape changes, as measured by either changing the number of subsequent
+                # nodes, or changing the nodes in such a way as to cause one of the
+                # previous values to no longer be valid in its position.
+                zero_attempt = self.cached_test_function_ir(
+                    nodes[:i] + (nodes[i].copy(with_value=0),) + nodes[i + 1 :]
+                )
+                if (
+                    zero_attempt is not self.shrink_target
+                    and zero_attempt is not None
+                    and zero_attempt.status >= Status.VALID
+                ):
+                    changed_shape = len(zero_attempt.ir_nodes) != len(nodes)
+
+                    if not changed_shape:
+                        for j in range(i + 1, len(nodes)):
+                            zero_node = zero_attempt.ir_nodes[j]
+                            orig_node = nodes[j]
+                            if (
+                                zero_node.ir_type != orig_node.ir_type
+                                or not ir_value_permitted(
+                                    orig_node.value, zero_node.ir_type, zero_node.kwargs
+                                )
+                            ):
+                                changed_shape = True
+                                break
+                    if changed_shape:
+                        for v in range(node.value):
+                            if self.try_lower_node_as_alternative(i, v):
+                                break
+            i += 1
+
+    def try_lower_node_as_alternative(self, i, v):
+        """Attempt to lower `self.shrink_target.ir_nodes[i]` to `v`,
+        while rerandomising and attempting to repair any subsequent
+        changes to the shape of the test case that this causes."""
+        nodes = self.shrink_target.ir_nodes
+        initial_attempt = self.cached_test_function_ir(
+            nodes[:i] + (nodes[i].copy(with_value=v),) + nodes[i + 1 :]
+        )
+        if initial_attempt is self.shrink_target:
+            return True
+
+        prefix = nodes[:i] + (nodes[i].copy(with_value=v),)
+        initial = self.shrink_target
+        examples = self.examples_starting_at[i]
+        for _ in range(3):
+            random_attempt = self.engine.cached_test_function_ir(
+                prefix, extend=len(nodes) * 2
+            )
+            if random_attempt.status < Status.VALID:
+                continue
+            self.incorporate_test_data(random_attempt)
+            for j in examples:
+                initial_ex = initial.examples[j]
+                attempt_ex = random_attempt.examples[j]
+                contents = random_attempt.ir_nodes[
+                    attempt_ex.ir_start : attempt_ex.ir_end
+                ]
+                self.consider_new_tree(
+                    nodes[:i] + contents + nodes[initial_ex.ir_end :]
+                )
+                if initial is not self.shrink_target:
+                    return True
+        return False
+
     @derived_value  # type: ignore
     def shrink_pass_choice_trees(self):
         return defaultdict(ChoiceTree)
diff --git a/hypothesis-python/tests/conjecture/test_engine.py b/hypothesis-python/tests/conjecture/test_engine.py
@@ -134,6 +134,7 @@ def generate_new_examples(self):
     runner.run()
     (last_data,) = runner.interesting_examples.values()
     assert last_data.status == Status.INTERESTING
+    assert runner.exit_reason == ExitReason.max_shrinks
     assert runner.shrinks == n
     in_db = set(db.data[runner.secondary_key])
     assert len(in_db) == n
diff --git a/hypothesis-python/tests/conjecture/test_shrinker.py b/hypothesis-python/tests/conjecture/test_shrinker.py
@@ -518,3 +518,30 @@ def shrinker(data: ConjectureData):
     # shrinking. Since the second draw is forced, this isn't possible to shrink
     # with just this pass.
     assert shrinker.choices == (15, 10)
+
+
+def test_alternative_shrinking_will_lower_to_alternate_value():
+    # We want to reject the first integer value we see when shrinking
+    # this alternative, because it will be the result of transmuting the
+    # bytes value, and we want to ensure that we can find other values
+    # there when we detect the shape change.
+    seen_int = None
+
+    @shrinking_from(ir(1, b"hello world"))
+    def shrinker(data: ConjectureData):
+        nonlocal seen_int
+        i = data.draw_integer(min_value=0, max_value=1)
+        if i == 1:
+            if data.draw_bytes():
+                data.mark_interesting()
+        else:
+            n = data.draw_integer(0, 100)
+            if n == 0:
+                return
+            if seen_int is None:
+                seen_int = n
+            elif n != seen_int:
+                data.mark_interesting()
+
+    shrinker.initial_coarse_reduction()
+    assert shrinker.choices[0] == 0
diff --git a/hypothesis-python/tests/nocover/test_precise_shrinking.py b/hypothesis-python/tests/nocover/test_precise_shrinking.py
@@ -135,8 +135,12 @@ def test_function(data):
 
 
 @lru_cache
+def minimal_for_strategy(s):
+    return precisely_shrink(s, end_marker=st.none())
+
+
 def minimal_buffer_for_strategy(s):
-    return precisely_shrink(s, end_marker=st.none())[0].buffer
+    return minimal_for_strategy(s)[0].buffer
 
 
 def test_strategy_list_is_in_sorted_order():
@@ -274,12 +278,11 @@ def shortlex(s):
     result_list = []
 
     for k, v in sorted(results.items(), key=lambda x: shortlex(x[0])):
-        if shortlex(k) < shortlex(buffer):
-            t = repr(v)
-            if t in seen:
-                continue
-            seen.add(t)
-            result_list.append((k, v))
+        t = repr(v)
+        if t in seen:
+            continue
+        seen.add(t)
+        result_list.append((k, v))
     return result_list
 
 
@@ -296,3 +299,34 @@ def test_always_shrinks_to_none(a, seed, block_falsey, allow_sloppy):
         combined_strategy, result.buffer, allow_sloppy=allow_sloppy, seed=seed
     )
     assert shrunk_values[0][1] is None
+
+
+@pytest.mark.parametrize(
+    "i,alts", [(i, alt) for alt in alternatives for i in range(1, len(alt))]
+)
+@pytest.mark.parametrize("force_small", [False, True])
+@pytest.mark.parametrize("seed", [0, 2452, 99085240570])
+def test_can_shrink_to_every_smaller_alternative(i, alts, seed, force_small):
+    types = [t for t, _ in alts]
+    strats = [s for _, s in alts]
+    combined_strategy = st.one_of(*strats)
+    if force_small:
+        result, value = precisely_shrink(
+            combined_strategy, is_interesting=lambda x: type(x) is types[i], seed=seed
+        )
+    else:
+        result, value = find_random(
+            combined_strategy, lambda x: type(x) is types[i], seed=seed
+        )
+
+    shrunk = shrinks(
+        combined_strategy,
+        result.buffer,
+        allow_sloppy=False,
+        # Arbitrary change so we don't use the same seed for each Random.
+        seed=seed * 17,
+    )
+    shrunk_values = [t for _, t in shrunk]
+
+    for j in range(i):
+        assert any(isinstance(x, types[j]) for x in shrunk_values)