Merge pull request #4162 from tybug/explain-ir-actual

Zac-HD · web-flow · commit d8c3522fb407 · 2024-11-09T11:27:57.000-08:00
Migrate `explain` phase to the typed choice sequence
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,3 @@
+RELEASE_TYPE: patch
+
+This patch migrates the :obj:`~hypothesis.Phase.explain` :ref:`phase <phases>` to our IR layer (:issue:`3921`). This should improve both its speed and precision.
diff --git a/hypothesis-python/src/hypothesis/control.py b/hypothesis-python/src/hypothesis/control.py
@@ -149,10 +149,10 @@ def prep_args_kwargs_from_strategies(self, kwarg_strategies):
         arg_labels = {}
         kwargs = {}
         for k, s in kwarg_strategies.items():
-            start_idx = self.data.index
+            start_idx = self.data.index_ir
             with deprecate_random_in_strategy("from {}={!r}", k, s) as check:
                 obj = check(self.data.draw(s, observe_as=f"generate:{k}"))
-            end_idx = self.data.index
+            end_idx = self.data.index_ir
             kwargs[k] = obj
 
             # This high up the stack, we can't see or really do much with the conjecture
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -67,7 +67,11 @@
     PreviouslyUnseenBehaviour,
     TreeRecordingObserver,
 )
-from hypothesis.internal.conjecture.junkdrawer import clamp, ensure_free_stackframes
+from hypothesis.internal.conjecture.junkdrawer import (
+    clamp,
+    ensure_free_stackframes,
+    startswith,
+)
 from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser
 from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key
 from hypothesis.internal.healthcheck import fail_health_check
@@ -1478,14 +1482,17 @@ def check_result(
             self.__data_cache[buffer] = result
         return result
 
-    def passing_buffers(self, prefix: bytes = b"") -> frozenset[bytes]:
-        """Return a collection of bytestrings which cause the test to pass.
+    def passing_choice_sequences(
+        self, prefix: Sequence[IRNode] = ()
+    ) -> frozenset[bytes]:
+        """Return a collection of choice sequence nodes which cause the test to pass.
         Optionally restrict this by a certain prefix, which is useful for explain mode.
         """
         return frozenset(
-            buf
-            for buf in self.__data_cache
-            if buf.startswith(prefix) and self.__data_cache[buf].status == Status.VALID
+            result.examples.ir_tree_nodes
+            for key in self.__data_cache_ir
+            if (result := self.__data_cache_ir[key]).status is Status.VALID
+            and startswith(result.examples.ir_tree_nodes, prefix)
         )
 
 
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
@@ -25,11 +25,14 @@
     ConjectureResult,
     IRNode,
     Status,
+    ir_size_nodes,
+    ir_to_buffer,
     ir_value_equal,
     ir_value_key,
     ir_value_permitted,
 )
 from hypothesis.internal.conjecture.junkdrawer import (
+    endswith,
     find_integer,
     replace_all,
     startswith,
@@ -540,20 +543,21 @@ def s(n):
         self.explain()
 
     def explain(self):
+        from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR
+
         if not self.should_explain or not self.shrink_target.arg_slices:
             return
-        from hypothesis.internal.conjecture.engine import BUFFER_SIZE
 
         self.max_stall = 1e999
         shrink_target = self.shrink_target
-        buffer = shrink_target.buffer
+        nodes = self.nodes
         chunks = defaultdict(list)
 
         # Before we start running experiments, let's check for known inputs which would
         # make them redundant.  The shrinking process means that we've already tried many
         # variations on the minimal example, so this can save a lot of time.
-        seen_passing_buffers = self.engine.passing_buffers(
-            prefix=buffer[: min(self.shrink_target.arg_slices)[0]]
+        seen_passing_seq = self.engine.passing_choice_sequences(
+            prefix=self.nodes[: min(self.shrink_target.arg_slices)[0]]
         )
 
         # Now that we've shrunk to a minimal failing example, it's time to try
@@ -565,8 +569,8 @@ def explain(self):
             # Check for any previous examples that match the prefix and suffix,
             # so we can skip if we found a passing example while shrinking.
             if any(
-                seen.startswith(buffer[:start]) and seen.endswith(buffer[end:])
-                for seen in seen_passing_buffers
+                startswith(seen, nodes[:start]) and endswith(seen, nodes[end:])
+                for seen in seen_passing_seq
             ):
                 continue
 
@@ -581,47 +585,61 @@ def explain(self):
                     # stop early if we're seeing mostly invalid examples
                     break  # pragma: no cover
 
-                buf_attempt_fixed = bytearray(buffer)
-                buf_attempt_fixed[start:end] = [
-                    self.random.randint(0, 255) for _ in range(end - start)
-                ]
-                result = self.engine.cached_test_function(
-                    buf_attempt_fixed, extend=BUFFER_SIZE - len(buf_attempt_fixed)
+                # replace start:end with random values
+                replacement = []
+                for i in range(start, end):
+                    node = nodes[i]
+                    if not node.was_forced:
+                        (value, _buf) = ir_to_buffer(
+                            node.ir_type, node.kwargs, random=self.random
+                        )
+                        node = node.copy(with_value=value)
+                    replacement.append(node)
+
+                attempt = nodes[:start] + tuple(replacement) + nodes[end:]
+                result = self.engine.cached_test_function_ir(
+                    attempt, extend=BUFFER_SIZE_IR - ir_size_nodes(attempt)
                 )
 
                 # Turns out this was a variable-length part, so grab the infix...
-                if result.status == Status.OVERRUN:
+                if result.status is Status.OVERRUN:
                     continue  # pragma: no cover  # flakily covered
                 if not (
-                    len(buf_attempt_fixed) == len(result.buffer)
-                    and result.buffer.endswith(buffer[end:])
+                    len(attempt) == len(result.examples.ir_tree_nodes)
+                    and endswith(result.examples.ir_tree_nodes, nodes[end:])
                 ):
                     for ex, res in zip(shrink_target.examples, result.examples):
-                        assert ex.start == res.start
-                        assert ex.start <= start
+                        assert ex.ir_start == res.ir_start
+                        assert ex.ir_start <= start
                         assert ex.label == res.label
-                        if start == ex.start and end == ex.end:
-                            res_end = res.end
+                        if start == ex.ir_start and end == ex.ir_end:
+                            res_end = res.ir_end
                             break
                     else:
                         raise NotImplementedError("Expected matching prefixes")
 
-                    buf_attempt_fixed = (
-                        buffer[:start] + result.buffer[start:res_end] + buffer[end:]
+                    attempt = (
+                        nodes[:start]
+                        + result.examples.ir_tree_nodes[start:res_end]
+                        + nodes[end:]
                     )
-                    chunks[(start, end)].append(result.buffer[start:res_end])
-                    result = self.engine.cached_test_function(buf_attempt_fixed)
+                    chunks[(start, end)].append(
+                        result.examples.ir_tree_nodes[start:res_end]
+                    )
+                    result = self.engine.cached_test_function_ir(attempt)
 
-                    if result.status == Status.OVERRUN:
+                    if result.status is Status.OVERRUN:
                         continue  # pragma: no cover  # flakily covered
                 else:
-                    chunks[(start, end)].append(result.buffer[start:end])
+                    chunks[(start, end)].append(
+                        result.examples.ir_tree_nodes[start:end]
+                    )
 
                 if shrink_target is not self.shrink_target:  # pragma: no cover
                     # If we've shrunk further without meaning to, bail out.
                     self.shrink_target.slice_comments.clear()
                     return
-                if result.status == Status.VALID:
+                if result.status is Status.VALID:
                     # The test passed, indicating that this param can't vary freely.
                     # However, it's really hard to write a simple and reliable covering
                     # test, because of our `seen_passing_buffers` check above.
@@ -640,15 +658,15 @@ def explain(self):
         chunks_by_start_index = sorted(chunks.items())
         for _ in range(500):  # pragma: no branch
             # no-branch here because we don't coverage-test the abort-at-500 logic.
-            new_buf = bytearray()
+            new_nodes = []
             prev_end = 0
             for (start, end), ls in chunks_by_start_index:
                 assert prev_end <= start < end, "these chunks must be nonoverlapping"
-                new_buf.extend(buffer[prev_end:start])
-                new_buf.extend(self.random.choice(ls))
+                new_nodes.extend(nodes[prev_end:start])
+                new_nodes.extend(self.random.choice(ls))
                 prev_end = end
 
-            result = self.engine.cached_test_function(new_buf)
+            result = self.engine.cached_test_function_ir(new_nodes)
 
             # This *can't* be a shrink because none of the components were.
             assert shrink_target is self.shrink_target
diff --git a/hypothesis-python/tests/conjecture/test_engine.py b/hypothesis-python/tests/conjecture/test_engine.py
@@ -1527,6 +1527,19 @@ def test(data):
         assert d2.status == Status.VALID
 
 
+def test_draw_bits_partly_from_prefix_and_partly_random():
+    # a draw_bits call which straddles the end of our prefix has a slightly
+    # different code branch.
+    def test(data):
+        # float consumes draw_bits(64)
+        data.draw_float()
+
+    with deterministic_PRNG():
+        runner = ConjectureRunner(test, settings=TEST_SETTINGS)
+        d = runner.cached_test_function(bytes(10), extend=100)
+        assert d.status == Status.VALID
+
+
 def test_can_be_set_to_ignore_limits():
     def test(data):
         data.draw_bytes(1, 1)
diff --git a/hypothesis-python/tests/conjecture/test_inquisitor.py b/hypothesis-python/tests/conjecture/test_inquisitor.py
@@ -16,36 +16,20 @@
 
 
 def fails_with_output(expected, error=AssertionError, **kw):
-    expected = [expected] if isinstance(expected, str) else expected
-
     def _inner(f):
         def _new():
             with pytest.raises(error) as err:
                 settings(print_blob=False, derandomize=True, **kw)(f)()
             got = "\n".join(err.value.__notes__).strip() + "\n"
-            assert any(got == s.strip() + "\n" for s in expected)
+            assert got == expected.strip() + "\n"
 
         return _new
 
     return _inner
 
 
-# this should have a marked as freely varying, but
-# false negatives in our inquisitor code skip over it sometimes, depending on the
-# seen_passed_buffers. yet another thing that should be improved by moving to the ir.
 @fails_with_output(
-    [
-        """
-Falsifying example: test_inquisitor_comments_basic_fail_if_either(
-    # The test always failed when commented parts were varied together.
-    a=False,
-    b=True,
-    c=[],  # or any other generated value
-    d=True,
-    e=False,  # or any other generated value
-)
-""",
-        """
+    """
 Falsifying example: test_inquisitor_comments_basic_fail_if_either(
     # The test always failed when commented parts were varied together.
     a=False,  # or any other generated value
@@ -54,8 +38,7 @@ def _new():
     d=True,
     e=False,  # or any other generated value
 )
-""",
-    ]
+"""
 )
 @given(st.booleans(), st.booleans(), st.lists(st.none()), st.booleans(), st.booleans())
 def test_inquisitor_comments_basic_fail_if_either(a, b, c, d, e):
@@ -91,6 +74,27 @@ def test_inquisitor_no_together_comment_if_single_argument(a, b):
     assert a
 
 
+@st.composite
+def ints_with_forced_draw(draw):
+    data = draw(st.data())
+    n = draw(st.integers())
+    data.conjecture_data.draw_boolean(forced=True)
+    return n
+
+
+@fails_with_output(
+    """
+Falsifying example: test_inquisitor_doesnt_break_on_varying_forced_nodes(
+    n1=100,
+    n2=0,  # or any other generated value
+)
+"""
+)
+@given(st.integers(), ints_with_forced_draw())
+def test_inquisitor_doesnt_break_on_varying_forced_nodes(n1, n2):
+    assert n1 < 100
+
+
 @fails_with(ZeroDivisionError)
 @settings(database=None)
 @given(start_date=st.datetimes(), data=st.data())
diff --git a/whole_repo_tests/test_mypy.py b/whole_repo_tests/test_mypy.py
@@ -41,11 +41,22 @@ def get_mypy_output(fname, *extra_args):
 
 
 def get_mypy_analysed_type(fname):
-    out = get_mypy_output(fname).rstrip()
-    msg = "Success: no issues found in 1 source file"
-    if out.endswith(msg):
-        out = out[: -len(msg)]
-    assert len(out.splitlines()) == 1, out
+    attempts = 0
+    while True:
+        out = get_mypy_output(fname).rstrip()
+        msg = "Success: no issues found in 1 source file"
+        if out.endswith(msg):
+            out = out[: -len(msg)]
+        # we've noticed some flakiness in getting an empty output here. Give it
+        # a couple tries.
+        if len(out.splitlines()) == 0:
+            attempts += 1
+            continue
+
+        assert len(out.splitlines()) == 1, out
+        assert attempts < 2, "too many failed retries"
+        break
+
     # See https://mypy.readthedocs.io/en/latest/common_issues.html#reveal-type
     # The shell output for `reveal_type([1, 2, 3])` looks like a literal:
     # file.py:2: error: Revealed type is 'builtins.list[builtins.int*]'
@@ -327,8 +338,7 @@ def test_stateful_target_params_mutually_exclusive(tmp_path, decorator):
     "target_args",
     [
         "target=b1",
-        # FIXME: temporary workaround for mypy bug, see hypothesis/pull/4136
-        pytest.param("targets=(b1,)", marks=pytest.mark.xfail(strict=False)),
+        "targets=(b1,)",
         "targets=(b1, b2)",
         "",
     ],

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+RELEASE_TYPE: patch`
	`2`	`+`
	`3`	+This patch migrates the :obj:`~hypothesis.Phase.explain` :ref:`phase <phases>` to our IR layer (:issue:`3921`). This should improve both its speed and precision.