Merge pull request #1745 from HypothesisWorks/DRMacIver/cache-overflows

DRMacIver · web-flow · commit 29fb9ebd777d · 2019-01-09T13:00:00.000Z
Detect potential overflows in cached_test_function
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,5 @@
+RELEASE_TYPE: patch
+
+This release improves some internal logic about when a test case in Hypothesis's internal representation could lead to a valid test case.
+In some circumstances this can lead to a significant speed up during shrinking.
+It may have some minor negative impact on the quality of the final result due to certain shrink passes now having access to less information about test cases in some circumstances, but this should rarely matter.
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -138,6 +138,15 @@ def trivial(self):
         return self.forced or self.all_zero
 
 
+class _Overrun(object):
+    status = Status.OVERRUN
+
+    def __repr__(self):
+        return "Overrun"
+
+
+Overrun = _Overrun()
+
 global_test_counter = 0
 
 
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -38,6 +38,7 @@
 from hypothesis.internal.conjecture.data import (
     MAX_DEPTH,
     ConjectureData,
+    Overrun,
     Status,
     StopTest,
 )
@@ -811,16 +812,17 @@ def generate_new_examples(self):
                 HealthCheck.large_base_example,
             )
 
-        # If the language starts with writes of length >= cap then there is
-        # only one string in it: Everything after cap is forced to be zero (or
-        # to be whatever value is written there). That means that once we've
-        # tried the zero value, there's nothing left for us to do, so we
-        # exit early here.
-        for i in hrange(self.cap):
-            if i not in zero_data.forced_indices:
-                break
-        else:
-            self.exit_with(ExitReason.finished)
+        if zero_data is not Overrun:
+            # If the language starts with writes of length >= cap then there is
+            # only one string in it: Everything after cap is forced to be zero (or
+            # to be whatever value is written there). That means that once we've
+            # tried the zero value, there's nothing left for us to do, so we
+            # exit early here.
+            for i in hrange(self.cap):
+                if i not in zero_data.forced_indices:
+                    break
+            else:
+                self.exit_with(ExitReason.finished)
 
         self.health_check_state = HealthCheckState()
 
@@ -1000,70 +1002,18 @@ def shrink(self, example, predicate):
     def new_shrinker(self, example, predicate):
         return Shrinker(self, example, predicate)
 
-    def prescreen_buffer(self, buffer):
-        """Attempt to rule out buffer as a possible interesting candidate.
-
-        Returns False if we know for sure that running this buffer will not
-        produce an interesting result. Returns True if it might (because it
-        explores territory we have not previously tried).
-
-        This is purely an optimisation to try to reduce the number of tests we
-        run. "return True" would be a valid but inefficient implementation.
-        """
-
-        # Traverse the tree, to see if we have already tried this buffer
-        # (or a prefix of it).
-        node_index = 0
-        n = len(buffer)
-        for k, b in enumerate(buffer):
-            if node_index in self.dead:
-                # This buffer (or a prefix of it) has already been tested,
-                # or has already had its descendants fully explored.
-                # Testing it again would not be helpful.
-                return False
-            try:
-                # The block size at that point provides a lower bound on how
-                # many more bytes are required. If the buffer does not have
-                # enough bytes to fulfill that block size then we can rule out
-                # this buffer.
-                if k + self.block_sizes[node_index] > n:
-                    return False
-            except KeyError:
-                pass
-
-            # If there's a forced value or a mask at this position, then
-            # pretend that the buffer already contains a matching value,
-            # because the test function is going to do the same.
-            try:
-                b = self.forced[node_index]
-            except KeyError:
-                pass
-            try:
-                b = b & self.masks[node_index]
-            except KeyError:
-                pass
-
-            try:
-                node_index = self.tree[node_index][b]
-            except KeyError:
-                # The buffer wasn't in the tree, which means we haven't tried
-                # it. That makes it a possible candidate.
-                return True
-        else:
-            # We ran out of buffer before reaching a leaf or a missing node.
-            # That means the test function is going to draw beyond the end
-            # of this buffer, which makes it a bad candidate.
-            return False
-
     def cached_test_function(self, buffer):
         """Checks the tree to see if we've tested this buffer, and returns the
         previous result if we have.
 
         Otherwise we call through to ``test_function``, and return a
         fresh result.
         """
+        rewritten = bytearray()
+        would_overrun = False
+
         node_index = 0
-        for c in buffer:
+        for i, c in enumerate(buffer):
             # If there's a forced value or a mask at this position, then
             # pretend that the buffer already contains a matching value,
             # because the test function is going to do the same.
@@ -1076,6 +1026,18 @@ def cached_test_function(self, buffer):
             except KeyError:
                 pass
 
+            try:
+                # If we know how many bytes are read at this point and
+                # there aren't enough, then it doesn't actually matter
+                # what the values are, we're definitely going to overrun.
+                if i + self.block_sizes[node_index] > len(buffer):
+                    would_overrun = True
+                    break
+            except KeyError:
+                pass
+
+            rewritten.append(c)
+
             try:
                 node_index = self.tree[node_index][c]
             except KeyError:
@@ -1087,18 +1049,23 @@ def cached_test_function(self, buffer):
             if isinstance(node, ConjectureData):
                 # This buffer (or a prefix of it) has already been tested.
                 # Return the stored result instead of trying it again.
+                assert node.status != Status.OVERRUN
                 return node
         else:
             # Falling off the end of this loop means that we're about to test
-            # a prefix of a previously-tested byte stream. The test is going
-            # to draw beyond the end of the buffer, and fail due to overrun.
-            # Currently there is no special handling for this case.
-            pass
+            # a prefix of a previously-tested byte stream, so the test would
+            # overrun.
+            would_overrun = True
+
+        if would_overrun:
+            return Overrun
 
         # We didn't find a match in the tree, so we need to run the test
         # function normally.
         result = ConjectureData.for_buffer(buffer)
         self.test_function(result)
+        if result.status == Status.OVERRUN:
+            return Overrun
         return result
 
     def event_to_string(self, event):
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py b/hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
@@ -25,7 +25,7 @@
 import attr
 
 from hypothesis.internal.compat import hbytes, hrange, int_from_bytes, int_to_bytes
-from hypothesis.internal.conjecture.data import ConjectureData, Status
+from hypothesis.internal.conjecture.data import Overrun, Status
 from hypothesis.internal.conjecture.shrinking import Integer, Length, Lexical, Ordering
 from hypothesis.internal.conjecture.shrinking.common import find_integer
 
@@ -297,16 +297,13 @@ def incorporate_new_buffer(self, buffer):
         if self.shrink_target.buffer.startswith(buffer):
             return False
 
-        if not self.__engine.prescreen_buffer(buffer):
-            return False
-
-        assert sort_key(buffer) <= sort_key(self.shrink_target.buffer)
-        data = ConjectureData.for_buffer(buffer)
-        self.__engine.test_function(data)
-        self.__test_function_cache[buffer] = data
-        return self.incorporate_test_data(data)
+        previous = self.shrink_target
+        self.cached_test_function(buffer)
+        return previous is not self.shrink_target
 
     def incorporate_test_data(self, data):
+        if data is Overrun:
+            return
         self.__test_function_cache[data.buffer] = data
         if self.__predicate(data) and sort_key(data.buffer) < sort_key(
             self.shrink_target.buffer
@@ -1047,14 +1044,7 @@ def zero_examples(self):
                 self.buffer[:u] + hbytes(v - u) + self.buffer[v:]
             )
 
-            # FIXME: IOU one attempt to debug this - DRMacIver
-            # This is a mysterious problem that should be impossible to trigger
-            # but isn't. I don't know what's going on, and it defeated my
-            # my attempts to reproduce or debug it. I'd *guess* it's related to
-            # nondeterminism in the test function. That should be impossible in
-            # the cases where I'm seeing it, but I haven't been able to put
-            # together a reliable reproduction of it.
-            if ex.index >= len(attempt.examples):  # pragma: no cover
+            if attempt is Overrun:
                 continue
 
             in_replacement = attempt.examples[ex.index]
diff --git a/hypothesis-python/tests/cover/test_conjecture_engine.py b/hypothesis-python/tests/cover/test_conjecture_engine.py
@@ -30,7 +30,12 @@
 from hypothesis.database import ExampleDatabase, InMemoryExampleDatabase
 from hypothesis.errors import FailedHealthCheck
 from hypothesis.internal.compat import hbytes, hrange, int_from_bytes
-from hypothesis.internal.conjecture.data import MAX_DEPTH, ConjectureData, Status
+from hypothesis.internal.conjecture.data import (
+    MAX_DEPTH,
+    ConjectureData,
+    Overrun,
+    Status,
+)
 from hypothesis.internal.conjecture.engine import (
     ConjectureRunner,
     ExitReason,
@@ -51,17 +56,17 @@
 SOME_LABEL = calc_label_from_name("some label")
 
 
+TEST_SETTINGS = settings(
+    max_examples=5000,
+    buffer_size=1024,
+    database=None,
+    suppress_health_check=HealthCheck.all(),
+)
+
+
 def run_to_buffer(f):
     with deterministic_PRNG():
-        runner = ConjectureRunner(
-            f,
-            settings=settings(
-                max_examples=5000,
-                buffer_size=1024,
-                database=None,
-                suppress_health_check=HealthCheck.all(),
-            ),
-        )
+        runner = ConjectureRunner(f, settings=TEST_SETTINGS)
         runner.run()
         assert runner.interesting_examples
         last_data, = runner.interesting_examples.values()
@@ -722,15 +727,22 @@ def f(data):
 
 
 def test_detects_too_small_block_starts():
+    call_count = [0]
+
     def f(data):
+        assert call_count[0] == 0
+        call_count[0] += 1
         data.draw_bytes(8)
         data.mark_interesting()
 
     runner = ConjectureRunner(f, settings=settings(database=None))
     r = ConjectureData.for_buffer(hbytes(8))
     runner.test_function(r)
     assert r.status == Status.INTERESTING
-    assert not runner.prescreen_buffer(hbytes([255] * 7))
+    assert call_count[0] == 1
+    r2 = runner.cached_test_function(hbytes([255] * 7))
+    assert r2.status == Status.OVERRUN
+    assert call_count[0] == 1
 
 
 def test_shrinks_both_interesting_examples(monkeypatch):
@@ -1909,3 +1921,23 @@ def test_target_selector_will_eventually_reuse_examples():
     for _ in range(2):
         x = selector.select()
         assert x.global_identifier in seen
+
+
+def test_cached_test_function_does_not_reinvoke_on_prefix():
+    call_count = [0]
+
+    def test_function(data):
+        call_count[0] += 1
+        data.draw_bits(8)
+        data.write(hbytes([7]))
+        data.draw_bits(8)
+
+    with deterministic_PRNG():
+        runner = ConjectureRunner(test_function, settings=TEST_SETTINGS)
+
+        data = runner.cached_test_function(hbytes(3))
+        assert data.status == Status.VALID
+        for n in [2, 1, 0]:
+            prefix_data = runner.cached_test_function(hbytes(n))
+            assert prefix_data is Overrun
+        assert call_count[0] == 1
diff --git a/hypothesis-python/tests/nocover/test_conjecture_engine.py b/hypothesis-python/tests/nocover/test_conjecture_engine.py
@@ -21,12 +21,12 @@
 
 import pytest
 
-from hypothesis import HealthCheck, given, settings, strategies as st
+from hypothesis import given, settings, strategies as st
 from hypothesis.database import InMemoryExampleDatabase
 from hypothesis.internal.compat import hbytes, hrange, int_from_bytes
 from hypothesis.internal.conjecture.data import ConjectureData, Status
 from hypothesis.internal.conjecture.engine import ConjectureRunner, RunIsComplete
-from tests.common.utils import no_shrink, non_covering_examples
+from tests.common.utils import non_covering_examples
 from tests.cover.test_conjecture_engine import run_to_buffer, shrink, shrinking_from
 
 
@@ -70,21 +70,6 @@ def f(data):
     assert in_db == seen
 
 
-@given(st.randoms(), st.random_module())
-@settings(
-    phases=no_shrink, deadline=None, suppress_health_check=[HealthCheck.hung_test]
-)
-def test_maliciously_bad_generator(rnd, seed):
-    @run_to_buffer
-    def x(data):
-        for _ in range(rnd.randint(1, 100)):
-            data.draw_bytes(rnd.randint(1, 10))
-        if rnd.randint(0, 1):
-            data.mark_invalid()
-        else:
-            data.mark_interesting()
-
-
 def test_can_discard(monkeypatch):
     n = 8
 
@@ -201,26 +186,6 @@ def f(data):
     assert f == [10, 0, 90]
 
 
-@given(st.integers(0, 255), st.integers(0, 255))
-def test_prescreen_with_masked_byte_agrees_with_results(byte_a, byte_b):
-    def f(data):
-        data.draw_bits(2)
-
-    runner = ConjectureRunner(f)
-
-    data_a = ConjectureData.for_buffer(hbytes([byte_a]))
-    data_b = ConjectureData.for_buffer(hbytes([byte_b]))
-
-    runner.test_function(data_a)
-    prescreen_b = runner.prescreen_buffer(hbytes([byte_b]))
-    # Always test buffer B, to check whether the prescreen was correct.
-    runner.test_function(data_b)
-
-    # If the prescreen passed, then the buffers should be different.
-    # If it failed, then the buffers should be the same.
-    assert prescreen_b == (data_a.buffer != data_b.buffer)
-
-
 @given(st.integers(0, 255), st.integers(0, 255))
 def test_cached_with_masked_byte_agrees_with_results(byte_a, byte_b):
     def f(data):