Skip to content

Commit 29fb9eb

Browse files
authored
Merge pull request #1745 from HypothesisWorks/DRMacIver/cache-overflows
Detect potential overflows in cached_test_function
2 parents 17b85cc + 44d9de0 commit 29fb9eb

File tree

6 files changed

+103
-135
lines changed

6 files changed

+103
-135
lines changed

hypothesis-python/RELEASE.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
RELEASE_TYPE: patch
2+
3+
This release improves some internal logic about when a test case in Hypothesis's internal representation could lead to a valid test case.
4+
In some circumstances this can lead to a significant speed up during shrinking.
5+
It may have some minor negative impact on the quality of the final result due to certain shrink passes now having access to less information about test cases in some circumstances, but this should rarely matter.

hypothesis-python/src/hypothesis/internal/conjecture/data.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,15 @@ def trivial(self):
138138
return self.forced or self.all_zero
139139

140140

141+
class _Overrun(object):
142+
status = Status.OVERRUN
143+
144+
def __repr__(self):
145+
return "Overrun"
146+
147+
148+
Overrun = _Overrun()
149+
141150
global_test_counter = 0
142151

143152

hypothesis-python/src/hypothesis/internal/conjecture/engine.py

Lines changed: 37 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from hypothesis.internal.conjecture.data import (
3939
MAX_DEPTH,
4040
ConjectureData,
41+
Overrun,
4142
Status,
4243
StopTest,
4344
)
@@ -811,16 +812,17 @@ def generate_new_examples(self):
811812
HealthCheck.large_base_example,
812813
)
813814

814-
# If the language starts with writes of length >= cap then there is
815-
# only one string in it: Everything after cap is forced to be zero (or
816-
# to be whatever value is written there). That means that once we've
817-
# tried the zero value, there's nothing left for us to do, so we
818-
# exit early here.
819-
for i in hrange(self.cap):
820-
if i not in zero_data.forced_indices:
821-
break
822-
else:
823-
self.exit_with(ExitReason.finished)
815+
if zero_data is not Overrun:
816+
# If the language starts with writes of length >= cap then there is
817+
# only one string in it: Everything after cap is forced to be zero (or
818+
# to be whatever value is written there). That means that once we've
819+
# tried the zero value, there's nothing left for us to do, so we
820+
# exit early here.
821+
for i in hrange(self.cap):
822+
if i not in zero_data.forced_indices:
823+
break
824+
else:
825+
self.exit_with(ExitReason.finished)
824826

825827
self.health_check_state = HealthCheckState()
826828

@@ -1000,70 +1002,18 @@ def shrink(self, example, predicate):
10001002
def new_shrinker(self, example, predicate):
10011003
return Shrinker(self, example, predicate)
10021004

1003-
def prescreen_buffer(self, buffer):
1004-
"""Attempt to rule out buffer as a possible interesting candidate.
1005-
1006-
Returns False if we know for sure that running this buffer will not
1007-
produce an interesting result. Returns True if it might (because it
1008-
explores territory we have not previously tried).
1009-
1010-
This is purely an optimisation to try to reduce the number of tests we
1011-
run. "return True" would be a valid but inefficient implementation.
1012-
"""
1013-
1014-
# Traverse the tree, to see if we have already tried this buffer
1015-
# (or a prefix of it).
1016-
node_index = 0
1017-
n = len(buffer)
1018-
for k, b in enumerate(buffer):
1019-
if node_index in self.dead:
1020-
# This buffer (or a prefix of it) has already been tested,
1021-
# or has already had its descendants fully explored.
1022-
# Testing it again would not be helpful.
1023-
return False
1024-
try:
1025-
# The block size at that point provides a lower bound on how
1026-
# many more bytes are required. If the buffer does not have
1027-
# enough bytes to fulfill that block size then we can rule out
1028-
# this buffer.
1029-
if k + self.block_sizes[node_index] > n:
1030-
return False
1031-
except KeyError:
1032-
pass
1033-
1034-
# If there's a forced value or a mask at this position, then
1035-
# pretend that the buffer already contains a matching value,
1036-
# because the test function is going to do the same.
1037-
try:
1038-
b = self.forced[node_index]
1039-
except KeyError:
1040-
pass
1041-
try:
1042-
b = b & self.masks[node_index]
1043-
except KeyError:
1044-
pass
1045-
1046-
try:
1047-
node_index = self.tree[node_index][b]
1048-
except KeyError:
1049-
# The buffer wasn't in the tree, which means we haven't tried
1050-
# it. That makes it a possible candidate.
1051-
return True
1052-
else:
1053-
# We ran out of buffer before reaching a leaf or a missing node.
1054-
# That means the test function is going to draw beyond the end
1055-
# of this buffer, which makes it a bad candidate.
1056-
return False
1057-
10581005
def cached_test_function(self, buffer):
10591006
"""Checks the tree to see if we've tested this buffer, and returns the
10601007
previous result if we have.
10611008
10621009
Otherwise we call through to ``test_function``, and return a
10631010
fresh result.
10641011
"""
1012+
rewritten = bytearray()
1013+
would_overrun = False
1014+
10651015
node_index = 0
1066-
for c in buffer:
1016+
for i, c in enumerate(buffer):
10671017
# If there's a forced value or a mask at this position, then
10681018
# pretend that the buffer already contains a matching value,
10691019
# because the test function is going to do the same.
@@ -1076,6 +1026,18 @@ def cached_test_function(self, buffer):
10761026
except KeyError:
10771027
pass
10781028

1029+
try:
1030+
# If we know how many bytes are read at this point and
1031+
# there aren't enough, then it doesn't actually matter
1032+
# what the values are, we're definitely going to overrun.
1033+
if i + self.block_sizes[node_index] > len(buffer):
1034+
would_overrun = True
1035+
break
1036+
except KeyError:
1037+
pass
1038+
1039+
rewritten.append(c)
1040+
10791041
try:
10801042
node_index = self.tree[node_index][c]
10811043
except KeyError:
@@ -1087,18 +1049,23 @@ def cached_test_function(self, buffer):
10871049
if isinstance(node, ConjectureData):
10881050
# This buffer (or a prefix of it) has already been tested.
10891051
# Return the stored result instead of trying it again.
1052+
assert node.status != Status.OVERRUN
10901053
return node
10911054
else:
10921055
# Falling off the end of this loop means that we're about to test
1093-
# a prefix of a previously-tested byte stream. The test is going
1094-
# to draw beyond the end of the buffer, and fail due to overrun.
1095-
# Currently there is no special handling for this case.
1096-
pass
1056+
# a prefix of a previously-tested byte stream, so the test would
1057+
# overrun.
1058+
would_overrun = True
1059+
1060+
if would_overrun:
1061+
return Overrun
10971062

10981063
# We didn't find a match in the tree, so we need to run the test
10991064
# function normally.
11001065
result = ConjectureData.for_buffer(buffer)
11011066
self.test_function(result)
1067+
if result.status == Status.OVERRUN:
1068+
return Overrun
11021069
return result
11031070

11041071
def event_to_string(self, event):

hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import attr
2626

2727
from hypothesis.internal.compat import hbytes, hrange, int_from_bytes, int_to_bytes
28-
from hypothesis.internal.conjecture.data import ConjectureData, Status
28+
from hypothesis.internal.conjecture.data import Overrun, Status
2929
from hypothesis.internal.conjecture.shrinking import Integer, Length, Lexical, Ordering
3030
from hypothesis.internal.conjecture.shrinking.common import find_integer
3131

@@ -297,16 +297,13 @@ def incorporate_new_buffer(self, buffer):
297297
if self.shrink_target.buffer.startswith(buffer):
298298
return False
299299

300-
if not self.__engine.prescreen_buffer(buffer):
301-
return False
302-
303-
assert sort_key(buffer) <= sort_key(self.shrink_target.buffer)
304-
data = ConjectureData.for_buffer(buffer)
305-
self.__engine.test_function(data)
306-
self.__test_function_cache[buffer] = data
307-
return self.incorporate_test_data(data)
300+
previous = self.shrink_target
301+
self.cached_test_function(buffer)
302+
return previous is not self.shrink_target
308303

309304
def incorporate_test_data(self, data):
305+
if data is Overrun:
306+
return
310307
self.__test_function_cache[data.buffer] = data
311308
if self.__predicate(data) and sort_key(data.buffer) < sort_key(
312309
self.shrink_target.buffer
@@ -1047,14 +1044,7 @@ def zero_examples(self):
10471044
self.buffer[:u] + hbytes(v - u) + self.buffer[v:]
10481045
)
10491046

1050-
# FIXME: IOU one attempt to debug this - DRMacIver
1051-
# This is a mysterious problem that should be impossible to trigger
1052-
# but isn't. I don't know what's going on, and it defeated my
1053-
# my attempts to reproduce or debug it. I'd *guess* it's related to
1054-
# nondeterminism in the test function. That should be impossible in
1055-
# the cases where I'm seeing it, but I haven't been able to put
1056-
# together a reliable reproduction of it.
1057-
if ex.index >= len(attempt.examples): # pragma: no cover
1047+
if attempt is Overrun:
10581048
continue
10591049

10601050
in_replacement = attempt.examples[ex.index]

hypothesis-python/tests/cover/test_conjecture_engine.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,12 @@
3030
from hypothesis.database import ExampleDatabase, InMemoryExampleDatabase
3131
from hypothesis.errors import FailedHealthCheck
3232
from hypothesis.internal.compat import hbytes, hrange, int_from_bytes
33-
from hypothesis.internal.conjecture.data import MAX_DEPTH, ConjectureData, Status
33+
from hypothesis.internal.conjecture.data import (
34+
MAX_DEPTH,
35+
ConjectureData,
36+
Overrun,
37+
Status,
38+
)
3439
from hypothesis.internal.conjecture.engine import (
3540
ConjectureRunner,
3641
ExitReason,
@@ -51,17 +56,17 @@
5156
SOME_LABEL = calc_label_from_name("some label")
5257

5358

59+
TEST_SETTINGS = settings(
60+
max_examples=5000,
61+
buffer_size=1024,
62+
database=None,
63+
suppress_health_check=HealthCheck.all(),
64+
)
65+
66+
5467
def run_to_buffer(f):
5568
with deterministic_PRNG():
56-
runner = ConjectureRunner(
57-
f,
58-
settings=settings(
59-
max_examples=5000,
60-
buffer_size=1024,
61-
database=None,
62-
suppress_health_check=HealthCheck.all(),
63-
),
64-
)
69+
runner = ConjectureRunner(f, settings=TEST_SETTINGS)
6570
runner.run()
6671
assert runner.interesting_examples
6772
last_data, = runner.interesting_examples.values()
@@ -722,15 +727,22 @@ def f(data):
722727

723728

724729
def test_detects_too_small_block_starts():
730+
call_count = [0]
731+
725732
def f(data):
733+
assert call_count[0] == 0
734+
call_count[0] += 1
726735
data.draw_bytes(8)
727736
data.mark_interesting()
728737

729738
runner = ConjectureRunner(f, settings=settings(database=None))
730739
r = ConjectureData.for_buffer(hbytes(8))
731740
runner.test_function(r)
732741
assert r.status == Status.INTERESTING
733-
assert not runner.prescreen_buffer(hbytes([255] * 7))
742+
assert call_count[0] == 1
743+
r2 = runner.cached_test_function(hbytes([255] * 7))
744+
assert r2.status == Status.OVERRUN
745+
assert call_count[0] == 1
734746

735747

736748
def test_shrinks_both_interesting_examples(monkeypatch):
@@ -1909,3 +1921,23 @@ def test_target_selector_will_eventually_reuse_examples():
19091921
for _ in range(2):
19101922
x = selector.select()
19111923
assert x.global_identifier in seen
1924+
1925+
1926+
def test_cached_test_function_does_not_reinvoke_on_prefix():
1927+
call_count = [0]
1928+
1929+
def test_function(data):
1930+
call_count[0] += 1
1931+
data.draw_bits(8)
1932+
data.write(hbytes([7]))
1933+
data.draw_bits(8)
1934+
1935+
with deterministic_PRNG():
1936+
runner = ConjectureRunner(test_function, settings=TEST_SETTINGS)
1937+
1938+
data = runner.cached_test_function(hbytes(3))
1939+
assert data.status == Status.VALID
1940+
for n in [2, 1, 0]:
1941+
prefix_data = runner.cached_test_function(hbytes(n))
1942+
assert prefix_data is Overrun
1943+
assert call_count[0] == 1

hypothesis-python/tests/nocover/test_conjecture_engine.py

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121

2222
import pytest
2323

24-
from hypothesis import HealthCheck, given, settings, strategies as st
24+
from hypothesis import given, settings, strategies as st
2525
from hypothesis.database import InMemoryExampleDatabase
2626
from hypothesis.internal.compat import hbytes, hrange, int_from_bytes
2727
from hypothesis.internal.conjecture.data import ConjectureData, Status
2828
from hypothesis.internal.conjecture.engine import ConjectureRunner, RunIsComplete
29-
from tests.common.utils import no_shrink, non_covering_examples
29+
from tests.common.utils import non_covering_examples
3030
from tests.cover.test_conjecture_engine import run_to_buffer, shrink, shrinking_from
3131

3232

@@ -70,21 +70,6 @@ def f(data):
7070
assert in_db == seen
7171

7272

73-
@given(st.randoms(), st.random_module())
74-
@settings(
75-
phases=no_shrink, deadline=None, suppress_health_check=[HealthCheck.hung_test]
76-
)
77-
def test_maliciously_bad_generator(rnd, seed):
78-
@run_to_buffer
79-
def x(data):
80-
for _ in range(rnd.randint(1, 100)):
81-
data.draw_bytes(rnd.randint(1, 10))
82-
if rnd.randint(0, 1):
83-
data.mark_invalid()
84-
else:
85-
data.mark_interesting()
86-
87-
8873
def test_can_discard(monkeypatch):
8974
n = 8
9075

@@ -201,26 +186,6 @@ def f(data):
201186
assert f == [10, 0, 90]
202187

203188

204-
@given(st.integers(0, 255), st.integers(0, 255))
205-
def test_prescreen_with_masked_byte_agrees_with_results(byte_a, byte_b):
206-
def f(data):
207-
data.draw_bits(2)
208-
209-
runner = ConjectureRunner(f)
210-
211-
data_a = ConjectureData.for_buffer(hbytes([byte_a]))
212-
data_b = ConjectureData.for_buffer(hbytes([byte_b]))
213-
214-
runner.test_function(data_a)
215-
prescreen_b = runner.prescreen_buffer(hbytes([byte_b]))
216-
# Always test buffer B, to check whether the prescreen was correct.
217-
runner.test_function(data_b)
218-
219-
# If the prescreen passed, then the buffers should be different.
220-
# If it failed, then the buffers should be the same.
221-
assert prescreen_b == (data_a.buffer != data_b.buffer)
222-
223-
224189
@given(st.integers(0, 255), st.integers(0, 255))
225190
def test_cached_with_masked_byte_agrees_with_results(byte_a, byte_b):
226191
def f(data):

0 commit comments

Comments
 (0)