Skip to content

Commit 2ce4344

Browse files
authored
Merge pull request #4221 from tybug/atheris-tcs
Add and use `BytestringProvider` in `fuzz_one_input`
2 parents 50d2707 + 703ae81 commit 2ce4344

File tree

7 files changed

+291
-8
lines changed

7 files changed

+291
-8
lines changed

hypothesis-python/RELEASE.rst

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
RELEASE_TYPE: patch
2+
3+
:ref:`fuzz_one_input <fuzz_one_input>` is now implemented using an :ref:`alternative backend <alternative-backends>`. This brings the interpretation of the fuzzer-provided bytestring closer to the fuzzer mutations, allowing the mutations to work more reliably. We hope to use this backend functionality to improve fuzzing integration (see e.g. https://github.com/google/atheris/issues/20) in the future!

hypothesis-python/src/hypothesis/core.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
ensure_free_stackframes,
8888
gc_cumulative_time,
8989
)
90+
from hypothesis.internal.conjecture.providers import BytestringProvider
9091
from hypothesis.internal.conjecture.shrinker import sort_key_ir
9192
from hypothesis.internal.entropy import deterministic_PRNG
9293
from hypothesis.internal.escalation import (
@@ -1866,7 +1867,13 @@ def fuzz_one_input(
18661867
if isinstance(buffer, io.IOBase):
18671868
buffer = buffer.read(BUFFER_SIZE)
18681869
assert isinstance(buffer, (bytes, bytearray, memoryview))
1869-
data = ConjectureData.for_buffer(buffer)
1870+
data = ConjectureData(
1871+
max_length=BUFFER_SIZE,
1872+
prefix=b"",
1873+
random=None,
1874+
provider=BytestringProvider,
1875+
provider_kw={"bytestring": buffer},
1876+
)
18701877
try:
18711878
state.execute_once(data)
18721879
except (StopTest, UnsatisfiedAssumption):
@@ -1880,7 +1887,8 @@ def fuzz_one_input(
18801887
settings.database.save(database_key, ir_to_bytes(data.choices))
18811888
minimal_failures[data.interesting_origin] = data.ir_nodes
18821889
raise
1883-
return bytes(data.buffer)
1890+
assert isinstance(data.provider, BytestringProvider)
1891+
return bytes(data.provider.drawn)
18841892

18851893
fuzz_one_input.__doc__ = HypothesisHandle.fuzz_one_input.__doc__
18861894
return fuzz_one_input

hypothesis-python/src/hypothesis/internal/conjecture/data.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -1606,11 +1606,20 @@ def __init__(
16061606
provider: Union[type, PrimitiveProvider] = HypothesisProvider,
16071607
ir_prefix: Optional[Sequence[Union[NodeTemplate, ChoiceT]]] = None,
16081608
max_length_ir: Optional[int] = None,
1609+
provider_kw: Optional[dict[str, Any]] = None,
16091610
) -> None:
16101611
from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR
16111612

16121613
if observer is None:
16131614
observer = DataObserver()
1615+
if provider_kw is None:
1616+
provider_kw = {}
1617+
elif not isinstance(provider, type):
1618+
raise InvalidArgument(
1619+
f"Expected {provider=} to be a class since {provider_kw=} was "
1620+
"passed, but got an instance instead."
1621+
)
1622+
16141623
assert isinstance(observer, DataObserver)
16151624
self._bytes_drawn = 0
16161625
self.observer = observer
@@ -1621,9 +1630,6 @@ def __init__(
16211630
self.__prefix = bytes(prefix)
16221631
self.__random = random
16231632

1624-
if ir_prefix is None:
1625-
assert random is not None or max_length <= len(prefix)
1626-
16271633
self.buffer: "Union[bytes, bytearray]" = bytearray()
16281634
self.index = 0
16291635
self.length_ir = 0
@@ -1644,9 +1650,11 @@ def __init__(
16441650
self.has_discards = False
16451651

16461652
self.provider: PrimitiveProvider = (
1647-
provider(self) if isinstance(provider, type) else provider
1653+
provider(self, **provider_kw) if isinstance(provider, type) else provider
16481654
)
16491655
assert isinstance(self.provider, PrimitiveProvider)
1656+
if ir_prefix is None and isinstance(self.provider, HypothesisProvider):
1657+
assert random is not None or max_length <= len(prefix)
16501658

16511659
self.__result: "Optional[ConjectureResult]" = None
16521660

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# This file is part of Hypothesis, which may be found at
2+
# https://github.com/HypothesisWorks/hypothesis/
3+
#
4+
# Copyright the Hypothesis Authors.
5+
# Individual contributors are listed in AUTHORS.rst and the git log.
6+
#
7+
# This Source Code Form is subject to the terms of the Mozilla Public License,
8+
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9+
# obtain one at https://mozilla.org/MPL/2.0/.
10+
11+
import math
12+
from typing import Optional
13+
14+
from hypothesis.internal.compat import int_from_bytes
15+
from hypothesis.internal.conjecture.data import (
16+
BYTE_MASKS,
17+
COLLECTION_DEFAULT_MAX_SIZE,
18+
ConjectureData,
19+
PrimitiveProvider,
20+
bits_to_bytes,
21+
)
22+
from hypothesis.internal.conjecture.floats import lex_to_float
23+
from hypothesis.internal.conjecture.utils import many
24+
from hypothesis.internal.floats import make_float_clamper
25+
from hypothesis.internal.intervalsets import IntervalSet
26+
27+
28+
class BytestringProvider(PrimitiveProvider):
29+
lifetime = "test_case"
30+
31+
def __init__(
32+
self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes
33+
):
34+
super().__init__(conjecturedata)
35+
self.bytestring = bytestring
36+
self.index = 0
37+
self.drawn = bytearray()
38+
39+
def _draw_bits(self, n):
40+
if n == 0: # pragma: no cover
41+
return 0
42+
n_bytes = bits_to_bytes(n)
43+
if self.index + n_bytes > len(self.bytestring):
44+
self._cd.mark_overrun()
45+
buf = bytearray(self.bytestring[self.index : self.index + n_bytes])
46+
self.index += n_bytes
47+
48+
buf[0] &= BYTE_MASKS[n % 8]
49+
buf = bytes(buf)
50+
self.drawn += buf
51+
return int_from_bytes(buf)
52+
53+
def draw_boolean(
54+
self,
55+
p: float = 0.5,
56+
*,
57+
forced: Optional[bool] = None,
58+
fake_forced: bool = False,
59+
) -> bool:
60+
if forced is not None:
61+
return forced
62+
63+
if p <= 0:
64+
return False
65+
if p >= 1:
66+
return True
67+
68+
# always use one byte for booleans to maintain constant draw size.
69+
# If a probability requires more than 8 bits to represent precisely,
70+
# the result will be slightly biased, but not badly.
71+
bits = 8
72+
size = 2**bits
73+
# always leave at least one value that can be true, even for very small
74+
# p.
75+
falsey = max(1, math.floor(size * (1 - p)))
76+
n = self._draw_bits(bits)
77+
return n >= falsey
78+
79+
def draw_integer(
80+
self,
81+
min_value: Optional[int] = None,
82+
max_value: Optional[int] = None,
83+
*,
84+
weights: Optional[dict[int, float]] = None,
85+
shrink_towards: int = 0,
86+
forced: Optional[int] = None,
87+
fake_forced: bool = False,
88+
) -> int:
89+
if forced is not None:
90+
return forced
91+
92+
assert self._cd is not None
93+
94+
# we explicitly ignore integer weights for now, as they are likely net
95+
# negative on fuzzer performance.
96+
97+
if min_value is None and max_value is None:
98+
min_value = -(2**127)
99+
max_value = 2**127 - 1
100+
elif min_value is None:
101+
assert max_value is not None
102+
min_value = max_value - 2**64
103+
elif max_value is None:
104+
assert min_value is not None
105+
max_value = min_value + 2**64
106+
107+
if min_value == max_value:
108+
return min_value
109+
110+
bits = (max_value - min_value).bit_length()
111+
value = self._draw_bits(bits)
112+
while not (min_value <= value <= max_value):
113+
value = self._draw_bits(bits)
114+
return value
115+
116+
def draw_float(
117+
self,
118+
*,
119+
min_value: float = -math.inf,
120+
max_value: float = math.inf,
121+
allow_nan: bool = True,
122+
smallest_nonzero_magnitude: float,
123+
forced: Optional[float] = None,
124+
fake_forced: bool = False,
125+
) -> float:
126+
if forced is not None:
127+
return forced
128+
129+
n = self._draw_bits(64)
130+
sign = -1 if n >> 64 else 1
131+
f = sign * lex_to_float(n & ((1 << 64) - 1))
132+
clamper = make_float_clamper(
133+
min_value,
134+
max_value,
135+
smallest_nonzero_magnitude=smallest_nonzero_magnitude,
136+
allow_nan=allow_nan,
137+
)
138+
return clamper(f)
139+
140+
def _draw_collection(self, min_size, max_size, *, alphabet_size):
141+
average_size = min(
142+
max(min_size * 2, min_size + 5),
143+
0.5 * (min_size + max_size),
144+
)
145+
elements = many(
146+
self._cd,
147+
min_size=min_size,
148+
max_size=max_size,
149+
average_size=average_size,
150+
observe=False,
151+
)
152+
values = []
153+
while elements.more():
154+
values.append(self.draw_integer(0, alphabet_size - 1))
155+
return values
156+
157+
def draw_string(
158+
self,
159+
intervals: IntervalSet,
160+
*,
161+
min_size: int = 0,
162+
max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
163+
forced: Optional[str] = None,
164+
fake_forced: bool = False,
165+
) -> str:
166+
if forced is not None:
167+
return forced
168+
values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals))
169+
return "".join(chr(intervals[v]) for v in values)
170+
171+
def draw_bytes(
172+
self,
173+
min_size: int = 0,
174+
max_size: int = COLLECTION_DEFAULT_MAX_SIZE,
175+
*,
176+
forced: Optional[bytes] = None,
177+
fake_forced: bool = False,
178+
) -> bytes:
179+
if forced is not None:
180+
return forced
181+
values = self._draw_collection(min_size, max_size, alphabet_size=2**8)
182+
return bytes(values)

hypothesis-python/tests/conjecture/test_alt_backend.py

+11
Original file line numberDiff line numberDiff line change
@@ -595,3 +595,14 @@ def test_function(x):
595595
with pytest.raises(AssertionError) as ctx:
596596
test_function()
597597
assert (msg in ctx.value.__notes__) == (provider is UnsoundVerifierProvider)
598+
599+
600+
def test_invalid_provider_kw():
601+
with pytest.raises(InvalidArgument, match="got an instance instead"):
602+
ConjectureData(
603+
max_length=0,
604+
prefix=b"",
605+
random=None,
606+
provider=TrivialProvider(None),
607+
provider_kw={"one": "two"},
608+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# This file is part of Hypothesis, which may be found at
2+
# https://github.com/HypothesisWorks/hypothesis/
3+
#
4+
# Copyright the Hypothesis Authors.
5+
# Individual contributors are listed in AUTHORS.rst and the git log.
6+
#
7+
# This Source Code Form is subject to the terms of the Mozilla Public License,
8+
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
9+
# obtain one at https://mozilla.org/MPL/2.0/.
10+
11+
from hypothesis import example, given, strategies as st
12+
from hypothesis.errors import StopTest
13+
from hypothesis.internal.conjecture.choice import (
14+
choice_equal,
15+
choice_from_index,
16+
choice_permitted,
17+
)
18+
from hypothesis.internal.conjecture.data import ConjectureData
19+
from hypothesis.internal.conjecture.engine import BUFFER_SIZE
20+
from hypothesis.internal.conjecture.providers import BytestringProvider
21+
from hypothesis.internal.intervalsets import IntervalSet
22+
23+
from tests.conjecture.common import float_kw, integer_kw, ir_types_and_kwargs, string_kw
24+
25+
26+
@example(b"\x00" * 100, [("integer", integer_kw())])
27+
@example(b"\x00" * 100, [("integer", integer_kw(0, 2))])
28+
@example(b"\x00" * 100, [("integer", integer_kw(0, 0))])
29+
@example(b"\x00" * 100, [("integer", integer_kw(min_value=0))])
30+
@example(b"\x00" * 100, [("integer", integer_kw(max_value=2))])
31+
@example(b"\x00" * 100, [("integer", integer_kw(0, 2, weights={0: 0.1}))])
32+
@example(b"\x00" * 100, [("boolean", {"p": 1.0})])
33+
@example(b"\x00" * 100, [("boolean", {"p": 0.0})])
34+
@example(b"\x00" * 100, [("boolean", {"p": 1e-99})])
35+
@example(b"\x00" * 100, [("string", string_kw(IntervalSet.from_string("a")))])
36+
@example(b"\x00" * 100, [("float", float_kw())])
37+
@example(b"\x00" * 100, [("bytes", {"min_size": 0, "max_size": 10})])
38+
@given(st.binary(min_size=200), st.lists(ir_types_and_kwargs()))
39+
def test_provider_contract_bytestring(bytestring, ir_type_and_kwargs):
40+
data = ConjectureData(
41+
BUFFER_SIZE,
42+
prefix=b"",
43+
random=None,
44+
observer=None,
45+
provider=BytestringProvider,
46+
provider_kw={"bytestring": bytestring},
47+
)
48+
49+
for ir_type, kwargs in ir_type_and_kwargs:
50+
try:
51+
value = getattr(data, f"draw_{ir_type}")(**kwargs)
52+
except StopTest:
53+
return
54+
55+
# ir_value_permitted is currently restricted to what *could* be generated
56+
# by the buffer. once we're fully on the TCS, we can drop this restriction.
57+
# until then, the BytestringProvider can theoretically generate values
58+
# that aren't forcable to a buffer - but this requires an enormous shrink_towards
59+
# value and is such an edge case that I'm just going to bank on nobody hitting
60+
# it before we're off the bytestring.
61+
integer_edge_case = (
62+
ir_type == "integer"
63+
and kwargs["shrink_towards"] is not None
64+
and kwargs["shrink_towards"].bit_length() > 100
65+
)
66+
assert choice_permitted(value, kwargs) or integer_edge_case
67+
68+
kwargs["forced"] = choice_from_index(0, ir_type, kwargs)
69+
assert choice_equal(
70+
kwargs["forced"], getattr(data, f"draw_{ir_type}")(**kwargs)
71+
)

hypothesis-python/tests/cover/test_fuzz_one_input.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def test_fuzz_one_input(buffer_type):
3838
@settings(database=db, phases=[Phase.reuse, Phase.shrink])
3939
def test(s):
4040
seen.append(s)
41-
assert "\0" not in s, repr(s)
41+
assert len(s) < 5, repr(s)
4242

4343
# Before running fuzz_one_input, there's nothing in `db`, and so the test passes
4444
# (because example generation is disabled by the custom settings)
@@ -67,7 +67,7 @@ def test(s):
6767
# reproduce it, *and shrink to a minimal example*.
6868
with pytest.raises(AssertionError):
6969
test()
70-
assert seen[-1] == "\0"
70+
assert seen[-1] == "0" * 5
7171

7272

7373
def test_can_fuzz_with_database_eq_None():

0 commit comments

Comments
 (0)