Skip to content

Commit ace043f

Browse files
committed
add variable-size width support to the bytes ir node
1 parent eaafdfc commit ace043f

File tree

9 files changed

+202
-83
lines changed

9 files changed

+202
-83
lines changed

hypothesis-python/src/hypothesis/internal/conjecture/data.py

+60-20
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ class StringKWargs(TypedDict):
115115

116116

117117
class BytesKWargs(TypedDict):
118-
size: int
118+
min_size: int
119+
max_size: Optional[int]
119120

120121

121122
class BooleanKWargs(TypedDict):
@@ -206,7 +207,7 @@ def structural_coverage(label: int) -> StructuralCoverageTag:
206207
FLOAT_INIT_LOGIC_CACHE = LRUCache(4096)
207208
POOLED_KWARGS_CACHE = LRUCache(4096)
208209

209-
DRAW_STRING_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
210+
COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
210211

211212

212213
class Example:
@@ -1036,7 +1037,7 @@ def trivial(self):
10361037
return self.value == (minimal_char * self.kwargs["min_size"])
10371038
if self.ir_type == "bytes":
10381039
# smallest size and all-zero value.
1039-
return len(self.value) == self.kwargs["size"] and not any(self.value)
1040+
return len(self.value) == self.kwargs["min_size"] and not any(self.value)
10401041

10411042
raise NotImplementedError(f"unhandled ir_type {self.ir_type}")
10421043

@@ -1095,7 +1096,11 @@ def ir_value_permitted(value, ir_type, kwargs):
10951096
return False
10961097
return all(ord(c) in kwargs["intervals"] for c in value)
10971098
elif ir_type == "bytes":
1098-
return len(value) == kwargs["size"]
1099+
if len(value) < kwargs["min_size"]:
1100+
return False
1101+
if kwargs["max_size"] is not None and len(value) > kwargs["max_size"]:
1102+
return False
1103+
return True
10991104
elif ir_type == "boolean":
11001105
if kwargs["p"] <= 2 ** (-64):
11011106
return value is False
@@ -1322,7 +1327,12 @@ def draw_string(
13221327

13231328
@abc.abstractmethod
13241329
def draw_bytes(
1325-
self, size: int, *, forced: Optional[bytes] = None, fake_forced: bool = False
1330+
self,
1331+
min_size: int,
1332+
max_size: Optional[int],
1333+
*,
1334+
forced: Optional[bytes] = None,
1335+
fake_forced: bool = False,
13261336
) -> bytes:
13271337
raise NotImplementedError
13281338

@@ -1611,7 +1621,7 @@ def draw_string(
16111621
fake_forced: bool = False,
16121622
) -> str:
16131623
if max_size is None:
1614-
max_size = DRAW_STRING_DEFAULT_MAX_SIZE
1624+
max_size = COLLECTION_DEFAULT_MAX_SIZE
16151625

16161626
assert forced is None or min_size <= len(forced) <= max_size
16171627
assert self._cd is not None
@@ -1663,17 +1673,44 @@ def draw_string(
16631673
return "".join(chars)
16641674

16651675
def draw_bytes(
1666-
self, size: int, *, forced: Optional[bytes] = None, fake_forced: bool = False
1676+
self,
1677+
min_size: int,
1678+
max_size: Optional[int],
1679+
*,
1680+
forced: Optional[bytes] = None,
1681+
fake_forced: bool = False,
16671682
) -> bytes:
1668-
forced_i = None
1669-
if forced is not None:
1670-
forced_i = int_from_bytes(forced)
1671-
size = len(forced)
1683+
if max_size is None:
1684+
max_size = COLLECTION_DEFAULT_MAX_SIZE
16721685

1673-
assert self._cd is not None
1674-
return self._cd.draw_bits(
1675-
8 * size, forced=forced_i, fake_forced=fake_forced
1676-
).to_bytes(size, "big")
1686+
assert forced is None or min_size <= len(forced) <= max_size
1687+
1688+
buf = bytearray()
1689+
average_size = min(
1690+
max(min_size * 2, min_size + 5),
1691+
0.5 * (min_size + max_size),
1692+
)
1693+
elements = many(
1694+
self._cd,
1695+
min_size=min_size,
1696+
max_size=max_size,
1697+
average_size=average_size,
1698+
forced=None if forced is None else len(forced),
1699+
fake_forced=fake_forced,
1700+
observe=False,
1701+
)
1702+
while elements.more():
1703+
forced_i: Optional[int] = None
1704+
if forced is not None:
1705+
# implicit conversion from bytes to int by indexing here
1706+
forced_i = forced[elements.count - 1]
1707+
1708+
assert self._cd is not None
1709+
buf += self._cd.draw_bits(
1710+
8, forced=forced_i, fake_forced=fake_forced
1711+
).to_bytes(1, "big")
1712+
1713+
return bytes(buf)
16771714

16781715
def _draw_float(
16791716
self,
@@ -2222,6 +2259,7 @@ def draw_string(
22222259
observe: bool = True,
22232260
) -> str:
22242261
assert forced is None or min_size <= len(forced)
2262+
assert min_size >= 0
22252263

22262264
kwargs: StringKWargs = self._pooled_kwargs(
22272265
"string",
@@ -2255,17 +2293,19 @@ def draw_string(
22552293

22562294
def draw_bytes(
22572295
self,
2258-
# TODO move to min_size and max_size here.
2259-
size: int,
2296+
min_size: int,
2297+
max_size: Optional[int],
22602298
*,
22612299
forced: Optional[bytes] = None,
22622300
fake_forced: bool = False,
22632301
observe: bool = True,
22642302
) -> bytes:
2265-
assert forced is None or len(forced) == size
2266-
assert size >= 0
2303+
assert forced is None or min_size <= len(forced)
2304+
assert min_size >= 0
22672305

2268-
kwargs: BytesKWargs = self._pooled_kwargs("bytes", {"size": size})
2306+
kwargs: BytesKWargs = self._pooled_kwargs(
2307+
"bytes", {"min_size": min_size, "max_size": max_size}
2308+
)
22692309

22702310
if self.ir_tree_nodes is not None and observe:
22712311
node_value = self._pop_ir_tree_node("bytes", kwargs, forced=forced)

hypothesis-python/src/hypothesis/internal/conjecture/datatree.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def _repr_pretty_(self, p, cycle):
147147

148148

149149
def compute_max_children(ir_type, kwargs):
150-
from hypothesis.internal.conjecture.data import DRAW_STRING_DEFAULT_MAX_SIZE
150+
from hypothesis.internal.conjecture.data import COLLECTION_DEFAULT_MAX_SIZE
151151

152152
if ir_type == "integer":
153153
min_value = kwargs["min_value"]
@@ -178,14 +178,26 @@ def compute_max_children(ir_type, kwargs):
178178
return 1
179179
return 2
180180
elif ir_type == "bytes":
181-
return 2 ** (8 * kwargs["size"])
181+
min_size = kwargs["min_size"]
182+
max_size = kwargs["max_size"]
183+
184+
if max_size is None:
185+
max_size = COLLECTION_DEFAULT_MAX_SIZE
186+
187+
definitely_too_large = max_size * math.log(2**8) > math.log(
188+
MAX_CHILDREN_EFFECTIVELY_INFINITE
189+
)
190+
if definitely_too_large:
191+
return MAX_CHILDREN_EFFECTIVELY_INFINITE
192+
193+
return sum(2 ** (8 * k) for k in range(min_size, max_size + 1))
182194
elif ir_type == "string":
183195
min_size = kwargs["min_size"]
184196
max_size = kwargs["max_size"]
185197
intervals = kwargs["intervals"]
186198

187199
if max_size is None:
188-
max_size = DRAW_STRING_DEFAULT_MAX_SIZE
200+
max_size = COLLECTION_DEFAULT_MAX_SIZE
189201

190202
if len(intervals) == 0:
191203
# Special-case the empty alphabet to avoid an error in math.log(0).
@@ -306,8 +318,13 @@ def all_children(ir_type, kwargs):
306318
else:
307319
yield from [False, True]
308320
if ir_type == "bytes":
309-
size = kwargs["size"]
310-
yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size)))
321+
min_size = kwargs["min_size"]
322+
max_size = kwargs["max_size"]
323+
324+
size = min_size
325+
while size <= max_size:
326+
yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size)))
327+
size += 1
311328
if ir_type == "string":
312329
min_size = kwargs["min_size"]
313330
max_size = kwargs["max_size"]

hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1075,10 +1075,9 @@ def try_shrinking_nodes(self, nodes, n):
10751075
return False # pragma: no cover
10761076

10771077
if node.ir_type in {"string", "bytes"}:
1078-
size_kwarg = "min_size" if node.ir_type == "string" else "size"
10791078
# if the size *increased*, we would have to guess what to pad with
10801079
# in order to try fixing up this attempt. Just give up.
1081-
if node.kwargs[size_kwarg] <= attempt_kwargs[size_kwarg]:
1080+
if node.kwargs["min_size"] <= attempt_kwargs["min_size"]:
10821081
return False
10831082
# the size decreased in our attempt. Try again, but replace with
10841083
# the min_size that we would have gotten, and truncate the value
@@ -1089,7 +1088,7 @@ def try_shrinking_nodes(self, nodes, n):
10891088
initial_attempt[node.index].copy(
10901089
with_kwargs=attempt_kwargs,
10911090
with_value=initial_attempt[node.index].value[
1092-
: attempt_kwargs[size_kwarg]
1091+
: attempt_kwargs["min_size"]
10931092
],
10941093
)
10951094
]

hypothesis-python/src/hypothesis/internal/conjecture/shrinking/bytes.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,19 @@
88
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
99
# obtain one at https://mozilla.org/MPL/2.0/.
1010

11-
from hypothesis.internal.compat import int_from_bytes, int_to_bytes
11+
from hypothesis.internal.conjecture.shrinking.collection import Collection
1212
from hypothesis.internal.conjecture.shrinking.integer import Integer
1313

1414

15-
class Bytes(Integer):
15+
class Bytes(Collection):
1616
def __init__(self, initial, predicate, **kwargs):
1717
# shrink by interpreting the bytes as an integer.
1818
# move to Collection.shrink when we support variable-size bytes,
1919
# because b'\x00\x02' could shrink to either b'\x00\x01' or b'\x02'.
2020
super().__init__(
21-
int_from_bytes(initial),
22-
lambda n: predicate(int_to_bytes(n, len(initial))),
21+
# implicit conversion from bytes to list of integers here
22+
list(initial),
23+
lambda val: predicate(bytes(val)),
24+
ElementShrinker=Integer,
2325
**kwargs,
2426
)

hypothesis-python/src/hypothesis/strategies/_internal/strings.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import re
1313
import warnings
1414
from functools import lru_cache, partial
15+
from typing import Optional
1516

1617
from hypothesis.errors import HypothesisWarning, InvalidArgument
1718
from hypothesis.internal import charmap
@@ -20,7 +21,6 @@
2021
from hypothesis.internal.reflection import get_pretty_function_description
2122
from hypothesis.strategies._internal.collections import ListStrategy
2223
from hypothesis.strategies._internal.lazy import unwrap_strategies
23-
from hypothesis.strategies._internal.numbers import IntegersStrategy
2424
from hypothesis.strategies._internal.strategies import (
2525
OneOfStrategy,
2626
SampledFromStrategy,
@@ -224,9 +224,13 @@ def _string_filter_rewrite(self, kind, condition):
224224
stacklevel=2,
225225
)
226226

227-
elems = unwrap_strategies(self.element_strategy)
228227
if (
229-
(kind is bytes or isinstance(elems, OneCharStringStrategy))
228+
(
229+
kind is bytes
230+
or isinstance(
231+
unwrap_strategies(self.element_strategy), OneCharStringStrategy
232+
)
233+
)
230234
and isinstance(pattern := getattr(condition, "__self__", None), re.Pattern)
231235
and isinstance(pattern.pattern, kind)
232236
):
@@ -331,15 +335,13 @@ def _identifier_characters():
331335
return id_start, id_continue
332336

333337

334-
class BytesStrategy(ListStrategy):
335-
def __init__(self, min_size, max_size):
336-
super().__init__(IntegersStrategy(0, 255), min_size=min_size, max_size=max_size)
338+
class BytesStrategy(SearchStrategy):
339+
def __init__(self, min_size: int, max_size: Optional[int]):
340+
self.min_size = min_size
341+
self.max_size = max_size if max_size is not None else float("inf")
337342

338343
def do_draw(self, data):
339-
# TODO: refactor the underlying provider to support variable-length bytes
340-
if self.min_size == self.max_size:
341-
return bytes(data.draw_bytes(self.min_size))
342-
return bytes(super().do_draw(data))
344+
return data.draw_bytes(self.min_size, self.max_size)
343345

344346
_nonempty_filters = (
345347
*ListStrategy._nonempty_filters,
@@ -353,4 +355,4 @@ def do_draw(self, data):
353355
def filter(self, condition):
354356
if (new := _string_filter_rewrite(self, bytes, condition)) is not None:
355357
return new
356-
return super().filter(condition)
358+
return ListStrategy.filter(self, condition)

hypothesis-python/tests/conjecture/common.py

+36-29
Original file line numberDiff line numberDiff line change
@@ -170,49 +170,56 @@ def draw_integer_kwargs(
170170

171171

172172
@st.composite
173-
def draw_string_kwargs(draw, *, use_min_size=True, use_max_size=True, use_forced=False):
174-
# TODO also sample empty intervals, ie remove this min_size, once we handle empty
175-
# pseudo-choices in the ir
176-
interval_set = draw(intervals(min_size=1))
177-
forced = (
178-
draw(TextStrategy(OneCharStringStrategy(interval_set))) if use_forced else None
179-
)
180-
173+
def _collection_kwargs(draw, *, forced, use_min_size=True, use_max_size=True):
181174
min_size = 0
182175
max_size = None
176+
# collections are quite expensive in entropy. cap to avoid overruns.
177+
cap = 50
183178

184179
if use_min_size:
185-
# cap to some reasonable min size to avoid overruns.
186-
n = 100
187-
if forced is not None:
188-
n = min(n, len(forced))
189-
190-
min_size = draw(st.integers(0, n))
180+
min_size = draw(
181+
st.integers(0, min(len(forced), cap) if forced is not None else cap)
182+
)
191183

192184
if use_max_size:
193-
n = min_size if forced is None else max(min_size, len(forced))
194-
max_size = draw(st.integers(min_value=n))
185+
max_size = draw(
186+
st.integers(
187+
min_value=min_size if forced is None else max(min_size, len(forced))
188+
)
189+
)
195190
# cap to some reasonable max size to avoid overruns.
196191
max_size = min(max_size, min_size + 100)
197192

198-
return {
199-
"intervals": interval_set,
200-
"min_size": min_size,
201-
"max_size": max_size,
202-
"forced": forced,
203-
}
193+
return {"min_size": min_size, "max_size": max_size}
204194

205195

206196
@st.composite
207-
def draw_bytes_kwargs(draw, *, use_forced=False):
208-
forced = draw(st.binary()) if use_forced else None
209-
# be reasonable with the number of bytes we ask for. We only have BUFFER_SIZE
210-
# to work with before we overrun.
211-
size = (
212-
draw(st.integers(min_value=0, max_value=100)) if forced is None else len(forced)
197+
def draw_string_kwargs(draw, *, use_min_size=True, use_max_size=True, use_forced=False):
198+
# TODO also sample empty intervals, ie remove this min_size, once we handle empty
199+
# pseudo-choices in the ir
200+
interval_set = draw(intervals(min_size=1))
201+
forced = (
202+
draw(TextStrategy(OneCharStringStrategy(interval_set))) if use_forced else None
203+
)
204+
kwargs = draw(
205+
_collection_kwargs(
206+
forced=forced, use_min_size=use_min_size, use_max_size=use_max_size
207+
)
213208
)
214209

215-
return {"size": size, "forced": forced}
210+
return {"intervals": interval_set, "forced": forced, **kwargs}
211+
212+
213+
@st.composite
214+
def draw_bytes_kwargs(draw, *, use_min_size=True, use_max_size=True, use_forced=False):
215+
forced = draw(st.binary()) if use_forced else None
216+
217+
kwargs = draw(
218+
_collection_kwargs(
219+
forced=forced, use_min_size=use_min_size, use_max_size=use_max_size
220+
)
221+
)
222+
return {"forced": forced, **kwargs}
216223

217224

218225
@st.composite

0 commit comments

Comments
 (0)