Skip to content

Commit cca3c71

Browse files
authored
Merge pull request #4241 from tybug/db-choices
Use typed choice sequence in database
2 parents 94055c5 + 714bc5d commit cca3c71

16 files changed

+287
-143
lines changed

.readthedocs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,5 @@ python:
2121
- path: hypothesis-python/
2222
extra_requirements:
2323
- all
24+
sphinx:
25+
configuration: hypothesis-python/docs/conf.py

hypothesis-python/RELEASE.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
RELEASE_TYPE: minor
2+
3+
The :doc:`Hypothesis example database <database>` now uses a new internal format to store examples. This new format is not compatible with the previous format, so stored entries will not carry over.
4+
5+
The database is best thought of as a cache that may be invalidated at times. Instead of relying on it for correctness, we recommend using :obj:`@example <hypothesis.example>` to specify explicit examples. When using databases across environments (such as connecting a :class:`~hypothesis.database.GitHubArtifactDatabase` database in CI to your local environment), we recommend using the same version of Hypothesis for each where possible, for maximum reproducibility.

hypothesis-python/src/hypothesis/core.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
ensure_free_stackframes,
8888
gc_cumulative_time,
8989
)
90-
from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir
90+
from hypothesis.internal.conjecture.shrinker import sort_key_ir
9191
from hypothesis.internal.entropy import deterministic_PRNG
9292
from hypothesis.internal.escalation import (
9393
InterestingOrigin,
@@ -352,9 +352,8 @@ def decode_failure(blob: bytes) -> Sequence[ChoiceT]:
352352
f"Could not decode blob {blob!r}: Invalid start byte {prefix!r}"
353353
)
354354

355-
try:
356-
choices = ir_from_bytes(decoded)
357-
except Exception:
355+
choices = ir_from_bytes(decoded)
356+
if choices is None:
358357
raise InvalidArgument(f"Invalid serialized choice sequence for blob {blob!r}")
359358

360359
return choices
@@ -1873,13 +1872,13 @@ def fuzz_one_input(
18731872
except (StopTest, UnsatisfiedAssumption):
18741873
return None
18751874
except BaseException:
1876-
buffer = bytes(data.buffer)
18771875
known = minimal_failures.get(data.interesting_origin)
18781876
if settings.database is not None and (
1879-
known is None or sort_key(buffer) <= sort_key(known)
1877+
known is None
1878+
or sort_key_ir(data.ir_nodes) <= sort_key_ir(known)
18801879
):
1881-
settings.database.save(database_key, buffer)
1882-
minimal_failures[data.interesting_origin] = buffer
1880+
settings.database.save(database_key, ir_to_bytes(data.choices))
1881+
minimal_failures[data.interesting_origin] = data.ir_nodes
18831882
raise
18841883
return bytes(data.buffer)
18851884

hypothesis-python/src/hypothesis/database.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -768,8 +768,7 @@ def ir_to_bytes(ir: Iterable[ChoiceT], /) -> bytes:
768768
return b"".join(parts)
769769

770770

771-
def ir_from_bytes(buffer: bytes, /) -> list[ChoiceT]:
772-
"""Deserialize a bytestring to a list of IR elements. Inverts ir_to_bytes."""
771+
def _ir_from_bytes(buffer: bytes, /) -> tuple[ChoiceT, ...]:
773772
# See above for an explanation of the format.
774773
parts: list[ChoiceT] = []
775774
idx = 0
@@ -797,4 +796,19 @@ def ir_from_bytes(buffer: bytes, /) -> list[ChoiceT]:
797796
else:
798797
assert tag == 4
799798
parts.append(chunk.decode(errors="surrogatepass"))
800-
return parts
799+
return tuple(parts)
800+
801+
802+
def ir_from_bytes(buffer: bytes, /) -> Optional[tuple[ChoiceT, ...]]:
803+
"""
804+
Deserialize a bytestring to a tuple of choices. Inverts ir_to_bytes.
805+
806+
Returns None if the given bytestring is not a valid serialization of choice
807+
sequences.
808+
"""
809+
try:
810+
return _ir_from_bytes(buffer)
811+
except Exception:
812+
# deserialization error, eg because our format changed or someone put junk
813+
# data in the db.
814+
return None

hypothesis-python/src/hypothesis/internal/conjecture/choice.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from hypothesis.errors import ChoiceTooLarge
2525
from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
2626
from hypothesis.internal.conjecture.utils import identity
27-
from hypothesis.internal.floats import make_float_clamper, sign_aware_lte
27+
from hypothesis.internal.floats import float_to_int, make_float_clamper, sign_aware_lte
2828
from hypothesis.internal.intervalsets import IntervalSet
2929

3030
T = TypeVar("T")
@@ -67,6 +67,9 @@ class BooleanKWargs(TypedDict):
6767
IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
6868
]
6969
ChoiceNameT: "TypeAlias" = Literal["integer", "string", "boolean", "float", "bytes"]
70+
ChoiceKeyT: "TypeAlias" = Union[
71+
int, str, bytes, tuple[Literal["bool"], bool], tuple[Literal["float"], int]
72+
]
7073

7174

7275
def _size_to_index(size: int, *, alphabet_size: int) -> int:
@@ -454,3 +457,47 @@ def choice_permitted(choice: ChoiceT, kwargs: ChoiceKwargsT) -> bool:
454457
return True
455458
else:
456459
raise NotImplementedError(f"unhandled type {type(choice)} with value {choice}")
460+
461+
462+
def choices_key(choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]:
463+
return tuple(choice_key(choice) for choice in choices)
464+
465+
466+
def choice_key(choice: ChoiceT) -> ChoiceKeyT:
467+
if isinstance(choice, float):
468+
# float_to_int to distinguish -0.0/0.0, signaling/nonsignaling nans, etc,
469+
# and then add a "float" key to avoid colliding with actual integers.
470+
return ("float", float_to_int(choice))
471+
if isinstance(choice, bool):
472+
# avoid choice_key(0) == choice_key(False)
473+
return ("bool", choice)
474+
return choice
475+
476+
477+
def choice_equal(choice1: ChoiceT, choice2: ChoiceT) -> bool:
478+
assert type(choice1) is type(choice2), (choice1, choice2)
479+
return choice_key(choice1) == choice_key(choice2)
480+
481+
482+
def choice_kwargs_equal(
483+
ir_type: ChoiceNameT, kwargs1: ChoiceKwargsT, kwargs2: ChoiceKwargsT
484+
) -> bool:
485+
return choice_kwargs_key(ir_type, kwargs1) == choice_kwargs_key(ir_type, kwargs2)
486+
487+
488+
def choice_kwargs_key(ir_type, kwargs):
489+
if ir_type == "float":
490+
return (
491+
float_to_int(kwargs["min_value"]),
492+
float_to_int(kwargs["max_value"]),
493+
kwargs["allow_nan"],
494+
kwargs["smallest_nonzero_magnitude"],
495+
)
496+
if ir_type == "integer":
497+
return (
498+
kwargs["min_value"],
499+
kwargs["max_value"],
500+
None if kwargs["weights"] is None else tuple(kwargs["weights"]),
501+
kwargs["shrink_towards"],
502+
)
503+
return tuple(kwargs[key] for key in sorted(kwargs))

hypothesis-python/src/hypothesis/internal/conjecture/data.py

Lines changed: 12 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@
4444
FloatKWargs,
4545
IntegerKWargs,
4646
StringKWargs,
47+
choice_equal,
4748
choice_from_index,
49+
choice_key,
50+
choice_kwargs_equal,
51+
choice_kwargs_key,
4852
choice_permitted,
4953
)
5054
from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
@@ -647,15 +651,15 @@ def trivial(self) -> bool:
647651

648652
if self.ir_type != "float":
649653
zero_value = choice_from_index(0, self.ir_type, self.kwargs)
650-
return ir_value_equal(self.value, zero_value)
654+
return choice_equal(self.value, zero_value)
651655
else:
652656
kwargs = cast(FloatKWargs, self.kwargs)
653657
min_value = kwargs["min_value"]
654658
max_value = kwargs["max_value"]
655659
shrink_towards = 0.0
656660

657661
if min_value == -math.inf and max_value == math.inf:
658-
return ir_value_equal(self.value, shrink_towards)
662+
return choice_equal(self.value, shrink_towards)
659663

660664
if (
661665
not math.isinf(min_value)
@@ -666,7 +670,7 @@ def trivial(self) -> bool:
666670
# one closest to shrink_towards
667671
shrink_towards = max(math.ceil(min_value), shrink_towards)
668672
shrink_towards = min(math.floor(max_value), shrink_towards)
669-
return ir_value_equal(self.value, float(shrink_towards))
673+
return choice_equal(self.value, float(shrink_towards))
670674

671675
# the real answer here is "the value in [min_value, max_value] with
672676
# the lowest denominator when represented as a fraction".
@@ -680,17 +684,17 @@ def __eq__(self, other: object) -> bool:
680684

681685
return (
682686
self.ir_type == other.ir_type
683-
and ir_value_equal(self.value, other.value)
684-
and ir_kwargs_equal(self.ir_type, self.kwargs, other.kwargs)
687+
and choice_equal(self.value, other.value)
688+
and choice_kwargs_equal(self.ir_type, self.kwargs, other.kwargs)
685689
and self.was_forced == other.was_forced
686690
)
687691

688692
def __hash__(self) -> int:
689693
return hash(
690694
(
691695
self.ir_type,
692-
ir_value_key(self.value),
693-
ir_kwargs_key(self.ir_type, self.kwargs),
696+
choice_key(self.value),
697+
choice_kwargs_key(self.ir_type, self.kwargs),
694698
self.was_forced,
695699
)
696700
)
@@ -726,39 +730,6 @@ def ir_size(ir: Iterable[Union[IRNode, NodeTemplate, ChoiceT]]) -> int:
726730
return size
727731

728732

729-
def ir_value_key(v):
730-
if type(v) is float:
731-
return float_to_int(v)
732-
return v
733-
734-
735-
def ir_kwargs_key(ir_type, kwargs):
736-
if ir_type == "float":
737-
return (
738-
float_to_int(kwargs["min_value"]),
739-
float_to_int(kwargs["max_value"]),
740-
kwargs["allow_nan"],
741-
kwargs["smallest_nonzero_magnitude"],
742-
)
743-
if ir_type == "integer":
744-
return (
745-
kwargs["min_value"],
746-
kwargs["max_value"],
747-
None if kwargs["weights"] is None else tuple(kwargs["weights"]),
748-
kwargs["shrink_towards"],
749-
)
750-
return tuple(kwargs[key] for key in sorted(kwargs))
751-
752-
753-
def ir_value_equal(v1, v2):
754-
assert type(v1) is type(v2), (v1, v2)
755-
return ir_value_key(v1) == ir_value_key(v2)
756-
757-
758-
def ir_kwargs_equal(ir_type, kwargs1, kwargs2):
759-
return ir_kwargs_key(ir_type, kwargs1) == ir_kwargs_key(ir_type, kwargs2)
760-
761-
762733
@dataclass_transform()
763734
@attr.s(slots=True)
764735
class ConjectureResult:
@@ -1969,7 +1940,7 @@ def _pooled_kwargs(self, ir_type, kwargs):
19691940
if self.provider.avoid_realization:
19701941
return kwargs
19711942

1972-
key = (ir_type, *ir_kwargs_key(ir_type, kwargs))
1943+
key = (ir_type, *choice_kwargs_key(ir_type, kwargs))
19731944
try:
19741945
return POOLED_KWARGS_CACHE[key]
19751946
except KeyError:

0 commit comments

Comments
 (0)