HypothesisWorks
diff --git a/‎.readthedocs.yml
Lines changed: 2 additions & 0 deletions b/‎.readthedocs.yml
Lines changed: 2 additions & 0 deletions
diff --git a/‎hypothesis-python/RELEASE.rst
Lines changed: 5 additions & 0 deletions b/‎hypothesis-python/RELEASE.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎hypothesis-python/src/hypothesis/core.py
Lines changed: 7 additions & 8 deletions b/‎hypothesis-python/src/hypothesis/core.py
Lines changed: 7 additions & 8 deletions
diff --git a/‎hypothesis-python/src/hypothesis/database.py
Lines changed: 17 additions & 3 deletions b/‎hypothesis-python/src/hypothesis/database.py
Lines changed: 17 additions & 3 deletions
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/choice.py
Lines changed: 48 additions & 1 deletion b/‎hypothesis-python/src/hypothesis/internal/conjecture/choice.py
Lines changed: 48 additions & 1 deletion
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/data.py
Lines changed: 12 additions & 41 deletions b/‎hypothesis-python/src/hypothesis/internal/conjecture/data.py
Lines changed: 12 additions & 41 deletions
@@ -21,3 +21,5 @@ python:
       - path: hypothesis-python/
         extra_requirements:
            - all
+sphinx:
+   configuration: hypothesis-python/docs/conf.py
@@ -0,0 +1,5 @@
+RELEASE_TYPE: minor
+
+The :doc:`Hypothesis example database <database>` now uses a new internal format to store examples. This new format is not compatible with the previous format, so stored entries will not carry over.
+
+The database is best thought of as a cache that may be invalidated at times. Instead of relying on it for correctness, we recommend using :obj:`@example <hypothesis.example>` to specify explicit examples. When using databases across environments (such as connecting a :class:`~hypothesis.database.GitHubArtifactDatabase` database in CI to your local environment), we recommend using the same version of Hypothesis for each where possible, for maximum reproducibility.
@@ -87,7 +87,7 @@
     ensure_free_stackframes,
     gc_cumulative_time,
 )
-from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir
+from hypothesis.internal.conjecture.shrinker import sort_key_ir
 from hypothesis.internal.entropy import deterministic_PRNG
 from hypothesis.internal.escalation import (
     InterestingOrigin,
@@ -352,9 +352,8 @@ def decode_failure(blob: bytes) -> Sequence[ChoiceT]:
             f"Could not decode blob {blob!r}: Invalid start byte {prefix!r}"
         )
 
-    try:
-        choices = ir_from_bytes(decoded)
-    except Exception:
+    choices = ir_from_bytes(decoded)
+    if choices is None:
         raise InvalidArgument(f"Invalid serialized choice sequence for blob {blob!r}")
 
     return choices
@@ -1873,13 +1872,13 @@ def fuzz_one_input(
                 except (StopTest, UnsatisfiedAssumption):
                     return None
                 except BaseException:
-                    buffer = bytes(data.buffer)
                     known = minimal_failures.get(data.interesting_origin)
                     if settings.database is not None and (
-                        known is None or sort_key(buffer) <= sort_key(known)
+                        known is None
+                        or sort_key_ir(data.ir_nodes) <= sort_key_ir(known)
                     ):
-                        settings.database.save(database_key, buffer)
-                        minimal_failures[data.interesting_origin] = buffer
+                        settings.database.save(database_key, ir_to_bytes(data.choices))
+                        minimal_failures[data.interesting_origin] = data.ir_nodes
                     raise
                 return bytes(data.buffer)
 
 
@@ -768,8 +768,7 @@ def ir_to_bytes(ir: Iterable[ChoiceT], /) -> bytes:
     return b"".join(parts)
 
 
-def ir_from_bytes(buffer: bytes, /) -> list[ChoiceT]:
-    """Deserialize a bytestring to a list of IR elements. Inverts ir_to_bytes."""
+def _ir_from_bytes(buffer: bytes, /) -> tuple[ChoiceT, ...]:
     # See above for an explanation of the format.
     parts: list[ChoiceT] = []
     idx = 0
@@ -797,4 +796,19 @@ def ir_from_bytes(buffer: bytes, /) -> list[ChoiceT]:
         else:
             assert tag == 4
             parts.append(chunk.decode(errors="surrogatepass"))
-    return parts
+    return tuple(parts)
+
+
+def ir_from_bytes(buffer: bytes, /) -> Optional[tuple[ChoiceT, ...]]:
+    """
+    Deserialize a bytestring to a tuple of choices. Inverts ir_to_bytes.
+
+    Returns None if the given bytestring is not a valid serialization of choice
+    sequences.
+    """
+    try:
+        return _ir_from_bytes(buffer)
+    except Exception:
+        # deserialization error, eg because our format changed or someone put junk
+        # data in the db.
+        return None
@@ -24,7 +24,7 @@
 from hypothesis.errors import ChoiceTooLarge
 from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
 from hypothesis.internal.conjecture.utils import identity
-from hypothesis.internal.floats import make_float_clamper, sign_aware_lte
+from hypothesis.internal.floats import float_to_int, make_float_clamper, sign_aware_lte
 from hypothesis.internal.intervalsets import IntervalSet
 
 T = TypeVar("T")
@@ -67,6 +67,9 @@ class BooleanKWargs(TypedDict):
     IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
 ]
 ChoiceNameT: "TypeAlias" = Literal["integer", "string", "boolean", "float", "bytes"]
+ChoiceKeyT: "TypeAlias" = Union[
+    int, str, bytes, tuple[Literal["bool"], bool], tuple[Literal["float"], int]
+]
 
 
 def _size_to_index(size: int, *, alphabet_size: int) -> int:
@@ -454,3 +457,47 @@ def choice_permitted(choice: ChoiceT, kwargs: ChoiceKwargsT) -> bool:
         return True
     else:
         raise NotImplementedError(f"unhandled type {type(choice)} with value {choice}")
+
+
+def choices_key(choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]:
+    return tuple(choice_key(choice) for choice in choices)
+
+
+def choice_key(choice: ChoiceT) -> ChoiceKeyT:
+    if isinstance(choice, float):
+        # float_to_int to distinguish -0.0/0.0, signaling/nonsignaling nans, etc,
+        # and then add a "float" key to avoid colliding with actual integers.
+        return ("float", float_to_int(choice))
+    if isinstance(choice, bool):
+        # avoid choice_key(0) == choice_key(False)
+        return ("bool", choice)
+    return choice
+
+
+def choice_equal(choice1: ChoiceT, choice2: ChoiceT) -> bool:
+    assert type(choice1) is type(choice2), (choice1, choice2)
+    return choice_key(choice1) == choice_key(choice2)
+
+
+def choice_kwargs_equal(
+    ir_type: ChoiceNameT, kwargs1: ChoiceKwargsT, kwargs2: ChoiceKwargsT
+) -> bool:
+    return choice_kwargs_key(ir_type, kwargs1) == choice_kwargs_key(ir_type, kwargs2)
+
+
+def choice_kwargs_key(ir_type, kwargs):
+    if ir_type == "float":
+        return (
+            float_to_int(kwargs["min_value"]),
+            float_to_int(kwargs["max_value"]),
+            kwargs["allow_nan"],
+            kwargs["smallest_nonzero_magnitude"],
+        )
+    if ir_type == "integer":
+        return (
+            kwargs["min_value"],
+            kwargs["max_value"],
+            None if kwargs["weights"] is None else tuple(kwargs["weights"]),
+            kwargs["shrink_towards"],
+        )
+    return tuple(kwargs[key] for key in sorted(kwargs))
@@ -44,7 +44,11 @@
     FloatKWargs,
     IntegerKWargs,
     StringKWargs,
+    choice_equal,
     choice_from_index,
+    choice_key,
+    choice_kwargs_equal,
+    choice_kwargs_key,
     choice_permitted,
 )
 from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
@@ -647,15 +651,15 @@ def trivial(self) -> bool:
 
         if self.ir_type != "float":
             zero_value = choice_from_index(0, self.ir_type, self.kwargs)
-            return ir_value_equal(self.value, zero_value)
+            return choice_equal(self.value, zero_value)
         else:
             kwargs = cast(FloatKWargs, self.kwargs)
             min_value = kwargs["min_value"]
             max_value = kwargs["max_value"]
             shrink_towards = 0.0
 
             if min_value == -math.inf and max_value == math.inf:
-                return ir_value_equal(self.value, shrink_towards)
+                return choice_equal(self.value, shrink_towards)
 
             if (
                 not math.isinf(min_value)
@@ -666,7 +670,7 @@ def trivial(self) -> bool:
                 # one closest to shrink_towards
                 shrink_towards = max(math.ceil(min_value), shrink_towards)
                 shrink_towards = min(math.floor(max_value), shrink_towards)
-                return ir_value_equal(self.value, float(shrink_towards))
+                return choice_equal(self.value, float(shrink_towards))
 
             # the real answer here is "the value in [min_value, max_value] with
             # the lowest denominator when represented as a fraction".
@@ -680,17 +684,17 @@ def __eq__(self, other: object) -> bool:
 
         return (
             self.ir_type == other.ir_type
-            and ir_value_equal(self.value, other.value)
-            and ir_kwargs_equal(self.ir_type, self.kwargs, other.kwargs)
+            and choice_equal(self.value, other.value)
+            and choice_kwargs_equal(self.ir_type, self.kwargs, other.kwargs)
             and self.was_forced == other.was_forced
         )
 
     def __hash__(self) -> int:
         return hash(
             (
                 self.ir_type,
-                ir_value_key(self.value),
-                ir_kwargs_key(self.ir_type, self.kwargs),
+                choice_key(self.value),
+                choice_kwargs_key(self.ir_type, self.kwargs),
                 self.was_forced,
             )
         )
@@ -726,39 +730,6 @@ def ir_size(ir: Iterable[Union[IRNode, NodeTemplate, ChoiceT]]) -> int:
     return size
 
 
-def ir_value_key(v):
-    if type(v) is float:
-        return float_to_int(v)
-    return v
-
-
-def ir_kwargs_key(ir_type, kwargs):
-    if ir_type == "float":
-        return (
-            float_to_int(kwargs["min_value"]),
-            float_to_int(kwargs["max_value"]),
-            kwargs["allow_nan"],
-            kwargs["smallest_nonzero_magnitude"],
-        )
-    if ir_type == "integer":
-        return (
-            kwargs["min_value"],
-            kwargs["max_value"],
-            None if kwargs["weights"] is None else tuple(kwargs["weights"]),
-            kwargs["shrink_towards"],
-        )
-    return tuple(kwargs[key] for key in sorted(kwargs))
-
-
-def ir_value_equal(v1, v2):
-    assert type(v1) is type(v2), (v1, v2)
-    return ir_value_key(v1) == ir_value_key(v2)
-
-
-def ir_kwargs_equal(ir_type, kwargs1, kwargs2):
-    return ir_kwargs_key(ir_type, kwargs1) == ir_kwargs_key(ir_type, kwargs2)
-
-
 @dataclass_transform()
 @attr.s(slots=True)
 class ConjectureResult:
@@ -1969,7 +1940,7 @@ def _pooled_kwargs(self, ir_type, kwargs):
         if self.provider.avoid_realization:
             return kwargs
 
-        key = (ir_type, *ir_kwargs_key(ir_type, kwargs))
+        key = (ir_type, *choice_kwargs_key(ir_type, kwargs))
         try:
             return POOLED_KWARGS_CACHE[key]
         except KeyError: