HypothesisWorks
diff --git a/‎hypothesis-python/RELEASE.rst
+5 b/‎hypothesis-python/RELEASE.rst
+5
diff --git a/‎hypothesis-python/src/hypothesis/core.py
+7-8 b/‎hypothesis-python/src/hypothesis/core.py
+7-8
diff --git a/‎hypothesis-python/src/hypothesis/database.py
+17-3 b/‎hypothesis-python/src/hypothesis/database.py
+17-3
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/choice.py
+2-1 b/‎hypothesis-python/src/hypothesis/internal/conjecture/choice.py
+2-1
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+48-25 b/‎hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+48-25
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
+1-1 b/‎hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
+1-1
@@ -0,0 +1,5 @@
+RELEASE_TYPE: minor
+
+The :doc:`Hypothesis example database <database>` now uses a new internal format to store examples. The new format is not compatible with the previous format, so any old stored counterexamples will be silently discarded.
+
+If you are replaying counterexamples using an external database such as :class:`~hypothesis.database.GitHubArtifactDatabase`, this means the counterexample must have been found after this version in the external database to successfully replay locally. In short, the Hypothesis versions of the local and remote databases should be both before or both after this version.
@@ -87,7 +87,7 @@
     ensure_free_stackframes,
     gc_cumulative_time,
 )
-from hypothesis.internal.conjecture.shrinker import sort_key, sort_key_ir
+from hypothesis.internal.conjecture.shrinker import sort_key_ir
 from hypothesis.internal.entropy import deterministic_PRNG
 from hypothesis.internal.escalation import (
     InterestingOrigin,
@@ -352,9 +352,8 @@ def decode_failure(blob: bytes) -> Sequence[ChoiceT]:
             f"Could not decode blob {blob!r}: Invalid start byte {prefix!r}"
         )
 
-    try:
-        choices = ir_from_bytes(decoded)
-    except Exception:
+    choices = ir_from_bytes(decoded)
+    if choices is None:
         raise InvalidArgument(f"Invalid serialized choice sequence for blob {blob!r}")
 
     return choices
@@ -1873,13 +1872,13 @@ def fuzz_one_input(
                 except (StopTest, UnsatisfiedAssumption):
                     return None
                 except BaseException:
-                    buffer = bytes(data.buffer)
                     known = minimal_failures.get(data.interesting_origin)
                     if settings.database is not None and (
-                        known is None or sort_key(buffer) <= sort_key(known)
+                        known is None
+                        or sort_key_ir(data.ir_nodes) <= sort_key_ir(known)
                     ):
-                        settings.database.save(database_key, buffer)
-                        minimal_failures[data.interesting_origin] = buffer
+                        settings.database.save(database_key, ir_to_bytes(data.choices))
+                        minimal_failures[data.interesting_origin] = data.ir_nodes
                     raise
                 return bytes(data.buffer)
 
 
@@ -768,8 +768,7 @@ def ir_to_bytes(ir: Iterable[ChoiceT], /) -> bytes:
     return b"".join(parts)
 
 
-def ir_from_bytes(buffer: bytes, /) -> list[ChoiceT]:
-    """Deserialize a bytestring to a list of IR elements. Inverts ir_to_bytes."""
+def _ir_from_bytes(buffer: bytes, /) -> tuple[ChoiceT, ...]:
     # See above for an explanation of the format.
     parts: list[ChoiceT] = []
     idx = 0
@@ -797,4 +796,19 @@ def ir_from_bytes(buffer: bytes, /) -> list[ChoiceT]:
         else:
             assert tag == 4
             parts.append(chunk.decode(errors="surrogatepass"))
-    return parts
+    return tuple(parts)
+
+
+def ir_from_bytes(buffer: bytes, /) -> Optional[tuple[ChoiceT, ...]]:
+    """
+    Deserialize a bytestring to a tuple of choices. Inverts ir_to_bytes.
+
+    Returns None if the given bytestring is not a valid serialization of choice
+    sequences.
+    """
+    try:
+        return _ir_from_bytes(buffer)
+    except Exception:
+        # deserialization error, eg because our format changed or someone put junk
+        # data in the db.
+        return None
@@ -465,7 +465,8 @@ def choices_key(choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]:
 
 def choice_key(choice: ChoiceT) -> ChoiceKeyT:
     if isinstance(choice, float):
-        # distinguish -0.0/0.0, signaling/nonsignaling nans, etc.
+        # float_to_int to distinguish -0.0/0.0, signaling/nonsignaling nans, etc,
+        # and then add a "float" key to avoid colliding with actual integers.
         return ("float", float_to_int(choice))
     if isinstance(choice, bool):
         # avoid choice_key(0) == choice_key(False)
 
@@ -34,7 +34,7 @@
 
 from hypothesis import HealthCheck, Phase, Verbosity, settings as Settings
 from hypothesis._settings import local_settings
-from hypothesis.database import ExampleDatabase
+from hypothesis.database import ExampleDatabase, ir_from_bytes, ir_to_bytes
 from hypothesis.errors import (
     BackendCannotProceed,
     FlakyReplay,
@@ -44,7 +44,12 @@
 )
 from hypothesis.internal.cache import LRUReusedCache
 from hypothesis.internal.compat import NotRequired, TypeAlias, TypedDict, ceil, override
-from hypothesis.internal.conjecture.choice import ChoiceKwargsT, ChoiceT, choices_key
+from hypothesis.internal.conjecture.choice import (
+    ChoiceKeyT,
+    ChoiceKwargsT,
+    ChoiceT,
+    choices_key,
+)
 from hypothesis.internal.conjecture.data import (
     AVAILABLE_PROVIDERS,
     ConjectureData,
@@ -69,7 +74,7 @@
     startswith,
 )
 from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser
-from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key, sort_key_ir
+from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key_ir
 from hypothesis.internal.escalation import InterestingOrigin
 from hypothesis.internal.healthcheck import fail_health_check
 from hypothesis.reporting import base_report, report
@@ -91,6 +96,10 @@
 Ls: TypeAlias = list["Ls | int"]
 
 
+def shortlex(s):
+    return (len(s), s)
+
+
 @attr.s
 class HealthCheckState:
     valid_examples: int = attr.ib(default=0)
@@ -467,7 +476,7 @@ def test_function(self, data: ConjectureData) -> None:
             data.freeze()
             return
         except BaseException:
-            self.save_buffer(data.buffer)
+            self.save_choices(data.choices)
             raise
         finally:
             # No branch, because if we're interrupted we always raise
@@ -522,7 +531,7 @@ def test_function(self, data: ConjectureData) -> None:
             and self.pareto_front is not None
             and self.pareto_front.add(data.as_result())
         ):
-            self.save_buffer(data.buffer, sub_key=b"pareto")
+            self.save_choices(data.choices, sub_key=b"pareto")
 
         assert len(data.buffer) <= BUFFER_SIZE
 
@@ -601,12 +610,12 @@ def test_function(self, data: ConjectureData) -> None:
             else:
                 if sort_key_ir(data.ir_nodes) < sort_key_ir(existing.ir_nodes):
                     self.shrinks += 1
-                    self.downgrade_buffer(existing.buffer)
+                    self.downgrade_buffer(ir_to_bytes(existing.choices))
                     self.__data_cache.unpin(existing.buffer)
                     changed = True
 
             if changed:
-                self.save_buffer(data.buffer)
+                self.save_choices(data.choices)
                 self.interesting_examples[key] = data.as_result()  # type: ignore
                 self.__data_cache.pin(data.buffer, data.as_result())
                 self.shrunk_examples.discard(key)
@@ -651,7 +660,7 @@ def test_function(self, data: ConjectureData) -> None:
         self.record_for_health_check(data)
 
     def on_pareto_evict(self, data: ConjectureData) -> None:
-        self.settings.database.delete(self.pareto_key, data.buffer)
+        self.settings.database.delete(self.pareto_key, ir_to_bytes(data.choices))
 
     def generate_novel_prefix(self) -> tuple[ChoiceT, ...]:
         """Uses the tree to proactively generate a starting sequence of bytes
@@ -735,14 +744,14 @@ def record_for_health_check(self, data: ConjectureData) -> None:
                 HealthCheck.too_slow,
             )
 
-    def save_buffer(
-        self, buffer: Union[bytes, bytearray], sub_key: Optional[bytes] = None
+    def save_choices(
+        self, choices: Sequence[ChoiceT], sub_key: Optional[bytes] = None
     ) -> None:
         if self.settings.database is not None:
             key = self.sub_key(sub_key)
             if key is None:
                 return
-            self.settings.database.save(key, bytes(buffer))
+            self.settings.database.save(key, ir_to_bytes(choices))
 
     def downgrade_buffer(self, buffer: Union[bytes, bytearray]) -> None:
         if self.settings.database is not None and self.database_key is not None:
@@ -832,7 +841,7 @@ def reuse_existing_examples(self) -> None:
             # sample the secondary corpus to a more manageable size.
 
             corpus = sorted(
-                self.settings.database.fetch(self.database_key), key=sort_key
+                self.settings.database.fetch(self.database_key), key=shortlex
             )
             factor = 0.1 if (Phase.generate in self.settings.phases) else 1
             desired_size = max(2, ceil(factor * self.settings.max_examples))
@@ -847,7 +856,7 @@ def reuse_existing_examples(self) -> None:
                     extra = extra_corpus
                 else:
                     extra = self.random.sample(extra_corpus, shortfall)
-                extra.sort(key=sort_key)
+                extra.sort(key=shortlex)
                 corpus.extend(extra)
 
             # We want a fast path where every primary entry in the database was
@@ -858,15 +867,20 @@ def reuse_existing_examples(self) -> None:
             for i, existing in enumerate(corpus):
                 if i >= primary_corpus_size and found_interesting_in_primary:
                     break
-                data = self.cached_test_function(existing, extend=BUFFER_SIZE)
+                choices = ir_from_bytes(existing)
+                if choices is None:
+                    # clear out any keys which fail deserialization
+                    self.settings.database.delete(self.database_key, existing)
+                    continue
+                data = self.cached_test_function_ir(choices, extend=BUFFER_SIZE)
                 if data.status != Status.INTERESTING:
                     self.settings.database.delete(self.database_key, existing)
                     self.settings.database.delete(self.secondary_key, existing)
                 else:
                     if i < primary_corpus_size:
                         found_interesting_in_primary = True
                         assert not isinstance(data, _Overrun)
-                        if existing != data.buffer:
+                        if choices_key(choices) != choices_key(data.choices):
                             all_interesting_in_primary_were_exact = False
                     if not self.settings.report_multiple_bugs:
                         break
@@ -886,10 +900,14 @@ def reuse_existing_examples(self) -> None:
                 pareto_corpus = list(self.settings.database.fetch(self.pareto_key))
                 if len(pareto_corpus) > desired_extra:
                     pareto_corpus = self.random.sample(pareto_corpus, desired_extra)
-                pareto_corpus.sort(key=sort_key)
+                pareto_corpus.sort(key=shortlex)
 
                 for existing in pareto_corpus:
-                    data = self.cached_test_function(existing, extend=BUFFER_SIZE)
+                    choices = ir_from_bytes(existing)
+                    if choices is None:
+                        self.settings.database.delete(self.pareto_key, existing)
+                        continue
+                    data = self.cached_test_function_ir(choices, extend=BUFFER_SIZE)
                     if data not in self.pareto_front:
                         self.settings.database.delete(self.pareto_key, existing)
                     if data.status == Status.INTERESTING:
@@ -1371,9 +1389,9 @@ def shrink_interesting_examples(self) -> None:
                     for k, v in self.interesting_examples.items()
                     if k not in self.shrunk_examples
                 ),
-                key=lambda kv: (sort_key_ir(kv[1].ir_nodes), sort_key(repr(kv[0]))),
+                key=lambda kv: (sort_key_ir(kv[1].ir_nodes), shortlex(repr(kv[0]))),
             )
-            self.debug(f"Shrinking {target!r}: {data.choices}")
+            self.debug(f"Shrinking {target!r}: {example.choices}")
 
             if not self.settings.report_multiple_bugs:
                 # If multi-bug reporting is disabled, we shrink our currently-minimal
@@ -1400,17 +1418,22 @@ def clear_secondary_key(self) -> None:
             # It's not worth trying the primary corpus because we already
             # tried all of those in the initial phase.
             corpus = sorted(
-                self.settings.database.fetch(self.secondary_key), key=sort_key
+                self.settings.database.fetch(self.secondary_key), key=shortlex
             )
             for c in corpus:
-                primary = {v.buffer for v in self.interesting_examples.values()}
-
-                cap = max(map(sort_key, primary))
+                choices = ir_from_bytes(c)
+                if choices is None:
+                    self.settings.database.delete(self.secondary_key, c)
+                    continue
+                primary = {
+                    ir_to_bytes(v.choices) for v in self.interesting_examples.values()
+                }
+                cap = max(map(shortlex, primary))
 
-                if sort_key(c) > cap:
+                if shortlex(c) > cap:
                     break
                 else:
-                    self.cached_test_function(c)
+                    self.cached_test_function_ir(choices)
                     # We unconditionally remove c from the secondary key as it
                     # is either now primary or worse than our primary example
                     # of this reason for interestingness.
 
@@ -81,7 +81,7 @@ def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]:
        result, so it makes sense to prioritise reducing earlier values over
        later ones. This makes the lexicographic order the more natural choice.
     """
-    return (len(buffer), buffer)
+    return (len(buffer), buffer)  # pragma: no cover # removing soon
 
 
 def sort_key_ir(nodes: Sequence[IRNode]) -> tuple[int, tuple[int, ...]]: