HypothesisWorks
diff --git a/‎.github/workflows/main.yml
+10-7 b/‎.github/workflows/main.yml
+10-7
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎hypothesis-python/RELEASE.rst
+4 b/‎hypothesis-python/RELEASE.rst
+4
diff --git a/‎hypothesis-python/src/hypothesis/core.py
+17-23 b/‎hypothesis-python/src/hypothesis/core.py
+17-23
diff --git a/‎hypothesis-python/src/hypothesis/internal/conjecture/data.py
+2-1 b/‎hypothesis-python/src/hypothesis/internal/conjecture/data.py
+2-1
diff --git a/‎hypothesis-python/src/hypothesis/strategies/_internal/utils.py
+16-11 b/‎hypothesis-python/src/hypothesis/strategies/_internal/utils.py
+16-11
diff --git a/‎hypothesis-python/src/hypothesis/vendor/pretty.py
+5-1 b/‎hypothesis-python/src/hypothesis/vendor/pretty.py
+5-1
diff --git a/‎hypothesis-python/tests/common/utils.py
+28 b/‎hypothesis-python/tests/common/utils.py
+28
diff --git a/‎hypothesis-python/tests/conftest.py
+8-1 b/‎hypothesis-python/tests/conftest.py
+8-1
diff --git a/‎hypothesis-python/tests/conjecture/common.py
+17-5 b/‎hypothesis-python/tests/conjecture/common.py
+17-5
diff --git a/‎hypothesis-python/tests/conjecture/test_alt_backend.py
+1-1 b/‎hypothesis-python/tests/conjecture/test_alt_backend.py
+1-1
diff --git a/‎hypothesis-python/tests/cover/test_cache_implementation.py
+2-1 b/‎hypothesis-python/tests/cover/test_cache_implementation.py
+2-1
diff --git a/‎hypothesis-python/tests/cover/test_core.py
+4-3 b/‎hypothesis-python/tests/cover/test_core.py
+4-3
diff --git a/‎hypothesis-python/tests/cover/test_database_backend.py
+1 b/‎hypothesis-python/tests/cover/test_database_backend.py
+1
diff --git a/‎hypothesis-python/tests/cover/test_datetimes.py
+2 b/‎hypothesis-python/tests/cover/test_datetimes.py
+2
diff --git a/‎hypothesis-python/tests/cover/test_direct_strategies.py
+2 b/‎hypothesis-python/tests/cover/test_direct_strategies.py
+2
@@ -83,12 +83,15 @@ jobs:
           # - check-py39-pandas12
           # - check-py39-pandas11
           ## `-cover` is too slow under crosshair; use a custom split
-          # - check-crosshair-custom-cover/test_[a-d]*
-          # - check-crosshair-custom-cover/test_[e-i]*
-          # - check-crosshair-custom-cover/test_[j-r]*
-          # - check-crosshair-custom-cover/test_[s-z]*
-          # - check-crosshair-custom-pytest/test_*
-          # - check-crosshair-nocover
+          - check-crosshair-custom-cover/test_[a-d]*
+          - check-crosshair-custom-cover/test_[e-i]*
+          - check-crosshair-custom-cover/test_[j-r]*
+          - check-crosshair-custom-cover/test_[s-z]*
+          - check-crosshair-custom-pytest/test_*
+          - check-crosshair-custom-nocover/test_[a-d]*
+          - check-crosshair-custom-nocover/test_[e-i]*
+          - check-crosshair-custom-nocover/test_[j-r]*
+          - check-crosshair-custom-nocover/test_[s-z]*
           # - check-crosshair-niche
           - check-py39-oldestnumpy
           - check-numpy-nightly
@@ -129,7 +132,7 @@ jobs:
         export TASK=${{ matrix.task }}
         if [[ $TASK == check-crosshair-custom-* ]]; then
           GROUP="${TASK#check-crosshair-custom-}"
-          ./build.sh check-crosshair-custom -- -n auto $(cd hypothesis-python ; echo tests/$GROUP)
+          ./build.sh check-crosshair-custom -- -n auto $(cd hypothesis-python && echo tests/$GROUP | xargs -n1 echo | grep -v "_py312" | xargs)
         else
           ./build.sh
         fi
 
@@ -44,4 +44,4 @@ To install Hypothesis:
 pip install hypothesis
 ```
 
-There are also [optional extras available](https://hypothesis.readthedocs.io/en/latest/packaging.html#other-python-libraries).
+There are also [optional extras available](https://hypothesis.readthedocs.io/en/latest/extras.html).
@@ -0,0 +1,4 @@
+RELEASE_TYPE: patch
+
+This patch improves the interaction between the :pypi:`hypothesis-crosshair`
+:ref:`backend <alternative-backends>` and :ref:`our observability tools <observability>`.
@@ -955,27 +955,20 @@ def run(data):
                     printer.text("Trying example:")
 
                 if self.print_given_args:
-                    if data.provider.avoid_realization and not print_example:
-                        # we can do better here by adding
-                        # avoid_realization: bool = False to repr_call, which
-                        # maintains args/kwargs structure (and comments) but shows
-                        # <symbolic> in place of values. For now, this at least
-                        # avoids realization with verbosity <= verbose.
-                        printer.text(" <symbolics>")
-                    else:
-                        printer.text(" ")
-                        printer.repr_call(
-                            test.__name__,
-                            args,
-                            kwargs,
-                            force_split=True,
-                            arg_slices=argslices,
-                            leading_comment=(
-                                "# " + context.data.slice_comments[(0, 0)]
-                                if (0, 0) in context.data.slice_comments
-                                else None
-                            ),
-                        )
+                    printer.text(" ")
+                    printer.repr_call(
+                        test.__name__,
+                        args,
+                        kwargs,
+                        force_split=True,
+                        arg_slices=argslices,
+                        leading_comment=(
+                            "# " + context.data.slice_comments[(0, 0)]
+                            if (0, 0) in context.data.slice_comments
+                            else None
+                        ),
+                        avoid_realization=data.provider.avoid_realization,
+                    )
                 report(printer.getvalue())
 
             if TESTCASE_CALLBACKS:
@@ -991,11 +984,12 @@ def run(data):
                         if (0, 0) in context.data.slice_comments
                         else None
                     ),
+                    avoid_realization=data.provider.avoid_realization,
                 )
                 self._string_repr = printer.getvalue()
                 data._observability_arguments = {
-                    **dict(enumerate(map(to_jsonable, args))),
-                    **{k: to_jsonable(v) for k, v in kwargs.items()},
+                    k: to_jsonable(v, avoid_realization=data.provider.avoid_realization)
+                    for k, v in [*enumerate(args), *kwargs.items()]
                 }
 
             try:
 
@@ -1136,7 +1136,8 @@ def draw(
                 )
                 raise
             if TESTCASE_CALLBACKS:
-                self._observability_args[key] = to_jsonable(v)
+                avoid = self.provider.avoid_realization
+                self._observability_args[key] = to_jsonable(v, avoid_realization=avoid)
             return v
         finally:
             self.stop_span()
 
@@ -10,6 +10,7 @@
 
 import sys
 import threading
+from functools import partial
 from inspect import signature
 from typing import TYPE_CHECKING, Callable
 
@@ -156,7 +157,7 @@ def accept(*args, **kwargs):
     return decorator
 
 
-def to_jsonable(obj: object) -> object:
+def to_jsonable(obj: object, *, avoid_realization: bool) -> object:
     """Recursively convert an object to json-encodable form.
 
     This is not intended to round-trip, but rather provide an analysis-ready
@@ -165,26 +166,30 @@ def to_jsonable(obj: object) -> object:
     """
     if isinstance(obj, (str, int, float, bool, type(None))):
         if isinstance(obj, int) and abs(obj) >= 2**63:
-            # Silently clamp very large ints to max_float, to avoid
-            # OverflowError when casting to float.
+            # Silently clamp very large ints to max_float, to avoid OverflowError when
+            # casting to float.  (but avoid adding more constraints to symbolic values)
+            if avoid_realization:
+                return "<symbolic>"
             obj = clamp(-sys.float_info.max, obj, sys.float_info.max)
             return float(obj)
         return obj
+    if avoid_realization:
+        return "<symbolic>"
+    recur = partial(to_jsonable, avoid_realization=avoid_realization)
     if isinstance(obj, (list, tuple, set, frozenset)):
         if isinstance(obj, tuple) and hasattr(obj, "_asdict"):
-            return to_jsonable(obj._asdict())  # treat namedtuples as dicts
-        return [to_jsonable(x) for x in obj]
+            return recur(obj._asdict())  # treat namedtuples as dicts
+        return [recur(x) for x in obj]
     if isinstance(obj, dict):
         return {
-            k if isinstance(k, str) else pretty(k): to_jsonable(v)
-            for k, v in obj.items()
+            k if isinstance(k, str) else pretty(k): recur(v) for k, v in obj.items()
         }
 
     # Hey, might as well try calling a .to_json() method - it works for Pandas!
     # We try this before the below general-purpose handlers to give folks a
     # chance to control this behavior on their custom classes.
     try:
-        return to_jsonable(obj.to_json())  # type: ignore
+        return recur(obj.to_json())  # type: ignore
     except Exception:
         pass
 
@@ -194,11 +199,11 @@ def to_jsonable(obj: object) -> object:
         and dcs.is_dataclass(obj)
         and not isinstance(obj, type)
     ):
-        return to_jsonable(dataclass_asdict(obj))
+        return recur(dataclass_asdict(obj))
     if attr.has(type(obj)):
-        return to_jsonable(attr.asdict(obj, recurse=False))  # type: ignore
+        return recur(attr.asdict(obj, recurse=False))  # type: ignore
     if (pyd := sys.modules.get("pydantic")) and isinstance(obj, pyd.BaseModel):
-        return to_jsonable(obj.model_dump())
+        return recur(obj.model_dump())
 
     # If all else fails, we'll just pretty-print as a string.
     return pretty(obj)
@@ -447,6 +447,7 @@ def repr_call(
         force_split: Optional[bool] = None,
         arg_slices: Optional[dict[str, tuple[int, int]]] = None,
         leading_comment: Optional[str] = None,
+        avoid_realization: bool = False,
     ) -> None:
         """Helper function to represent a function call.
 
@@ -494,7 +495,10 @@ def repr_call(
                     self.breakable(" " if i else "")
                 if k:
                     self.text(f"{k}=")
-                self.pretty(v)
+                if avoid_realization:
+                    self.text("<symbolic>")
+                else:
+                    self.pretty(v)
                 if force_split or i + 1 < len(all_args):
                     self.text(",")
                 comment = None
 
@@ -9,6 +9,7 @@
 # obtain one at https://mozilla.org/MPL/2.0/.
 
 import contextlib
+import enum
 import sys
 import warnings
 from io import StringIO
@@ -249,3 +250,30 @@ def capture_observations():
 # config option, so *linking against* something built this way can break us.
 # Everything is terrible
 PYTHON_FTZ = next_down(sys.float_info.min) == 0.0
+
+
+class Why(enum.Enum):
+    # Categorizing known failures, to ease later follow-up investigation.
+    # Some are crosshair issues, some hypothesis issues, others truly ok-to-xfail tests.
+    symbolic_outside_context = "CrosshairInternal error (using value outside context)"
+    nested_given = "nested @given decorators don't work with crosshair"
+    undiscovered = "crosshair may not find the failing input"
+    other = "reasons not elsewhere categorized"
+
+
+def xfail_on_crosshair(why: Why, /, *, strict=True, as_marks=False):
+    # run `pytest -m xf_crosshair` to select these tests!
+    try:
+        import pytest
+    except ImportError:
+        return lambda fn: fn
+
+    current_backend = settings.get_profile(settings._current_profile).backend
+    kw = {
+        "strict": strict and why != Why.undiscovered,
+        "reason": f"Expected failure due to: {why.value}",
+        "condition": current_backend == "crosshair",
+    }
+    if as_marks:  # for use with pytest.param(..., marks=xfail_on_crosshair())
+        return (pytest.mark.xf_crosshair, pytest.mark.xfail(**kw))
+    return lambda fn: pytest.mark.xf_crosshair(pytest.mark.xfail(**kw)(fn))
@@ -54,6 +54,7 @@ def pytest_configure(config):
         "markers",
         "xp_min_version(api_version): run when greater or equal to api_version",
     )
+    config.addinivalue_line("markers", "xf_crosshair: selection for xfailing symbolics")
 
     if config.getoption("--hypothesis-benchmark-shrinks"):
         # we'd like to support xdist here, but a session-scope fixture won't
@@ -75,7 +76,6 @@ def pytest_addoption(parser):
     parser.addoption("--hypothesis-update-outputs", action="store_true")
     parser.addoption("--hypothesis-benchmark-shrinks", type=str, choices=["new", "old"])
     parser.addoption("--hypothesis-benchmark-output", type=str)
-    parser.addoption("--hypothesis-learn-to-normalize", action="store_true")
 
     # New in pytest 6, so we add a shim on old versions to avoid missing-arg errors
     arg = "--durations-min"
@@ -95,6 +95,13 @@ def warns_or_raises(request):
         return pytest.warns
 
 
+# crosshair needs actual time for its path timeouts; load it before patching
+try:
+    import hypothesis_crosshair_provider.crosshair_provider  # noqa: F401
+except ImportError:
+    pass
+
+
 @pytest.fixture(scope="function", autouse=True)
 def _consistently_increment_time(monkeypatch):
     """Rather than rely on real system time we monkey patch time.time so that
 
@@ -14,9 +14,10 @@
 from random import Random
 from typing import Optional
 
+import pytest
+
 from hypothesis import HealthCheck, Phase, assume, settings, strategies as st
-from hypothesis.control import current_build_context
-from hypothesis.errors import InvalidArgument
+from hypothesis.control import current_build_context, currently_in_test_context
 from hypothesis.internal.conjecture import engine as engine_module
 from hypothesis.internal.conjecture.choice import ChoiceNode, ChoiceT
 from hypothesis.internal.conjecture.data import ConjectureData, Status
@@ -103,10 +104,21 @@ def accept(f):
 
 
 def fresh_data(*, random=None, observer=None) -> ConjectureData:
+    context = current_build_context() if currently_in_test_context() else None
+    if context is not None and settings().backend == "crosshair":
+        # we should reeaxmine fresh_data sometime and see if we can replace it
+        # with nicer and higher level hypothesis idioms.
+        #
+        # For now it doesn't work well with crosshair tests. This is no big
+        # loss, because these tests often rely on hypothesis-provider-specific
+        # things.
+        pytest.skip(
+            "Fresh data is too low level (and too much of a hack) to be "
+            "worth supporting when testing with crosshair"
+        )
+
     if random is None:
-        try:
-            context = current_build_context()
-        except InvalidArgument:
+        if context is None:
             # ensure usage of fresh_data() is not flaky outside of property tests.
             raise ValueError(
                 "must pass a seeded Random instance to fresh_data() when "
 
@@ -444,7 +444,7 @@ def test_function(f):
 
         with capture_out() as out:
             test_function()
-        assert "Trying example: <symbolics>" in out.getvalue()
+        assert "Trying example: test_function(\n    f=<symbolic>,\n)" in out.getvalue()
 
 
 @pytest.mark.parametrize("verbosity", [Verbosity.verbose, Verbosity.debug])
 
@@ -26,7 +26,7 @@
 from hypothesis.errors import InvalidArgument
 from hypothesis.internal.cache import GenericCache, LRUCache, LRUReusedCache
 
-from tests.common.utils import skipif_emscripten
+from tests.common.utils import Why, skipif_emscripten, xfail_on_crosshair
 
 
 class LRUCacheAlternative(GenericCache):
@@ -116,6 +116,7 @@ def test_behaves_like_a_dict_with_losses(implementation, writes, size):
         assert len(target) <= min(len(model), size)
 
 
+@xfail_on_crosshair(Why.symbolic_outside_context)
 @settings(
     suppress_health_check={HealthCheck.too_slow}
     | set(settings.get_profile(settings._current_profile).suppress_health_check),
 
@@ -19,18 +19,19 @@
 
 
 def test_stops_after_max_examples_if_satisfying():
-    tracker = []
+    count = 0
 
     def track(x):
-        tracker.append(x)
+        nonlocal count
+        count += 1
         return False
 
     max_examples = 100
 
     with pytest.raises(NoSuchExample):
         find(s.integers(0, 10000), track, settings=settings(max_examples=max_examples))
 
-    assert len(tracker) == max_examples
+    assert count == max_examples
 
 
 def test_stops_after_ten_times_max_examples_if_not_satisfying():
 
@@ -659,6 +659,7 @@ def test_database_listener_memory():
 
 
 @skipif_emscripten
+@pytest.mark.skipif(settings._current_profile == "crosshair", reason="takes ages")
 def test_database_listener_background_write():
     _database_conforms_to_listener_api(
         lambda path: BackgroundWriteDatabase(InMemoryExampleDatabase()),
 
@@ -16,6 +16,7 @@
 from hypothesis.strategies import dates, datetimes, timedeltas, times
 
 from tests.common.debug import assert_simple_property, find_any, minimal
+from tests.common.utils import Why, xfail_on_crosshair
 
 
 def test_can_find_positive_delta():
@@ -104,6 +105,7 @@ def test_single_date(val):
     assert find_any(dates(val, val)) is val
 
 
+@xfail_on_crosshair(Why.undiscovered)
 def test_can_find_midnight():
     find_any(times(), lambda x: x.hour == x.minute == x.second == 0)
 
 
@@ -23,6 +23,7 @@
 from hypothesis.vendor.pretty import pretty
 
 from tests.common.debug import check_can_generate_examples, minimal
+from tests.common.utils import Why, xfail_on_crosshair
 
 # Use `pretty` instead of `repr` for building test names, so that set and dict
 # parameters print consistently across multiple worker processes with different
@@ -437,6 +438,7 @@ def test_decimals():
     assert minimal(st.decimals(), lambda f: f.is_finite() and f >= 1) == 1
 
 
+@xfail_on_crosshair(Why.undiscovered)
 def test_non_float_decimal():
     minimal(st.decimals(), lambda d: d.is_finite() and decimal.Decimal(float(d)) != d)