Skip to content

Commit e66f7fb

Browse files
authored
Merge pull request #4034 from Zac-HD/crosshair-in-ci
[experimental] Run crosshair in CI
2 parents e014a09 + 159c3b8 commit e66f7fb

File tree

69 files changed

+391
-129
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+391
-129
lines changed

.github/workflows/main.yml

+10-7
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,15 @@ jobs:
8383
# - check-py39-pandas12
8484
# - check-py39-pandas11
8585
## `-cover` is too slow under crosshair; use a custom split
86-
# - check-crosshair-custom-cover/test_[a-d]*
87-
# - check-crosshair-custom-cover/test_[e-i]*
88-
# - check-crosshair-custom-cover/test_[j-r]*
89-
# - check-crosshair-custom-cover/test_[s-z]*
90-
# - check-crosshair-custom-pytest/test_*
91-
# - check-crosshair-nocover
86+
- check-crosshair-custom-cover/test_[a-d]*
87+
- check-crosshair-custom-cover/test_[e-i]*
88+
- check-crosshair-custom-cover/test_[j-r]*
89+
- check-crosshair-custom-cover/test_[s-z]*
90+
- check-crosshair-custom-pytest/test_*
91+
- check-crosshair-custom-nocover/test_[a-d]*
92+
- check-crosshair-custom-nocover/test_[e-i]*
93+
- check-crosshair-custom-nocover/test_[j-r]*
94+
- check-crosshair-custom-nocover/test_[s-z]*
9295
# - check-crosshair-niche
9396
- check-py39-oldestnumpy
9497
- check-numpy-nightly
@@ -129,7 +132,7 @@ jobs:
129132
export TASK=${{ matrix.task }}
130133
if [[ $TASK == check-crosshair-custom-* ]]; then
131134
GROUP="${TASK#check-crosshair-custom-}"
132-
./build.sh check-crosshair-custom -- -n auto $(cd hypothesis-python ; echo tests/$GROUP)
135+
./build.sh check-crosshair-custom -- -n auto $(cd hypothesis-python && echo tests/$GROUP | xargs -n1 echo | grep -v "_py312" | xargs)
133136
else
134137
./build.sh
135138
fi

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,4 @@ To install Hypothesis:
4444
pip install hypothesis
4545
```
4646

47-
There are also [optional extras available](https://hypothesis.readthedocs.io/en/latest/packaging.html#other-python-libraries).
47+
There are also [optional extras available](https://hypothesis.readthedocs.io/en/latest/extras.html).

hypothesis-python/RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
RELEASE_TYPE: patch
2+
3+
This patch improves the interaction between the :pypi:`hypothesis-crosshair`
4+
:ref:`backend <alternative-backends>` and :ref:`our observability tools <observability>`.

hypothesis-python/src/hypothesis/core.py

+17-23
Original file line numberDiff line numberDiff line change
@@ -955,27 +955,20 @@ def run(data):
955955
printer.text("Trying example:")
956956

957957
if self.print_given_args:
958-
if data.provider.avoid_realization and not print_example:
959-
# we can do better here by adding
960-
# avoid_realization: bool = False to repr_call, which
961-
# maintains args/kwargs structure (and comments) but shows
962-
# <symbolic> in place of values. For now, this at least
963-
# avoids realization with verbosity <= verbose.
964-
printer.text(" <symbolics>")
965-
else:
966-
printer.text(" ")
967-
printer.repr_call(
968-
test.__name__,
969-
args,
970-
kwargs,
971-
force_split=True,
972-
arg_slices=argslices,
973-
leading_comment=(
974-
"# " + context.data.slice_comments[(0, 0)]
975-
if (0, 0) in context.data.slice_comments
976-
else None
977-
),
978-
)
958+
printer.text(" ")
959+
printer.repr_call(
960+
test.__name__,
961+
args,
962+
kwargs,
963+
force_split=True,
964+
arg_slices=argslices,
965+
leading_comment=(
966+
"# " + context.data.slice_comments[(0, 0)]
967+
if (0, 0) in context.data.slice_comments
968+
else None
969+
),
970+
avoid_realization=data.provider.avoid_realization,
971+
)
979972
report(printer.getvalue())
980973

981974
if TESTCASE_CALLBACKS:
@@ -991,11 +984,12 @@ def run(data):
991984
if (0, 0) in context.data.slice_comments
992985
else None
993986
),
987+
avoid_realization=data.provider.avoid_realization,
994988
)
995989
self._string_repr = printer.getvalue()
996990
data._observability_arguments = {
997-
**dict(enumerate(map(to_jsonable, args))),
998-
**{k: to_jsonable(v) for k, v in kwargs.items()},
991+
k: to_jsonable(v, avoid_realization=data.provider.avoid_realization)
992+
for k, v in [*enumerate(args), *kwargs.items()]
999993
}
1000994

1001995
try:

hypothesis-python/src/hypothesis/internal/conjecture/data.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1136,7 +1136,8 @@ def draw(
11361136
)
11371137
raise
11381138
if TESTCASE_CALLBACKS:
1139-
self._observability_args[key] = to_jsonable(v)
1139+
avoid = self.provider.avoid_realization
1140+
self._observability_args[key] = to_jsonable(v, avoid_realization=avoid)
11401141
return v
11411142
finally:
11421143
self.stop_span()

hypothesis-python/src/hypothesis/strategies/_internal/utils.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import sys
1212
import threading
13+
from functools import partial
1314
from inspect import signature
1415
from typing import TYPE_CHECKING, Callable
1516

@@ -156,7 +157,7 @@ def accept(*args, **kwargs):
156157
return decorator
157158

158159

159-
def to_jsonable(obj: object) -> object:
160+
def to_jsonable(obj: object, *, avoid_realization: bool) -> object:
160161
"""Recursively convert an object to json-encodable form.
161162
162163
This is not intended to round-trip, but rather provide an analysis-ready
@@ -165,26 +166,30 @@ def to_jsonable(obj: object) -> object:
165166
"""
166167
if isinstance(obj, (str, int, float, bool, type(None))):
167168
if isinstance(obj, int) and abs(obj) >= 2**63:
168-
# Silently clamp very large ints to max_float, to avoid
169-
# OverflowError when casting to float.
169+
# Silently clamp very large ints to max_float, to avoid OverflowError when
170+
# casting to float. (but avoid adding more constraints to symbolic values)
171+
if avoid_realization:
172+
return "<symbolic>"
170173
obj = clamp(-sys.float_info.max, obj, sys.float_info.max)
171174
return float(obj)
172175
return obj
176+
if avoid_realization:
177+
return "<symbolic>"
178+
recur = partial(to_jsonable, avoid_realization=avoid_realization)
173179
if isinstance(obj, (list, tuple, set, frozenset)):
174180
if isinstance(obj, tuple) and hasattr(obj, "_asdict"):
175-
return to_jsonable(obj._asdict()) # treat namedtuples as dicts
176-
return [to_jsonable(x) for x in obj]
181+
return recur(obj._asdict()) # treat namedtuples as dicts
182+
return [recur(x) for x in obj]
177183
if isinstance(obj, dict):
178184
return {
179-
k if isinstance(k, str) else pretty(k): to_jsonable(v)
180-
for k, v in obj.items()
185+
k if isinstance(k, str) else pretty(k): recur(v) for k, v in obj.items()
181186
}
182187

183188
# Hey, might as well try calling a .to_json() method - it works for Pandas!
184189
# We try this before the below general-purpose handlers to give folks a
185190
# chance to control this behavior on their custom classes.
186191
try:
187-
return to_jsonable(obj.to_json()) # type: ignore
192+
return recur(obj.to_json()) # type: ignore
188193
except Exception:
189194
pass
190195

@@ -194,11 +199,11 @@ def to_jsonable(obj: object) -> object:
194199
and dcs.is_dataclass(obj)
195200
and not isinstance(obj, type)
196201
):
197-
return to_jsonable(dataclass_asdict(obj))
202+
return recur(dataclass_asdict(obj))
198203
if attr.has(type(obj)):
199-
return to_jsonable(attr.asdict(obj, recurse=False)) # type: ignore
204+
return recur(attr.asdict(obj, recurse=False)) # type: ignore
200205
if (pyd := sys.modules.get("pydantic")) and isinstance(obj, pyd.BaseModel):
201-
return to_jsonable(obj.model_dump())
206+
return recur(obj.model_dump())
202207

203208
# If all else fails, we'll just pretty-print as a string.
204209
return pretty(obj)

hypothesis-python/src/hypothesis/vendor/pretty.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ def repr_call(
447447
force_split: Optional[bool] = None,
448448
arg_slices: Optional[dict[str, tuple[int, int]]] = None,
449449
leading_comment: Optional[str] = None,
450+
avoid_realization: bool = False,
450451
) -> None:
451452
"""Helper function to represent a function call.
452453
@@ -494,7 +495,10 @@ def repr_call(
494495
self.breakable(" " if i else "")
495496
if k:
496497
self.text(f"{k}=")
497-
self.pretty(v)
498+
if avoid_realization:
499+
self.text("<symbolic>")
500+
else:
501+
self.pretty(v)
498502
if force_split or i + 1 < len(all_args):
499503
self.text(",")
500504
comment = None

hypothesis-python/tests/common/utils.py

+28
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# obtain one at https://mozilla.org/MPL/2.0/.
1010

1111
import contextlib
12+
import enum
1213
import sys
1314
import warnings
1415
from io import StringIO
@@ -249,3 +250,30 @@ def capture_observations():
249250
# config option, so *linking against* something built this way can break us.
250251
# Everything is terrible
251252
PYTHON_FTZ = next_down(sys.float_info.min) == 0.0
253+
254+
255+
class Why(enum.Enum):
256+
# Categorizing known failures, to ease later follow-up investigation.
257+
# Some are crosshair issues, some hypothesis issues, others truly ok-to-xfail tests.
258+
symbolic_outside_context = "CrosshairInternal error (using value outside context)"
259+
nested_given = "nested @given decorators don't work with crosshair"
260+
undiscovered = "crosshair may not find the failing input"
261+
other = "reasons not elsewhere categorized"
262+
263+
264+
def xfail_on_crosshair(why: Why, /, *, strict=True, as_marks=False):
265+
# run `pytest -m xf_crosshair` to select these tests!
266+
try:
267+
import pytest
268+
except ImportError:
269+
return lambda fn: fn
270+
271+
current_backend = settings.get_profile(settings._current_profile).backend
272+
kw = {
273+
"strict": strict and why != Why.undiscovered,
274+
"reason": f"Expected failure due to: {why.value}",
275+
"condition": current_backend == "crosshair",
276+
}
277+
if as_marks: # for use with pytest.param(..., marks=xfail_on_crosshair())
278+
return (pytest.mark.xf_crosshair, pytest.mark.xfail(**kw))
279+
return lambda fn: pytest.mark.xf_crosshair(pytest.mark.xfail(**kw)(fn))

hypothesis-python/tests/conftest.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def pytest_configure(config):
5454
"markers",
5555
"xp_min_version(api_version): run when greater or equal to api_version",
5656
)
57+
config.addinivalue_line("markers", "xf_crosshair: selection for xfailing symbolics")
5758

5859
if config.getoption("--hypothesis-benchmark-shrinks"):
5960
# we'd like to support xdist here, but a session-scope fixture won't
@@ -75,7 +76,6 @@ def pytest_addoption(parser):
7576
parser.addoption("--hypothesis-update-outputs", action="store_true")
7677
parser.addoption("--hypothesis-benchmark-shrinks", type=str, choices=["new", "old"])
7778
parser.addoption("--hypothesis-benchmark-output", type=str)
78-
parser.addoption("--hypothesis-learn-to-normalize", action="store_true")
7979

8080
# New in pytest 6, so we add a shim on old versions to avoid missing-arg errors
8181
arg = "--durations-min"
@@ -95,6 +95,13 @@ def warns_or_raises(request):
9595
return pytest.warns
9696

9797

98+
# crosshair needs actual time for its path timeouts; load it before patching
99+
try:
100+
import hypothesis_crosshair_provider.crosshair_provider # noqa: F401
101+
except ImportError:
102+
pass
103+
104+
98105
@pytest.fixture(scope="function", autouse=True)
99106
def _consistently_increment_time(monkeypatch):
100107
"""Rather than rely on real system time we monkey patch time.time so that

hypothesis-python/tests/conjecture/common.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@
1414
from random import Random
1515
from typing import Optional
1616

17+
import pytest
18+
1719
from hypothesis import HealthCheck, Phase, assume, settings, strategies as st
18-
from hypothesis.control import current_build_context
19-
from hypothesis.errors import InvalidArgument
20+
from hypothesis.control import current_build_context, currently_in_test_context
2021
from hypothesis.internal.conjecture import engine as engine_module
2122
from hypothesis.internal.conjecture.choice import ChoiceNode, ChoiceT
2223
from hypothesis.internal.conjecture.data import ConjectureData, Status
@@ -103,10 +104,21 @@ def accept(f):
103104

104105

105106
def fresh_data(*, random=None, observer=None) -> ConjectureData:
107+
context = current_build_context() if currently_in_test_context() else None
108+
if context is not None and settings().backend == "crosshair":
109+
# we should reeaxmine fresh_data sometime and see if we can replace it
110+
# with nicer and higher level hypothesis idioms.
111+
#
112+
# For now it doesn't work well with crosshair tests. This is no big
113+
# loss, because these tests often rely on hypothesis-provider-specific
114+
# things.
115+
pytest.skip(
116+
"Fresh data is too low level (and too much of a hack) to be "
117+
"worth supporting when testing with crosshair"
118+
)
119+
106120
if random is None:
107-
try:
108-
context = current_build_context()
109-
except InvalidArgument:
121+
if context is None:
110122
# ensure usage of fresh_data() is not flaky outside of property tests.
111123
raise ValueError(
112124
"must pass a seeded Random instance to fresh_data() when "

hypothesis-python/tests/conjecture/test_alt_backend.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def test_function(f):
444444

445445
with capture_out() as out:
446446
test_function()
447-
assert "Trying example: <symbolics>" in out.getvalue()
447+
assert "Trying example: test_function(\n f=<symbolic>,\n)" in out.getvalue()
448448

449449

450450
@pytest.mark.parametrize("verbosity", [Verbosity.verbose, Verbosity.debug])

hypothesis-python/tests/cover/test_cache_implementation.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from hypothesis.errors import InvalidArgument
2727
from hypothesis.internal.cache import GenericCache, LRUCache, LRUReusedCache
2828

29-
from tests.common.utils import skipif_emscripten
29+
from tests.common.utils import Why, skipif_emscripten, xfail_on_crosshair
3030

3131

3232
class LRUCacheAlternative(GenericCache):
@@ -116,6 +116,7 @@ def test_behaves_like_a_dict_with_losses(implementation, writes, size):
116116
assert len(target) <= min(len(model), size)
117117

118118

119+
@xfail_on_crosshair(Why.symbolic_outside_context)
119120
@settings(
120121
suppress_health_check={HealthCheck.too_slow}
121122
| set(settings.get_profile(settings._current_profile).suppress_health_check),

hypothesis-python/tests/cover/test_core.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,19 @@
1919

2020

2121
def test_stops_after_max_examples_if_satisfying():
22-
tracker = []
22+
count = 0
2323

2424
def track(x):
25-
tracker.append(x)
25+
nonlocal count
26+
count += 1
2627
return False
2728

2829
max_examples = 100
2930

3031
with pytest.raises(NoSuchExample):
3132
find(s.integers(0, 10000), track, settings=settings(max_examples=max_examples))
3233

33-
assert len(tracker) == max_examples
34+
assert count == max_examples
3435

3536

3637
def test_stops_after_ten_times_max_examples_if_not_satisfying():

hypothesis-python/tests/cover/test_database_backend.py

+1
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,7 @@ def test_database_listener_memory():
659659

660660

661661
@skipif_emscripten
662+
@pytest.mark.skipif(settings._current_profile == "crosshair", reason="takes ages")
662663
def test_database_listener_background_write():
663664
_database_conforms_to_listener_api(
664665
lambda path: BackgroundWriteDatabase(InMemoryExampleDatabase()),

hypothesis-python/tests/cover/test_datetimes.py

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from hypothesis.strategies import dates, datetimes, timedeltas, times
1717

1818
from tests.common.debug import assert_simple_property, find_any, minimal
19+
from tests.common.utils import Why, xfail_on_crosshair
1920

2021

2122
def test_can_find_positive_delta():
@@ -104,6 +105,7 @@ def test_single_date(val):
104105
assert find_any(dates(val, val)) is val
105106

106107

108+
@xfail_on_crosshair(Why.undiscovered)
107109
def test_can_find_midnight():
108110
find_any(times(), lambda x: x.hour == x.minute == x.second == 0)
109111

hypothesis-python/tests/cover/test_direct_strategies.py

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from hypothesis.vendor.pretty import pretty
2424

2525
from tests.common.debug import check_can_generate_examples, minimal
26+
from tests.common.utils import Why, xfail_on_crosshair
2627

2728
# Use `pretty` instead of `repr` for building test names, so that set and dict
2829
# parameters print consistently across multiple worker processes with different
@@ -437,6 +438,7 @@ def test_decimals():
437438
assert minimal(st.decimals(), lambda f: f.is_finite() and f >= 1) == 1
438439

439440

441+
@xfail_on_crosshair(Why.undiscovered)
440442
def test_non_float_decimal():
441443
minimal(st.decimals(), lambda d: d.is_finite() and decimal.Decimal(float(d)) != d)
442444

0 commit comments

Comments
 (0)