Skip to content

Commit 7b1d106

Browse files
authored
Merge pull request #3979 from jobh/gc-accounting
Account for time spent in garbage collection
2 parents 54d029d + eec66ed commit 7b1d106

File tree

15 files changed

+304
-42
lines changed

15 files changed

+304
-42
lines changed

hypothesis-python/RELEASE-sample.rst

+10-6
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,17 @@ which should:
2121
an internal invariant." (the complete changelog for version 6.99.11)
2222
- use ``double backticks`` for verbatim code,
2323
- use Sphinx cross-references to any functions or classes mentioned:
24-
- :pypi:`package` for links to external packages,
24+
- :pypi:`package` for links to external packages.
2525
- :func:`package.function` for link to functions, where the link text will
26-
be ``package.function``, or :func:`~package.function` to show ``function``,
27-
- :class:`package.class` for link to classes (abbreviated as above),
28-
- :issue:`issue-number` for referencing issues,
29-
- :doc:`link text <chapter#anchor>` for documentation references
30-
(``https://hypothesis.readthedocs.io/en/latest/<chapter>.html#<anchor>``)
26+
be ``package.function``, or :func:`~package.function` to show ``function``.
27+
- :class:`package.class` for link to classes (abbreviated as above).
28+
- :issue:`issue-number` for referencing issues.
29+
- Similarly, :pull:`pr-number` can be used for PRs, but it's usually
30+
preferred to refer to version numbers such as :ref:`version 6.98.9 <v6.98.9>,
31+
as they are meaningful to end users.
32+
- :doc:`link text <chapter#anchor>` for documentation references.
33+
- `link text <https://hypothesis.readthedocs.io/en/latest/chapter.html#anchor>`__
34+
is the same link, for general web addresses.
3135
- finish with a note of thanks from the maintainers. If this is your first
3236
contribution, don't forget to add yourself to AUTHORS.rst!
3337

hypothesis-python/RELEASE.rst

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
RELEASE_TYPE: patch
2+
3+
Account for time spent in garbage collection during tests, to avoid
4+
flaky ``DeadlineExceeded`` errors as seen in :issue:`3975`.
5+
6+
Also fixes overcounting of stateful run times,
7+
a minor observability bug dating to :ref:`version 6.98.9 <v6.98.9>`
8+
(:pull:`3890`).

hypothesis-python/docs/schema_observations.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
},
4747
"timing": {
4848
"type": "object",
49-
"description": "The time in seconds taken by non-overlapping parts of this test case. Hypothesis reports execute:test, and generate:{argname} for each argument.",
49+
"description": "The time in seconds taken by non-overlapping parts of this test case. Hypothesis reports execute:test, overall:gc, and generate:{argname} for each argument.",
5050
"additionalProperties": {
5151
"type": "number",
5252
"minimum": 0
@@ -98,4 +98,4 @@
9898
"additionalProperties": false
9999
}
100100
]
101-
}
101+
}

hypothesis-python/src/hypothesis/core.py

+25-7
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@
7777
)
7878
from hypothesis.internal.conjecture.data import ConjectureData, Status
7979
from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner
80-
from hypothesis.internal.conjecture.junkdrawer import ensure_free_stackframes
80+
from hypothesis.internal.conjecture.junkdrawer import (
81+
ensure_free_stackframes,
82+
gc_cumulative_time,
83+
)
8184
from hypothesis.internal.conjecture.shrinker import sort_key
8285
from hypothesis.internal.entropy import deterministic_PRNG
8386
from hypothesis.internal.escalation import (
@@ -820,30 +823,45 @@ def execute_once(
820823
self._string_repr = ""
821824
text_repr = None
822825
if self.settings.deadline is None and not TESTCASE_CALLBACKS:
823-
test = self.test
826+
827+
@proxies(self.test)
828+
def test(*args, **kwargs):
829+
with ensure_free_stackframes():
830+
return self.test(*args, **kwargs)
831+
824832
else:
825833

826834
@proxies(self.test)
827835
def test(*args, **kwargs):
828836
arg_drawtime = math.fsum(data.draw_times.values())
837+
arg_stateful = math.fsum(data._stateful_run_times.values())
838+
arg_gctime = gc_cumulative_time()
829839
start = time.perf_counter()
830840
try:
831-
result = self.test(*args, **kwargs)
841+
with ensure_free_stackframes():
842+
result = self.test(*args, **kwargs)
832843
finally:
833844
finish = time.perf_counter()
834845
in_drawtime = math.fsum(data.draw_times.values()) - arg_drawtime
835-
runtime = datetime.timedelta(seconds=finish - start - in_drawtime)
846+
in_stateful = (
847+
math.fsum(data._stateful_run_times.values()) - arg_stateful
848+
)
849+
in_gctime = gc_cumulative_time() - arg_gctime
850+
runtime = finish - start - in_drawtime - in_stateful - in_gctime
836851
self._timing_features = {
837-
"execute:test": finish - start - in_drawtime,
852+
"execute:test": runtime,
853+
"overall:gc": in_gctime,
838854
**data.draw_times,
839855
**data._stateful_run_times,
840856
}
841857

842858
if (current_deadline := self.settings.deadline) is not None:
843859
if not is_final:
844860
current_deadline = (current_deadline // 4) * 5
845-
if runtime >= current_deadline:
846-
raise DeadlineExceeded(runtime, self.settings.deadline)
861+
if runtime >= current_deadline.total_seconds():
862+
raise DeadlineExceeded(
863+
datetime.timedelta(seconds=runtime), self.settings.deadline
864+
)
847865
return result
848866

849867
def run(data):

hypothesis-python/src/hypothesis/internal/conjecture/data.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@
4444
from hypothesis.internal.cache import LRUReusedCache
4545
from hypothesis.internal.compat import add_note, floor, int_from_bytes, int_to_bytes
4646
from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
47-
from hypothesis.internal.conjecture.junkdrawer import IntList, uniform
47+
from hypothesis.internal.conjecture.junkdrawer import (
48+
IntList,
49+
gc_cumulative_time,
50+
uniform,
51+
)
4852
from hypothesis.internal.conjecture.utils import (
4953
INT_SIZES,
5054
INT_SIZES_SAMPLER,
@@ -1980,6 +1984,7 @@ def __init__(
19801984
self.testcounter = global_test_counter
19811985
global_test_counter += 1
19821986
self.start_time = time.perf_counter()
1987+
self.gc_start_time = gc_cumulative_time()
19831988
self.events: Dict[str, Union[str, int, float]] = {}
19841989
self.forced_indices: "Set[int]" = set()
19851990
self.interesting_origin: Optional[InterestingOrigin] = None
@@ -2420,6 +2425,7 @@ def draw(
24202425
# where we cache something expensive, this led to Flaky deadline errors!
24212426
# See https://github.com/HypothesisWorks/hypothesis/issues/2108
24222427
start_time = time.perf_counter()
2428+
gc_start_time = gc_cumulative_time()
24232429

24242430
strategy.validate()
24252431

@@ -2443,7 +2449,10 @@ def draw(
24432449
try:
24442450
return strategy.do_draw(self)
24452451
finally:
2446-
self.draw_times[key] = time.perf_counter() - start_time
2452+
# Subtract the time spent in GC to avoid overcounting, as it is
2453+
# accounted for at the overall example level.
2454+
in_gctime = gc_cumulative_time() - gc_start_time
2455+
self.draw_times[key] = time.perf_counter() - start_time - in_gctime
24472456
except Exception as err:
24482457
add_note(err, f"while generating {key[9:]!r} from {strategy!r}")
24492458
raise
@@ -2520,6 +2529,7 @@ def freeze(self) -> None:
25202529
assert isinstance(self.buffer, bytes)
25212530
return
25222531
self.finish_time = time.perf_counter()
2532+
self.gc_finish_time = gc_cumulative_time()
25232533
assert len(self.buffer) == self.index
25242534

25252535
# Always finish by closing all remaining examples so that we have a

hypothesis-python/src/hypothesis/internal/conjecture/engine.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ class CallStats(TypedDict):
168168
status: str
169169
runtime: float
170170
drawtime: float
171+
gctime: float
171172
events: List[str]
172173

173174

@@ -298,7 +299,9 @@ def __stoppable_test_function(self, data: ConjectureData) -> None:
298299
"""
299300
# We ensure that the test has this much stack space remaining, no
300301
# matter the size of the stack when called, to de-flake RecursionErrors
301-
# (#2494, #3671).
302+
# (#2494, #3671). Note, this covers the data generation part of the test;
303+
# the actual test execution is additionally protected at the call site
304+
# in hypothesis.core.execute_once.
302305
with ensure_free_stackframes():
303306
try:
304307
self._test_function(data)
@@ -430,6 +433,7 @@ def test_function(self, data: ConjectureData) -> None:
430433
"status": data.status.name.lower(),
431434
"runtime": data.finish_time - data.start_time,
432435
"drawtime": math.fsum(data.draw_times.values()),
436+
"gctime": data.gc_finish_time - data.gc_start_time,
433437
"events": sorted(
434438
k if v == "" else f"{k}: {v}" for k, v in data.events.items()
435439
),

hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py

+51
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
anything that lives here, please move it."""
1414

1515
import array
16+
import gc
1617
import sys
18+
import time
1719
import warnings
1820
from random import Random
1921
from typing import (
@@ -413,3 +415,52 @@ def find(self, condition: Callable[[T], bool]) -> T:
413415
self.__values.append(value)
414416
return value
415417
raise NotFound("No values satisfying condition")
418+
419+
420+
_gc_initialized = False
421+
_gc_start = 0
422+
_gc_cumulative_time = 0
423+
424+
425+
def gc_cumulative_time() -> float:
426+
global _gc_initialized
427+
if not _gc_initialized:
428+
if hasattr(gc, "callbacks"):
429+
# CPython
430+
def gc_callback(phase, info):
431+
global _gc_start, _gc_cumulative_time
432+
try:
433+
now = time.perf_counter()
434+
if phase == "start":
435+
_gc_start = now
436+
elif phase == "stop" and _gc_start > 0:
437+
_gc_cumulative_time += now - _gc_start # pragma: no cover # ??
438+
except RecursionError: # pragma: no cover
439+
# Avoid flakiness via UnraisableException, which is caught and
440+
# warned by pytest. The actual callback (this function) is
441+
# validated to never trigger a RecursionError itself when
442+
# when called by gc.collect.
443+
# Anyway, we should hit the same error on "start"
444+
# and "stop", but to ensure we don't get out of sync we just
445+
# signal that there is no matching start.
446+
_gc_start = 0
447+
return
448+
449+
gc.callbacks.insert(0, gc_callback)
450+
elif hasattr(gc, "hooks"): # pragma: no cover # pypy only
451+
# PyPy
452+
def hook(stats):
453+
global _gc_cumulative_time
454+
try:
455+
_gc_cumulative_time += stats.duration
456+
except RecursionError:
457+
pass
458+
459+
if gc.hooks.on_gc_minor is None:
460+
gc.hooks.on_gc_minor = hook
461+
if gc.hooks.on_gc_collect_step is None:
462+
gc.hooks.on_gc_collect_step = hook
463+
464+
_gc_initialized = True
465+
466+
return _gc_cumulative_time

hypothesis-python/src/hypothesis/internal/escalation.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,12 @@ def get_trimmed_traceback(exception=None):
8787
else:
8888
tb = exception.__traceback__
8989
# Avoid trimming the traceback if we're in verbose mode, or the error
90-
# was raised inside Hypothesis
90+
# was raised inside Hypothesis. Additionally, the environment variable
91+
# HYPOTHESIS_NO_TRACEBACK_TRIM is respected if nonempty, because verbose
92+
# mode is prohibitively slow when debugging strategy recursion errors.
9193
if (
9294
tb is None
95+
or os.environ.get("HYPOTHESIS_NO_TRACEBACK_TRIM", None)
9396
or hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug
9497
or is_hypothesis_file(traceback.extract_tb(tb)[-1][0])
9598
and not isinstance(exception, _Trimmable)

hypothesis-python/src/hypothesis/internal/scrutineer.py

+43-19
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import functools
1212
import os
13+
import re
1314
import subprocess
1415
import sys
1516
import types
@@ -58,15 +59,18 @@ def __init__(self):
5859
self._previous_location = None
5960

6061
def trace(self, frame, event, arg):
61-
if event == "call":
62-
return self.trace
63-
elif event == "line":
64-
# manual inlining of self.trace_line for performance.
65-
fname = frame.f_code.co_filename
66-
if should_trace_file(fname):
67-
current_location = (fname, frame.f_lineno)
68-
self.branches.add((self._previous_location, current_location))
69-
self._previous_location = current_location
62+
try:
63+
if event == "call":
64+
return self.trace
65+
elif event == "line":
66+
# manual inlining of self.trace_line for performance.
67+
fname = frame.f_code.co_filename
68+
if should_trace_file(fname):
69+
current_location = (fname, frame.f_lineno)
70+
self.branches.add((self._previous_location, current_location))
71+
self._previous_location = current_location
72+
except RecursionError:
73+
pass
7074

7175
def trace_line(self, code: types.CodeType, line_number: int) -> None:
7276
fname = code.co_filename
@@ -104,19 +108,38 @@ def __exit__(self, *args, **kwargs):
104108
# a contextmanager; this is probably after the fault has been triggered.
105109
# Similar reasoning applies to a few other standard-library modules: even
106110
# if the fault was later, these still aren't useful locations to report!
107-
f"{sep}contextlib.py",
108-
f"{sep}inspect.py",
109-
f"{sep}re.py",
110-
f"{sep}re{sep}__init__.py", # refactored in Python 3.11
111-
f"{sep}warnings.py",
111+
# Note: The list is post-processed, so use plain "/" for separator here.
112+
"/contextlib.py",
113+
"/inspect.py",
114+
"/re.py",
115+
"/re/__init__.py", # refactored in Python 3.11
116+
"/warnings.py",
112117
# Quite rarely, the first AFNP line is in Pytest's internals.
113-
f"{sep}_pytest{sep}assertion{sep}__init__.py",
114-
f"{sep}_pytest{sep}assertion{sep}rewrite.py",
115-
f"{sep}_pytest{sep}_io{sep}saferepr.py",
116-
f"{sep}pluggy{sep}_result.py",
118+
"/_pytest/_io/saferepr.py",
119+
"/_pytest/assertion/*.py",
120+
"/_pytest/config/__init__.py",
121+
"/_pytest/pytester.py",
122+
"/pluggy/_*.py",
123+
"/reprlib.py",
124+
"/typing.py",
125+
"/conftest.py",
117126
)
118127

119128

129+
def _glob_to_re(locs):
130+
"""Translate a list of glob patterns to a combined regular expression.
131+
Only the * wildcard is supported, and patterns including special
132+
characters will only work by chance."""
133+
# fnmatch.translate is not an option since its "*" consumes path sep
134+
return "|".join(
135+
loc.replace("*", r"[^/]+")
136+
.replace(".", re.escape("."))
137+
.replace("/", re.escape(sep))
138+
+ r"\Z" # right anchored
139+
for loc in locs
140+
)
141+
142+
120143
def get_explaining_locations(traces):
121144
# Traces is a dict[interesting_origin | None, set[frozenset[tuple[str, int]]]]
122145
# Each trace in the set might later become a Counter instead of frozenset.
@@ -159,8 +182,9 @@ def get_explaining_locations(traces):
159182
# The last step is to filter out explanations that we know would be uninformative.
160183
# When this is the first AFNP location, we conclude that Scrutineer missed the
161184
# real divergence (earlier in the trace) and drop that unhelpful explanation.
185+
filter_regex = re.compile(_glob_to_re(UNHELPFUL_LOCATIONS))
162186
return {
163-
origin: {loc for loc in afnp_locs if not loc[0].endswith(UNHELPFUL_LOCATIONS)}
187+
origin: {loc for loc in afnp_locs if not filter_regex.search(loc[0])}
164188
for origin, afnp_locs in explanations.items()
165189
}
166190

hypothesis-python/src/hypothesis/stateful.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
from hypothesis.internal.compat import add_note
5151
from hypothesis.internal.conjecture import utils as cu
5252
from hypothesis.internal.conjecture.engine import BUFFER_SIZE
53+
from hypothesis.internal.conjecture.junkdrawer import gc_cumulative_time
5354
from hypothesis.internal.healthcheck import fail_health_check
5455
from hypothesis.internal.observability import TESTCASE_CALLBACKS
5556
from hypothesis.internal.reflection import (
@@ -158,6 +159,7 @@ def output(s):
158159
must_stop = True
159160

160161
start_draw = perf_counter()
162+
start_gc = gc_cumulative_time()
161163
if cd.draw_boolean(p=2**-16, forced=must_stop):
162164
break
163165
steps_run += 1
@@ -175,7 +177,8 @@ def output(s):
175177
rule, data = cd.draw(machine._rules_strategy)
176178
draw_label = f"generate:rule:{rule.function.__name__}"
177179
cd.draw_times.setdefault(draw_label, 0.0)
178-
cd.draw_times[draw_label] += perf_counter() - start_draw
180+
in_gctime = gc_cumulative_time() - start_gc
181+
cd.draw_times[draw_label] += perf_counter() - start_draw - in_gctime
179182

180183
# Pretty-print the values this rule was called with *before* calling
181184
# _add_result_to_targets, to avoid printing arguments which are also
@@ -196,8 +199,10 @@ def output(s):
196199

197200
label = f"execute:rule:{rule.function.__name__}"
198201
start = perf_counter()
202+
start_gc = gc_cumulative_time()
199203
result = rule.function(machine, **data)
200-
cd._stateful_run_times[label] += perf_counter() - start
204+
in_gctime = gc_cumulative_time() - start_gc
205+
cd._stateful_run_times[label] += perf_counter() - start - in_gctime
201206

202207
if rule.targets:
203208
if isinstance(result, MultipleResults):

0 commit comments

Comments
 (0)