Skip to content

Commit c3e2fca

Browse files
authored
Merge pull request #4064 from tybug/ir-fast-cache
Use a more performant `LRUCache` for ir kwargs caches
2 parents 8c75b9e + 6a96344 commit c3e2fca

File tree

4 files changed

+84
-7
lines changed

4 files changed

+84
-7
lines changed

hypothesis-python/RELEASE.rst

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
RELEASE_TYPE: patch
2+
3+
This patch improves our example generation performance by adjusting our internal cache implementation.

hypothesis-python/src/hypothesis/internal/cache.py

+62
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# obtain one at https://mozilla.org/MPL/2.0/.
1010

1111
import threading
12+
from collections import OrderedDict
1213

1314
import attr
1415

@@ -282,3 +283,64 @@ def new_entry(self, key, value):
282283

283284
def on_access(self, key, value, score):
284285
return (2, self.tick())
286+
287+
288+
class LRUCache:
289+
"""
290+
This is a drop-in replacement for a GenericCache (despite the lack of inheritance)
291+
in performance critical environments. It turns out that GenericCache's heap
292+
balancing for arbitrary scores can be quite expensive compared to the doubly
293+
linked list approach of lru_cache or OrderedDict.
294+
295+
This class is a pure LRU and does not provide any sort of affininty towards
296+
the number of accesses beyond recency. If soft-pinning entries which have been
297+
accessed at least once is important, use LRUReusedCache.
298+
"""
299+
300+
# Here are some nice performance references for lru_cache vs OrderedDict:
301+
# https://github.com/python/cpython/issues/72426#issuecomment-1093727671
302+
# https://discuss.python.org/t/simplify-lru-cache/18192/6
303+
#
304+
# We use OrderedDict here because it is unclear to me we can provide the same
305+
# api as GenericCache without messing with @lru_cache internals.
306+
#
307+
# Anecdotally, OrderedDict seems quite competitive with lru_cache, but perhaps
308+
# that is localized to our access patterns.
309+
310+
def __init__(self, max_size):
311+
assert max_size > 0
312+
self.max_size = max_size
313+
self._threadlocal = threading.local()
314+
315+
@property
316+
def cache(self):
317+
try:
318+
return self._threadlocal.cache
319+
except AttributeError:
320+
self._threadlocal.cache = OrderedDict()
321+
return self._threadlocal.cache
322+
323+
def __setitem__(self, key, value):
324+
self.cache[key] = value
325+
self.cache.move_to_end(key)
326+
327+
while len(self.cache) > self.max_size:
328+
self.cache.popitem(last=False)
329+
330+
def __getitem__(self, key):
331+
val = self.cache[key]
332+
self.cache.move_to_end(key)
333+
return val
334+
335+
def __iter__(self):
336+
return iter(self.cache)
337+
338+
def __len__(self):
339+
return len(self.cache)
340+
341+
def __contains__(self, key):
342+
return key in self.cache
343+
344+
# implement GenericCache interface, for tests
345+
def check_valid(self):
346+
pass

hypothesis-python/src/hypothesis/internal/conjecture/data.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
import attr
4242

4343
from hypothesis.errors import Frozen, InvalidArgument, StopTest
44-
from hypothesis.internal.cache import LRUReusedCache
44+
from hypothesis.internal.cache import LRUCache
4545
from hypothesis.internal.compat import add_note, floor, int_from_bytes, int_to_bytes
4646
from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float
4747
from hypothesis.internal.conjecture.junkdrawer import (
@@ -200,9 +200,11 @@ def structural_coverage(label: int) -> StructuralCoverageTag:
200200
NASTY_FLOATS = list(map(float, NASTY_FLOATS))
201201
NASTY_FLOATS.extend([-x for x in NASTY_FLOATS])
202202

203-
FLOAT_INIT_LOGIC_CACHE = LRUReusedCache(4096)
204-
205-
POOLED_KWARGS_CACHE = LRUReusedCache(4096)
203+
# These caches, especially the kwargs cache, can be quite hot and so we prefer
204+
# LRUCache over LRUReusedCache for performance. We lose scan resistance, but
205+
# that's probably fine here.
206+
FLOAT_INIT_LOGIC_CACHE = LRUCache(4096)
207+
POOLED_KWARGS_CACHE = LRUCache(4096)
206208

207209
DRAW_STRING_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large"
208210

hypothesis-python/tests/cover/test_cache_implementation.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
strategies as st,
2525
)
2626
from hypothesis.errors import InvalidArgument
27-
from hypothesis.internal.cache import GenericCache, LRUReusedCache
27+
from hypothesis.internal.cache import GenericCache, LRUCache, LRUReusedCache
2828

2929
from tests.common.utils import skipif_emscripten
3030

3131

32-
class LRUCache(GenericCache):
32+
class LRUCacheAlternative(GenericCache):
3333
__slots__ = ("__tick",)
3434

3535
def __init__(self, max_size):
@@ -88,7 +88,8 @@ def on_access(self, key, value, score):
8888

8989

9090
@pytest.mark.parametrize(
91-
"implementation", [LRUCache, LFUCache, LRUReusedCache, ValueScored, RandomCache]
91+
"implementation",
92+
[LRUCache, LFUCache, LRUReusedCache, ValueScored, RandomCache, LRUCacheAlternative],
9293
)
9394
@example(writes=[(0, 0), (3, 0), (1, 0), (2, 0), (2, 0), (1, 0)], size=4)
9495
@example(writes=[(0, 0)], size=1)
@@ -308,6 +309,15 @@ def test_iterates_over_remaining_keys():
308309
assert sorted(cache) == [1, 2]
309310

310311

312+
def test_lru_cache_is_actually_lru():
313+
cache = LRUCache(2)
314+
cache[1] = 1 # [1]
315+
cache[2] = 2 # [1, 2]
316+
cache[1] # [2, 1]
317+
cache[3] = 2 # [2, 1, 3] -> drop least recently used -> [1, 3]
318+
assert list(cache) == [1, 3]
319+
320+
311321
@skipif_emscripten
312322
def test_cache_is_threadsafe_issue_2433_regression():
313323
errors = []

0 commit comments

Comments
 (0)