Skip to content

Commit a343757

Browse files
committed
Add global settings for caching list_libraries
This allows us to enable and disable the cache on the fly for list_libraries. Also allows us to change the expiry period for returning the results.
1 parent 7c4cc4e commit a343757

File tree

4 files changed

+99
-28
lines changed

4 files changed

+99
-28
lines changed

arctic/_cache.py

+55-12
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,79 @@
33

44
from pymongo.errors import OperationFailure
55

6-
from ._config import CACHE_COLL, CACHE_DB
7-
86
logger = logging.getLogger(__name__)
97

8+
CACHE_COLL = 'cache'
9+
CACHE_DB = 'meta_db'
10+
CACHE_SETTINGS = 'cache_settings'
11+
DEFAULT_CACHE_EXPIRY = 3600
12+
1013

1114
class Cache:
12-
def __init__(self, client, cache_expiry=3600, cache_db=CACHE_DB, cache_col=CACHE_COLL):
15+
def __init__(self, client, cache_expiry=DEFAULT_CACHE_EXPIRY, cache_db=CACHE_DB, cache_col=CACHE_COLL):
1316
self._client = client
1417
self._cachedb = client[cache_db]
1518
self._cachecol = None
1619
try:
1720
if cache_col not in self._cachedb.list_collection_names():
1821
self._cachedb.create_collection(cache_col).create_index("date", expireAfterSeconds=cache_expiry)
1922
except OperationFailure as op:
20-
logging.debug("This is fine if you are not admin. The collection should already be created for you: %s", op)
23+
logging.info("This is fine if you are not admin. The collection should already be created for you: %s", op)
2124

2225
self._cachecol = self._cachedb[cache_col]
2326

24-
def get(self, key, newer_than_secs=-1):
27+
def _get_cache_settings(self):
28+
try:
29+
return self._cachedb[CACHE_SETTINGS].find_one()
30+
except OperationFailure as op:
31+
logging.debug("Cannot access %s in db: %s. Error: %s" % (CACHE_SETTINGS, CACHE_DB, op))
32+
return None
33+
34+
def set_caching_state(self, enabled):
35+
"""
36+
Used to enable or disable the caching globally
37+
:return:
38+
"""
39+
if not isinstance(enabled, bool):
40+
logging.error("Enabled should be a boolean type.")
41+
return
42+
43+
if CACHE_SETTINGS not in self._cachedb.list_collection_names():
44+
logging.info("Creating %s collection for cache settings" % CACHE_SETTINGS)
45+
self._cachedb[CACHE_SETTINGS].insert_one({
46+
'enabled': enabled,
47+
'cache_expiry': DEFAULT_CACHE_EXPIRY
48+
})
49+
else:
50+
self._cachedb[CACHE_SETTINGS].update_one({}, {'$set': {'enabled': enabled}})
51+
logging.info("Caching set to: %s" % enabled)
52+
53+
def _is_not_expired(self, cached_data, newer_than_secs):
54+
# Use the expiry period in the settings (or the default) if not overriden by the function argument.
55+
if newer_than_secs:
56+
expiry_period = newer_than_secs
57+
else:
58+
cache_settings = self._get_cache_settings()
59+
expiry_period = cache_settings['cache_expiry'] if cache_settings else DEFAULT_CACHE_EXPIRY
60+
61+
return datetime.utcnow() < cached_data['date'] + timedelta(seconds=expiry_period)
62+
63+
def get(self, key, newer_than_secs=None):
2564
"""
2665
2766
:param key: Key for the dataset. eg. list_libraries.
28-
:param newer_than_secs: -1 to indicate use cache if available. Used to indicate what level of staleness
67+
:param newer_than_secs: None to indicate use cache if available. Used to indicate what level of staleness
2968
in seconds is tolerable.
3069
:return: None unless if there is non stale data present in the cache.
3170
"""
3271
try:
3372
if not self._cachecol:
3473
# Collection not created or no permissions to read from it.
3574
return None
36-
coll_data = self._cachecol.find_one({"type": key})
75+
cached_data = self._cachecol.find_one({"type": key})
3776
# Check that there is data in cache and it's not stale.
38-
if coll_data and (
39-
newer_than_secs == -1 or
40-
datetime.utcnow() < coll_data['date'] + timedelta(seconds=newer_than_secs)
41-
):
42-
return coll_data['data']
77+
if cached_data and self._is_not_expired(cached_data, newer_than_secs):
78+
return cached_data['data']
4379
except OperationFailure as op:
4480
logging.warning("Could not read from cache due to: %s. Ask your admin to give read permissions on %s:%s",
4581
op, CACHE_DB, CACHE_COLL)
@@ -83,3 +119,10 @@ def update_item_for_key(self, key, old, new):
83119
# This op is not atomic, but given the rarity of renaming a lib, it should not cause issues.
84120
self.delete_item_from_key(key, old)
85121
self.append(key, new)
122+
123+
def is_caching_enabled(self):
124+
# Caching is enabled unless explicitly disabled.
125+
cache_settings = self._get_cache_settings()
126+
if cache_settings and not cache_settings['enabled']:
127+
return False
128+
return True

arctic/_config.py

-7
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,6 @@ class FwPointersCfg(Enum):
9494
ARCTIC_ASYNC_NWORKERS = os.environ.get('ARCTIC_ASYNC_NWORKERS', 4)
9595

9696

97-
# -------------------------------
98-
# Flag used for indicating caching levels. For now just for list_libraries.
99-
# -------------------------------
100-
ENABLE_CACHE = not bool(os.environ.get('DISABLE_CACHE'))
101-
CACHE_COLL = 'cache'
102-
CACHE_DB = 'meta_db'
103-
10497
# -------------------------------
10598
# Flag used to convert byte column/index/column names to unicode when read back.
10699
# -------------------------------

arctic/arctic.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from six import string_types
99

1010
from ._cache import Cache
11-
from ._config import ENABLE_CACHE
1211
from ._util import indent
1312
from .auth import authenticate, get_auth
1413
from .chunkstore import chunkstore
@@ -190,13 +189,20 @@ def __getstate__(self):
190189
def __setstate__(self, state):
191190
return Arctic.__init__(self, **state)
192191

193-
def list_libraries(self, newer_than_secs=-1):
192+
def is_caching_enabled(self):
193+
"""
194+
Allows people to enable or disable caching for list_libraries globally.
195+
"""
196+
_ = self._conn # Ensures the connection exists and cache is initialized with it.
197+
return self._cache.is_caching_enabled()
198+
199+
def list_libraries(self, newer_than_secs=None):
194200
"""
195201
Returns
196202
-------
197203
list of Arctic library names
198204
"""
199-
return self._list_libraries_cached(newer_than_secs) if ENABLE_CACHE else self._list_libraries()
205+
return self._list_libraries_cached(newer_than_secs) if self.is_caching_enabled() else self._list_libraries()
200206

201207
@mongo_retry
202208
def _list_libraries(self):
@@ -213,7 +219,7 @@ def _list_libraries(self):
213219
return libs
214220

215221
# Better to be pessimistic here and not retry.
216-
def _list_libraries_cached(self, newer_than_secs=-1):
222+
def _list_libraries_cached(self, newer_than_secs=None):
217223
"""
218224
Returns
219225
-------
@@ -222,11 +228,14 @@ def _list_libraries_cached(self, newer_than_secs=-1):
222228
"""
223229
_ = self._conn # Ensures the connection exists and cache is initialized with it.
224230
cache_data = self._cache.get('list_libraries', newer_than_secs)
225-
if cache_data:
226-
logger.debug('Library names are in cache.')
227-
return cache_data
228-
229-
return self._list_libraries()
231+
if not cache_data:
232+
# Try to refresh the cache.
233+
logging.debug("Cache has expired data, fetching from slow path and reloading cache.")
234+
libs = self._list_libraries()
235+
self._cache.set('list_libraries', libs)
236+
return libs
237+
238+
return cache_data
230239

231240
def reload_cache(self):
232241
_ = self._conn # Ensures the connection exists and cache is initialized with it.

tests/integration/test_arctic.py

+26
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,29 @@ def test_deleting_library_removes_it_from_cache(arctic):
315315
arctic.delete_library('test1')
316316

317317
assert arctic._list_libraries_cached() == arctic._list_libraries() == arctic.list_libraries() == ['test2']
318+
319+
320+
def test_disable_cache_by_settings(arctic):
321+
lib = 'test1'
322+
arctic.initialize_library(lib)
323+
324+
# Should be enabled by default
325+
assert arctic._list_libraries_cached() == arctic._list_libraries()
326+
327+
arctic._cache.set_caching_state(enabled=False)
328+
329+
# Should not return cached results now.
330+
with patch('arctic.arctic.Arctic._list_libraries', return_value=[lib]) as uncached_list_libraries:
331+
with patch('arctic.arctic.Arctic._list_libraries_cached', return_value=[lib]) as cached_list_libraries:
332+
arctic.list_libraries()
333+
uncached_list_libraries.assert_called()
334+
cached_list_libraries.assert_not_called()
335+
336+
arctic._cache.set_caching_state(enabled=True)
337+
338+
# Should used cached data again.
339+
with patch('arctic.arctic.Arctic._list_libraries', return_value=[lib]) as uncached_list_libraries_e:
340+
with patch('arctic.arctic.Arctic._list_libraries_cached', return_value=[lib]) as cached_list_libraries_e:
341+
arctic.list_libraries()
342+
uncached_list_libraries_e.assert_not_called()
343+
cached_list_libraries_e.assert_called()

0 commit comments

Comments
 (0)