Skip to content

Commit 54c05be

Browse files
authored
Merge pull request pandas-dev#772 from shashank88/cache_fixes
Add global settings for caching list_libraries
2 parents 7c4cc4e + df1681e commit 54c05be

File tree

5 files changed

+118
-32
lines changed

5 files changed

+118
-32
lines changed

arctic/_cache.py

+62-12
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,86 @@
33

44
from pymongo.errors import OperationFailure
55

6-
from ._config import CACHE_COLL, CACHE_DB
7-
86
logger = logging.getLogger(__name__)
97

8+
CACHE_COLL = 'cache'
9+
CACHE_DB = 'meta_db'
10+
CACHE_SETTINGS = 'settings'
11+
CACHE_SETTINGS_KEY = 'cache'
12+
"""
13+
Sample cache_settings collection entry:
14+
meta_db.cache_settings.insertOne({"type": "cache", "enabled": true, "cache_expiry": 600})
15+
meta_db.cache_settings.find(): { "_id" : ObjectId("5cd5388b9fddfbe6e968f11b"), "type": "cache", "enabled" : false, "cache_expiry" : 600 }
16+
"""
17+
DEFAULT_CACHE_EXPIRY = 3600
18+
1019

1120
class Cache:
12-
def __init__(self, client, cache_expiry=3600, cache_db=CACHE_DB, cache_col=CACHE_COLL):
21+
def __init__(self, client, cache_expiry=DEFAULT_CACHE_EXPIRY, cache_db=CACHE_DB, cache_col=CACHE_COLL):
1322
self._client = client
1423
self._cachedb = client[cache_db]
1524
self._cachecol = None
1625
try:
1726
if cache_col not in self._cachedb.list_collection_names():
1827
self._cachedb.create_collection(cache_col).create_index("date", expireAfterSeconds=cache_expiry)
1928
except OperationFailure as op:
20-
logging.debug("This is fine if you are not admin. The collection should already be created for you: %s", op)
29+
logging.info("This is fine if you are not admin. The collection should already be created for you: %s", op)
2130

2231
self._cachecol = self._cachedb[cache_col]
2332

24-
def get(self, key, newer_than_secs=-1):
33+
def _get_cache_settings(self):
34+
try:
35+
return self._cachedb[CACHE_SETTINGS].find_one({'type': CACHE_SETTINGS_KEY})
36+
except OperationFailure as op:
37+
logging.debug("Cannot access %s in db: %s. Error: %s" % (CACHE_SETTINGS, CACHE_DB, op))
38+
return None
39+
40+
def set_caching_state(self, enabled):
41+
"""
42+
Used to enable or disable the caching globally
43+
:return:
44+
"""
45+
if not isinstance(enabled, bool):
46+
logging.error("Enabled should be a boolean type.")
47+
return
48+
49+
if CACHE_SETTINGS not in self._cachedb.list_collection_names():
50+
logging.info("Creating %s collection for cache settings" % CACHE_SETTINGS)
51+
self._cachedb[CACHE_SETTINGS].insert_one({
52+
'type': CACHE_SETTINGS_KEY,
53+
'enabled': enabled,
54+
'cache_expiry': DEFAULT_CACHE_EXPIRY
55+
})
56+
else:
57+
self._cachedb[CACHE_SETTINGS].update_one({'type': CACHE_SETTINGS_KEY}, {'$set': {'enabled': enabled}})
58+
logging.info("Caching set to: %s" % enabled)
59+
60+
def _is_not_expired(self, cached_data, newer_than_secs):
61+
# Use the expiry period in the settings (or the default) if not overriden by the function argument.
62+
if newer_than_secs:
63+
expiry_period = newer_than_secs
64+
else:
65+
cache_settings = self._get_cache_settings()
66+
expiry_period = cache_settings['cache_expiry'] if cache_settings else DEFAULT_CACHE_EXPIRY
67+
68+
return datetime.utcnow() < cached_data['date'] + timedelta(seconds=expiry_period)
69+
70+
def get(self, key, newer_than_secs=None):
2571
"""
2672
2773
:param key: Key for the dataset. eg. list_libraries.
28-
:param newer_than_secs: -1 to indicate use cache if available. Used to indicate what level of staleness
74+
:param newer_than_secs: None to indicate use cache if available. Used to indicate what level of staleness
2975
in seconds is tolerable.
3076
:return: None unless if there is non stale data present in the cache.
3177
"""
3278
try:
3379
if not self._cachecol:
3480
# Collection not created or no permissions to read from it.
3581
return None
36-
coll_data = self._cachecol.find_one({"type": key})
82+
cached_data = self._cachecol.find_one({"type": key})
3783
# Check that there is data in cache and it's not stale.
38-
if coll_data and (
39-
newer_than_secs == -1 or
40-
datetime.utcnow() < coll_data['date'] + timedelta(seconds=newer_than_secs)
41-
):
42-
return coll_data['data']
84+
if cached_data and self._is_not_expired(cached_data, newer_than_secs):
85+
return cached_data['data']
4386
except OperationFailure as op:
4487
logging.warning("Could not read from cache due to: %s. Ask your admin to give read permissions on %s:%s",
4588
op, CACHE_DB, CACHE_COLL)
@@ -83,3 +126,10 @@ def update_item_for_key(self, key, old, new):
83126
# This op is not atomic, but given the rarity of renaming a lib, it should not cause issues.
84127
self.delete_item_from_key(key, old)
85128
self.append(key, new)
129+
130+
def is_caching_enabled(self):
131+
# Caching is enabled unless explicitly disabled.
132+
cache_settings = self._get_cache_settings()
133+
if cache_settings and not cache_settings['enabled']:
134+
return False
135+
return True

arctic/_config.py

-7
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,6 @@ class FwPointersCfg(Enum):
9494
ARCTIC_ASYNC_NWORKERS = os.environ.get('ARCTIC_ASYNC_NWORKERS', 4)
9595

9696

97-
# -------------------------------
98-
# Flag used for indicating caching levels. For now just for list_libraries.
99-
# -------------------------------
100-
ENABLE_CACHE = not bool(os.environ.get('DISABLE_CACHE'))
101-
CACHE_COLL = 'cache'
102-
CACHE_DB = 'meta_db'
103-
10497
# -------------------------------
10598
# Flag used to convert byte column/index/column names to unicode when read back.
10699
# -------------------------------

arctic/arctic.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from six import string_types
99

1010
from ._cache import Cache
11-
from ._config import ENABLE_CACHE
1211
from ._util import indent
1312
from .auth import authenticate, get_auth
1413
from .chunkstore import chunkstore
@@ -190,13 +189,20 @@ def __getstate__(self):
190189
def __setstate__(self, state):
191190
return Arctic.__init__(self, **state)
192191

193-
def list_libraries(self, newer_than_secs=-1):
192+
def is_caching_enabled(self):
193+
"""
194+
Allows people to enable or disable caching for list_libraries globally.
195+
"""
196+
_ = self._conn # Ensures the connection exists and cache is initialized with it.
197+
return self._cache.is_caching_enabled()
198+
199+
def list_libraries(self, newer_than_secs=None):
194200
"""
195201
Returns
196202
-------
197203
list of Arctic library names
198204
"""
199-
return self._list_libraries_cached(newer_than_secs) if ENABLE_CACHE else self._list_libraries()
205+
return self._list_libraries_cached(newer_than_secs) if self.is_caching_enabled() else self._list_libraries()
200206

201207
@mongo_retry
202208
def _list_libraries(self):
@@ -213,7 +219,7 @@ def _list_libraries(self):
213219
return libs
214220

215221
# Better to be pessimistic here and not retry.
216-
def _list_libraries_cached(self, newer_than_secs=-1):
222+
def _list_libraries_cached(self, newer_than_secs=None):
217223
"""
218224
Returns
219225
-------
@@ -222,11 +228,14 @@ def _list_libraries_cached(self, newer_than_secs=-1):
222228
"""
223229
_ = self._conn # Ensures the connection exists and cache is initialized with it.
224230
cache_data = self._cache.get('list_libraries', newer_than_secs)
225-
if cache_data:
226-
logger.debug('Library names are in cache.')
227-
return cache_data
228-
229-
return self._list_libraries()
231+
if not cache_data:
232+
# Try to refresh the cache.
233+
logging.debug("Cache has expired data, fetching from slow path and reloading cache.")
234+
libs = self._list_libraries()
235+
self._cache.set('list_libraries', libs)
236+
return libs
237+
238+
return cache_data
230239

231240
def reload_cache(self):
232241
_ = self._conn # Ensures the connection exists and cache is initialized with it.

tests/integration/test_arctic.py

+26
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,29 @@ def test_deleting_library_removes_it_from_cache(arctic):
315315
arctic.delete_library('test1')
316316

317317
assert arctic._list_libraries_cached() == arctic._list_libraries() == arctic.list_libraries() == ['test2']
318+
319+
320+
def test_disable_cache_by_settings(arctic):
321+
lib = 'test1'
322+
arctic.initialize_library(lib)
323+
324+
# Should be enabled by default
325+
assert arctic._list_libraries_cached() == arctic._list_libraries()
326+
327+
arctic._cache.set_caching_state(enabled=False)
328+
329+
# Should not return cached results now.
330+
with patch('arctic.arctic.Arctic._list_libraries', return_value=[lib]) as uncached_list_libraries:
331+
with patch('arctic.arctic.Arctic._list_libraries_cached', return_value=[lib]) as cached_list_libraries:
332+
arctic.list_libraries()
333+
uncached_list_libraries.assert_called()
334+
cached_list_libraries.assert_not_called()
335+
336+
arctic._cache.set_caching_state(enabled=True)
337+
338+
# Should used cached data again.
339+
with patch('arctic.arctic.Arctic._list_libraries', return_value=[lib]) as uncached_list_libraries_e:
340+
with patch('arctic.arctic.Arctic._list_libraries_cached', return_value=[lib]) as cached_list_libraries_e:
341+
arctic.list_libraries()
342+
uncached_list_libraries_e.assert_not_called()
343+
cached_list_libraries_e.assert_called()

tests/unit/test_arctic.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
def test_arctic_lazy_init():
2020
with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True) as mc, \
2121
patch('arctic.arctic.mongo_retry', side_effect=lambda x: x, autospec=True), \
22+
patch('arctic._cache.Cache._is_not_expired', return_value=True), \
2223
patch('arctic.arctic.get_auth', autospec=True) as ga:
2324
store = Arctic('cluster')
2425
assert not mc.called
@@ -30,6 +31,7 @@ def test_arctic_lazy_init():
3031
def test_arctic_lazy_init_ssl_true():
3132
with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True) as mc, \
3233
patch('arctic.arctic.mongo_retry', side_effect=lambda x: x, autospec=True), \
34+
patch('arctic._cache.Cache._is_not_expired', return_value=True), \
3335
patch('arctic.arctic.get_auth', autospec=True) as ga:
3436
store = Arctic('cluster', ssl=True)
3537
assert not mc.called
@@ -48,6 +50,7 @@ def test_arctic_lazy_init_ssl_true():
4850
def test_connection_passed_warning_raised():
4951
with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True), \
5052
patch('arctic.arctic.mongo_retry', side_effect=lambda x: x, autospec=True), \
53+
patch('arctic._cache.Cache._is_not_expired', return_value=True), \
5154
patch('arctic.arctic.get_auth', autospec=True), \
5255
patch('arctic.arctic.logger') as lg:
5356
magic_mock = MagicMock(nodes={("host", "port")})
@@ -62,7 +65,8 @@ def test_connection_passed_warning_raised():
6265
def test_arctic_auth():
6366
with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True), \
6467
patch('arctic.arctic.mongo_retry', autospec=True), \
65-
patch('arctic.arctic.get_auth', autospec=True) as ga:
68+
patch('arctic._cache.Cache._is_not_expired', return_value=True), \
69+
patch('arctic.arctic.get_auth', autospec=True) as ga:
6670
ga.return_value = Credential('db', 'admin_user', 'admin_pass')
6771
store = Arctic('cluster')
6872
# do something to trigger lazy arctic init
@@ -86,7 +90,8 @@ def test_arctic_auth():
8690
def test_arctic_auth_custom_app_name():
8791
with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True), \
8892
patch('arctic.arctic.mongo_retry', autospec=True), \
89-
patch('arctic.arctic.get_auth', autospec=True) as ga:
93+
patch('arctic._cache.Cache._is_not_expired', return_value=True), \
94+
patch('arctic.arctic.get_auth', autospec=True) as ga:
9095
ga.return_value = Credential('db', 'admin_user', 'admin_pass')
9196
store = Arctic('cluster', app_name=sentinel.app_name)
9297
# do something to trigger lazy arctic init
@@ -108,7 +113,8 @@ def test_arctic_auth_custom_app_name():
108113
def test_arctic_connect_hostname():
109114
with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True) as mc, \
110115
patch('arctic.arctic.mongo_retry', autospec=True) as ar, \
111-
patch('arctic.arctic.get_mongodb_uri', autospec=True) as gmu:
116+
patch('arctic._cache.Cache._is_not_expired', return_value=True), \
117+
patch('arctic.arctic.get_mongodb_uri', autospec=True) as gmu:
112118
store = Arctic('hostname', socketTimeoutMS=sentinel.socket_timeout,
113119
connectTimeoutMS=sentinel.connect_timeout,
114120
serverSelectionTimeoutMS=sentinel.select_timeout)
@@ -124,6 +130,7 @@ def test_arctic_connect_with_environment_name():
124130
with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True) as mc, \
125131
patch('arctic.arctic.mongo_retry', autospec=True) as ar, \
126132
patch('arctic.arctic.get_auth', autospec=True), \
133+
patch('arctic._cache.Cache._is_not_expired', return_value=True), \
127134
patch('arctic.arctic.get_mongodb_uri') as gmfe:
128135
store = Arctic('live', socketTimeoutMS=sentinel.socket_timeout,
129136
connectTimeoutMS=sentinel.connect_timeout,
@@ -453,7 +460,8 @@ def flaky_auth(*args, **kwargs):
453460

454461
def test_reset():
455462
c = MagicMock()
456-
with patch('pymongo.MongoClient', return_value=c, autospec=True) as mc:
463+
with patch('pymongo.MongoClient', return_value=c, autospec=True) as mc, \
464+
patch('arctic._cache.Cache._is_not_expired', return_value=True):
457465
store = Arctic('hostname')
458466
# do something to trigger lazy arctic init
459467
store.list_libraries()

0 commit comments

Comments
 (0)