Skip to content

Commit 1fd3500

Browse files
authored
Allow sharding on keys other than symbol, default BSONStore to shard on _id (pandas-dev#374)
* Allow sharding on keys other than symbol, default BSONStore to shard on _id
1 parent b6f09d9 commit 1fd3500

File tree

3 files changed

+31
-8
lines changed

3 files changed

+31
-8
lines changed

arctic/_util.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,24 @@ def are_equals(o1, o2, **kwargs):
2323
return False
2424

2525

26-
def enable_sharding(arctic, library_name, hashed=True):
26+
def enable_sharding(arctic, library_name, hashed=True, key='symbol'):
27+
"""
28+
Enable sharding on a library
29+
30+
Parameters:
31+
-----------
32+
arctic: `arctic.Arctic` Arctic class
33+
34+
library_name: `basestring` library name
35+
36+
hashed: `bool` if True, use hashed sharding, if False, use range sharding
37+
See https://docs.mongodb.com/manual/core/hashed-sharding/,
38+
as well as https://docs.mongodb.com/manual/core/ranged-sharding/ for details.
39+
40+
key: `basestring` key to be used for sharding. Defaults to 'symbol', applicable to
41+
all of Arctic's built-in stores except for BSONStore, which typically uses '_id'.
42+
See https://docs.mongodb.com/manual/core/sharding-shard-key/ for details.
43+
"""
2744
c = arctic._conn
2845
lib = arctic[library_name]._arctic_lib
2946
dbname = lib._db.name
@@ -34,8 +51,8 @@ def enable_sharding(arctic, library_name, hashed=True):
3451
if not 'already enabled' in str(e):
3552
raise
3653
if not hashed:
37-
logger.info("Range sharding 'symbol' on: " + dbname + '.' + library_name)
38-
c.admin.command('shardCollection', dbname + '.' + library_name, key={'symbol': 1})
54+
logger.info("Range sharding '" + key + "' on: " + dbname + '.' + library_name)
55+
c.admin.command('shardCollection', dbname + '.' + library_name, key={key: 1})
3956
else:
40-
logger.info("Hash sharding 'symbol' on: " + dbname + '.' + library_name)
41-
c.admin.command('shardCollection', dbname + '.' + library_name, key={'symbol': 'hashed'})
57+
logger.info("Hash sharding '" + key + "' on: " + dbname + '.' + library_name)
58+
c.admin.command('shardCollection', dbname + '.' + library_name, key={key: 'hashed'})

arctic/store/bson_store.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ def __init__(self, arctic_lib):
3030
def initialize_library(cls, arctic_lib, hashed=True, **kwargs):
3131
logger.info("Trying to enable sharding...")
3232
try:
33-
enable_sharding(arctic_lib.arctic, arctic_lib.get_name(), hashed=hashed)
33+
if not hashed:
34+
logger.warning("Ignored hashed=False when enabling sharding, only hashed=True "
35+
" makes sense when they key is an ObjectId")
36+
enable_sharding(arctic_lib.arctic, arctic_lib.get_name(), hashed=True, key='_id')
3437
except OperationFailure as exception:
3538
logger.warning(("Library created, but couldn't enable sharding: "
3639
"%s. This is OK if you're not 'admin'"), exception)

tests/unit/store/test_bson_store.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@ def test_initialize_library():
1111
with patch('arctic.store.bson_store.enable_sharding', autospec=True) as enable_sharding:
1212
arctic_lib.get_top_level_collection.return_value.database.create_collection.__name__ = 'some_name'
1313
arctic_lib.get_top_level_collection.return_value.database.collection_names.__name__ = 'some_name'
14-
BSONStore.initialize_library(arctic_lib, hashed=sentinel.hashed)
15-
assert enable_sharding.call_args_list == [call(arctic_lib.arctic, arctic_lib.get_name(), hashed=sentinel.hashed)]
14+
BSONStore.initialize_library(arctic_lib, hashed=True)
15+
BSONStore.initialize_library(arctic_lib, hashed=False)
16+
# Check we always set the sharding to be hashed, regarless of user input
17+
assert enable_sharding.call_args_list == [call(arctic_lib.arctic, arctic_lib.get_name(), hashed=True, key='_id'),
18+
call(arctic_lib.arctic, arctic_lib.get_name(), hashed=True, key='_id')]
1619

1720

1821
def test_find():

0 commit comments

Comments
 (0)