|
8 | 8 |
|
9 | 9 | from ..decorators import mongo_retry
|
10 | 10 | from ..exceptions import UnhandledDtypeException, DataIntegrityException
|
11 |
| -from ._version_store_utils import checksum |
| 11 | +from ._version_store_utils import checksum, version_base_or_id |
12 | 12 |
|
13 | 13 | from .._compression import compress_array, decompress
|
14 | 14 | from six.moves import xrange
|
@@ -39,20 +39,25 @@ def _promote(type1, type2):
|
39 | 39 | def _attempt_update_unchanged(symbol, unchanged_segment_ids, collection, version, previous_version):
|
40 | 40 | if not unchanged_segment_ids or not collection or not version:
|
41 | 41 | return
|
| 42 | + |
| 43 | + # Currenlty it is called only from _concat_and_rewrite, with "base_version_id" always empty |
| 44 | + # Use version_base_or_id() instead, to make the method safe going forward, called form anywhere |
| 45 | + parent_id = version_base_or_id(version) |
| 46 | + |
42 | 47 | # Update the parent set of the unchanged/compressed segments
|
43 | 48 | result = collection.update_many({
|
44 | 49 | 'symbol': symbol, # hit only the right shard
|
45 | 50 | # update_many is a broadcast query otherwise
|
46 | 51 | '_id': {'$in': [x['_id'] for x in unchanged_segment_ids]}
|
47 | 52 | },
|
48 |
| - {'$addToSet': {'parent': version['_id']}}) |
| 53 | + {'$addToSet': {'parent': parent_id}}) |
49 | 54 | # Fast check for success without extra query
|
50 | 55 | if result.matched_count == len(unchanged_segment_ids):
|
51 | 56 | return
|
52 | 57 | # update_many is tricky sometimes wrt matched_count across replicas when balancer runs. Check based on _id.
|
53 | 58 | unchanged_ids = set([x['_id'] for x in unchanged_segment_ids])
|
54 | 59 | spec = {'symbol': symbol,
|
55 |
| - 'parent': version['_id'], |
| 60 | + 'parent': parent_id, |
56 | 61 | 'segment': {'$lte': unchanged_segment_ids[-1]['segment']}}
|
57 | 62 | matched_segments_ids = set([x['_id'] for x in collection.find(spec)])
|
58 | 63 | if unchanged_ids != matched_segments_ids:
|
@@ -240,7 +245,7 @@ def _do_read(self, collection, version, symbol, index_range=None):
|
240 | 245 | segment_count = None
|
241 | 246 |
|
242 | 247 | spec = {'symbol': symbol,
|
243 |
| - 'parent': version.get('base_version_id', version['_id']), |
| 248 | + 'parent': version_base_or_id(version), |
244 | 249 | 'segment': {'$lt': to_index}
|
245 | 250 | }
|
246 | 251 | if from_index is not None:
|
@@ -333,7 +338,7 @@ def _do_append(self, collection, version, symbol, item, previous_version, dirty_
|
333 | 338 | #_CHUNK_SIZE is probably too big if we're only appending single rows of data - perhaps something smaller,
|
334 | 339 | #or also look at number of appended segments?
|
335 | 340 | if not dirty_append and version['append_count'] < _APPEND_COUNT and version['append_size'] < _APPEND_SIZE:
|
336 |
| - version['base_version_id'] = previous_version.get('base_version_id', previous_version['_id']) |
| 341 | + version['base_version_id'] = version_base_or_id(previous_version) |
337 | 342 |
|
338 | 343 | if len(item) > 0:
|
339 | 344 |
|
@@ -375,7 +380,7 @@ def _concat_and_rewrite(self, collection, version, symbol, item, previous_versio
|
375 | 380 |
|
376 | 381 | # Figure out which is the last 'full' chunk
|
377 | 382 | spec = {'symbol': symbol,
|
378 |
| - 'parent': previous_version.get('base_version_id', previous_version['_id']), |
| 383 | + 'parent': version_base_or_id(previous_version), |
379 | 384 | 'segment': {'$lt': previous_version['up_to']}}
|
380 | 385 |
|
381 | 386 | read_index_range = [0, None]
|
@@ -423,17 +428,21 @@ def _concat_and_rewrite(self, collection, version, symbol, item, previous_versio
|
423 | 428 | self.check_written(collection, symbol, version)
|
424 | 429 |
|
425 | 430 | def check_written(self, collection, symbol, version):
|
| 431 | + # Currently only called from methods which guarantee 'base_version_id' is not populated. |
| 432 | + # Make it nonetheless safe for the general case. |
| 433 | + parent_id = version_base_or_id(version) |
| 434 | + |
426 | 435 | # Check all the chunks are in place
|
427 |
| - seen_chunks = collection.find({'symbol': symbol, 'parent': version['_id']}, |
| 436 | + seen_chunks = collection.find({'symbol': symbol, 'parent': parent_id}, |
428 | 437 | ).count()
|
429 | 438 |
|
430 | 439 | if seen_chunks != version['segment_count']:
|
431 |
| - segments = [x['segment'] for x in collection.find({'symbol': symbol, 'parent': version['_id']}, |
| 440 | + segments = [x['segment'] for x in collection.find({'symbol': symbol, 'parent': parent_id}, |
432 | 441 | projection={'segment': 1},
|
433 | 442 | )]
|
434 | 443 | raise pymongo.errors.OperationFailure("Failed to write all the Chunks. Saw %s expecting %s"
|
435 | 444 | "Parent: %s \n segments: %s" %
|
436 |
| - (seen_chunks, version['segment_count'], version['_id'], segments)) |
| 445 | + (seen_chunks, version['segment_count'], parent_id, segments)) |
437 | 446 |
|
438 | 447 | def checksum(self, item):
|
439 | 448 | sha = hashlib.sha1()
|
|
0 commit comments