Skip to content

Commit bb6977a

Browse files
author
Adrian Teng
committed
Raise if appending out-of-order items; Add concat flag
1 parent f89de88 commit bb6977a

File tree

3 files changed

+77
-10
lines changed

3 files changed

+77
-10
lines changed

arctic/store/_pandas_ndarray_store.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
import ast
22
import logging
33

4+
import lz4
5+
import numpy as np
46
from bson.binary import Binary
57
from pandas import DataFrame, Series, Panel
6-
import numpy as np
78

9+
from arctic.exceptions import UnorderedDataException
810
from arctic.serialization.numpy_records import SeriesSerializer, DataFrameSerializer
11+
from ._ndarray_store import NdarrayStore
912
from .._compression import compress, decompress
1013
from ..date._util import to_pandas_closed_closed
1114
from ..exceptions import ArcticException
12-
from ._ndarray_store import NdarrayStore
13-
1415

1516
log = logging.getLogger(__name__)
1617

@@ -116,6 +117,24 @@ def get_info(self, version):
116117
ret['dtype'] = ast.literal_eval(version['dtype'])
117118
return ret
118119

120+
def read_segment_last_dt(self, version):
121+
if 'segment_index' in version:
122+
index = np.fromstring(lz4.decompress(version['segment_index']), dtype=INDEX_DTYPE)
123+
dt_index = self._datetime64_index(index)
124+
if dt_index:
125+
return index[dt_index][-1]
126+
return None
127+
128+
def slice_overlap_item_or_raise(self, item, previous_version, concat):
129+
"""If new item has overlap dt with previous version, keep only new bits if concat=True; raise if concat=False"""
130+
prev_version_last_dt = self.read_segment_last_dt(previous_version)
131+
if prev_version_last_dt and len(item) > 0 and item.index[0] <= prev_version_last_dt:
132+
if concat:
133+
item = item[item.index > prev_version_last_dt]
134+
else:
135+
raise UnorderedDataException(
136+
"new data {} before to symbol ending {}".format(item.index[0], prev_version_last_dt))
137+
return item
119138

120139
def _start_end(date_range, dts):
121140
"""
@@ -152,7 +171,8 @@ def write(self, arctic_lib, version, symbol, item, previous_version):
152171
item, md = self.SERIALIZER.serialize(item)
153172
super(PandasSeriesStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)
154173

155-
def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
174+
def append(self, arctic_lib, version, symbol, item, previous_version, concat=False, **kwargs):
175+
item = self.slice_overlap_item_or_raise(item, previous_version, concat)
156176
item, md = self.SERIALIZER.serialize(item)
157177
super(PandasSeriesStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md, **kwargs)
158178

@@ -176,7 +196,8 @@ def write(self, arctic_lib, version, symbol, item, previous_version):
176196
item, md = self.SERIALIZER.serialize(item)
177197
super(PandasDataFrameStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)
178198

179-
def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
199+
def append(self, arctic_lib, version, symbol, item, previous_version, concat=False, **kwargs):
200+
item = self.slice_overlap_item_or_raise(item, previous_version, concat)
180201
item, md = self.SERIALIZER.serialize(item)
181202
super(PandasDataFrameStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md, **kwargs)
182203

tests/integration/store/test_pandas_store.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,52 @@ def test_dataframe_append_should_add_new_columns_and_reorder(library):
344344
assert_frame_equal(expected, actual)
345345

346346

347+
def test_series_append_concat(library):
348+
s1 = Series(data=[1.0], index=[dt(2012, 1, 1)])
349+
s2 = Series([1.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)])
350+
s2.index.name = 'index'
351+
s2.name = 'values'
352+
library.write('TEST_1', s1)
353+
library.append('TEST_1', s2, concat=True)
354+
result = library.read('TEST_1').data
355+
assert_series_equal(s2, result)
356+
357+
358+
def test_series_append_concat_only_appends_end(library):
359+
s1 = Series([1.0], [dt(2012, 1, 1)])
360+
s2 = Series([2.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)])
361+
library.write('TEST_1', s1)
362+
library.append('TEST_1', s2, concat=True)
363+
364+
result = library.read('TEST_1').data
365+
expected = Series([1.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)])
366+
expected.index.name = 'index'
367+
expected.name = 'values'
368+
assert_series_equal(expected, result)
369+
370+
371+
def test_frame_append_concat(library):
372+
df1 = DataFrame(data=[1.0], index=[dt(2012, 1, 1)], columns=['a'])
373+
df2 = DataFrame([1.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)], columns=['a'])
374+
df2.index.name = 'index'
375+
library.write('TEST_1', df1)
376+
library.append('TEST_1', df2, concat=True)
377+
result = library.read('TEST_1').data
378+
assert_frame_equal(df2, result)
379+
380+
381+
def test_frame_append_concat_only_appends_end(library):
382+
df1 = DataFrame([1.0], [dt(2012, 1, 1)], columns=['a'])
383+
df2 = DataFrame([2.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)], columns=['a'])
384+
library.write('TEST_1', df1)
385+
library.append('TEST_1', df2, concat=True)
386+
387+
result = library.read('TEST_1').data
388+
expected = DataFrame([1.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)], columns=['a'])
389+
expected.index.name = 'index'
390+
assert_frame_equal(expected, result)
391+
392+
347393
# -- auto generated tests --- #
348394
def dataframe(columns, length, index):
349395
df = DataFrame(np.ones((length, columns)), columns=list(string.ascii_lowercase[:columns]))

tests/integration/store/test_version_store.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@
3535
2012-11-08 17:06:11.040 | 3.0""")
3636

3737
ts1_append = read_str_as_pandas(""" times | near
38-
2012-09-08 17:06:11.040 | 1.0
39-
2012-10-08 17:06:11.040 | 2.0
40-
2012-10-09 17:06:11.040 | 2.5
41-
2012-11-08 17:06:11.040 | 3.0
42-
2012-11-09 17:06:11.040 | 3.0""")
38+
2012-11-09 17:06:11.040 | 1.0
39+
2012-11-10 17:06:11.040 | 2.0
40+
2012-11-11 17:06:11.040 | 2.5
41+
2012-11-12 17:06:11.040 | 3.0
42+
2012-11-13 17:06:11.040 | 3.0""")
4343

4444

4545
symbol = 'TS1'

0 commit comments

Comments
 (0)