Skip to content

Commit 426a2a0

Browse files
author
Adrian Teng
committed
Revert "Revert "Revert "Merge pull request pandas-dev#363 from manahl/add-concat-flag"""
This reverts commit e4daded.
1 parent 386b098 commit 426a2a0

File tree

4 files changed

+11
-77
lines changed

4 files changed

+11
-77
lines changed

CHANGES.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
* Feature: #365 add generic BSON store
2323

2424
### 1.42 (2017-05-12)
25-
* Bugfix: #346 fixed daterange subsetting error on very large dataframes in version store
25+
* Bugfix: #346 fixed daterange subsetting error on very large dateframes in version store
2626
* Bugfix: #351 $size queries can't use indexes, use alternative queries
2727

2828
### 1.41 (2017-04-20)

arctic/store/_pandas_ndarray_store.py

+5-25
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
import ast
22
import logging
33

4-
import numpy as np
54
from bson.binary import Binary
65
from pandas import DataFrame, Series, Panel
6+
import numpy as np
77

8-
from arctic.exceptions import UnorderedDataException
98
from arctic.serialization.numpy_records import SeriesSerializer, DataFrameSerializer
10-
from ._ndarray_store import NdarrayStore
119
from .._compression import compress, decompress
1210
from ..date._util import to_pandas_closed_closed
1311
from ..exceptions import ArcticException
12+
from ._ndarray_store import NdarrayStore
13+
1414

1515
log = logging.getLogger(__name__)
1616

@@ -116,24 +116,6 @@ def get_info(self, version):
116116
ret['dtype'] = ast.literal_eval(version['dtype'])
117117
return ret
118118

119-
def read_segment_last_dt(self, version):
120-
if 'segment_index' in version:
121-
index = np.fromstring(decompress(version['segment_index']), dtype=INDEX_DTYPE)
122-
dt_index = self._datetime64_index(index)
123-
if dt_index:
124-
return index[dt_index][-1]
125-
return None
126-
127-
def slice_overlap_item_or_raise(self, item, previous_version, concat):
128-
"""If new item has overlap dt with previous version, keep only new bits if concat=True; raise if concat=False"""
129-
prev_version_last_dt = self.read_segment_last_dt(previous_version)
130-
if prev_version_last_dt and len(item) > 0 and item.index[0] <= prev_version_last_dt:
131-
if concat:
132-
item = item[item.index > prev_version_last_dt]
133-
else:
134-
raise UnorderedDataException(
135-
"new data {} before to symbol ending {}".format(item.index[0], prev_version_last_dt))
136-
return item
137119

138120
def _start_end(date_range, dts):
139121
"""
@@ -170,8 +152,7 @@ def write(self, arctic_lib, version, symbol, item, previous_version):
170152
item, md = self.SERIALIZER.serialize(item)
171153
super(PandasSeriesStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)
172154

173-
def append(self, arctic_lib, version, symbol, item, previous_version, concat=False, **kwargs):
174-
item = self.slice_overlap_item_or_raise(item, previous_version, concat)
155+
def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
175156
item, md = self.SERIALIZER.serialize(item)
176157
super(PandasSeriesStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md, **kwargs)
177158

@@ -195,8 +176,7 @@ def write(self, arctic_lib, version, symbol, item, previous_version):
195176
item, md = self.SERIALIZER.serialize(item)
196177
super(PandasDataFrameStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)
197178

198-
def append(self, arctic_lib, version, symbol, item, previous_version, concat=False, **kwargs):
199-
item = self.slice_overlap_item_or_raise(item, previous_version, concat)
179+
def append(self, arctic_lib, version, symbol, item, previous_version, **kwargs):
200180
item, md = self.SERIALIZER.serialize(item)
201181
super(PandasDataFrameStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md, **kwargs)
202182

tests/integration/store/test_pandas_store.py

-46
Original file line numberDiff line numberDiff line change
@@ -344,52 +344,6 @@ def test_dataframe_append_should_add_new_columns_and_reorder(library):
344344
assert_frame_equal(expected, actual)
345345

346346

347-
def test_series_append_concat(library):
348-
s1 = Series(data=[1.0], index=[dt(2012, 1, 1)])
349-
s2 = Series([1.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)])
350-
s2.index.name = 'index'
351-
s2.name = 'values'
352-
library.write('TEST_1', s1)
353-
library.append('TEST_1', s2, concat=True)
354-
result = library.read('TEST_1').data
355-
assert_series_equal(s2, result)
356-
357-
358-
def test_series_append_concat_only_appends_end(library):
359-
s1 = Series([1.0], [dt(2012, 1, 1)])
360-
s2 = Series([2.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)])
361-
library.write('TEST_1', s1)
362-
library.append('TEST_1', s2, concat=True)
363-
364-
result = library.read('TEST_1').data
365-
expected = Series([1.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)])
366-
expected.index.name = 'index'
367-
expected.name = 'values'
368-
assert_series_equal(expected, result)
369-
370-
371-
def test_frame_append_concat(library):
372-
df1 = DataFrame(data=[1.0], index=[dt(2012, 1, 1)], columns=['a'])
373-
df2 = DataFrame([1.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)], columns=['a'])
374-
df2.index.name = 'index'
375-
library.write('TEST_1', df1)
376-
library.append('TEST_1', df2, concat=True)
377-
result = library.read('TEST_1').data
378-
assert_frame_equal(df2, result)
379-
380-
381-
def test_frame_append_concat_only_appends_end(library):
382-
df1 = DataFrame([1.0], [dt(2012, 1, 1)], columns=['a'])
383-
df2 = DataFrame([2.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)], columns=['a'])
384-
library.write('TEST_1', df1)
385-
library.append('TEST_1', df2, concat=True)
386-
387-
result = library.read('TEST_1').data
388-
expected = DataFrame([1.0, 2.0], [dt(2012, 1, 1), dt(2012, 1, 2)], columns=['a'])
389-
expected.index.name = 'index'
390-
assert_frame_equal(expected, result)
391-
392-
393347
# -- auto generated tests --- #
394348
def dataframe(columns, length, index):
395349
df = DataFrame(np.ones((length, columns)), columns=list(string.ascii_lowercase[:columns]))

tests/integration/store/test_version_store.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@
3535
2012-11-08 17:06:11.040 | 3.0""")
3636

3737
ts1_append = read_str_as_pandas(""" times | near
38-
2012-11-09 17:06:11.040 | 1.0
39-
2012-11-10 17:06:11.040 | 2.0
40-
2012-11-11 17:06:11.040 | 2.5
41-
2012-11-12 17:06:11.040 | 3.0
42-
2012-11-13 17:06:11.040 | 3.0""")
38+
2012-09-08 17:06:11.040 | 1.0
39+
2012-10-08 17:06:11.040 | 2.0
40+
2012-10-09 17:06:11.040 | 2.5
41+
2012-11-08 17:06:11.040 | 3.0
42+
2012-11-09 17:06:11.040 | 3.0""")
4343

4444

4545
symbol = 'TS1'

0 commit comments

Comments
 (0)