Skip to content

Commit 428a021

Browse files
Merge pull request pandas-dev#58 from bmoscon/master
Fix issue with new versions of NumPy (v1.10.1) that cause dtype metadata to be lost
2 parents 23722a3 + 1c742a2 commit 428a021

File tree

3 files changed

+42
-43
lines changed

3 files changed

+42
-43
lines changed

arctic/store/_ndarray_store.py

+22-17
Original file line numberDiff line numberDiff line change
@@ -210,35 +210,38 @@ def _do_read(self, collection, version, symbol, index_range=None):
210210
rtn = np.fromstring(data, dtype=dtype).reshape(version.get('shape', (-1)))
211211
return rtn
212212

213-
def _promote_types(self, item, dtype_str):
214-
if dtype_str == str(item.dtype):
215-
return item.dtype
213+
def _promote_types(self, dtype, dtype_str):
214+
if dtype_str == str(dtype):
215+
return dtype
216216
prev_dtype = self._dtype(dtype_str)
217-
if item.dtype.names is None:
218-
rtn = np.promote_types(item.dtype, prev_dtype)
217+
if dtype.names is None:
218+
rtn = np.promote_types(dtype, prev_dtype)
219219
else:
220-
rtn = _promote_struct_dtypes(item.dtype, prev_dtype)
221-
rtn = np.dtype(rtn, metadata=dict(item.dtype.metadata or {}))
220+
rtn = _promote_struct_dtypes(dtype, prev_dtype)
221+
rtn = np.dtype(rtn, metadata=dict(dtype.metadata or {}))
222222
return rtn
223223

224-
def append(self, arctic_lib, version, symbol, item, previous_version):
224+
def append(self, arctic_lib, version, symbol, item, previous_version, dtype=None):
225225
collection = arctic_lib.get_top_level_collection()
226226
if previous_version.get('shape', [-1]) != [-1, ] + list(item.shape)[1:]:
227227
raise UnhandledDtypeException()
228228

229-
if previous_version['up_to'] == 0:
229+
if not dtype:
230230
dtype = item.dtype
231+
232+
if previous_version['up_to'] == 0:
233+
dtype = dtype
231234
elif len(item) == 0:
232235
dtype = self._dtype(previous_version['dtype'])
233236
else:
234-
dtype = self._promote_types(item, previous_version['dtype'])
237+
dtype = self._promote_types(dtype, previous_version['dtype'])
235238
item = item.astype(dtype)
236239
if str(dtype) != previous_version['dtype']:
237240
logger.debug('Converting %s from %s to %s' % (symbol, previous_version['dtype'], str(dtype)))
238241
if item.dtype.hasobject:
239242
raise UnhandledDtypeException()
240-
version['dtype'] = str(item.dtype)
241-
version['dtype_metadata'] = dict(item.dtype.metadata or {})
243+
version['dtype'] = str(dtype)
244+
version['dtype_metadata'] = dict(dtype.metadata or {})
242245
version['type'] = self.TYPE
243246

244247
old_arr = self._do_read(collection, previous_version, symbol).astype(dtype)
@@ -368,20 +371,22 @@ def checksum(self, item):
368371
sha.update(item.tostring())
369372
return Binary(sha.digest())
370373

371-
def write(self, arctic_lib, version, symbol, item, previous_version):
374+
def write(self, arctic_lib, version, symbol, item, previous_version, dtype=None):
372375
collection = arctic_lib.get_top_level_collection()
373376
if item.dtype.hasobject:
374377
raise UnhandledDtypeException()
375378

376-
version['dtype'] = str(item.dtype)
379+
if not dtype:
380+
dtype = item.dtype
381+
version['dtype'] = str(dtype)
377382
version['shape'] = (-1,) + item.shape[1:]
378-
version['dtype_metadata'] = dict(item.dtype.metadata or {})
383+
version['dtype_metadata'] = dict(dtype.metadata or {})
379384
version['type'] = self.TYPE
380385
version['up_to'] = len(item)
381386
version['sha'] = self.checksum(item)
382-
387+
383388
if previous_version:
384-
if version['dtype'] == str(item.dtype) \
389+
if version['dtype'] == str(dtype) \
385390
and 'sha' in previous_version \
386391
and self.checksum(item[:previous_version['up_to']]) == previous_version['sha']:
387392
#The first n rows are identical to the previous version, so just append.

arctic/store/_pandas_ndarray_store.py

+15-12
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,18 @@ def to_records(self, df):
8787
arrays = map(_to_primitive, arrays)
8888
dtype = np.dtype([(str(x), v.dtype) if len(v.shape) == 1 else (str(x), v.dtype, v.shape[1]) for x, v in zip(names, arrays)],
8989
metadata=metadata)
90+
9091
rtn = np.rec.fromarrays(arrays, dtype=dtype, names=names)
91-
#For some reason the dtype metadata is lost in the line above.
92-
rtn.dtype = dtype
93-
return rtn
92+
# For some reason the dtype metadata is lost in the line above
93+
# and setting rtn.dtype to dtype does not preserve the metadata
94+
# see https://github.com/numpy/numpy/issues/6771
95+
96+
return (rtn, dtype)
9497

9598
def can_convert_to_records_without_objects(self, df, symbol):
9699
# We can't easily distinguish string columns from objects
97100
try:
98-
arr = self.to_records(df)
101+
arr,_ = self.to_records(df)
99102
except Exception as e:
100103
# This exception will also occur when we try to write the object so we fall-back to saving using Pickle
101104
log.info('Pandas dataframe %s caused exception "%s" when attempting to convert to records. Saving as Blob.'
@@ -249,12 +252,12 @@ def can_write(self, version, symbol, data):
249252
return False
250253

251254
def write(self, arctic_lib, version, symbol, item, previous_version):
252-
item = self.to_records(item)
253-
super(PandasSeriesStore, self).write(arctic_lib, version, symbol, item, previous_version)
255+
item, md = self.to_records(item)
256+
super(PandasSeriesStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)
254257

255258
def append(self, arctic_lib, version, symbol, item, previous_version):
256-
item = self.to_records(item)
257-
super(PandasSeriesStore, self).append(arctic_lib, version, symbol, item, previous_version)
259+
item, md = self.to_records(item)
260+
super(PandasSeriesStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md)
258261

259262
def read(self, arctic_lib, version, symbol, **kwargs):
260263
item = super(PandasSeriesStore, self).read(arctic_lib, version, symbol, **kwargs)
@@ -287,12 +290,12 @@ def can_write(self, version, symbol, data):
287290
return False
288291

289292
def write(self, arctic_lib, version, symbol, item, previous_version):
290-
item = self.to_records(item)
291-
super(PandasDataFrameStore, self).write(arctic_lib, version, symbol, item, previous_version)
293+
item, md = self.to_records(item)
294+
super(PandasDataFrameStore, self).write(arctic_lib, version, symbol, item, previous_version, dtype=md)
292295

293296
def append(self, arctic_lib, version, symbol, item, previous_version):
294-
item = self.to_records(item)
295-
super(PandasDataFrameStore, self).append(arctic_lib, version, symbol, item, previous_version)
297+
item, md = self.to_records(item)
298+
super(PandasDataFrameStore, self).append(arctic_lib, version, symbol, item, previous_version, dtype=md)
296299

297300
def read(self, arctic_lib, version, symbol, **kwargs):
298301
item = super(PandasDataFrameStore, self).read(arctic_lib, version, symbol, **kwargs)

tests/unit/store/test_pandas_ndarray_store.py

+5-14
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_can_convert_to_records_without_objects_returns_false_on_exception_in_to
2121

2222
def test_can_convert_to_records_without_objects_returns_false_when_records_have_object_dtype():
2323
store = PandasStore()
24-
store.to_records = Mock(return_value=np.array(['a', 'b', None, 'd']))
24+
store.to_records = Mock(return_value=(np.array(['a', 'b', None, 'd']), None))
2525

2626
with patch('arctic.store._pandas_ndarray_store.log') as mock_log:
2727
assert store.can_convert_to_records_without_objects(sentinel.df, 'my_symbol') is False
@@ -32,8 +32,8 @@ def test_can_convert_to_records_without_objects_returns_false_when_records_have_
3232

3333
def test_can_convert_to_records_without_objects_returns_false_when_records_have_arrays_in_them():
3434
store = PandasStore()
35-
store.to_records = Mock(return_value=np.rec.array([(1356998400000000000L, ['A', 'BC'])],
36-
dtype=[('index', '<M8[ns]'), ('values', 'S2', (2,))]))
35+
store.to_records = Mock(return_value=(np.rec.array([(1356998400000000000L, ['A', 'BC'])],
36+
dtype=[('index', '<M8[ns]'), ('values', 'S2', (2,))]), None))
3737

3838
with patch('arctic.store._pandas_ndarray_store.log') as mock_log:
3939
assert store.can_convert_to_records_without_objects(sentinel.df, 'my_symbol') is False
@@ -44,8 +44,8 @@ def test_can_convert_to_records_without_objects_returns_false_when_records_have_
4444

4545
def test_can_convert_to_records_without_objects_returns_true_otherwise():
4646
store = PandasStore()
47-
store.to_records = Mock(return_value=np.rec.array([(1356998400000000000L, 'a')],
48-
dtype=[('index', '<M8[ns]'), ('values', 'S2')]))
47+
store.to_records = Mock(return_value=(np.rec.array([(1356998400000000000L, 'a')],
48+
dtype=[('index', '<M8[ns]'), ('values', 'S2')]), None))
4949

5050
with patch('arctic.store._pandas_ndarray_store.log') as mock_log:
5151
assert store.can_convert_to_records_without_objects(sentinel.df, 'my_symbol') is True
@@ -54,15 +54,6 @@ def test_can_convert_to_records_without_objects_returns_true_otherwise():
5454
store.to_records.assert_called_once_with(sentinel.df)
5555

5656

57-
def test_to_records_raises_when_object_dtypes_present():
58-
store = PandasDataFrameStore()
59-
df = pd.DataFrame(data=dict(A=['a', 'b', None, 'c'], B=[1., 2., 3., 4.]), index=range(4))
60-
with raises(TypeError) as e:
61-
store.to_records(df)
62-
63-
assert "Cannot change data-type for object array." in str(e)
64-
65-
6657
def test_panel_converted_to_dataframe_and_stacked_to_write():
6758
store = PandasPanelStore()
6859
panel = Mock(shape=(1, 2, 3), axes=[Mock(names=['n%d' % i]) for i in range(3)])

0 commit comments

Comments
 (0)