Skip to content

Commit 271ae44

Browse files
Kevin Sheppardbashtage
Kevin Sheppard
authored andcommitted
BUG: Enable complex values to be written to HDF
Enable table format to be used to store complex values in DataFrames, Panels and Panel4Ds. Add tests for both fixed and panel. Add exception when attempting to write Series with complex values. closes #10447
1 parent 83b2320 commit 271ae44

File tree

3 files changed

+166
-5
lines changed

3 files changed

+166
-5
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ New features
2727
~~~~~~~~~~~~
2828

2929
- SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`)
30+
- Enable writing complex values to HDF stores when using table format (:issue:`10447`)
3031

3132
.. _whatsnew_0170.enhancements.other:
3233

@@ -147,3 +148,4 @@ Bug Fixes
147148
- Bug in `groupby.var` which caused variance to be inaccurate for small float values (:issue:`10448`)
148149

149150
- Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`)
151+

pandas/io/pytables.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -1773,6 +1773,8 @@ def set_kind(self):
17731773
self.kind = 'string'
17741774
elif dtype.startswith(u('float')):
17751775
self.kind = 'float'
1776+
elif dtype.startswith(u('complex')):
1777+
self.kind = 'complex'
17761778
elif dtype.startswith(u('int')) or dtype.startswith(u('uint')):
17771779
self.kind = 'integer'
17781780
elif dtype.startswith(u('date')):
@@ -1802,6 +1804,8 @@ def set_atom(self, block, block_items, existing_col, min_itemsize,
18021804
return self.set_atom_datetime64(block)
18031805
elif block.is_timedelta:
18041806
return self.set_atom_timedelta64(block)
1807+
elif block.is_complex:
1808+
return self.set_atom_complex(block)
18051809

18061810
dtype = block.dtype.name
18071811
inferred_type = lib.infer_dtype(block.values)
@@ -1936,6 +1940,12 @@ def get_atom_coltype(self, kind=None):
19361940
def get_atom_data(self, block, kind=None):
19371941
return self.get_atom_coltype(kind=kind)(shape=block.shape[0])
19381942

1943+
def set_atom_complex(self, block):
1944+
self.kind = block.dtype.name
1945+
itemsize = int(self.kind.split('complex')[-1]) // 8
1946+
self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0])
1947+
self.set_data(block.values.astype(self.typ.type, copy=False))
1948+
19391949
def set_atom_data(self, block):
19401950
self.kind = block.dtype.name
19411951
self.typ = self.get_atom_data(block)
@@ -3147,8 +3157,8 @@ def f(i, c):
31473157
def create_index(self, columns=None, optlevel=None, kind=None):
31483158
"""
31493159
Create a pytables index on the specified columns
3150-
note: cannot index Time64Col() currently; PyTables must be >= 2.3
3151-
3160+
note: cannot index Time64Col() or ComplexCol currently;
3161+
PyTables must be >= 3.0
31523162
31533163
Paramaters
31543164
----------
@@ -3203,6 +3213,12 @@ def create_index(self, columns=None, optlevel=None, kind=None):
32033213

32043214
# create the index
32053215
if not v.is_indexed:
3216+
if v.type.startswith('complex'):
3217+
raise TypeError('Columns containing complex values can be stored but cannot'
3218+
' be indexed when using table format. Either use fixed '
3219+
'format, set index=False, or do not include the columns '
3220+
'containing complex values to data_columns when '
3221+
'initializing the table.')
32063222
v.create_index(**kw)
32073223

32083224
def read_axes(self, where, **kwargs):

pandas/io/tests/test_pytables.py

+146-3
Original file line numberDiff line numberDiff line change
@@ -131,18 +131,18 @@ def compat_assert_produces_warning(w,f):
131131
f()
132132

133133

134-
class TestHDFStore(tm.TestCase):
134+
class Base(tm.TestCase):
135135

136136
@classmethod
137137
def setUpClass(cls):
138-
super(TestHDFStore, cls).setUpClass()
138+
super(Base, cls).setUpClass()
139139

140140
# Pytables 3.0.0 deprecates lots of things
141141
tm.reset_testing_mode()
142142

143143
@classmethod
144144
def tearDownClass(cls):
145-
super(TestHDFStore, cls).tearDownClass()
145+
super(Base, cls).tearDownClass()
146146

147147
# Pytables 3.0.0 deprecates lots of things
148148
tm.set_testing_mode()
@@ -155,6 +155,9 @@ def setUp(self):
155155
def tearDown(self):
156156
pass
157157

158+
159+
class TestHDFStore(Base):
160+
158161
def test_factory_fun(self):
159162
path = create_tempfile(self.path)
160163
try:
@@ -4743,6 +4746,146 @@ def test_read_nokey(self):
47434746
df.to_hdf(path, 'df2', mode='a')
47444747
self.assertRaises(ValueError, read_hdf, path)
47454748

4749+
4750+
class TestHDFComplexValues(Base):
4751+
# GH10447
4752+
def test_complex_fixed(self):
4753+
df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
4754+
index=list('abcd'),
4755+
columns=list('ABCDE'))
4756+
4757+
with ensure_clean_path(self.path) as path:
4758+
df.to_hdf(path, 'df')
4759+
reread = read_hdf(path, 'df')
4760+
assert_frame_equal(df, reread)
4761+
4762+
df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
4763+
index=list('abcd'),
4764+
columns=list('ABCDE'))
4765+
with ensure_clean_path(self.path) as path:
4766+
df.to_hdf(path, 'df')
4767+
reread = read_hdf(path, 'df')
4768+
assert_frame_equal(df, reread)
4769+
4770+
def test_complex_table(self):
4771+
df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
4772+
index=list('abcd'),
4773+
columns=list('ABCDE'))
4774+
4775+
with ensure_clean_path(self.path) as path:
4776+
df.to_hdf(path, 'df', format='table')
4777+
reread = read_hdf(path, 'df')
4778+
assert_frame_equal(df, reread)
4779+
4780+
df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
4781+
index=list('abcd'),
4782+
columns=list('ABCDE'))
4783+
4784+
with ensure_clean_path(self.path) as path:
4785+
df.to_hdf(path, 'df', format='table', mode='w')
4786+
reread = read_hdf(path, 'df')
4787+
assert_frame_equal(df, reread)
4788+
4789+
def test_complex_mixed_fixed(self):
4790+
complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
4791+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
4792+
dtype=np.complex128)
4793+
df = DataFrame({'A': [1, 2, 3, 4],
4794+
'B': ['a', 'b', 'c', 'd'],
4795+
'C': complex64,
4796+
'D': complex128,
4797+
'E': [1.0, 2.0, 3.0, 4.0]},
4798+
index=list('abcd'))
4799+
with ensure_clean_path(self.path) as path:
4800+
df.to_hdf(path, 'df')
4801+
reread = read_hdf(path, 'df')
4802+
assert_frame_equal(df, reread)
4803+
4804+
def test_complex_mixed_table(self):
4805+
complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
4806+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
4807+
dtype=np.complex128)
4808+
df = DataFrame({'A': [1, 2, 3, 4],
4809+
'B': ['a', 'b', 'c', 'd'],
4810+
'C': complex64,
4811+
'D': complex128,
4812+
'E': [1.0, 2.0, 3.0, 4.0]},
4813+
index=list('abcd'))
4814+
4815+
with ensure_clean_store(self.path) as store:
4816+
store.append('df', df, data_columns=['A', 'B'])
4817+
result = store.select('df', where=Term('A>2'))
4818+
assert_frame_equal(df.loc[df.A > 2], result)
4819+
4820+
with ensure_clean_path(self.path) as path:
4821+
df.to_hdf(path, 'df', format='table')
4822+
reread = read_hdf(path, 'df')
4823+
assert_frame_equal(df, reread)
4824+
4825+
def test_complex_across_dimensions_fixed(self):
4826+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
4827+
s = Series(complex128, index=list('abcd'))
4828+
df = DataFrame({'A': s, 'B': s})
4829+
p = Panel({'One': df, 'Two': df})
4830+
4831+
objs = [s, df, p]
4832+
comps = [tm.assert_series_equal, tm.assert_frame_equal,
4833+
tm.assert_panel_equal]
4834+
for obj, comp in zip(objs, comps):
4835+
with ensure_clean_path(self.path) as path:
4836+
obj.to_hdf(path, 'obj', format='fixed')
4837+
reread = read_hdf(path, 'obj')
4838+
comp(obj, reread)
4839+
4840+
def test_complex_across_dimensions(self):
4841+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
4842+
s = Series(complex128, index=list('abcd'))
4843+
df = DataFrame({'A': s, 'B': s})
4844+
p = Panel({'One': df, 'Two': df})
4845+
p4d = pd.Panel4D({'i': p, 'ii': p})
4846+
4847+
objs = [df, p, p4d]
4848+
comps = [tm.assert_frame_equal, tm.assert_panel_equal,
4849+
tm.assert_panel4d_equal]
4850+
for obj, comp in zip(objs, comps):
4851+
with ensure_clean_path(self.path) as path:
4852+
obj.to_hdf(path, 'obj', format='table')
4853+
reread = read_hdf(path, 'obj')
4854+
comp(obj, reread)
4855+
4856+
def test_complex_indexing_error(self):
4857+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
4858+
dtype=np.complex128)
4859+
df = DataFrame({'A': [1, 2, 3, 4],
4860+
'B': ['a', 'b', 'c', 'd'],
4861+
'C': complex128},
4862+
index=list('abcd'))
4863+
with ensure_clean_store(self.path) as store:
4864+
self.assertRaises(TypeError, store.append, 'df', df, data_columns=['C'])
4865+
4866+
def test_complex_series_error(self):
4867+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
4868+
s = Series(complex128, index=list('abcd'))
4869+
4870+
with ensure_clean_path(self.path) as path:
4871+
self.assertRaises(TypeError, s.to_hdf, path, 'obj', format='t')
4872+
4873+
with ensure_clean_path(self.path) as path:
4874+
s.to_hdf(path, 'obj', format='t', index=False)
4875+
reread = read_hdf(path, 'obj')
4876+
tm.assert_series_equal(s, reread)
4877+
4878+
def test_complex_append(self):
4879+
df = DataFrame({'a': np.random.randn(100).astype(np.complex128),
4880+
'b': np.random.randn(100)})
4881+
4882+
with ensure_clean_store(self.path) as store:
4883+
store.append('df', df, data_columns=['b'])
4884+
store.append('df', df)
4885+
result = store.select('df')
4886+
assert_frame_equal(pd.concat([df, df], 0), result)
4887+
4888+
47464889
def _test_sort(obj):
47474890
if isinstance(obj, DataFrame):
47484891
return obj.reindex(sorted(obj.index))

0 commit comments

Comments
 (0)