Skip to content

Commit 98961c5

Browse files
committed
Merge pull request #10473 from bashtage/hdf-complex
BUG: Enable complex values to be written to HDF
2 parents 83b2320 + 271ae44 commit 98961c5

File tree

3 files changed

+166
-5
lines changed

3 files changed

+166
-5
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ New features
2727
~~~~~~~~~~~~
2828

2929
- SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`)
30+
- Enable writing complex values to HDF stores when using table format (:issue:`10447`)
3031

3132
.. _whatsnew_0170.enhancements.other:
3233

@@ -147,3 +148,4 @@ Bug Fixes
147148
- Bug in `groupby.var` which caused variance to be inaccurate for small float values (:issue:`10448`)
148149

149150
- Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`)
151+

pandas/io/pytables.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -1773,6 +1773,8 @@ def set_kind(self):
17731773
self.kind = 'string'
17741774
elif dtype.startswith(u('float')):
17751775
self.kind = 'float'
1776+
elif dtype.startswith(u('complex')):
1777+
self.kind = 'complex'
17761778
elif dtype.startswith(u('int')) or dtype.startswith(u('uint')):
17771779
self.kind = 'integer'
17781780
elif dtype.startswith(u('date')):
@@ -1802,6 +1804,8 @@ def set_atom(self, block, block_items, existing_col, min_itemsize,
18021804
return self.set_atom_datetime64(block)
18031805
elif block.is_timedelta:
18041806
return self.set_atom_timedelta64(block)
1807+
elif block.is_complex:
1808+
return self.set_atom_complex(block)
18051809

18061810
dtype = block.dtype.name
18071811
inferred_type = lib.infer_dtype(block.values)
@@ -1936,6 +1940,12 @@ def get_atom_coltype(self, kind=None):
19361940
def get_atom_data(self, block, kind=None):
19371941
return self.get_atom_coltype(kind=kind)(shape=block.shape[0])
19381942

1943+
def set_atom_complex(self, block):
1944+
self.kind = block.dtype.name
1945+
itemsize = int(self.kind.split('complex')[-1]) // 8
1946+
self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0])
1947+
self.set_data(block.values.astype(self.typ.type, copy=False))
1948+
19391949
def set_atom_data(self, block):
19401950
self.kind = block.dtype.name
19411951
self.typ = self.get_atom_data(block)
@@ -3147,8 +3157,8 @@ def f(i, c):
31473157
def create_index(self, columns=None, optlevel=None, kind=None):
31483158
"""
31493159
Create a pytables index on the specified columns
3150-
note: cannot index Time64Col() currently; PyTables must be >= 2.3
3151-
3160+
note: cannot index Time64Col() or ComplexCol currently;
3161+
PyTables must be >= 3.0
31523162
31533163
Paramaters
31543164
----------
@@ -3203,6 +3213,12 @@ def create_index(self, columns=None, optlevel=None, kind=None):
32033213

32043214
# create the index
32053215
if not v.is_indexed:
3216+
if v.type.startswith('complex'):
3217+
raise TypeError('Columns containing complex values can be stored but cannot'
3218+
' be indexed when using table format. Either use fixed '
3219+
'format, set index=False, or do not include the columns '
3220+
'containing complex values to data_columns when '
3221+
'initializing the table.')
32063222
v.create_index(**kw)
32073223

32083224
def read_axes(self, where, **kwargs):

pandas/io/tests/test_pytables.py

+146-3
Original file line numberDiff line numberDiff line change
@@ -131,18 +131,18 @@ def compat_assert_produces_warning(w,f):
131131
f()
132132

133133

134-
class TestHDFStore(tm.TestCase):
134+
class Base(tm.TestCase):
135135

136136
@classmethod
137137
def setUpClass(cls):
138-
super(TestHDFStore, cls).setUpClass()
138+
super(Base, cls).setUpClass()
139139

140140
# Pytables 3.0.0 deprecates lots of things
141141
tm.reset_testing_mode()
142142

143143
@classmethod
144144
def tearDownClass(cls):
145-
super(TestHDFStore, cls).tearDownClass()
145+
super(Base, cls).tearDownClass()
146146

147147
# Pytables 3.0.0 deprecates lots of things
148148
tm.set_testing_mode()
@@ -155,6 +155,9 @@ def setUp(self):
155155
def tearDown(self):
156156
pass
157157

158+
159+
class TestHDFStore(Base):
160+
158161
def test_factory_fun(self):
159162
path = create_tempfile(self.path)
160163
try:
@@ -4743,6 +4746,146 @@ def test_read_nokey(self):
47434746
df.to_hdf(path, 'df2', mode='a')
47444747
self.assertRaises(ValueError, read_hdf, path)
47454748

4749+
4750+
class TestHDFComplexValues(Base):
4751+
# GH10447
4752+
def test_complex_fixed(self):
4753+
df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
4754+
index=list('abcd'),
4755+
columns=list('ABCDE'))
4756+
4757+
with ensure_clean_path(self.path) as path:
4758+
df.to_hdf(path, 'df')
4759+
reread = read_hdf(path, 'df')
4760+
assert_frame_equal(df, reread)
4761+
4762+
df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
4763+
index=list('abcd'),
4764+
columns=list('ABCDE'))
4765+
with ensure_clean_path(self.path) as path:
4766+
df.to_hdf(path, 'df')
4767+
reread = read_hdf(path, 'df')
4768+
assert_frame_equal(df, reread)
4769+
4770+
def test_complex_table(self):
4771+
df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
4772+
index=list('abcd'),
4773+
columns=list('ABCDE'))
4774+
4775+
with ensure_clean_path(self.path) as path:
4776+
df.to_hdf(path, 'df', format='table')
4777+
reread = read_hdf(path, 'df')
4778+
assert_frame_equal(df, reread)
4779+
4780+
df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
4781+
index=list('abcd'),
4782+
columns=list('ABCDE'))
4783+
4784+
with ensure_clean_path(self.path) as path:
4785+
df.to_hdf(path, 'df', format='table', mode='w')
4786+
reread = read_hdf(path, 'df')
4787+
assert_frame_equal(df, reread)
4788+
4789+
def test_complex_mixed_fixed(self):
4790+
complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
4791+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
4792+
dtype=np.complex128)
4793+
df = DataFrame({'A': [1, 2, 3, 4],
4794+
'B': ['a', 'b', 'c', 'd'],
4795+
'C': complex64,
4796+
'D': complex128,
4797+
'E': [1.0, 2.0, 3.0, 4.0]},
4798+
index=list('abcd'))
4799+
with ensure_clean_path(self.path) as path:
4800+
df.to_hdf(path, 'df')
4801+
reread = read_hdf(path, 'df')
4802+
assert_frame_equal(df, reread)
4803+
4804+
def test_complex_mixed_table(self):
4805+
complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
4806+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
4807+
dtype=np.complex128)
4808+
df = DataFrame({'A': [1, 2, 3, 4],
4809+
'B': ['a', 'b', 'c', 'd'],
4810+
'C': complex64,
4811+
'D': complex128,
4812+
'E': [1.0, 2.0, 3.0, 4.0]},
4813+
index=list('abcd'))
4814+
4815+
with ensure_clean_store(self.path) as store:
4816+
store.append('df', df, data_columns=['A', 'B'])
4817+
result = store.select('df', where=Term('A>2'))
4818+
assert_frame_equal(df.loc[df.A > 2], result)
4819+
4820+
with ensure_clean_path(self.path) as path:
4821+
df.to_hdf(path, 'df', format='table')
4822+
reread = read_hdf(path, 'df')
4823+
assert_frame_equal(df, reread)
4824+
4825+
def test_complex_across_dimensions_fixed(self):
4826+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
4827+
s = Series(complex128, index=list('abcd'))
4828+
df = DataFrame({'A': s, 'B': s})
4829+
p = Panel({'One': df, 'Two': df})
4830+
4831+
objs = [s, df, p]
4832+
comps = [tm.assert_series_equal, tm.assert_frame_equal,
4833+
tm.assert_panel_equal]
4834+
for obj, comp in zip(objs, comps):
4835+
with ensure_clean_path(self.path) as path:
4836+
obj.to_hdf(path, 'obj', format='fixed')
4837+
reread = read_hdf(path, 'obj')
4838+
comp(obj, reread)
4839+
4840+
def test_complex_across_dimensions(self):
4841+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
4842+
s = Series(complex128, index=list('abcd'))
4843+
df = DataFrame({'A': s, 'B': s})
4844+
p = Panel({'One': df, 'Two': df})
4845+
p4d = pd.Panel4D({'i': p, 'ii': p})
4846+
4847+
objs = [df, p, p4d]
4848+
comps = [tm.assert_frame_equal, tm.assert_panel_equal,
4849+
tm.assert_panel4d_equal]
4850+
for obj, comp in zip(objs, comps):
4851+
with ensure_clean_path(self.path) as path:
4852+
obj.to_hdf(path, 'obj', format='table')
4853+
reread = read_hdf(path, 'obj')
4854+
comp(obj, reread)
4855+
4856+
def test_complex_indexing_error(self):
4857+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
4858+
dtype=np.complex128)
4859+
df = DataFrame({'A': [1, 2, 3, 4],
4860+
'B': ['a', 'b', 'c', 'd'],
4861+
'C': complex128},
4862+
index=list('abcd'))
4863+
with ensure_clean_store(self.path) as store:
4864+
self.assertRaises(TypeError, store.append, 'df', df, data_columns=['C'])
4865+
4866+
def test_complex_series_error(self):
4867+
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
4868+
s = Series(complex128, index=list('abcd'))
4869+
4870+
with ensure_clean_path(self.path) as path:
4871+
self.assertRaises(TypeError, s.to_hdf, path, 'obj', format='t')
4872+
4873+
with ensure_clean_path(self.path) as path:
4874+
s.to_hdf(path, 'obj', format='t', index=False)
4875+
reread = read_hdf(path, 'obj')
4876+
tm.assert_series_equal(s, reread)
4877+
4878+
def test_complex_append(self):
4879+
df = DataFrame({'a': np.random.randn(100).astype(np.complex128),
4880+
'b': np.random.randn(100)})
4881+
4882+
with ensure_clean_store(self.path) as store:
4883+
store.append('df', df, data_columns=['b'])
4884+
store.append('df', df)
4885+
result = store.select('df')
4886+
assert_frame_equal(pd.concat([df, df], 0), result)
4887+
4888+
47464889
def _test_sort(obj):
47474890
if isinstance(obj, DataFrame):
47484891
return obj.reindex(sorted(obj.index))

0 commit comments

Comments
 (0)