Skip to content

BUG: Enable complex values to be written to HDF #10473

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 13, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ New features
~~~~~~~~~~~~

- SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`)
- Enable writing complex values to HDF stores when using table format (:issue:`10447`)

.. _whatsnew_0170.enhancements.other:

Expand Down Expand Up @@ -147,3 +148,4 @@ Bug Fixes
- Bug in `groupby.var` which caused variance to be inaccurate for small float values (:issue:`10448`)

- Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`)

20 changes: 18 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1773,6 +1773,8 @@ def set_kind(self):
self.kind = 'string'
elif dtype.startswith(u('float')):
self.kind = 'float'
elif dtype.startswith(u('complex')):
self.kind = 'complex'
elif dtype.startswith(u('int')) or dtype.startswith(u('uint')):
self.kind = 'integer'
elif dtype.startswith(u('date')):
Expand Down Expand Up @@ -1802,6 +1804,8 @@ def set_atom(self, block, block_items, existing_col, min_itemsize,
return self.set_atom_datetime64(block)
elif block.is_timedelta:
return self.set_atom_timedelta64(block)
elif block.is_complex:
return self.set_atom_complex(block)

dtype = block.dtype.name
inferred_type = lib.infer_dtype(block.values)
Expand Down Expand Up @@ -1936,6 +1940,12 @@ def get_atom_coltype(self, kind=None):
def get_atom_data(self, block, kind=None):
return self.get_atom_coltype(kind=kind)(shape=block.shape[0])

def set_atom_complex(self, block):
self.kind = block.dtype.name
itemsize = int(self.kind.split('complex')[-1]) // 8
self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0])
self.set_data(block.values.astype(self.typ.type, copy=False))

def set_atom_data(self, block):
self.kind = block.dtype.name
self.typ = self.get_atom_data(block)
Expand Down Expand Up @@ -3147,8 +3157,8 @@ def f(i, c):
def create_index(self, columns=None, optlevel=None, kind=None):
"""
Create a pytables index on the specified columns
note: cannot index Time64Col() currently; PyTables must be >= 2.3

note: cannot index Time64Col() or ComplexCol currently;
PyTables must be >= 3.0

Paramaters
----------
Expand Down Expand Up @@ -3203,6 +3213,12 @@ def create_index(self, columns=None, optlevel=None, kind=None):

# create the index
if not v.is_indexed:
if v.type.startswith('complex'):
raise TypeError('Columns containing complex values can be stored but cannot'
' be indexed when using table format. Either use fixed '
'format, set index=False, or do not include the columns '
'containing complex values to data_columns when '
'initializing the table.')
v.create_index(**kw)

def read_axes(self, where, **kwargs):
Expand Down
149 changes: 146 additions & 3 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,18 @@ def compat_assert_produces_warning(w,f):
f()


class TestHDFStore(tm.TestCase):
class Base(tm.TestCase):

@classmethod
def setUpClass(cls):
super(TestHDFStore, cls).setUpClass()
super(Base, cls).setUpClass()

# Pytables 3.0.0 deprecates lots of things
tm.reset_testing_mode()

@classmethod
def tearDownClass(cls):
super(TestHDFStore, cls).tearDownClass()
super(Base, cls).tearDownClass()

# Pytables 3.0.0 deprecates lots of things
tm.set_testing_mode()
Expand All @@ -155,6 +155,9 @@ def setUp(self):
def tearDown(self):
pass


class TestHDFStore(Base):

def test_factory_fun(self):
path = create_tempfile(self.path)
try:
Expand Down Expand Up @@ -4743,6 +4746,146 @@ def test_read_nokey(self):
df.to_hdf(path, 'df2', mode='a')
self.assertRaises(ValueError, read_hdf, path)


class TestHDFComplexValues(Base):
# GH10447
def test_complex_fixed(self):
df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
index=list('abcd'),
columns=list('ABCDE'))

with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df')
reread = read_hdf(path, 'df')
assert_frame_equal(df, reread)

df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
index=list('abcd'),
columns=list('ABCDE'))
with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df')
reread = read_hdf(path, 'df')
assert_frame_equal(df, reread)

def test_complex_table(self):
df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
index=list('abcd'),
columns=list('ABCDE'))

with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df', format='table')
reread = read_hdf(path, 'df')
assert_frame_equal(df, reread)

df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
index=list('abcd'),
columns=list('ABCDE'))

with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df', format='table', mode='w')
reread = read_hdf(path, 'df')
assert_frame_equal(df, reread)

def test_complex_mixed_fixed(self):
complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
dtype=np.complex128)
df = DataFrame({'A': [1, 2, 3, 4],
'B': ['a', 'b', 'c', 'd'],
'C': complex64,
'D': complex128,
'E': [1.0, 2.0, 3.0, 4.0]},
index=list('abcd'))
with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df')
reread = read_hdf(path, 'df')
assert_frame_equal(df, reread)

def test_complex_mixed_table(self):
complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
dtype=np.complex128)
df = DataFrame({'A': [1, 2, 3, 4],
'B': ['a', 'b', 'c', 'd'],
'C': complex64,
'D': complex128,
'E': [1.0, 2.0, 3.0, 4.0]},
index=list('abcd'))

with ensure_clean_store(self.path) as store:
store.append('df', df, data_columns=['A', 'B'])
result = store.select('df', where=Term('A>2'))
assert_frame_equal(df.loc[df.A > 2], result)

with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df', format='table')
reread = read_hdf(path, 'df')
assert_frame_equal(df, reread)

def test_complex_across_dimensions_fixed(self):
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
s = Series(complex128, index=list('abcd'))
df = DataFrame({'A': s, 'B': s})
p = Panel({'One': df, 'Two': df})

objs = [s, df, p]
comps = [tm.assert_series_equal, tm.assert_frame_equal,
tm.assert_panel_equal]
for obj, comp in zip(objs, comps):
with ensure_clean_path(self.path) as path:
obj.to_hdf(path, 'obj', format='fixed')
reread = read_hdf(path, 'obj')
comp(obj, reread)

def test_complex_across_dimensions(self):
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
s = Series(complex128, index=list('abcd'))
df = DataFrame({'A': s, 'B': s})
p = Panel({'One': df, 'Two': df})
p4d = pd.Panel4D({'i': p, 'ii': p})

objs = [df, p, p4d]
comps = [tm.assert_frame_equal, tm.assert_panel_equal,
tm.assert_panel4d_equal]
for obj, comp in zip(objs, comps):
with ensure_clean_path(self.path) as path:
obj.to_hdf(path, 'obj', format='table')
reread = read_hdf(path, 'obj')
comp(obj, reread)

def test_complex_indexing_error(self):
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
dtype=np.complex128)
df = DataFrame({'A': [1, 2, 3, 4],
'B': ['a', 'b', 'c', 'd'],
'C': complex128},
index=list('abcd'))
with ensure_clean_store(self.path) as store:
self.assertRaises(TypeError, store.append, 'df', df, data_columns=['C'])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if index=False this will not raise. So either we need to not allow a data_columns for complex at all (e.g. you can check when its creating), or allow it, but have a check when selecting (as you already check on the indexing of it). I think the latter is better, as its not a 'problem' to create them, just select from them / index with them. (e.g. you might want to export the data and data_columns is the only way to specify that you need non-aggregated columns)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if index=False this will not raise. So either we need to not allow a data_columns for complex at all (e.g. you can check when its creating), or allow it, but have a check when selecting (as you already check on the indexing of it). I think the latter is better, as its not a 'problem' to create them, just select from them / index with them. (e.g. you might want to export the data and data_columns is the only way to specify that you need non-aggregated columns)

I agree with the latter as a single point to catch this issue and provide an explanation.


def test_complex_series_error(self):
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
s = Series(complex128, index=list('abcd'))

with ensure_clean_path(self.path) as path:
self.assertRaises(TypeError, s.to_hdf, path, 'obj', format='t')

with ensure_clean_path(self.path) as path:
s.to_hdf(path, 'obj', format='t', index=False)
reread = read_hdf(path, 'obj')
tm.assert_series_equal(s, reread)

def test_complex_append(self):
df = DataFrame({'a': np.random.randn(100).astype(np.complex128),
'b': np.random.randn(100)})

with ensure_clean_store(self.path) as store:
store.append('df', df, data_columns=['b'])
store.append('df', df)
result = store.select('df')
assert_frame_equal(pd.concat([df, df], 0), result)


def _test_sort(obj):
if isinstance(obj, DataFrame):
return obj.reindex(sorted(obj.index))
Expand Down