From 271ae441c1d0256b9c8b61449b21644e39c70cbf Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 29 Jun 2015 15:17:55 -0400 Subject: [PATCH] BUG: Enable complex values to be written to HDF Enable table format to be used to store complex values in DataFrames, Panels and Panel4Ds. Add tests for both fixed and panel. Add exception when attempting to write Series with complex values. closes #10447 --- doc/source/whatsnew/v0.17.0.txt | 2 + pandas/io/pytables.py | 20 ++++- pandas/io/tests/test_pytables.py | 149 ++++++++++++++++++++++++++++++- 3 files changed, 166 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 28ec828b81c34..a3ec13439fe76 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -27,6 +27,7 @@ New features ~~~~~~~~~~~~ - SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`) +- Enable writing complex values to HDF stores when using table format (:issue:`10447`) .. _whatsnew_0170.enhancements.other: @@ -147,3 +148,4 @@ Bug Fixes - Bug in `groupby.var` which caused variance to be inaccurate for small float values (:issue:`10448`) - Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`) + diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index eb800c37db98f..9e1a272ec5621 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1773,6 +1773,8 @@ def set_kind(self): self.kind = 'string' elif dtype.startswith(u('float')): self.kind = 'float' + elif dtype.startswith(u('complex')): + self.kind = 'complex' elif dtype.startswith(u('int')) or dtype.startswith(u('uint')): self.kind = 'integer' elif dtype.startswith(u('date')): @@ -1802,6 +1804,8 @@ def set_atom(self, block, block_items, existing_col, min_itemsize, return self.set_atom_datetime64(block) elif block.is_timedelta: return self.set_atom_timedelta64(block) + elif block.is_complex: + return self.set_atom_complex(block) dtype = block.dtype.name inferred_type = lib.infer_dtype(block.values) @@ -1936,6 +1940,12 @@ def get_atom_coltype(self, kind=None): def get_atom_data(self, block, kind=None): return self.get_atom_coltype(kind=kind)(shape=block.shape[0]) + def set_atom_complex(self, block): + self.kind = block.dtype.name + itemsize = int(self.kind.split('complex')[-1]) // 8 + self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0]) + self.set_data(block.values.astype(self.typ.type, copy=False)) + def set_atom_data(self, block): self.kind = block.dtype.name self.typ = self.get_atom_data(block) @@ -3147,8 +3157,8 @@ def f(i, c): def create_index(self, columns=None, optlevel=None, kind=None): """ Create a pytables index on the specified columns - note: cannot index Time64Col() currently; PyTables must be >= 2.3 - + note: cannot index Time64Col() or ComplexCol currently; + PyTables must be >= 3.0 Paramaters ---------- @@ -3203,6 +3213,12 @@ def create_index(self, columns=None, optlevel=None, kind=None): # create the index if not v.is_indexed: + if v.type.startswith('complex'): + raise TypeError('Columns containing complex values can be stored but cannot' + ' be indexed when using table format. Either use fixed ' + 'format, set index=False, or do not include the columns ' + 'containing complex values to data_columns when ' + 'initializing the table.') v.create_index(**kw) def read_axes(self, where, **kwargs): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 4ae2c331f5a65..1b932fb3759e5 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -131,18 +131,18 @@ def compat_assert_produces_warning(w,f): f() -class TestHDFStore(tm.TestCase): +class Base(tm.TestCase): @classmethod def setUpClass(cls): - super(TestHDFStore, cls).setUpClass() + super(Base, cls).setUpClass() # Pytables 3.0.0 deprecates lots of things tm.reset_testing_mode() @classmethod def tearDownClass(cls): - super(TestHDFStore, cls).tearDownClass() + super(Base, cls).tearDownClass() # Pytables 3.0.0 deprecates lots of things tm.set_testing_mode() @@ -155,6 +155,9 @@ def setUp(self): def tearDown(self): pass + +class TestHDFStore(Base): + def test_factory_fun(self): path = create_tempfile(self.path) try: @@ -4743,6 +4746,146 @@ def test_read_nokey(self): df.to_hdf(path, 'df2', mode='a') self.assertRaises(ValueError, read_hdf, path) + +class TestHDFComplexValues(Base): + # GH10447 + def test_complex_fixed(self): + df = DataFrame(np.random.rand(4, 5).astype(np.complex64), + index=list('abcd'), + columns=list('ABCDE')) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + df = DataFrame(np.random.rand(4, 5).astype(np.complex128), + index=list('abcd'), + columns=list('ABCDE')) + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_complex_table(self): + df = DataFrame(np.random.rand(4, 5).astype(np.complex64), + index=list('abcd'), + columns=list('ABCDE')) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + df = DataFrame(np.random.rand(4, 5).astype(np.complex128), + index=list('abcd'), + columns=list('ABCDE')) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table', mode='w') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_complex_mixed_fixed(self): + complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64) + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], + dtype=np.complex128) + df = DataFrame({'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'd'], + 'C': complex64, + 'D': complex128, + 'E': [1.0, 2.0, 3.0, 4.0]}, + index=list('abcd')) + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_complex_mixed_table(self): + complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64) + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], + dtype=np.complex128) + df = DataFrame({'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'd'], + 'C': complex64, + 'D': complex128, + 'E': [1.0, 2.0, 3.0, 4.0]}, + index=list('abcd')) + + with ensure_clean_store(self.path) as store: + store.append('df', df, data_columns=['A', 'B']) + result = store.select('df', where=Term('A>2')) + assert_frame_equal(df.loc[df.A > 2], result) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_complex_across_dimensions_fixed(self): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list('abcd')) + df = DataFrame({'A': s, 'B': s}) + p = Panel({'One': df, 'Two': df}) + + objs = [s, df, p] + comps = [tm.assert_series_equal, tm.assert_frame_equal, + tm.assert_panel_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(self.path) as path: + obj.to_hdf(path, 'obj', format='fixed') + reread = read_hdf(path, 'obj') + comp(obj, reread) + + def test_complex_across_dimensions(self): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list('abcd')) + df = DataFrame({'A': s, 'B': s}) + p = Panel({'One': df, 'Two': df}) + p4d = pd.Panel4D({'i': p, 'ii': p}) + + objs = [df, p, p4d] + comps = [tm.assert_frame_equal, tm.assert_panel_equal, + tm.assert_panel4d_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(self.path) as path: + obj.to_hdf(path, 'obj', format='table') + reread = read_hdf(path, 'obj') + comp(obj, reread) + + def test_complex_indexing_error(self): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], + dtype=np.complex128) + df = DataFrame({'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'd'], + 'C': complex128}, + index=list('abcd')) + with ensure_clean_store(self.path) as store: + self.assertRaises(TypeError, store.append, 'df', df, data_columns=['C']) + + def test_complex_series_error(self): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list('abcd')) + + with ensure_clean_path(self.path) as path: + self.assertRaises(TypeError, s.to_hdf, path, 'obj', format='t') + + with ensure_clean_path(self.path) as path: + s.to_hdf(path, 'obj', format='t', index=False) + reread = read_hdf(path, 'obj') + tm.assert_series_equal(s, reread) + + def test_complex_append(self): + df = DataFrame({'a': np.random.randn(100).astype(np.complex128), + 'b': np.random.randn(100)}) + + with ensure_clean_store(self.path) as store: + store.append('df', df, data_columns=['b']) + store.append('df', df) + result = store.select('df') + assert_frame_equal(pd.concat([df, df], 0), result) + + def _test_sort(obj): if isinstance(obj, DataFrame): return obj.reindex(sorted(obj.index))