diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 28ec828b81c34..a3ec13439fe76 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -27,6 +27,7 @@ New features ~~~~~~~~~~~~ - SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`) +- Enable writing complex values to HDF stores when using table format (:issue:`10447`) .. _whatsnew_0170.enhancements.other: @@ -147,3 +148,4 @@ Bug Fixes - Bug in `groupby.var` which caused variance to be inaccurate for small float values (:issue:`10448`) - Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`) + diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index eb800c37db98f..9e1a272ec5621 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1773,6 +1773,8 @@ def set_kind(self): self.kind = 'string' elif dtype.startswith(u('float')): self.kind = 'float' + elif dtype.startswith(u('complex')): + self.kind = 'complex' elif dtype.startswith(u('int')) or dtype.startswith(u('uint')): self.kind = 'integer' elif dtype.startswith(u('date')): @@ -1802,6 +1804,8 @@ def set_atom(self, block, block_items, existing_col, min_itemsize, return self.set_atom_datetime64(block) elif block.is_timedelta: return self.set_atom_timedelta64(block) + elif block.is_complex: + return self.set_atom_complex(block) dtype = block.dtype.name inferred_type = lib.infer_dtype(block.values) @@ -1936,6 +1940,12 @@ def get_atom_coltype(self, kind=None): def get_atom_data(self, block, kind=None): return self.get_atom_coltype(kind=kind)(shape=block.shape[0]) + def set_atom_complex(self, block): + self.kind = block.dtype.name + itemsize = int(self.kind.split('complex')[-1]) // 8 + self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0]) + self.set_data(block.values.astype(self.typ.type, copy=False)) + def set_atom_data(self, block): self.kind = block.dtype.name self.typ = self.get_atom_data(block) @@ -3147,8 +3157,8 @@ def f(i, c): def create_index(self, columns=None, optlevel=None, kind=None): """ Create a pytables index on the specified columns - note: cannot index Time64Col() currently; PyTables must be >= 2.3 - + note: cannot index Time64Col() or ComplexCol currently; + PyTables must be >= 3.0 Paramaters ---------- @@ -3203,6 +3213,12 @@ def create_index(self, columns=None, optlevel=None, kind=None): # create the index if not v.is_indexed: + if v.type.startswith('complex'): + raise TypeError('Columns containing complex values can be stored but cannot' + ' be indexed when using table format. Either use fixed ' + 'format, set index=False, or do not include the columns ' + 'containing complex values to data_columns when ' + 'initializing the table.') v.create_index(**kw) def read_axes(self, where, **kwargs): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 4ae2c331f5a65..1b932fb3759e5 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -131,18 +131,18 @@ def compat_assert_produces_warning(w,f): f() -class TestHDFStore(tm.TestCase): +class Base(tm.TestCase): @classmethod def setUpClass(cls): - super(TestHDFStore, cls).setUpClass() + super(Base, cls).setUpClass() # Pytables 3.0.0 deprecates lots of things tm.reset_testing_mode() @classmethod def tearDownClass(cls): - super(TestHDFStore, cls).tearDownClass() + super(Base, cls).tearDownClass() # Pytables 3.0.0 deprecates lots of things tm.set_testing_mode() @@ -155,6 +155,9 @@ def setUp(self): def tearDown(self): pass + +class TestHDFStore(Base): + def test_factory_fun(self): path = create_tempfile(self.path) try: @@ -4743,6 +4746,146 @@ def test_read_nokey(self): df.to_hdf(path, 'df2', mode='a') self.assertRaises(ValueError, read_hdf, path) + +class TestHDFComplexValues(Base): + # GH10447 + def test_complex_fixed(self): + df = DataFrame(np.random.rand(4, 5).astype(np.complex64), + index=list('abcd'), + columns=list('ABCDE')) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + df = DataFrame(np.random.rand(4, 5).astype(np.complex128), + index=list('abcd'), + columns=list('ABCDE')) + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_complex_table(self): + df = DataFrame(np.random.rand(4, 5).astype(np.complex64), + index=list('abcd'), + columns=list('ABCDE')) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + df = DataFrame(np.random.rand(4, 5).astype(np.complex128), + index=list('abcd'), + columns=list('ABCDE')) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table', mode='w') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_complex_mixed_fixed(self): + complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64) + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], + dtype=np.complex128) + df = DataFrame({'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'd'], + 'C': complex64, + 'D': complex128, + 'E': [1.0, 2.0, 3.0, 4.0]}, + index=list('abcd')) + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_complex_mixed_table(self): + complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64) + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], + dtype=np.complex128) + df = DataFrame({'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'd'], + 'C': complex64, + 'D': complex128, + 'E': [1.0, 2.0, 3.0, 4.0]}, + index=list('abcd')) + + with ensure_clean_store(self.path) as store: + store.append('df', df, data_columns=['A', 'B']) + result = store.select('df', where=Term('A>2')) + assert_frame_equal(df.loc[df.A > 2], result) + + with ensure_clean_path(self.path) as path: + df.to_hdf(path, 'df', format='table') + reread = read_hdf(path, 'df') + assert_frame_equal(df, reread) + + def test_complex_across_dimensions_fixed(self): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list('abcd')) + df = DataFrame({'A': s, 'B': s}) + p = Panel({'One': df, 'Two': df}) + + objs = [s, df, p] + comps = [tm.assert_series_equal, tm.assert_frame_equal, + tm.assert_panel_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(self.path) as path: + obj.to_hdf(path, 'obj', format='fixed') + reread = read_hdf(path, 'obj') + comp(obj, reread) + + def test_complex_across_dimensions(self): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list('abcd')) + df = DataFrame({'A': s, 'B': s}) + p = Panel({'One': df, 'Two': df}) + p4d = pd.Panel4D({'i': p, 'ii': p}) + + objs = [df, p, p4d] + comps = [tm.assert_frame_equal, tm.assert_panel_equal, + tm.assert_panel4d_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(self.path) as path: + obj.to_hdf(path, 'obj', format='table') + reread = read_hdf(path, 'obj') + comp(obj, reread) + + def test_complex_indexing_error(self): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], + dtype=np.complex128) + df = DataFrame({'A': [1, 2, 3, 4], + 'B': ['a', 'b', 'c', 'd'], + 'C': complex128}, + index=list('abcd')) + with ensure_clean_store(self.path) as store: + self.assertRaises(TypeError, store.append, 'df', df, data_columns=['C']) + + def test_complex_series_error(self): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list('abcd')) + + with ensure_clean_path(self.path) as path: + self.assertRaises(TypeError, s.to_hdf, path, 'obj', format='t') + + with ensure_clean_path(self.path) as path: + s.to_hdf(path, 'obj', format='t', index=False) + reread = read_hdf(path, 'obj') + tm.assert_series_equal(s, reread) + + def test_complex_append(self): + df = DataFrame({'a': np.random.randn(100).astype(np.complex128), + 'b': np.random.randn(100)}) + + with ensure_clean_store(self.path) as store: + store.append('df', df, data_columns=['b']) + store.append('df', df) + result = store.select('df') + assert_frame_equal(pd.concat([df, df], 0), result) + + def _test_sort(obj): if isinstance(obj, DataFrame): return obj.reindex(sorted(obj.index))