pandas-dev · jreback · Jul 13, 2015 · Jun 29, 2015 · jreback · Jul 1, 2015
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -27,6 +27,7 @@ New features
 ~~~~~~~~~~~~
 
 - SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`)
+- Enable writing complex values to HDF stores when using table format (:issue:`10447`)
 
 .. _whatsnew_0170.enhancements.other:
 
@@ -147,3 +148,4 @@ Bug Fixes
 - Bug in `groupby.var` which caused variance to be inaccurate for small float values (:issue:`10448`)
 
 - Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`)
+
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -1773,6 +1773,8 @@ def set_kind(self):
                 self.kind = 'string'
             elif dtype.startswith(u('float')):
                 self.kind = 'float'
+            elif dtype.startswith(u('complex')):
+                self.kind = 'complex'
             elif dtype.startswith(u('int')) or dtype.startswith(u('uint')):
                 self.kind = 'integer'
             elif dtype.startswith(u('date')):
@@ -1802,6 +1804,8 @@ def set_atom(self, block, block_items, existing_col, min_itemsize,
             return self.set_atom_datetime64(block)
         elif block.is_timedelta:
             return self.set_atom_timedelta64(block)
+        elif block.is_complex:
+            return self.set_atom_complex(block)
 
         dtype = block.dtype.name
         inferred_type = lib.infer_dtype(block.values)
@@ -1936,6 +1940,12 @@ def get_atom_coltype(self, kind=None):
     def get_atom_data(self, block, kind=None):
         return self.get_atom_coltype(kind=kind)(shape=block.shape[0])
 
+    def set_atom_complex(self, block):
+        self.kind = block.dtype.name
+        itemsize = int(self.kind.split('complex')[-1]) // 8
+        self.typ = _tables().ComplexCol(itemsize=itemsize, shape=block.shape[0])
+        self.set_data(block.values.astype(self.typ.type, copy=False))
+
     def set_atom_data(self, block):
         self.kind = block.dtype.name
         self.typ = self.get_atom_data(block)
@@ -3147,8 +3157,8 @@ def f(i, c):
     def create_index(self, columns=None, optlevel=None, kind=None):
         """
         Create a pytables index on the specified columns
-          note: cannot index Time64Col() currently; PyTables must be >= 2.3
-
+          note: cannot index Time64Col() or ComplexCol currently;
+          PyTables must be >= 3.0
 
         Paramaters
         ----------
@@ -3203,6 +3213,12 @@ def create_index(self, columns=None, optlevel=None, kind=None):
 
                 # create the index
                 if not v.is_indexed:
+                    if v.type.startswith('complex'):
+                        raise TypeError('Columns containing complex values can be stored but cannot'
+                                        ' be indexed when using table format. Either use fixed '
+                                        'format, set index=False, or do not include the columns '
+                                        'containing complex values to data_columns when '
+                                        'initializing the table.')
                     v.create_index(**kw)
 
     def read_axes(self, where, **kwargs):

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -131,18 +131,18 @@ def compat_assert_produces_warning(w,f):
             f()
 
 
-class TestHDFStore(tm.TestCase):
+class Base(tm.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        super(TestHDFStore, cls).setUpClass()
+        super(Base, cls).setUpClass()
 
         # Pytables 3.0.0 deprecates lots of things
         tm.reset_testing_mode()
 
     @classmethod
     def tearDownClass(cls):
-        super(TestHDFStore, cls).tearDownClass()
+        super(Base, cls).tearDownClass()
 
         # Pytables 3.0.0 deprecates lots of things
         tm.set_testing_mode()
@@ -155,6 +155,9 @@ def setUp(self):
     def tearDown(self):
         pass
 
+
+class TestHDFStore(Base):
+
     def test_factory_fun(self):
         path = create_tempfile(self.path)
         try:
@@ -4743,6 +4746,146 @@ def test_read_nokey(self):
             df.to_hdf(path, 'df2', mode='a')
             self.assertRaises(ValueError, read_hdf, path)
 
+
+class TestHDFComplexValues(Base):
+    # GH10447
+    def test_complex_fixed(self):
+        df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
+                       index=list('abcd'),
+                       columns=list('ABCDE'))
+
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df')
+            reread = read_hdf(path, 'df')
+            assert_frame_equal(df, reread)
+
+        df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
+                       index=list('abcd'),
+                       columns=list('ABCDE'))
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df')
+            reread = read_hdf(path, 'df')
+            assert_frame_equal(df, reread)
+
+    def test_complex_table(self):
+        df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
+                       index=list('abcd'),
+                       columns=list('ABCDE'))
+
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df', format='table')
+            reread = read_hdf(path, 'df')
+            assert_frame_equal(df, reread)
+
+        df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
+                       index=list('abcd'),
+                       columns=list('ABCDE'))
+
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df', format='table', mode='w')
+            reread = read_hdf(path, 'df')
+            assert_frame_equal(df, reread)
+
+    def test_complex_mixed_fixed(self):
+        complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
+        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
+                              dtype=np.complex128)
+        df = DataFrame({'A': [1, 2, 3, 4],
+                        'B': ['a', 'b', 'c', 'd'],
+                        'C': complex64,
+                        'D': complex128,
+                        'E': [1.0, 2.0, 3.0, 4.0]},
+                       index=list('abcd'))
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df')
+            reread = read_hdf(path, 'df')
+            assert_frame_equal(df, reread)
+
+    def test_complex_mixed_table(self):
+        complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
+        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
+                              dtype=np.complex128)
+        df = DataFrame({'A': [1, 2, 3, 4],
+                        'B': ['a', 'b', 'c', 'd'],
+                        'C': complex64,
+                        'D': complex128,
+                        'E': [1.0, 2.0, 3.0, 4.0]},
+                       index=list('abcd'))
+
+        with ensure_clean_store(self.path) as store:
+            store.append('df', df, data_columns=['A', 'B'])
+            result = store.select('df', where=Term('A>2'))
+            assert_frame_equal(df.loc[df.A > 2], result)
+
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df', format='table')
+            reread = read_hdf(path, 'df')
+            assert_frame_equal(df, reread)
+
+    def test_complex_across_dimensions_fixed(self):
+        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
+        s = Series(complex128, index=list('abcd'))
+        df = DataFrame({'A': s, 'B': s})
+        p = Panel({'One': df, 'Two': df})
+
+        objs = [s, df, p]
+        comps = [tm.assert_series_equal, tm.assert_frame_equal,
+                 tm.assert_panel_equal]
+        for obj, comp in zip(objs, comps):
+            with ensure_clean_path(self.path) as path:
+                obj.to_hdf(path, 'obj', format='fixed')
+                reread = read_hdf(path, 'obj')
+                comp(obj, reread)
+
+    def test_complex_across_dimensions(self):
+        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
+        s = Series(complex128, index=list('abcd'))
+        df = DataFrame({'A': s, 'B': s})
+        p = Panel({'One': df, 'Two': df})
+        p4d = pd.Panel4D({'i': p, 'ii': p})
+
+        objs = [df, p, p4d]
+        comps = [tm.assert_frame_equal, tm.assert_panel_equal,
+                 tm.assert_panel4d_equal]
+        for obj, comp in zip(objs, comps):
+            with ensure_clean_path(self.path) as path:
+                obj.to_hdf(path, 'obj', format='table')
+                reread = read_hdf(path, 'obj')
+                comp(obj, reread)
+
+    def test_complex_indexing_error(self):
+        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
+                              dtype=np.complex128)
+        df = DataFrame({'A': [1, 2, 3, 4],
+                        'B': ['a', 'b', 'c', 'd'],
+                        'C': complex128},
+                       index=list('abcd'))
+        with ensure_clean_store(self.path) as store:
+            self.assertRaises(TypeError, store.append, 'df', df, data_columns=['C'])
+
+    def test_complex_series_error(self):
+        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
+        s = Series(complex128, index=list('abcd'))
+
+        with ensure_clean_path(self.path) as path:
+            self.assertRaises(TypeError, s.to_hdf, path, 'obj', format='t')
+
+        with ensure_clean_path(self.path) as path:
+            s.to_hdf(path, 'obj', format='t', index=False)
+            reread = read_hdf(path, 'obj')
+            tm.assert_series_equal(s, reread)
+
+    def test_complex_append(self):
+        df = DataFrame({'a': np.random.randn(100).astype(np.complex128),
+                        'b': np.random.randn(100)})
+
+        with ensure_clean_store(self.path) as store:
+            store.append('df', df, data_columns=['b'])
+            store.append('df', df)
+            result = store.select('df')
+            assert_frame_equal(pd.concat([df, df], 0), result)
+
+
 def _test_sort(obj):
     if isinstance(obj, DataFrame):
         return obj.reindex(sorted(obj.index))