diff --git a/doc/source/io.rst b/doc/source/io.rst index a363d144b2ba1..91ffb5091e927 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2311,7 +2311,8 @@ Fixed Format The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called the ``fixed`` format. These types of stores are are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be -retrieved in their entirety. These offer very fast writing and slightly faster reading than ``table`` stores. +retrieved in their entirety. They also do not support dataframes with non-unique column names. +The ``fixed`` format stores offer very fast writing and slightly faster reading than ``table`` stores. This format is specified by default when using ``put`` or ``to_hdf`` or by ``format='fixed'`` or ``format='f'`` .. warning:: diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index ca24eb3f910ed..9fbe718b3fc64 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -187,6 +187,7 @@ Bug Fixes - Bug in Series 0-division with a float and integer operand dtypes (:issue:`7785`) - Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`) - Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`) +- Raise a ``ValueError`` in ``df.to_hdf`` if ``df`` has non-unique columns as the resulting file will be broken (:issue:`7761`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 0e6c41a25bbe5..cecbb407d0bd1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2680,6 +2680,9 @@ def write(self, obj, **kwargs): self.attrs.ndim = data.ndim for i, ax in enumerate(data.axes): + if i == 0: + if not ax.is_unique: + raise ValueError("Columns index has to be unique for fixed format") self.write_index('axis%d' % i, ax) # Supporting mixed-type DataFrame objects...nontrivial diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 6a944284035c8..c602e8ff1a888 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4370,6 +4370,17 @@ def test_categorical(self): # FIXME: TypeError: cannot pass a where specification when reading from a Fixed format store. this store must be selected in its entirety #result = store.select('df', where = ['index>2']) #tm.assert_frame_equal(df[df.index>2],result) + + def test_duplicate_column_name(self): + df = DataFrame(columns=["a", "a"], data=[[0, 0]]) + + with ensure_clean_path(self.path) as path: + self.assertRaises(ValueError, df.to_hdf, path, 'df', format='fixed') + + df.to_hdf(path, 'df', format='table') + other = read_hdf(path, 'df') + tm.assert_frame_equal(df, other) + def _test_sort(obj): if isinstance(obj, DataFrame):