diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index a9897f389fe12..231297df3fb8f 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -60,6 +60,7 @@ Bug Fixes - Bug in ``HDFStore`` when writing a ``MultiIndex`` when using ``data_columns=True`` (:issue:`14435`) - Bug in ``HDFStore.append()`` when writing a ``Series`` and passing a ``min_itemsize`` argument containing a value for the ``index`` (:issue:`11412`) +- Bug when writing to a ``HDFStore`` in ``table`` format with a ``min_itemsize`` value for the ``index`` and without asking to append (:issue:`10381`) - Bug in ``Series.groupby.nunique()`` raising an ``IndexError`` for an empty ``Series`` (:issue:`12553`) - Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when the index had duplicate values (:issue:`13412`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 693606fdd1d32..e474aeab1f6ca 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3429,9 +3429,8 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, j = len(self.index_axes) # check for column conflicts - if validate: - for a in self.axes: - a.maybe_set_size(min_itemsize=min_itemsize) + for a in self.axes: + a.maybe_set_size(min_itemsize=min_itemsize) # reindex by our non_index_axes & compute data_columns for a in self.non_index_axes: diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index d621797558c8f..b23d0b89fe850 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1372,6 +1372,22 @@ def check_col(key, name, size): min_itemsize={'index': 4}) tm.assert_series_equal(store.select('ss2'), df['B']) + # min_itemsize in index without appending (GH 10381) + store.put('ss3', df, format='table', + min_itemsize={'index': 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C='longer').set_index('C') + store.append('ss3', df2) + tm.assert_frame_equal(store.select('ss3'), + pd.concat([df, df2])) + + # same as above, with a Series + store.put('ss4', df['B'], format='table', + min_itemsize={'index': 6}) + store.append('ss4', df2['B']) + tm.assert_series_equal(store.select('ss4'), + pd.concat([df['B'], df2['B']])) + # with nans _maybe_remove(store, 'df') df = tm.makeTimeDataFrame() @@ -1426,6 +1442,26 @@ def check_col(key, name, size): self.assertRaises(ValueError, store.append, 'df', df, min_itemsize={'foo': 20, 'foobar': 20}) + def test_to_hdf_with_min_itemsize(self): + + with ensure_clean_path(self.path) as path: + + # min_itemsize in index with to_hdf (GH 10381) + df = tm.makeMixedDataFrame().set_index('C') + df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C='longer').set_index('C') + df2.to_hdf(path, 'ss3', append=True, format='table') + tm.assert_frame_equal(pd.read_hdf(path, 'ss3'), + pd.concat([df, df2])) + + # same as above, with a Series + df['B'].to_hdf(path, 'ss4', format='table', + min_itemsize={'index': 6}) + df2['B'].to_hdf(path, 'ss4', append=True, format='table') + tm.assert_series_equal(pd.read_hdf(path, 'ss4'), + pd.concat([df['B'], df2['B']])) + def test_append_with_data_columns(self): with ensure_clean_store(self.path) as store: