Skip to content

Apply min_itemsize to index even when not appending #14812

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Bug Fixes

- Bug in ``HDFStore`` when writing a ``MultiIndex`` when using ``data_columns=True`` (:issue:`14435`)
- Bug in ``HDFStore.append()`` when writing a ``Series`` and passing a ``min_itemsize`` argument containing a value for the ``index`` (:issue:`11412`)
- Bug when writing to a ``HDFStore`` in ``table`` format with a ``min_itemsize`` value for the ``index`` and without asking to append (:issue:`10381`)
- Bug in ``Series.groupby.nunique()`` raising an ``IndexError`` for an empty ``Series`` (:issue:`12553`)
- Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when the index had duplicate values (:issue:`13412`)

Expand Down
5 changes: 2 additions & 3 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3429,9 +3429,8 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
j = len(self.index_axes)

# check for column conflicts
if validate:
for a in self.axes:
a.maybe_set_size(min_itemsize=min_itemsize)
for a in self.axes:
a.maybe_set_size(min_itemsize=min_itemsize)

# reindex by our non_index_axes & compute data_columns
for a in self.non_index_axes:
Expand Down
36 changes: 36 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,22 @@ def check_col(key, name, size):
min_itemsize={'index': 4})
tm.assert_series_equal(store.select('ss2'), df['B'])

# min_itemsize in index without appending (GH 10381)
store.put('ss3', df, format='table',
min_itemsize={'index': 6})
# just make sure there is a longer string:
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
store.append('ss3', df2)
tm.assert_frame_equal(store.select('ss3'),
pd.concat([df, df2]))

# same as above, with a Series
store.put('ss4', df['B'], format='table',
min_itemsize={'index': 6})
store.append('ss4', df2['B'])
tm.assert_series_equal(store.select('ss4'),
pd.concat([df['B'], df2['B']]))

# with nans
_maybe_remove(store, 'df')
df = tm.makeTimeDataFrame()
Expand Down Expand Up @@ -1426,6 +1442,26 @@ def check_col(key, name, size):
self.assertRaises(ValueError, store.append, 'df',
df, min_itemsize={'foo': 20, 'foobar': 20})

def test_to_hdf_with_min_itemsize(self):

with ensure_clean_path(self.path) as path:

# min_itemsize in index with to_hdf (GH 10381)
df = tm.makeMixedDataFrame().set_index('C')
df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6})
# just make sure there is a longer string:
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
df2.to_hdf(path, 'ss3', append=True, format='table')
tm.assert_frame_equal(pd.read_hdf(path, 'ss3'),
pd.concat([df, df2]))

# same as above, with a Series
df['B'].to_hdf(path, 'ss4', format='table',
min_itemsize={'index': 6})
df2['B'].to_hdf(path, 'ss4', append=True, format='table')
tm.assert_series_equal(pd.read_hdf(path, 'ss4'),
pd.concat([df['B'], df2['B']]))

def test_append_with_data_columns(self):

with ensure_clean_store(self.path) as store:
Expand Down