Skip to content

Commit 9529ec0

Browse files
toobazischurov
authored andcommitted
BUG: Apply min_itemsize to index even when not appending
closes pandas-dev#10381 Author: Pietro Battiston <[email protected]> Closes pandas-dev#14812 from toobaz/to_hdf_min_itemsize and squashes the following commits: c07f1e4 [Pietro Battiston] Whatsnew 38b8fcc [Pietro Battiston] Tests for previous commit c838afa [Pietro Battiston] BUG: set min_itemsize even when there is no need to validate (pandas-dev#10381)
1 parent 923cb8d commit 9529ec0

File tree

3 files changed

+39
-3
lines changed

3 files changed

+39
-3
lines changed

doc/source/whatsnew/v0.19.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ Bug Fixes
6060

6161
- Bug in ``HDFStore`` when writing a ``MultiIndex`` when using ``data_columns=True`` (:issue:`14435`)
6262
- Bug in ``HDFStore.append()`` when writing a ``Series`` and passing a ``min_itemsize`` argument containing a value for the ``index`` (:issue:`11412`)
63+
- Bug when writing to a ``HDFStore`` in ``table`` format with a ``min_itemsize`` value for the ``index`` and without asking to append (:issue:`10381`)
6364
- Bug in ``Series.groupby.nunique()`` raising an ``IndexError`` for an empty ``Series`` (:issue:`12553`)
6465
- Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when the index had duplicate values (:issue:`13412`)
6566

pandas/io/pytables.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -3429,9 +3429,8 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
34293429
j = len(self.index_axes)
34303430

34313431
# check for column conflicts
3432-
if validate:
3433-
for a in self.axes:
3434-
a.maybe_set_size(min_itemsize=min_itemsize)
3432+
for a in self.axes:
3433+
a.maybe_set_size(min_itemsize=min_itemsize)
34353434

34363435
# reindex by our non_index_axes & compute data_columns
34373436
for a in self.non_index_axes:

pandas/io/tests/test_pytables.py

+36
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,22 @@ def check_col(key, name, size):
13721372
min_itemsize={'index': 4})
13731373
tm.assert_series_equal(store.select('ss2'), df['B'])
13741374

1375+
# min_itemsize in index without appending (GH 10381)
1376+
store.put('ss3', df, format='table',
1377+
min_itemsize={'index': 6})
1378+
# just make sure there is a longer string:
1379+
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
1380+
store.append('ss3', df2)
1381+
tm.assert_frame_equal(store.select('ss3'),
1382+
pd.concat([df, df2]))
1383+
1384+
# same as above, with a Series
1385+
store.put('ss4', df['B'], format='table',
1386+
min_itemsize={'index': 6})
1387+
store.append('ss4', df2['B'])
1388+
tm.assert_series_equal(store.select('ss4'),
1389+
pd.concat([df['B'], df2['B']]))
1390+
13751391
# with nans
13761392
_maybe_remove(store, 'df')
13771393
df = tm.makeTimeDataFrame()
@@ -1426,6 +1442,26 @@ def check_col(key, name, size):
14261442
self.assertRaises(ValueError, store.append, 'df',
14271443
df, min_itemsize={'foo': 20, 'foobar': 20})
14281444

1445+
def test_to_hdf_with_min_itemsize(self):
1446+
1447+
with ensure_clean_path(self.path) as path:
1448+
1449+
# min_itemsize in index with to_hdf (GH 10381)
1450+
df = tm.makeMixedDataFrame().set_index('C')
1451+
df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6})
1452+
# just make sure there is a longer string:
1453+
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
1454+
df2.to_hdf(path, 'ss3', append=True, format='table')
1455+
tm.assert_frame_equal(pd.read_hdf(path, 'ss3'),
1456+
pd.concat([df, df2]))
1457+
1458+
# same as above, with a Series
1459+
df['B'].to_hdf(path, 'ss4', format='table',
1460+
min_itemsize={'index': 6})
1461+
df2['B'].to_hdf(path, 'ss4', append=True, format='table')
1462+
tm.assert_series_equal(pd.read_hdf(path, 'ss4'),
1463+
pd.concat([df['B'], df2['B']]))
1464+
14291465
def test_append_with_data_columns(self):
14301466

14311467
with ensure_clean_store(self.path) as store:

0 commit comments

Comments
 (0)