Skip to content

Commit 370ddc6

Browse files
authored
HDFStore append_to_multiple with min_itemsize (#34939)
1 parent 2b25640 commit 370ddc6

File tree

3 files changed

+36
-1
lines changed

3 files changed

+36
-1
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,7 @@ I/O
10281028
- `TypeError` exceptions raised by :meth:`read_csv` and :meth:`read_table` were showing as ``parser_f`` when an unexpected keyword argument was passed (:issue:`25648`)
10291029
- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
10301030
- Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`)
1031+
- Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`)
10311032

10321033
Plotting
10331034
^^^^^^^^

pandas/io/pytables.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1303,14 +1303,21 @@ def append_to_multiple(
13031303
valid_index = valid_index.intersection(index)
13041304
value = value.loc[valid_index]
13051305

1306+
min_itemsize = kwargs.pop("min_itemsize", None)
1307+
13061308
# append
13071309
for k, v in d.items():
13081310
dc = data_columns if k == selector else None
13091311

13101312
# compute the val
13111313
val = value.reindex(v, axis=axis)
13121314

1313-
self.append(k, val, data_columns=dc, **kwargs)
1315+
filtered = (
1316+
{key: value for (key, value) in min_itemsize.items() if key in v}
1317+
if min_itemsize is not None
1318+
else None
1319+
)
1320+
self.append(k, val, data_columns=dc, min_itemsize=filtered, **kwargs)
13141321

13151322
def create_table_index(
13161323
self,

pandas/tests/io/pytables/test_store.py

+27
Original file line numberDiff line numberDiff line change
@@ -3697,6 +3697,33 @@ def test_append_to_multiple_dropna_false(self, setup_path):
36973697

36983698
assert not store.select("df1a").index.equals(store.select("df2a").index)
36993699

3700+
def test_append_to_multiple_min_itemsize(self, setup_path):
3701+
# GH 11238
3702+
df = pd.DataFrame(
3703+
{
3704+
"IX": np.arange(1, 21),
3705+
"Num": np.arange(1, 21),
3706+
"BigNum": np.arange(1, 21) * 88,
3707+
"Str": ["a" for _ in range(20)],
3708+
"LongStr": ["abcde" for _ in range(20)],
3709+
}
3710+
)
3711+
expected = df.iloc[[0]]
3712+
3713+
with ensure_clean_store(setup_path) as store:
3714+
store.append_to_multiple(
3715+
{
3716+
"index": ["IX"],
3717+
"nums": ["Num", "BigNum"],
3718+
"strs": ["Str", "LongStr"],
3719+
},
3720+
df.iloc[[0]],
3721+
"index",
3722+
min_itemsize={"Str": 10, "LongStr": 100, "Num": 2},
3723+
)
3724+
result = store.select_as_multiple(["index", "nums", "strs"])
3725+
tm.assert_frame_equal(result, expected)
3726+
37003727
def test_select_as_multiple(self, setup_path):
37013728

37023729
df1 = tm.makeTimeDataFrame()

0 commit comments

Comments
 (0)