Skip to content

Commit 1db5946

Browse files
ivanovmgluckyvs1
authored andcommitted
BUG: to_hdf and HDFStore for subclasses (pandas-dev#38262)
1 parent 64a2263 commit 1db5946

File tree

5 files changed

+60
-3
lines changed

5 files changed

+60
-3
lines changed

doc/source/reference/io.rst

+5
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ HDFStore: PyTables (HDF5)
8383
HDFStore.groups
8484
HDFStore.walk
8585

86+
.. warning::
87+
88+
One can store a subclass of ``DataFrame`` or ``Series`` to HDF5,
89+
but the type of the subclass is lost upon storing.
90+
8691
Feather
8792
~~~~~~~
8893
.. autosummary::

doc/source/whatsnew/v1.3.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ I/O
241241
- Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)
242242
- Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`)
243243
- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`)
244-
-
244+
- Bug in :func:`to_hdf` raising ``KeyError`` when trying to apply
245+
for subclasses of ``DataFrame`` or ``Series`` (:issue:`33748`).
245246

246247
Period
247248
^^^^^^

pandas/core/generic.py

+5
Original file line numberDiff line numberDiff line change
@@ -2505,6 +2505,11 @@ def to_hdf(
25052505
In order to add another DataFrame or Series to an existing HDF file
25062506
please use append mode and a different a key.
25072507
2508+
.. warning::
2509+
2510+
One can store a subclass of ``DataFrame`` or ``Series`` to HDF5,
2511+
but the type of the subclass is lost upon storing.
2512+
25082513
For more information see the :ref:`user guide <io.hdf5>`.
25092514
25102515
Parameters

pandas/io/pytables.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1646,8 +1646,10 @@ def error(t):
16461646
"nor a value are passed"
16471647
)
16481648
else:
1649-
_TYPE_MAP = {Series: "series", DataFrame: "frame"}
1650-
pt = _TYPE_MAP[type(value)]
1649+
if isinstance(value, Series):
1650+
pt = "series"
1651+
else:
1652+
pt = "frame"
16511653

16521654
# we are actually a table
16531655
if format == "table":
+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import numpy as np
2+
3+
from pandas import DataFrame, Series
4+
import pandas._testing as tm
5+
from pandas.tests.io.pytables.common import ensure_clean_path
6+
7+
from pandas.io.pytables import HDFStore, read_hdf
8+
9+
10+
class TestHDFStoreSubclass:
11+
# GH 33748
12+
def test_supported_for_subclass_dataframe(self):
13+
data = {"a": [1, 2], "b": [3, 4]}
14+
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)
15+
16+
expected = DataFrame(data, dtype=np.intp)
17+
18+
with ensure_clean_path("temp.h5") as path:
19+
sdf.to_hdf(path, "df")
20+
result = read_hdf(path, "df")
21+
tm.assert_frame_equal(result, expected)
22+
23+
with ensure_clean_path("temp.h5") as path:
24+
with HDFStore(path) as store:
25+
store.put("df", sdf)
26+
result = read_hdf(path, "df")
27+
tm.assert_frame_equal(result, expected)
28+
29+
def test_supported_for_subclass_series(self):
30+
data = [1, 2, 3]
31+
sser = tm.SubclassedSeries(data, dtype=np.intp)
32+
33+
expected = Series(data, dtype=np.intp)
34+
35+
with ensure_clean_path("temp.h5") as path:
36+
sser.to_hdf(path, "ser")
37+
result = read_hdf(path, "ser")
38+
tm.assert_series_equal(result, expected)
39+
40+
with ensure_clean_path("temp.h5") as path:
41+
with HDFStore(path) as store:
42+
store.put("ser", sser)
43+
result = read_hdf(path, "ser")
44+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)