Skip to content

Commit d5283ea

Browse files
faulairejreback
authored andcommitted
Fix ValueError when reading a Dataframe with HDFStore in Python 3 fro… (#24510)
1 parent 50470d5 commit d5283ea

File tree

4 files changed

+17
-2
lines changed

4 files changed

+17
-2
lines changed

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1519,7 +1519,6 @@ MultiIndex
15191519
I/O
15201520
^^^
15211521

1522-
15231522
.. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
15241523

15251524
Proper handling of `np.NaN` in a string data-typed column with the Python engine
@@ -1591,6 +1590,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
15911590
- Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`)
15921591
- Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`)
15931592
- Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`)
1593+
- Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`)
15941594

15951595
Plotting
15961596
^^^^^^^^

pandas/io/pytables.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2501,7 +2501,7 @@ def set_attrs(self):
25012501
def get_attrs(self):
25022502
""" retrieve our attributes """
25032503
self.encoding = _ensure_encoding(getattr(self.attrs, 'encoding', None))
2504-
self.errors = getattr(self.attrs, 'errors', 'strict')
2504+
self.errors = _ensure_decoded(getattr(self.attrs, 'errors', 'strict'))
25052505
for n in self.attributes:
25062506
setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None)))
25072507

@@ -2661,6 +2661,7 @@ def read_index_node(self, node, start=None, stop=None):
26612661

26622662
if 'name' in node._v_attrs:
26632663
name = _ensure_str(node._v_attrs.name)
2664+
name = _ensure_decoded(name)
26642665

26652666
index_class = self._alias_to_class(_ensure_decoded(
26662667
getattr(node._v_attrs, 'index_class', '')))
Binary file not shown.

pandas/tests/io/test_pytables.py

+14
Original file line numberDiff line numberDiff line change
@@ -4540,6 +4540,20 @@ def test_pytables_native2_read(self, datapath):
45404540
d1 = store['detector']
45414541
assert isinstance(d1, DataFrame)
45424542

4543+
def test_legacy_table_fixed_format_read_py2(self, datapath):
4544+
# GH 24510
4545+
# legacy table with fixed format written en Python 2
4546+
with ensure_clean_store(
4547+
datapath('io', 'data', 'legacy_hdf',
4548+
'legacy_table_fixed_py2.h5'),
4549+
mode='r') as store:
4550+
result = store.select('df')
4551+
expected = pd.DataFrame([[1, 2, 3, 'D']],
4552+
columns=['A', 'B', 'C', 'D'],
4553+
index=pd.Index(['ABC'],
4554+
name='INDEX_NAME'))
4555+
assert_frame_equal(expected, result)
4556+
45434557
def test_legacy_table_read(self, datapath):
45444558
# legacy table types
45454559
with ensure_clean_store(

0 commit comments

Comments
 (0)