diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index b4331aab3085f..78667b0e3e39b 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1515,7 +1515,6 @@ MultiIndex I/O ^^^ - .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of `np.NaN` in a string data-typed column with the Python engine @@ -1587,6 +1586,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`) - Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) +- Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5b76b4bb3d6ab..a894b8788f8d8 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2501,7 +2501,7 @@ def set_attrs(self): def get_attrs(self): """ retrieve our attributes """ self.encoding = _ensure_encoding(getattr(self.attrs, 'encoding', None)) - self.errors = getattr(self.attrs, 'errors', 'strict') + self.errors = _ensure_decoded(getattr(self.attrs, 'errors', 'strict')) for n in self.attributes: setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None))) @@ -2661,6 +2661,7 @@ def read_index_node(self, node, start=None, stop=None): if 'name' in node._v_attrs: name = _ensure_str(node._v_attrs.name) + name = _ensure_decoded(name) index_class = self._alias_to_class(_ensure_decoded( getattr(node._v_attrs, 'index_class', ''))) diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 new file mode 100644 index 0000000000000..540251d9fae86 Binary files /dev/null and b/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 differ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 1c4d00c8b3e15..4179e81d02042 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4540,6 +4540,20 @@ def test_pytables_native2_read(self, datapath): d1 = store['detector'] assert isinstance(d1, DataFrame) + def test_legacy_table_fixed_format_read_py2(self, datapath): + # GH 24510 + # legacy table with fixed format written en Python 2 + with ensure_clean_store( + datapath('io', 'data', 'legacy_hdf', + 'legacy_table_fixed_py2.h5'), + mode='r') as store: + result = store.select('df') + expected = pd.DataFrame([[1, 2, 3, 'D']], + columns=['A', 'B', 'C', 'D'], + index=pd.Index(['ABC'], + name='INDEX_NAME')) + assert_frame_equal(expected, result) + def test_legacy_table_read(self, datapath): # legacy table types with ensure_clean_store(