Skip to content

Commit 05e41e4

Browse files
makmanalpstangirala
authored andcommitted
BUG: convert numpy strings in index names in HDF pandas-dev#13492 (pandas-dev#16444)
* BUG: Handle numpy strings in index names in HDF5 pandas-dev#13492 * REF: refactor to _ensure_str
1 parent ff9d73d commit 05e41e4

File tree

3 files changed

+36
-2
lines changed

3 files changed

+36
-2
lines changed

doc/source/whatsnew/v0.20.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ I/O
7777
- Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`)
7878
- Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`)
7979
- Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`)
80+
- Bug where ``pd.read_hdf()`` returns numpy strings for index names (:issue:`13492`)
8081

8182
- Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`)
8283

pandas/io/pytables.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,18 @@ def _ensure_encoding(encoding):
7373
return encoding
7474

7575

76+
def _ensure_str(name):
77+
"""Ensure that an index / column name is a str (python 3) or
78+
unicode (python 2); otherwise they may be np.string dtype.
79+
Non-string dtypes are passed through unchanged.
80+
81+
https://github.com/pandas-dev/pandas/issues/13492
82+
"""
83+
if isinstance(name, compat.string_types):
84+
name = compat.text_type(name)
85+
return name
86+
87+
7688
Term = Expr
7789

7890

@@ -2567,7 +2579,7 @@ def read_index_node(self, node, start=None, stop=None):
25672579
name = None
25682580

25692581
if 'name' in node._v_attrs:
2570-
name = node._v_attrs.name
2582+
name = _ensure_str(node._v_attrs.name)
25712583

25722584
index_class = self._alias_to_class(getattr(node._v_attrs,
25732585
'index_class', ''))

pandas/tests/io/test_pytables.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
date_range, timedelta_range, Index, DatetimeIndex,
1717
isnull)
1818

19-
from pandas.compat import is_platform_windows, PY3, PY35, BytesIO
19+
from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type
2020
from pandas.io.formats.printing import pprint_thing
2121

2222
tables = pytest.importorskip('tables')
@@ -2920,6 +2920,27 @@ def test_store_index_name_with_tz(self):
29202920
recons = store['frame']
29212921
tm.assert_frame_equal(recons, df)
29222922

2923+
@pytest.mark.parametrize('table_format', ['table', 'fixed'])
2924+
def test_store_index_name_numpy_str(self, table_format):
2925+
# GH #13492
2926+
idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1),
2927+
datetime.date(2000, 1, 2)]),
2928+
name=u('cols\u05d2'))
2929+
idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1),
2930+
datetime.date(2010, 1, 2)]),
2931+
name=u('rows\u05d0'))
2932+
df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)
2933+
2934+
# This used to fail, returning numpy strings instead of python strings.
2935+
with ensure_clean_path(self.path) as path:
2936+
df.to_hdf(path, 'df', format=table_format)
2937+
df2 = read_hdf(path, 'df')
2938+
2939+
assert_frame_equal(df, df2, check_names=True)
2940+
2941+
assert type(df2.index.name) == text_type
2942+
assert type(df2.columns.name) == text_type
2943+
29232944
def test_store_series_name(self):
29242945
df = tm.makeDataFrame()
29252946
series = df['A']

0 commit comments

Comments
 (0)