Skip to content

Commit 25dcff5

Browse files
AlexisMignonjreback
authored andcommitted
BUG: Fix a bug occuring when using DataFrame.to_records with unicode
column names in python 2. closes #11879 closes #13462
1 parent 6c17f67 commit 25dcff5

File tree

3 files changed

+25
-5
lines changed

3 files changed

+25
-5
lines changed

doc/source/whatsnew/v0.20.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,8 @@ Bug Fixes
615615
- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`)
616616
- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`)
617617

618-
- Bug in ``pd.read_msgpack`` when deserializing a ``CategoricalIndex`` (:issue:`15487`)
618+
- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`)
619+
- Bug in ``pd.DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`)
619620

620621

621622
- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`)

pandas/core/frame.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1105,13 +1105,17 @@ def to_records(self, index=True, convert_datetime64=True):
11051105
count += 1
11061106
elif index_names[0] is None:
11071107
index_names = ['index']
1108-
names = lmap(str, index_names) + lmap(str, self.columns)
1108+
names = (lmap(compat.text_type, index_names) +
1109+
lmap(compat.text_type, self.columns))
11091110
else:
11101111
arrays = [self[c].get_values() for c in self.columns]
1111-
names = lmap(str, self.columns)
1112+
names = lmap(compat.text_type, self.columns)
11121113

1113-
dtype = np.dtype([(x, v.dtype) for x, v in zip(names, arrays)])
1114-
return np.rec.fromarrays(arrays, dtype=dtype, names=names)
1114+
formats = [v.dtype for v in arrays]
1115+
return np.rec.fromarrays(
1116+
arrays,
1117+
dtype={'names': names, 'formats': formats}
1118+
)
11151119

11161120
@classmethod
11171121
def from_items(cls, items, columns=None, orient='columns'):

pandas/tests/frame/test_convert_to.py

+15
Original file line numberDiff line numberDiff line change
@@ -177,3 +177,18 @@ def test_to_records_with_unicode_index(self):
177177
.to_records()
178178
expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')])
179179
tm.assert_almost_equal(result, expected)
180+
181+
def test_to_records_with_unicode_column_names(self):
182+
# xref issue: https://github.com/numpy/numpy/issues/2407
183+
# Issue #11879. to_records used to raise an exception when used
184+
# with column names containing non ascii caracters in Python 2
185+
result = DataFrame(data={u"accented_name_é": [1.0]}).to_records()
186+
187+
# Note that numpy allows for unicode field names but dtypes need
188+
# to be specified using dictionnary intsead of list of tuples.
189+
expected = np.rec.array(
190+
[(0, 1.0)],
191+
dtype={"names": ["index", u"accented_name_é"],
192+
"formats": ['<i8', '<f8']}
193+
)
194+
tm.assert_almost_equal(result, expected)

0 commit comments

Comments
 (0)