Skip to content

Commit ba1a979

Browse files
Fix segfault on dir of a DataFrame with an unicode surrogate character in the column name
Return a repr() version if a string is not printable
1 parent e734449 commit ba1a979

File tree

3 files changed

+13
-6
lines changed

3 files changed

+13
-6
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ Other
389389
- Set operations on an object-dtype :class:`Index` now always return object-dtype results (:issue:`31401`)
390390
- Bug in :meth:`AbstractHolidayCalendar.holidays` when no rules were defined (:issue:`31415`)
391391
- Bug in :meth:`DataFrame.to_records` incorrectly losing timezone information in timezone-aware ``datetime64`` columns (:issue:`32535`)
392+
- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`)
392393

393394
.. ---------------------------------------------------------------------------
394395

pandas/_libs/tslibs/util.pxd

+4-6
Original file line numberDiff line numberDiff line change
@@ -236,13 +236,11 @@ cdef inline const char* get_c_string_buf_and_size(str py_string,
236236
237237
Returns
238238
-------
239-
buf : const char*
239+
c_string_buf : const char*
240240
"""
241-
cdef:
242-
const char *buf
243-
244-
buf = PyUnicode_AsUTF8AndSize(py_string, length)
245-
return buf
241+
if not py_string.isprintable():
242+
return PyUnicode_AsUTF8AndSize(repr(py_string), length)
243+
return PyUnicode_AsUTF8AndSize(py_string, length)
246244

247245

248246
cdef inline const char* get_c_string(str py_string):

pandas/tests/frame/test_api.py

+8
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,14 @@ def test_not_hashable(self):
127127
with pytest.raises(TypeError, match=msg):
128128
hash(empty_frame)
129129

130+
def test_column_name_contains_unicode_surrogate(self):
131+
# GH 25509
132+
colname = "\ud83d"
133+
df = DataFrame({colname: []})
134+
# this should not crash
135+
assert colname not in dir(df)
136+
assert df.columns[0] == colname
137+
130138
def test_new_empty_index(self):
131139
df1 = DataFrame(np.random.randn(0, 3))
132140
df2 = DataFrame(np.random.randn(0, 3))

0 commit comments

Comments
 (0)