Skip to content

Commit 5959ee3

Browse files
jschendeljreback
authored andcommitted
ERR: Fix segfault with .astype('category') on empty DataFrame (#18015)
1 parent b2d0d1b commit 5959ee3

File tree

4 files changed

+17
-4
lines changed

4 files changed

+17
-4
lines changed

doc/source/whatsnew/v0.21.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ Numeric
113113
Categorical
114114
^^^^^^^^^^^
115115

116-
-
116+
- Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`)
117117
-
118118
-
119119

pandas/_libs/src/inference.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -349,13 +349,13 @@ def infer_dtype(object value, bint skipna=False):
349349
if values.dtype != np.object_:
350350
values = values.astype('O')
351351

352+
# make contiguous
353+
values = values.ravel()
354+
352355
n = len(values)
353356
if n == 0:
354357
return 'empty'
355358

356-
# make contiguous
357-
values = values.ravel()
358-
359359
# try to use a valid value
360360
for i in range(n):
361361
val = util.get_value_1d(values, i)

pandas/tests/dtypes/test_inference.py

+6
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,12 @@ def test_length_zero(self):
416416
result = lib.infer_dtype([])
417417
assert result == 'empty'
418418

419+
# GH 18004
420+
arr = np.array([np.array([], dtype=object),
421+
np.array([], dtype=object)])
422+
result = lib.infer_dtype(arr)
423+
assert result == 'empty'
424+
419425
def test_integers(self):
420426
arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O')
421427
result = lib.infer_dtype(arr)

pandas/tests/test_categorical.py

+7
Original file line numberDiff line numberDiff line change
@@ -2124,6 +2124,13 @@ def test_creation_astype(self):
21242124
res = s.astype(CategoricalDtype(list('abcdef'), ordered=True))
21252125
tm.assert_series_equal(res, exp)
21262126

2127+
@pytest.mark.parametrize('columns', [['x'], ['x', 'y'], ['x', 'y', 'z']])
2128+
def test_empty_astype(self, columns):
2129+
# GH 18004
2130+
msg = '> 1 ndim Categorical are not supported at this time'
2131+
with tm.assert_raises_regex(NotImplementedError, msg):
2132+
DataFrame(columns=columns).astype('category')
2133+
21272134
def test_construction_series(self):
21282135

21292136
l = [1, 2, 3, 1]

0 commit comments

Comments
 (0)