From b6745ae3fa728f7a4708188852d142124c417802 Mon Sep 17 00:00:00 2001 From: jschendel Date: Sat, 28 Oct 2017 12:52:58 -0600 Subject: [PATCH] ERR: Fix segfault with .astype('category') on empty DataFrame --- doc/source/whatsnew/v0.21.1.txt | 2 +- pandas/_libs/src/inference.pyx | 6 +++--- pandas/tests/dtypes/test_inference.py | 6 ++++++ pandas/tests/test_categorical.py | 7 +++++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 64b662c38d39e..03a8e83a713f0 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -113,7 +113,7 @@ Numeric Categorical ^^^^^^^^^^^ -- +- Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`) - - diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index b0a64e1ccc225..c340e870e9722 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -349,13 +349,13 @@ def infer_dtype(object value, bint skipna=False): if values.dtype != np.object_: values = values.astype('O') + # make contiguous + values = values.ravel() + n = len(values) if n == 0: return 'empty' - # make contiguous - values = values.ravel() - # try to use a valid value for i in range(n): val = util.get_value_1d(values, i) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 70273f9e999cf..7195cb43a70dc 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -416,6 +416,12 @@ def test_length_zero(self): result = lib.infer_dtype([]) assert result == 'empty' + # GH 18004 + arr = np.array([np.array([], dtype=object), + np.array([], dtype=object)]) + result = lib.infer_dtype(arr) + assert result == 'empty' + def test_integers(self): arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') result = lib.infer_dtype(arr) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 272ba25bf8f8a..6366aae8ccdf6 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -2124,6 +2124,13 @@ def test_creation_astype(self): res = s.astype(CategoricalDtype(list('abcdef'), ordered=True)) tm.assert_series_equal(res, exp) + @pytest.mark.parametrize('columns', [['x'], ['x', 'y'], ['x', 'y', 'z']]) + def test_empty_astype(self, columns): + # GH 18004 + msg = '> 1 ndim Categorical are not supported at this time' + with tm.assert_raises_regex(NotImplementedError, msg): + DataFrame(columns=columns).astype('category') + def test_construction_series(self): l = [1, 2, 3, 1]