diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 719178a67459d..e7b79bed148dd 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -480,6 +480,7 @@ Categorical - :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`) - Bug where :meth:`Categorical.replace` would replace with ``NaN`` whenever the new value and replacement value were equal (:issue:`33288`) - Bug where an ordered :class:`Categorical` containing only ``NaN`` values would raise rather than returning ``NaN`` when taking the minimum or maximum (:issue:`33450`) +- Bug where :meth:`Series.isna` and :meth:`DataFrame.isna` would raise for categorical dtype when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33594`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index d329f4337de2e..92e1b17c41694 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -134,13 +134,13 @@ def _isna_new(obj): elif isinstance(obj, type): return False elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)): - return _isna_ndarraylike(obj) + return _isna_ndarraylike(obj, old=False) elif isinstance(obj, ABCDataFrame): return obj.isna() elif isinstance(obj, list): - return _isna_ndarraylike(np.asarray(obj, dtype=object)) + return _isna_ndarraylike(np.asarray(obj, dtype=object), old=False) elif hasattr(obj, "__array__"): - return _isna_ndarraylike(np.asarray(obj)) + return _isna_ndarraylike(np.asarray(obj), old=False) else: return False @@ -165,13 +165,13 @@ def _isna_old(obj): elif isinstance(obj, type): return False elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)): - return _isna_ndarraylike_old(obj) + return _isna_ndarraylike(obj, old=True) elif isinstance(obj, ABCDataFrame): return obj.isna() elif isinstance(obj, list): - return _isna_ndarraylike_old(np.asarray(obj, dtype=object)) + return _isna_ndarraylike(np.asarray(obj, dtype=object), old=True) elif hasattr(obj, "__array__"): - return _isna_ndarraylike_old(np.asarray(obj)) + return _isna_ndarraylike(np.asarray(obj), old=True) else: return False @@ -207,40 +207,40 @@ def _use_inf_as_na(key): globals()["_isna"] = _isna_new -def _isna_ndarraylike(obj): - values = getattr(obj, "_values", obj) - dtype = values.dtype - - if is_extension_array_dtype(dtype): - result = values.isna() - elif is_string_dtype(dtype): - result = _isna_string_dtype(values, dtype, old=False) - - elif needs_i8_conversion(dtype): - # this is the NaT pattern - result = values.view("i8") == iNaT - else: - result = np.isnan(values) - - # box - if isinstance(obj, ABCSeries): - result = obj._constructor(result, index=obj.index, name=obj.name, copy=False) - - return result +def _isna_ndarraylike(obj, old: bool = False): + """ + Return an array indicating which values of the input array are NaN / NA. + Parameters + ---------- + obj: array-like + The input array whose elements are to be checked. + old: bool + Whether or not to treat infinite values as NA. -def _isna_ndarraylike_old(obj): + Returns + ------- + array-like + Array of boolean values denoting the NA status of each element. + """ values = getattr(obj, "_values", obj) dtype = values.dtype - if is_string_dtype(dtype): - result = _isna_string_dtype(values, dtype, old=True) - + if is_extension_array_dtype(dtype): + if old: + result = values.isna() | (values == -np.inf) | (values == np.inf) + else: + result = values.isna() + elif is_string_dtype(dtype): + result = _isna_string_dtype(values, dtype, old=old) elif needs_i8_conversion(dtype): # this is the NaT pattern result = values.view("i8") == iNaT else: - result = ~np.isfinite(values) + if old: + result = ~np.isfinite(values) + else: + result = np.isnan(values) # box if isinstance(obj, ABCSeries): diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 9eb3c8b3a8c48..5309b8827e3f0 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -5,7 +5,8 @@ from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import Categorical, Index, Series, isna +import pandas as pd +from pandas import Categorical, DataFrame, Index, Series, isna import pandas._testing as tm @@ -97,3 +98,53 @@ def test_fillna_array(self): expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype) tm.assert_categorical_equal(result, expected) assert isna(cat[-1]) # didnt modify original inplace + + @pytest.mark.parametrize( + "values, expected", + [ + ([1, 2, 3], np.array([False, False, False])), + ([1, 2, np.nan], np.array([False, False, True])), + ([1, 2, np.inf], np.array([False, False, True])), + ([1, 2, pd.NA], np.array([False, False, True])), + ], + ) + def test_use_inf_as_na(self, values, expected): + # https://github.com/pandas-dev/pandas/issues/33594 + with pd.option_context("mode.use_inf_as_na", True): + cat = Categorical(values) + result = cat.isna() + tm.assert_numpy_array_equal(result, expected) + + result = Series(cat).isna() + expected = Series(expected) + tm.assert_series_equal(result, expected) + + result = DataFrame(cat).isna() + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values, expected", + [ + ([1, 2, 3], np.array([False, False, False])), + ([1, 2, np.nan], np.array([False, False, True])), + ([1, 2, np.inf], np.array([False, False, True])), + ([1, 2, pd.NA], np.array([False, False, True])), + ], + ) + def test_use_inf_as_na_outside_context(self, values, expected): + # https://github.com/pandas-dev/pandas/issues/33594 + # Using isna directly for Categorical will fail in general here + cat = Categorical(values) + + with pd.option_context("mode.use_inf_as_na", True): + result = pd.isna(cat) + tm.assert_numpy_array_equal(result, expected) + + result = pd.isna(Series(cat)) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + result = pd.isna(DataFrame(cat)) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected)