Skip to content

Commit 91150d9

Browse files
authored
BUG: Fix Categorical use_inf_as_na bug (#33629)
1 parent 08f9bd2 commit 91150d9

File tree

3 files changed

+84
-32
lines changed

3 files changed

+84
-32
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ Categorical
526526
- :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`)
527527
- Bug where :meth:`Categorical.replace` would replace with ``NaN`` whenever the new value and replacement value were equal (:issue:`33288`)
528528
- Bug where an ordered :class:`Categorical` containing only ``NaN`` values would raise rather than returning ``NaN`` when taking the minimum or maximum (:issue:`33450`)
529+
- Bug where :meth:`Series.isna` and :meth:`DataFrame.isna` would raise for categorical dtype when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33594`)
529530

530531
Datetimelike
531532
^^^^^^^^^^^^

pandas/core/dtypes/missing.py

+31-31
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,13 @@ def _isna_new(obj):
134134
elif isinstance(obj, type):
135135
return False
136136
elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)):
137-
return _isna_ndarraylike(obj)
137+
return _isna_ndarraylike(obj, old=False)
138138
elif isinstance(obj, ABCDataFrame):
139139
return obj.isna()
140140
elif isinstance(obj, list):
141-
return _isna_ndarraylike(np.asarray(obj, dtype=object))
141+
return _isna_ndarraylike(np.asarray(obj, dtype=object), old=False)
142142
elif hasattr(obj, "__array__"):
143-
return _isna_ndarraylike(np.asarray(obj))
143+
return _isna_ndarraylike(np.asarray(obj), old=False)
144144
else:
145145
return False
146146

@@ -165,13 +165,13 @@ def _isna_old(obj):
165165
elif isinstance(obj, type):
166166
return False
167167
elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)):
168-
return _isna_ndarraylike_old(obj)
168+
return _isna_ndarraylike(obj, old=True)
169169
elif isinstance(obj, ABCDataFrame):
170170
return obj.isna()
171171
elif isinstance(obj, list):
172-
return _isna_ndarraylike_old(np.asarray(obj, dtype=object))
172+
return _isna_ndarraylike(np.asarray(obj, dtype=object), old=True)
173173
elif hasattr(obj, "__array__"):
174-
return _isna_ndarraylike_old(np.asarray(obj))
174+
return _isna_ndarraylike(np.asarray(obj), old=True)
175175
else:
176176
return False
177177

@@ -207,40 +207,40 @@ def _use_inf_as_na(key):
207207
globals()["_isna"] = _isna_new
208208

209209

210-
def _isna_ndarraylike(obj):
211-
values = getattr(obj, "_values", obj)
212-
dtype = values.dtype
213-
214-
if is_extension_array_dtype(dtype):
215-
result = values.isna()
216-
elif is_string_dtype(dtype):
217-
result = _isna_string_dtype(values, dtype, old=False)
218-
219-
elif needs_i8_conversion(dtype):
220-
# this is the NaT pattern
221-
result = values.view("i8") == iNaT
222-
else:
223-
result = np.isnan(values)
224-
225-
# box
226-
if isinstance(obj, ABCSeries):
227-
result = obj._constructor(result, index=obj.index, name=obj.name, copy=False)
228-
229-
return result
210+
def _isna_ndarraylike(obj, old: bool = False):
211+
"""
212+
Return an array indicating which values of the input array are NaN / NA.
230213
214+
Parameters
215+
----------
216+
obj: array-like
217+
The input array whose elements are to be checked.
218+
old: bool
219+
Whether or not to treat infinite values as NA.
231220
232-
def _isna_ndarraylike_old(obj):
221+
Returns
222+
-------
223+
array-like
224+
Array of boolean values denoting the NA status of each element.
225+
"""
233226
values = getattr(obj, "_values", obj)
234227
dtype = values.dtype
235228

236-
if is_string_dtype(dtype):
237-
result = _isna_string_dtype(values, dtype, old=True)
238-
229+
if is_extension_array_dtype(dtype):
230+
if old:
231+
result = values.isna() | (values == -np.inf) | (values == np.inf)
232+
else:
233+
result = values.isna()
234+
elif is_string_dtype(dtype):
235+
result = _isna_string_dtype(values, dtype, old=old)
239236
elif needs_i8_conversion(dtype):
240237
# this is the NaT pattern
241238
result = values.view("i8") == iNaT
242239
else:
243-
result = ~np.isfinite(values)
240+
if old:
241+
result = ~np.isfinite(values)
242+
else:
243+
result = np.isnan(values)
244244

245245
# box
246246
if isinstance(obj, ABCSeries):

pandas/tests/arrays/categorical/test_missing.py

+52-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
from pandas.core.dtypes.dtypes import CategoricalDtype
77

8-
from pandas import Categorical, Index, Series, isna
8+
import pandas as pd
9+
from pandas import Categorical, DataFrame, Index, Series, isna
910
import pandas._testing as tm
1011

1112

@@ -97,3 +98,53 @@ def test_fillna_array(self):
9798
expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)
9899
tm.assert_categorical_equal(result, expected)
99100
assert isna(cat[-1]) # didnt modify original inplace
101+
102+
@pytest.mark.parametrize(
103+
"values, expected",
104+
[
105+
([1, 2, 3], np.array([False, False, False])),
106+
([1, 2, np.nan], np.array([False, False, True])),
107+
([1, 2, np.inf], np.array([False, False, True])),
108+
([1, 2, pd.NA], np.array([False, False, True])),
109+
],
110+
)
111+
def test_use_inf_as_na(self, values, expected):
112+
# https://github.com/pandas-dev/pandas/issues/33594
113+
with pd.option_context("mode.use_inf_as_na", True):
114+
cat = Categorical(values)
115+
result = cat.isna()
116+
tm.assert_numpy_array_equal(result, expected)
117+
118+
result = Series(cat).isna()
119+
expected = Series(expected)
120+
tm.assert_series_equal(result, expected)
121+
122+
result = DataFrame(cat).isna()
123+
expected = DataFrame(expected)
124+
tm.assert_frame_equal(result, expected)
125+
126+
@pytest.mark.parametrize(
127+
"values, expected",
128+
[
129+
([1, 2, 3], np.array([False, False, False])),
130+
([1, 2, np.nan], np.array([False, False, True])),
131+
([1, 2, np.inf], np.array([False, False, True])),
132+
([1, 2, pd.NA], np.array([False, False, True])),
133+
],
134+
)
135+
def test_use_inf_as_na_outside_context(self, values, expected):
136+
# https://github.com/pandas-dev/pandas/issues/33594
137+
# Using isna directly for Categorical will fail in general here
138+
cat = Categorical(values)
139+
140+
with pd.option_context("mode.use_inf_as_na", True):
141+
result = pd.isna(cat)
142+
tm.assert_numpy_array_equal(result, expected)
143+
144+
result = pd.isna(Series(cat))
145+
expected = Series(expected)
146+
tm.assert_series_equal(result, expected)
147+
148+
result = pd.isna(DataFrame(cat))
149+
expected = DataFrame(expected)
150+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)