Skip to content

Commit 678a9ac

Browse files
authored
BUG: Fix StringArray use_inf_as_na bug (#33656)
1 parent 1818c28 commit 678a9ac

File tree

6 files changed

+38
-12
lines changed

6 files changed

+38
-12
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -842,13 +842,13 @@ ExtensionArray
842842
^^^^^^^^^^^^^^
843843

844844
- Fixed bug where :meth:`Series.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`)
845+
- Fixed bug where :meth:`StringArray.isna` would return ``False`` for NA values when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33655`)
845846
- Fixed bug in :class:`Series` construction with EA dtype and index but no data or scalar data fails (:issue:`26469`)
846847
- Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`).
847848
- Fixed bug where :meth:`Series.update` would raise a ``ValueError`` for ``ExtensionArray`` dtypes with missing values (:issue:`33980`)
848849
- Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`)
849850
- Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`)
850851

851-
852852
Other
853853
^^^^^
854854
- Appending a dictionary to a :class:`DataFrame` without passing ``ignore_index=True`` will raise ``TypeError: Can only append a dict if ignore_index=True``

pandas/_libs/missing.pyx

+2-6
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,6 @@ cpdef bint checknull_old(object val):
9090
return False
9191

9292

93-
cdef inline bint _check_none_nan_inf_neginf(object val):
94-
return val is None or (isinstance(val, float) and
95-
(val != val or val == INF or val == NEGINF))
96-
97-
9893
@cython.wraparound(False)
9994
@cython.boundscheck(False)
10095
cpdef ndarray[uint8_t] isnaobj(ndarray arr):
@@ -141,6 +136,7 @@ def isnaobj_old(arr: ndarray) -> ndarray:
141136
- INF
142137
- NEGINF
143138
- NaT
139+
- NA
144140

145141
Parameters
146142
----------
@@ -161,7 +157,7 @@ def isnaobj_old(arr: ndarray) -> ndarray:
161157
result = np.zeros(n, dtype=np.uint8)
162158
for i in range(n):
163159
val = arr[i]
164-
result[i] = val is NaT or _check_none_nan_inf_neginf(val)
160+
result[i] = checknull(val) or val == INF or val == NEGINF
165161
return result.view(np.bool_)
166162

167163

pandas/core/dtypes/missing.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
TD64NS_DTYPE,
1818
ensure_object,
1919
is_bool_dtype,
20+
is_categorical_dtype,
2021
is_complex_dtype,
2122
is_datetimelike_v_numeric,
2223
is_dtype_equal,
@@ -209,8 +210,8 @@ def _isna_ndarraylike(obj, inf_as_na: bool = False):
209210
dtype = values.dtype
210211

211212
if is_extension_array_dtype(dtype):
212-
if inf_as_na:
213-
result = values.isna() | (values == -np.inf) | (values == np.inf)
213+
if inf_as_na and is_categorical_dtype(dtype):
214+
result = libmissing.isnaobj_old(values.to_numpy())
214215
else:
215216
result = values.isna()
216217
elif is_string_dtype(dtype):

pandas/tests/arrays/string_/test_string.py

+22
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,28 @@ def test_value_counts_na():
305305
tm.assert_series_equal(result, expected)
306306

307307

308+
@pytest.mark.parametrize(
309+
"values, expected",
310+
[
311+
(pd.array(["a", "b", "c"]), np.array([False, False, False])),
312+
(pd.array(["a", "b", None]), np.array([False, False, True])),
313+
],
314+
)
315+
def test_use_inf_as_na(values, expected):
316+
# https://github.com/pandas-dev/pandas/issues/33655
317+
with pd.option_context("mode.use_inf_as_na", True):
318+
result = values.isna()
319+
tm.assert_numpy_array_equal(result, expected)
320+
321+
result = pd.Series(values).isna()
322+
expected = pd.Series(expected)
323+
tm.assert_series_equal(result, expected)
324+
325+
result = pd.DataFrame(values).isna()
326+
expected = pd.DataFrame(expected)
327+
tm.assert_frame_equal(result, expected)
328+
329+
308330
def test_memory_usage():
309331
# GH 33963
310332
series = pd.Series(["a", "b", "c"], dtype="string")

pandas/tests/extension/base/missing.py

+7
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,10 @@ def test_fillna_fill_other(self, data):
127127
expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)})
128128

129129
self.assert_frame_equal(result, expected)
130+
131+
def test_use_inf_as_na_no_effect(self, data_missing):
132+
ser = pd.Series(data_missing)
133+
expected = ser.isna()
134+
with pd.option_context("mode.use_inf_as_na", True):
135+
result = ser.isna()
136+
self.assert_series_equal(result, expected)

pandas/tests/series/test_missing.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -509,12 +509,12 @@ def test_fillna_nat(self):
509509
tm.assert_frame_equal(filled2, expected)
510510

511511
def test_isna_for_inf(self):
512-
s = Series(["a", np.inf, np.nan, 1.0])
512+
s = Series(["a", np.inf, np.nan, pd.NA, 1.0])
513513
with pd.option_context("mode.use_inf_as_na", True):
514514
r = s.isna()
515515
dr = s.dropna()
516-
e = Series([False, True, True, False])
517-
de = Series(["a", 1.0], index=[0, 3])
516+
e = Series([False, True, True, True, False])
517+
de = Series(["a", 1.0], index=[0, 4])
518518
tm.assert_series_equal(r, e)
519519
tm.assert_series_equal(dr, de)
520520

0 commit comments

Comments
 (0)