Skip to content

Commit 589d0d3

Browse files
committed
BUG: membership checks on ExtensionArray containing NA values
1 parent 8d1b8ab commit 589d0d3

File tree

4 files changed

+40
-1
lines changed

4 files changed

+40
-1
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ ExtensionArray
690690
- Fixed bug when applying a NumPy ufunc with multiple outputs to a :class:`pandas.arrays.IntegerArray` returning None (:issue:`36913`)
691691
- Fixed an inconsistency in :class:`PeriodArray`'s ``__init__`` signature to those of :class:`DatetimeArray` and :class:`TimedeltaArray` (:issue:`37289`)
692692
- Reductions for :class:`BooleanArray`, :class:`Categorical`, :class:`DatetimeArray`, :class:`FloatingArray`, :class:`IntegerArray`, :class:`PeriodArray`, :class:`TimedeltaArray`, and :class:`PandasArray` are now keyword-only methods (:issue:`37541`)
693+
- Bug, where a `ValueError` was wrongly raised if a membership check was made on an `ExtensionArray` with :class:`NA` values, but without a custom ``__contains__`` method (:issue:`xxxxx`)
693694

694695
Other
695696
^^^^^

pandas/core/arrays/base.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
)
4040
from pandas.core.dtypes.dtypes import ExtensionDtype
4141
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
42-
from pandas.core.dtypes.missing import isna
42+
from pandas.core.dtypes.missing import isna, notna
4343

4444
from pandas.core import ops
4545
from pandas.core.algorithms import factorize_array, unique
@@ -351,6 +351,18 @@ def __iter__(self):
351351
for i in range(len(self)):
352352
yield self[i]
353353

354+
def __contains__(self, item) -> bool:
355+
"""
356+
Return for `item in self`.
357+
"""
358+
# comparisons of ant item to pd.NA always return pd.NA, so e.g.
359+
# "a" in [pd.NA] raises a TypeError. The implementation below works around that.
360+
if isna(item):
361+
return isna(self).any() if self._can_hold_na else False
362+
363+
arr = self[notna(self)] if self._can_hold_na else self
364+
return item in iter(arr)
365+
354366
def __eq__(self, other: Any) -> ArrayLike:
355367
"""
356368
Return for `self == other` (element-wise equality).

pandas/tests/arrays/categorical/test_operators.py

+12
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,15 @@ def test_numeric_like_ops(self):
395395
msg = "Object with dtype category cannot perform the numpy op log"
396396
with pytest.raises(TypeError, match=msg):
397397
np.log(s)
398+
399+
def test_contains(self, ordered):
400+
# GH-xxxxx
401+
cat = Categorical(["a", "b"], ordered=ordered)
402+
assert "a" in cat
403+
assert "x" not in cat
404+
assert pd.NA not in cat
405+
406+
cat = Categorical([np.nan, "a"], ordered=ordered)
407+
assert "a" in cat
408+
assert "x" not in cat
409+
assert pd.NA in cat

pandas/tests/arrays/string_/test_string.py

+14
Original file line numberDiff line numberDiff line change
@@ -345,3 +345,17 @@ def test_astype_from_float_dtype(dtype):
345345
result = s.astype("string")
346346
expected = pd.Series(["0.1"], dtype="string")
347347
tm.assert_series_equal(result, expected)
348+
349+
350+
def test_contains():
351+
# GH-xxxxx
352+
arr = pd.arrays.StringArray(np.array(["a", "b"], dtype=object))
353+
354+
assert "a" in arr
355+
assert "x" not in arr
356+
assert pd.NA not in arr
357+
358+
arr = pd.arrays.StringArray(np.array(["a", pd.NA]))
359+
assert "a" in arr
360+
assert "x" not in arr
361+
assert pd.NA in arr

0 commit comments

Comments
 (0)