Skip to content

Commit 1e7a3fc

Browse files
jorisvandenbosscheproost
authored andcommitted
API: BooleanArray any/all with NA logic (pandas-dev#30062)
1 parent 723c7ee commit 1e7a3fc

File tree

3 files changed

+174
-5
lines changed

3 files changed

+174
-5
lines changed

ci/code_checks.sh

+4
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
281281
pytest -q --doctest-modules pandas/core/arrays/string_.py
282282
RET=$(($RET + $?)) ; echo $MSG "DONE"
283283

284+
MSG='Doctests arrays/boolean.py' ; echo $MSG
285+
pytest -q --doctest-modules pandas/core/arrays/boolean.py
286+
RET=$(($RET + $?)) ; echo $MSG "DONE"
287+
284288
fi
285289

286290
### DOCSTRINGS ###

pandas/core/arrays/boolean.py

+143-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from pandas._libs import lib, missing as libmissing
88
from pandas.compat import set_function_name
9+
from pandas.compat.numpy import function as nv
910

1011
from pandas.core.dtypes.base import ExtensionDtype
1112
from pandas.core.dtypes.cast import astype_nansafe
@@ -571,6 +572,143 @@ def _values_for_argsort(self) -> np.ndarray:
571572
data[self._mask] = -1
572573
return data
573574

575+
def any(self, skipna: bool = True, **kwargs):
576+
"""
577+
Return whether any element is True.
578+
579+
Returns False unless there is at least one element that is True.
580+
By default, NAs are skipped. If ``skipna=False`` is specified and
581+
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
582+
is used as for logical operations.
583+
584+
Parameters
585+
----------
586+
skipna : bool, default True
587+
Exclude NA values. If the entire array is NA and `skipna` is
588+
True, then the result will be False, as for an empty array.
589+
If `skipna` is False, the result will still be True if there is
590+
at least one element that is True, otherwise NA will be returned
591+
if there are NA's present.
592+
**kwargs : any, default None
593+
Additional keywords have no effect but might be accepted for
594+
compatibility with NumPy.
595+
596+
Returns
597+
-------
598+
bool or :attr:`pandas.NA`
599+
600+
See Also
601+
--------
602+
numpy.any : Numpy version of this method.
603+
BooleanArray.all : Return whether all elements are True.
604+
605+
Examples
606+
--------
607+
608+
The result indicates whether any element is True (and by default
609+
skips NAs):
610+
611+
>>> pd.array([True, False, True]).any()
612+
True
613+
>>> pd.array([True, False, pd.NA]).any()
614+
True
615+
>>> pd.array([False, False, pd.NA]).any()
616+
False
617+
>>> pd.array([], dtype="boolean").any()
618+
False
619+
>>> pd.array([pd.NA], dtype="boolean").any()
620+
False
621+
622+
With ``skipna=False``, the result can be NA if this is logically
623+
required (whether ``pd.NA`` is True or False influences the result):
624+
625+
>>> pd.array([True, False, pd.NA]).any(skipna=False)
626+
True
627+
>>> pd.array([False, False, pd.NA]).any(skipna=False)
628+
NA
629+
"""
630+
kwargs.pop("axis", None)
631+
nv.validate_any((), kwargs)
632+
633+
values = self._data.copy()
634+
np.putmask(values, self._mask, False)
635+
result = values.any()
636+
if skipna:
637+
return result
638+
else:
639+
if result or len(self) == 0:
640+
return result
641+
else:
642+
return self.dtype.na_value
643+
644+
def all(self, skipna: bool = True, **kwargs):
645+
"""
646+
Return whether all elements are True.
647+
648+
Returns True unless there is at least one element that is False.
649+
By default, NAs are skipped. If ``skipna=False`` is specified and
650+
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
651+
is used as for logical operations.
652+
653+
Parameters
654+
----------
655+
skipna : bool, default True
656+
Exclude NA values. If the entire array is NA and `skipna` is
657+
True, then the result will be True, as for an empty array.
658+
If `skipna` is False, the result will still be False if there is
659+
at least one element that is False, otherwise NA will be returned
660+
if there are NA's present.
661+
**kwargs : any, default None
662+
Additional keywords have no effect but might be accepted for
663+
compatibility with NumPy.
664+
665+
Returns
666+
-------
667+
bool or :attr:`pandas.NA`
668+
669+
See Also
670+
--------
671+
numpy.all : Numpy version of this method.
672+
BooleanArray.any : Return whether any element is True.
673+
674+
Examples
675+
--------
676+
677+
The result indicates whether any element is True (and by default
678+
skips NAs):
679+
680+
>>> pd.array([True, True, pd.NA]).all()
681+
True
682+
>>> pd.array([True, False, pd.NA]).all()
683+
False
684+
>>> pd.array([], dtype="boolean").all()
685+
True
686+
>>> pd.array([pd.NA], dtype="boolean").all()
687+
True
688+
689+
With ``skipna=False``, the result can be NA if this is logically
690+
required (whether ``pd.NA`` is True or False influences the result):
691+
692+
>>> pd.array([True, True, pd.NA]).all(skipna=False)
693+
NA
694+
>>> pd.array([True, False, pd.NA]).all(skipna=False)
695+
False
696+
"""
697+
kwargs.pop("axis", None)
698+
nv.validate_all((), kwargs)
699+
700+
values = self._data.copy()
701+
np.putmask(values, self._mask, True)
702+
result = values.all()
703+
704+
if skipna:
705+
return result
706+
else:
707+
if not result or len(self) == 0:
708+
return result
709+
else:
710+
return self.dtype.na_value
711+
574712
@classmethod
575713
def _create_logical_method(cls, op):
576714
def logical_method(self, other):
@@ -667,6 +805,10 @@ def cmp_method(self, other):
667805
return set_function_name(cmp_method, name, cls)
668806

669807
def _reduce(self, name, skipna=True, **kwargs):
808+
809+
if name in {"any", "all"}:
810+
return getattr(self, name)(skipna=skipna, **kwargs)
811+
670812
data = self._data
671813
mask = self._mask
672814

@@ -678,12 +820,8 @@ def _reduce(self, name, skipna=True, **kwargs):
678820
op = getattr(nanops, "nan" + name)
679821
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
680822

681-
# if we have a boolean op, don't coerce
682-
if name in ["any", "all"]:
683-
pass
684-
685823
# if we have numeric op that would result in an int, coerce to int if possible
686-
elif name in ["sum", "prod"] and notna(result):
824+
if name in ["sum", "prod"] and notna(result):
687825
int_result = np.int64(result)
688826
if int_result == result:
689827
result = int_result

pandas/tests/arrays/test_boolean.py

+27
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,33 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions):
715715
assert isinstance(getattr(s, op)(), np.float64)
716716

717717

718+
@pytest.mark.parametrize(
719+
"values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
720+
[
721+
([True, pd.NA], True, True, True, pd.NA),
722+
([False, pd.NA], False, False, pd.NA, False),
723+
([pd.NA], False, True, pd.NA, pd.NA),
724+
([], False, True, False, True),
725+
],
726+
)
727+
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
728+
# the methods return numpy scalars
729+
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
730+
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
731+
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
732+
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
733+
734+
for con in [pd.array, pd.Series]:
735+
a = con(values, dtype="boolean")
736+
assert a.any() is exp_any
737+
assert a.all() is exp_all
738+
assert a.any(skipna=False) is exp_any_noskip
739+
assert a.all(skipna=False) is exp_all_noskip
740+
741+
assert np.any(a.any()) is exp_any
742+
assert np.all(a.all()) is exp_all
743+
744+
718745
# TODO when BooleanArray coerces to object dtype numpy array, need to do conversion
719746
# manually in the indexing code
720747
# def test_indexing_boolean_mask():

0 commit comments

Comments
 (0)