Skip to content

Commit 0524ef8

Browse files
authored
ENH: nullables use Kleene logic for any/all reductions (#41970)
1 parent a5264b8 commit 0524ef8

File tree

9 files changed

+183
-152
lines changed

9 files changed

+183
-152
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ Other enhancements
123123
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
124124
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)
125125
- Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`)
126+
- :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
126127
- Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
127128
-
128129

pandas/core/arrays/boolean.py

+3-143
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
npt,
2222
type_t,
2323
)
24-
from pandas.compat.numpy import function as nv
2524

2625
from pandas.core.dtypes.common import (
2726
is_bool_dtype,
@@ -310,6 +309,9 @@ class BooleanArray(BaseMaskedArray):
310309

311310
# The value used to fill '_data' to avoid upcasting
312311
_internal_fill_value = False
312+
# Fill values used for any/all
313+
_truthy_value = True
314+
_falsey_value = False
313315
_TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
314316
_FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
315317

@@ -490,141 +492,6 @@ def _values_for_argsort(self) -> np.ndarray:
490492
data[self._mask] = -1
491493
return data
492494

493-
def any(self, *, skipna: bool = True, **kwargs):
494-
"""
495-
Return whether any element is True.
496-
497-
Returns False unless there is at least one element that is True.
498-
By default, NAs are skipped. If ``skipna=False`` is specified and
499-
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
500-
is used as for logical operations.
501-
502-
Parameters
503-
----------
504-
skipna : bool, default True
505-
Exclude NA values. If the entire array is NA and `skipna` is
506-
True, then the result will be False, as for an empty array.
507-
If `skipna` is False, the result will still be True if there is
508-
at least one element that is True, otherwise NA will be returned
509-
if there are NA's present.
510-
**kwargs : any, default None
511-
Additional keywords have no effect but might be accepted for
512-
compatibility with NumPy.
513-
514-
Returns
515-
-------
516-
bool or :attr:`pandas.NA`
517-
518-
See Also
519-
--------
520-
numpy.any : Numpy version of this method.
521-
BooleanArray.all : Return whether all elements are True.
522-
523-
Examples
524-
--------
525-
The result indicates whether any element is True (and by default
526-
skips NAs):
527-
528-
>>> pd.array([True, False, True]).any()
529-
True
530-
>>> pd.array([True, False, pd.NA]).any()
531-
True
532-
>>> pd.array([False, False, pd.NA]).any()
533-
False
534-
>>> pd.array([], dtype="boolean").any()
535-
False
536-
>>> pd.array([pd.NA], dtype="boolean").any()
537-
False
538-
539-
With ``skipna=False``, the result can be NA if this is logically
540-
required (whether ``pd.NA`` is True or False influences the result):
541-
542-
>>> pd.array([True, False, pd.NA]).any(skipna=False)
543-
True
544-
>>> pd.array([False, False, pd.NA]).any(skipna=False)
545-
<NA>
546-
"""
547-
kwargs.pop("axis", None)
548-
nv.validate_any((), kwargs)
549-
550-
values = self._data.copy()
551-
np.putmask(values, self._mask, False)
552-
result = values.any()
553-
if skipna:
554-
return result
555-
else:
556-
if result or len(self) == 0 or not self._mask.any():
557-
return result
558-
else:
559-
return self.dtype.na_value
560-
561-
def all(self, *, skipna: bool = True, **kwargs):
562-
"""
563-
Return whether all elements are True.
564-
565-
Returns True unless there is at least one element that is False.
566-
By default, NAs are skipped. If ``skipna=False`` is specified and
567-
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
568-
is used as for logical operations.
569-
570-
Parameters
571-
----------
572-
skipna : bool, default True
573-
Exclude NA values. If the entire array is NA and `skipna` is
574-
True, then the result will be True, as for an empty array.
575-
If `skipna` is False, the result will still be False if there is
576-
at least one element that is False, otherwise NA will be returned
577-
if there are NA's present.
578-
**kwargs : any, default None
579-
Additional keywords have no effect but might be accepted for
580-
compatibility with NumPy.
581-
582-
Returns
583-
-------
584-
bool or :attr:`pandas.NA`
585-
586-
See Also
587-
--------
588-
numpy.all : Numpy version of this method.
589-
BooleanArray.any : Return whether any element is True.
590-
591-
Examples
592-
--------
593-
The result indicates whether any element is True (and by default
594-
skips NAs):
595-
596-
>>> pd.array([True, True, pd.NA]).all()
597-
True
598-
>>> pd.array([True, False, pd.NA]).all()
599-
False
600-
>>> pd.array([], dtype="boolean").all()
601-
True
602-
>>> pd.array([pd.NA], dtype="boolean").all()
603-
True
604-
605-
With ``skipna=False``, the result can be NA if this is logically
606-
required (whether ``pd.NA`` is True or False influences the result):
607-
608-
>>> pd.array([True, True, pd.NA]).all(skipna=False)
609-
<NA>
610-
>>> pd.array([True, False, pd.NA]).all(skipna=False)
611-
False
612-
"""
613-
kwargs.pop("axis", None)
614-
nv.validate_all((), kwargs)
615-
616-
values = self._data.copy()
617-
np.putmask(values, self._mask, True)
618-
result = values.all()
619-
620-
if skipna:
621-
return result
622-
else:
623-
if not result or len(self) == 0 or not self._mask.any():
624-
return result
625-
else:
626-
return self.dtype.na_value
627-
628495
def _logical_method(self, other, op):
629496

630497
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
@@ -753,13 +620,6 @@ def _arith_method(self, other, op):
753620

754621
return self._maybe_mask_result(result, mask, other, op_name)
755622

756-
def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
757-
758-
if name in {"any", "all"}:
759-
return getattr(self, name)(skipna=skipna, **kwargs)
760-
761-
return super()._reduce(name, skipna=skipna, **kwargs)
762-
763623
def _maybe_mask_result(self, result, mask, other, op_name: str):
764624
"""
765625
Parameters

pandas/core/arrays/floating.py

+3
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ class FloatingArray(NumericArray):
245245

246246
# The value used to fill '_data' to avoid upcasting
247247
_internal_fill_value = 0.0
248+
# Fill values used for any/all
249+
_truthy_value = 1.0
250+
_falsey_value = 0.0
248251

249252
@cache_readonly
250253
def dtype(self) -> FloatingDtype:

pandas/core/arrays/integer.py

+3
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,9 @@ class IntegerArray(NumericArray):
307307

308308
# The value used to fill '_data' to avoid upcasting
309309
_internal_fill_value = 1
310+
# Fill values used for any/all
311+
_truthy_value = 1
312+
_falsey_value = 0
310313

311314
@cache_readonly
312315
def dtype(self) -> _IntegerDtype:

pandas/core/arrays/masked.py

+160-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
if TYPE_CHECKING:
6565
from pandas import Series
6666
from pandas.core.arrays import BooleanArray
67-
67+
from pandas.compat.numpy import function as nv
6868

6969
BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")
7070

@@ -115,6 +115,9 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
115115

116116
# The value used to fill '_data' to avoid upcasting
117117
_internal_fill_value: Scalar
118+
# Fill values used for any/all
119+
_truthy_value = Scalar # bool(_truthy_value) = True
120+
_falsey_value = Scalar # bool(_falsey_value) = False
118121

119122
def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
120123
# values is supposed to already be validated in the subclass
@@ -518,6 +521,9 @@ def value_counts(self, dropna: bool = True) -> Series:
518521
return Series(counts, index=index)
519522

520523
def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
524+
if name in {"any", "all"}:
525+
return getattr(self, name)(skipna=skipna, **kwargs)
526+
521527
data = self._data
522528
mask = self._mask
523529

@@ -537,3 +543,156 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
537543
return libmissing.NA
538544

539545
return result
546+
547+
def any(self, *, skipna: bool = True, **kwargs):
548+
"""
549+
Return whether any element is truthy.
550+
551+
Returns False unless there is at least one element that is truthy.
552+
By default, NAs are skipped. If ``skipna=False`` is specified and
553+
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
554+
is used as for logical operations.
555+
556+
.. versionchanged:: 1.4.0
557+
558+
Parameters
559+
----------
560+
skipna : bool, default True
561+
Exclude NA values. If the entire array is NA and `skipna` is
562+
True, then the result will be False, as for an empty array.
563+
If `skipna` is False, the result will still be True if there is
564+
at least one element that is truthy, otherwise NA will be returned
565+
if there are NA's present.
566+
**kwargs : any, default None
567+
Additional keywords have no effect but might be accepted for
568+
compatibility with NumPy.
569+
570+
Returns
571+
-------
572+
bool or :attr:`pandas.NA`
573+
574+
See Also
575+
--------
576+
numpy.any : Numpy version of this method.
577+
BaseMaskedArray.all : Return whether all elements are truthy.
578+
579+
Examples
580+
--------
581+
The result indicates whether any element is truthy (and by default
582+
skips NAs):
583+
584+
>>> pd.array([True, False, True]).any()
585+
True
586+
>>> pd.array([True, False, pd.NA]).any()
587+
True
588+
>>> pd.array([False, False, pd.NA]).any()
589+
False
590+
>>> pd.array([], dtype="boolean").any()
591+
False
592+
>>> pd.array([pd.NA], dtype="boolean").any()
593+
False
594+
>>> pd.array([pd.NA], dtype="Float64").any()
595+
False
596+
597+
With ``skipna=False``, the result can be NA if this is logically
598+
required (whether ``pd.NA`` is True or False influences the result):
599+
600+
>>> pd.array([True, False, pd.NA]).any(skipna=False)
601+
True
602+
>>> pd.array([1, 0, pd.NA]).any(skipna=False)
603+
True
604+
>>> pd.array([False, False, pd.NA]).any(skipna=False)
605+
<NA>
606+
>>> pd.array([0, 0, pd.NA]).any(skipna=False)
607+
<NA>
608+
"""
609+
kwargs.pop("axis", None)
610+
nv.validate_any((), kwargs)
611+
612+
values = self._data.copy()
613+
np.putmask(values, self._mask, self._falsey_value)
614+
result = values.any()
615+
if skipna:
616+
return result
617+
else:
618+
if result or len(self) == 0 or not self._mask.any():
619+
return result
620+
else:
621+
return self.dtype.na_value
622+
623+
def all(self, *, skipna: bool = True, **kwargs):
624+
"""
625+
Return whether all elements are truthy.
626+
627+
Returns True unless there is at least one element that is falsey.
628+
By default, NAs are skipped. If ``skipna=False`` is specified and
629+
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
630+
is used as for logical operations.
631+
632+
.. versionchanged:: 1.4.0
633+
634+
Parameters
635+
----------
636+
skipna : bool, default True
637+
Exclude NA values. If the entire array is NA and `skipna` is
638+
True, then the result will be True, as for an empty array.
639+
If `skipna` is False, the result will still be False if there is
640+
at least one element that is falsey, otherwise NA will be returned
641+
if there are NA's present.
642+
**kwargs : any, default None
643+
Additional keywords have no effect but might be accepted for
644+
compatibility with NumPy.
645+
646+
Returns
647+
-------
648+
bool or :attr:`pandas.NA`
649+
650+
See Also
651+
--------
652+
numpy.all : Numpy version of this method.
653+
BooleanArray.any : Return whether any element is truthy.
654+
655+
Examples
656+
--------
657+
The result indicates whether all elements are truthy (and by default
658+
skips NAs):
659+
660+
>>> pd.array([True, True, pd.NA]).all()
661+
True
662+
>>> pd.array([1, 1, pd.NA]).all()
663+
True
664+
>>> pd.array([True, False, pd.NA]).all()
665+
False
666+
>>> pd.array([], dtype="boolean").all()
667+
True
668+
>>> pd.array([pd.NA], dtype="boolean").all()
669+
True
670+
>>> pd.array([pd.NA], dtype="Float64").all()
671+
True
672+
673+
With ``skipna=False``, the result can be NA if this is logically
674+
required (whether ``pd.NA`` is True or False influences the result):
675+
676+
>>> pd.array([True, True, pd.NA]).all(skipna=False)
677+
<NA>
678+
>>> pd.array([1, 1, pd.NA]).all(skipna=False)
679+
<NA>
680+
>>> pd.array([True, False, pd.NA]).all(skipna=False)
681+
False
682+
>>> pd.array([1, 0, pd.NA]).all(skipna=False)
683+
False
684+
"""
685+
kwargs.pop("axis", None)
686+
nv.validate_all((), kwargs)
687+
688+
values = self._data.copy()
689+
np.putmask(values, self._mask, self._truthy_value)
690+
result = values.all()
691+
692+
if skipna:
693+
return result
694+
else:
695+
if not result or len(self) == 0 or not self._mask.any():
696+
return result
697+
else:
698+
return self.dtype.na_value

pandas/tests/extension/test_boolean.py

+1
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ def check_reduce(self, s, op_name, skipna):
381381
tm.assert_almost_equal(result, expected)
382382

383383

384+
@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py")
384385
class TestBooleanReduce(base.BaseBooleanReduceTests):
385386
pass
386387

0 commit comments

Comments
 (0)