Skip to content

ENH: nullables use Kleene logic for any/all reductions #41970

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ Other enhancements
- :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`)
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)
- :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
-

.. ---------------------------------------------------------------------------

Expand Down
146 changes: 3 additions & 143 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
npt,
type_t,
)
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.common import (
is_bool_dtype,
Expand Down Expand Up @@ -297,6 +296,9 @@ class BooleanArray(BaseMaskedArray):

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = False
# Fill values used for any/all
_truthy_value = True
_falsey_value = False
_TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
_FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}

Expand Down Expand Up @@ -477,141 +479,6 @@ def _values_for_argsort(self) -> np.ndarray:
data[self._mask] = -1
return data

def any(self, *, skipna: bool = True, **kwargs):
"""
Return whether any element is True.

Returns False unless there is at least one element that is True.
By default, NAs are skipped. If ``skipna=False`` is specified and
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
is used as for logical operations.

Parameters
----------
skipna : bool, default True
Exclude NA values. If the entire array is NA and `skipna` is
True, then the result will be False, as for an empty array.
If `skipna` is False, the result will still be True if there is
at least one element that is True, otherwise NA will be returned
if there are NA's present.
**kwargs : any, default None
Additional keywords have no effect but might be accepted for
compatibility with NumPy.

Returns
-------
bool or :attr:`pandas.NA`

See Also
--------
numpy.any : Numpy version of this method.
BooleanArray.all : Return whether all elements are True.

Examples
--------
The result indicates whether any element is True (and by default
skips NAs):

>>> pd.array([True, False, True]).any()
True
>>> pd.array([True, False, pd.NA]).any()
True
>>> pd.array([False, False, pd.NA]).any()
False
>>> pd.array([], dtype="boolean").any()
False
>>> pd.array([pd.NA], dtype="boolean").any()
False

With ``skipna=False``, the result can be NA if this is logically
required (whether ``pd.NA`` is True or False influences the result):

>>> pd.array([True, False, pd.NA]).any(skipna=False)
True
>>> pd.array([False, False, pd.NA]).any(skipna=False)
<NA>
"""
kwargs.pop("axis", None)
nv.validate_any((), kwargs)

values = self._data.copy()
np.putmask(values, self._mask, False)
result = values.any()
if skipna:
return result
else:
if result or len(self) == 0 or not self._mask.any():
return result
else:
return self.dtype.na_value

def all(self, *, skipna: bool = True, **kwargs):
"""
Return whether all elements are True.

Returns True unless there is at least one element that is False.
By default, NAs are skipped. If ``skipna=False`` is specified and
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
is used as for logical operations.

Parameters
----------
skipna : bool, default True
Exclude NA values. If the entire array is NA and `skipna` is
True, then the result will be True, as for an empty array.
If `skipna` is False, the result will still be False if there is
at least one element that is False, otherwise NA will be returned
if there are NA's present.
**kwargs : any, default None
Additional keywords have no effect but might be accepted for
compatibility with NumPy.

Returns
-------
bool or :attr:`pandas.NA`

See Also
--------
numpy.all : Numpy version of this method.
BooleanArray.any : Return whether any element is True.

Examples
--------
The result indicates whether any element is True (and by default
skips NAs):

>>> pd.array([True, True, pd.NA]).all()
True
>>> pd.array([True, False, pd.NA]).all()
False
>>> pd.array([], dtype="boolean").all()
True
>>> pd.array([pd.NA], dtype="boolean").all()
True

With ``skipna=False``, the result can be NA if this is logically
required (whether ``pd.NA`` is True or False influences the result):

>>> pd.array([True, True, pd.NA]).all(skipna=False)
<NA>
>>> pd.array([True, False, pd.NA]).all(skipna=False)
False
"""
kwargs.pop("axis", None)
nv.validate_all((), kwargs)

values = self._data.copy()
np.putmask(values, self._mask, True)
result = values.all()

if skipna:
return result
else:
if not result or len(self) == 0 or not self._mask.any():
return result
else:
return self.dtype.na_value

def _logical_method(self, other, op):

assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
Expand Down Expand Up @@ -740,13 +607,6 @@ def _arith_method(self, other, op):

return self._maybe_mask_result(result, mask, other, op_name)

def _reduce(self, name: str, *, skipna: bool = True, **kwargs):

if name in {"any", "all"}:
return getattr(self, name)(skipna=skipna, **kwargs)

return super()._reduce(name, skipna=skipna, **kwargs)

def _maybe_mask_result(self, result, mask, other, op_name: str):
"""
Parameters
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ class FloatingArray(NumericArray):

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = 0.0
# Fill values used for any/all
_truthy_value = 1.0
_falsey_value = 0.0

@cache_readonly
def dtype(self) -> FloatingDtype:
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,9 @@ class IntegerArray(NumericArray):

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = 1
# Fill values used for any/all
_truthy_value = 1
_falsey_value = 0

@cache_readonly
def dtype(self) -> _IntegerDtype:
Expand Down
161 changes: 160 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
if TYPE_CHECKING:
from pandas import Series
from pandas.core.arrays import BooleanArray

from pandas.compat.numpy import function as nv

BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")

Expand Down Expand Up @@ -113,6 +113,9 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):

# The value used to fill '_data' to avoid upcasting
_internal_fill_value: Scalar
# Fill values used for any/all
_truthy_value = Scalar # bool(_truthy_value) = True
_falsey_value = Scalar # bool(_falsey_value) = False

def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
# values is supposed to already be validated in the subclass
Expand Down Expand Up @@ -509,6 +512,9 @@ def value_counts(self, dropna: bool = True) -> Series:
return Series(counts, index=index)

def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
if name in {"any", "all"}:
return getattr(self, name)(skipna=skipna, **kwargs)

data = self._data
mask = self._mask

Expand All @@ -528,3 +534,156 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
return libmissing.NA

return result

def any(self, *, skipna: bool = True, **kwargs):
"""
Return whether any element is truthy.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a versionchanged 1.4

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


Returns False unless there is at least one element that is truthy.
By default, NAs are skipped. If ``skipna=False`` is specified and
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
is used as for logical operations.

.. versionchanged:: 1.4.0

Parameters
----------
skipna : bool, default True
Exclude NA values. If the entire array is NA and `skipna` is
True, then the result will be False, as for an empty array.
If `skipna` is False, the result will still be True if there is
at least one element that is truthy, otherwise NA will be returned
if there are NA's present.
**kwargs : any, default None
Additional keywords have no effect but might be accepted for
compatibility with NumPy.

Returns
-------
bool or :attr:`pandas.NA`

See Also
--------
numpy.any : Numpy version of this method.
BaseMaskedArray.all : Return whether all elements are truthy.

Examples
--------
The result indicates whether any element is truthy (and by default
skips NAs):

>>> pd.array([True, False, True]).any()
True
>>> pd.array([True, False, pd.NA]).any()
True
>>> pd.array([False, False, pd.NA]).any()
False
>>> pd.array([], dtype="boolean").any()
False
>>> pd.array([pd.NA], dtype="boolean").any()
False
>>> pd.array([pd.NA], dtype="Float64").any()
False

With ``skipna=False``, the result can be NA if this is logically
required (whether ``pd.NA`` is True or False influences the result):

>>> pd.array([True, False, pd.NA]).any(skipna=False)
True
>>> pd.array([1, 0, pd.NA]).any(skipna=False)
True
>>> pd.array([False, False, pd.NA]).any(skipna=False)
<NA>
>>> pd.array([0, 0, pd.NA]).any(skipna=False)
<NA>
"""
kwargs.pop("axis", None)
nv.validate_any((), kwargs)

values = self._data.copy()
np.putmask(values, self._mask, self._falsey_value)
result = values.any()
if skipna:
return result
else:
if result or len(self) == 0 or not self._mask.any():
return result
else:
return self.dtype.na_value

def all(self, *, skipna: bool = True, **kwargs):
"""
Return whether all elements are truthy.

Returns True unless there is at least one element that is falsey.
By default, NAs are skipped. If ``skipna=False`` is specified and
missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
is used as for logical operations.

.. versionchanged:: 1.4.0

Parameters
----------
skipna : bool, default True
Exclude NA values. If the entire array is NA and `skipna` is
True, then the result will be True, as for an empty array.
If `skipna` is False, the result will still be False if there is
at least one element that is falsey, otherwise NA will be returned
if there are NA's present.
**kwargs : any, default None
Additional keywords have no effect but might be accepted for
compatibility with NumPy.

Returns
-------
bool or :attr:`pandas.NA`

See Also
--------
numpy.all : Numpy version of this method.
BooleanArray.any : Return whether any element is truthy.

Examples
--------
The result indicates whether all elements are truthy (and by default
skips NAs):

>>> pd.array([True, True, pd.NA]).all()
True
>>> pd.array([1, 1, pd.NA]).all()
True
>>> pd.array([True, False, pd.NA]).all()
False
>>> pd.array([], dtype="boolean").all()
True
>>> pd.array([pd.NA], dtype="boolean").all()
True
>>> pd.array([pd.NA], dtype="Float64").all()
True

With ``skipna=False``, the result can be NA if this is logically
required (whether ``pd.NA`` is True or False influences the result):

>>> pd.array([True, True, pd.NA]).all(skipna=False)
<NA>
>>> pd.array([1, 1, pd.NA]).all(skipna=False)
<NA>
>>> pd.array([True, False, pd.NA]).all(skipna=False)
False
>>> pd.array([1, 0, pd.NA]).all(skipna=False)
False
"""
kwargs.pop("axis", None)
nv.validate_all((), kwargs)

values = self._data.copy()
np.putmask(values, self._mask, self._truthy_value)
result = values.all()

if skipna:
return result
else:
if not result or len(self) == 0 or not self._mask.any():
return result
else:
return self.dtype.na_value
1 change: 1 addition & 0 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ def check_reduce(self, s, op_name, skipna):
tm.assert_almost_equal(result, expected)


@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py")
class TestBooleanReduce(base.BaseBooleanReduceTests):
pass

Expand Down
Loading