-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API: BooleanArray any/all with NA logic #30062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
0bf654e
043f257
12d2729
15471d8
6ca6945
e59e91f
24797d4
ec7d072
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
|
||
from pandas._libs import lib, missing as libmissing | ||
from pandas.compat import set_function_name | ||
from pandas.compat.numpy import function as nv | ||
|
||
from pandas.core.dtypes.base import ExtensionDtype | ||
from pandas.core.dtypes.cast import astype_nansafe | ||
|
@@ -560,6 +561,143 @@ def _values_for_argsort(self) -> np.ndarray: | |
data[self._mask] = -1 | ||
return data | ||
|
||
def any(self, skipna=True, **kwargs): | ||
""" | ||
Return whether any element is True. | ||
|
||
Returns False unless there is at least one element that is True. | ||
By default, NAs are skipped. If ``skipna=False`` is specified and | ||
missing values are present, similar :ref:`Kleene logic <boolean.kleene>` | ||
is used as for logical operations. | ||
|
||
Parameters | ||
---------- | ||
skipna : bool, default True | ||
Exclude NA values. If the entire array is NA and `skipna` is | ||
True, then the result will be False, as for an empty array. | ||
If `skipna` is False, the result will still be True if there is | ||
at least one element that is True, otherwise NA will be returned | ||
if there are NA's present. | ||
**kwargs : any, default None | ||
Additional keywords have no effect but might be accepted for | ||
compatibility with NumPy. | ||
|
||
Returns | ||
------- | ||
bool or :attr:`pandas.NA` | ||
|
||
See Also | ||
-------- | ||
numpy.any : Numpy version of this method. | ||
BooleanArray.all : Return whether all elements are True. | ||
|
||
Examples | ||
-------- | ||
|
||
The result indicates whether any element is True (and by default | ||
skips NAs): | ||
|
||
>>> pd.array([True, False, True]).any() | ||
True | ||
>>> pd.array([True, False, pd.NA]).any() | ||
True | ||
>>> pd.array([False, False, pd.NA]).any() | ||
False | ||
>>> pd.array([], dtype="boolean").any() | ||
False | ||
>>> pd.array([pd.NA], dtype="boolean").any() | ||
False | ||
|
||
With ``skipna=False``, the result can be NA if this is logically | ||
required (whether ``pd.NA`` is True or False influences the result): | ||
|
||
>>> pd.array([True, False, pd.NA]).any(skipna=False) | ||
True | ||
>>> pd.array([False, False, pd.NA]).any(skipna=False) | ||
NA | ||
""" | ||
kwargs.pop("axis", None) | ||
nv.validate_any((), kwargs) | ||
|
||
values = self._data.copy() | ||
np.putmask(values, self._mask, False) | ||
result = values.any() | ||
if skipna: | ||
return result | ||
else: | ||
if result or len(self) == 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use not len(self) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In pandas/core, we actually use the (the typical pythonic idiom recommendation is about doing |
||
return result | ||
else: | ||
return self.dtype.na_value | ||
|
||
def all(self, skipna=True, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
""" | ||
Return whether all elements are True. | ||
|
||
Returns True unless there is at least one element that is False. | ||
By default, NAs are skipped. If ``skipna=False`` is specified and | ||
missing values are present, similar :ref:`Kleene logic <boolean.kleene>` | ||
is used as for logical operations. | ||
|
||
Parameters | ||
---------- | ||
skipna : bool, default True | ||
Exclude NA values. If the entire array is NA and `skipna` is | ||
True, then the result will be True, as for an empty array. | ||
If `skipna` is False, the result will still be False if there is | ||
at least one element that is False, otherwise NA will be returned | ||
if there are NA's present. | ||
**kwargs : any, default None | ||
Additional keywords have no effect but might be accepted for | ||
compatibility with NumPy. | ||
|
||
Returns | ||
------- | ||
bool or :attr:`pandas.NA` | ||
|
||
See Also | ||
-------- | ||
numpy.all : Numpy version of this method. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might want to add a link for kleene logic here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the See Also section, we can only add links to other API pages. But, in the long description of the docstring a bit above, I already included a link about the Kleene logic. |
||
BooleanArray.any : Return whether any element is True. | ||
|
||
Examples | ||
-------- | ||
|
||
The result indicates whether any element is True (and by default | ||
skips NAs): | ||
|
||
>>> pd.array([True, True, pd.NA]).all() | ||
True | ||
>>> pd.array([True, False, pd.NA]).all() | ||
False | ||
>>> pd.array([], dtype="boolean").all() | ||
True | ||
>>> pd.array([pd.NA], dtype="boolean").all() | ||
True | ||
|
||
With ``skipna=False``, the result can be NA if this is logically | ||
required (whether ``pd.NA`` is True or False influences the result): | ||
|
||
>>> pd.array([True, True, pd.NA]).all(skipna=False) | ||
NA | ||
>>> pd.array([True, False, pd.NA]).all(skipna=False) | ||
False | ||
""" | ||
kwargs.pop("axis", None) | ||
nv.validate_all((), kwargs) | ||
|
||
values = self._data.copy() | ||
np.putmask(values, self._mask, True) | ||
result = values.all() | ||
|
||
if skipna: | ||
return result | ||
else: | ||
if not result or len(self) == 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
return result | ||
else: | ||
return self.dtype.na_value | ||
|
||
@classmethod | ||
def _create_logical_method(cls, op): | ||
def logical_method(self, other): | ||
|
@@ -656,6 +794,10 @@ def cmp_method(self, other): | |
return set_function_name(cmp_method, name, cls) | ||
|
||
def _reduce(self, name, skipna=True, **kwargs): | ||
|
||
if name in {"any", "all"}: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we usually use lists for these checks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this file we actually use more There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Heh, I'm probably to blame for the sets :) I like them more for membership tests, though it doesn't matter for small sets. |
||
return getattr(self, name)(skipna=skipna, **kwargs) | ||
|
||
data = self._data | ||
mask = self._mask | ||
|
||
|
@@ -667,12 +809,8 @@ def _reduce(self, name, skipna=True, **kwargs): | |
op = getattr(nanops, "nan" + name) | ||
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) | ||
|
||
# if we have a boolean op, don't coerce | ||
if name in ["any", "all"]: | ||
pass | ||
|
||
# if we have numeric op that would result in an int, coerce to int if possible | ||
elif name in ["sum", "prod"] and notna(result): | ||
if name in ["sum", "prod"] and notna(result): | ||
int_result = np.int64(result) | ||
if int_result == result: | ||
result = int_result | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you type
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done