From 830f5517e229be5bc85d97719fa29636ea162ed8 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 12 Jun 2021 15:04:11 -0400 Subject: [PATCH 1/5] add initial tests --- pandas/core/arrays/boolean.py | 146 +-------------------- pandas/core/arrays/floating.py | 3 + pandas/core/arrays/integer.py | 3 + pandas/core/arrays/masked.py | 143 +++++++++++++++++++- pandas/tests/reductions/test_reductions.py | 7 +- 5 files changed, 155 insertions(+), 147 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 14d059c04b7c0..ed023b5b5cf7a 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -15,7 +15,6 @@ Dtype, type_t, ) -from pandas.compat.numpy import function as nv from pandas.core.dtypes.common import ( is_bool_dtype, @@ -291,6 +290,9 @@ class BooleanArray(BaseMaskedArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value = False + # Fill values used for any/all + _truthy_value = True + _falsey_value = False _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} @@ -458,141 +460,6 @@ def _values_for_argsort(self) -> np.ndarray: data[self._mask] = -1 return data - def any(self, *, skipna: bool = True, **kwargs): - """ - Return whether any element is True. - - Returns False unless there is at least one element that is True. - By default, NAs are skipped. If ``skipna=False`` is specified and - missing values are present, similar :ref:`Kleene logic ` - is used as for logical operations. - - Parameters - ---------- - skipna : bool, default True - Exclude NA values. If the entire array is NA and `skipna` is - True, then the result will be False, as for an empty array. - If `skipna` is False, the result will still be True if there is - at least one element that is True, otherwise NA will be returned - if there are NA's present. - **kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - bool or :attr:`pandas.NA` - - See Also - -------- - numpy.any : Numpy version of this method. - BooleanArray.all : Return whether all elements are True. - - Examples - -------- - The result indicates whether any element is True (and by default - skips NAs): - - >>> pd.array([True, False, True]).any() - True - >>> pd.array([True, False, pd.NA]).any() - True - >>> pd.array([False, False, pd.NA]).any() - False - >>> pd.array([], dtype="boolean").any() - False - >>> pd.array([pd.NA], dtype="boolean").any() - False - - With ``skipna=False``, the result can be NA if this is logically - required (whether ``pd.NA`` is True or False influences the result): - - >>> pd.array([True, False, pd.NA]).any(skipna=False) - True - >>> pd.array([False, False, pd.NA]).any(skipna=False) - - """ - kwargs.pop("axis", None) - nv.validate_any((), kwargs) - - values = self._data.copy() - np.putmask(values, self._mask, False) - result = values.any() - if skipna: - return result - else: - if result or len(self) == 0 or not self._mask.any(): - return result - else: - return self.dtype.na_value - - def all(self, *, skipna: bool = True, **kwargs): - """ - Return whether all elements are True. - - Returns True unless there is at least one element that is False. - By default, NAs are skipped. If ``skipna=False`` is specified and - missing values are present, similar :ref:`Kleene logic ` - is used as for logical operations. - - Parameters - ---------- - skipna : bool, default True - Exclude NA values. If the entire array is NA and `skipna` is - True, then the result will be True, as for an empty array. - If `skipna` is False, the result will still be False if there is - at least one element that is False, otherwise NA will be returned - if there are NA's present. - **kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - bool or :attr:`pandas.NA` - - See Also - -------- - numpy.all : Numpy version of this method. - BooleanArray.any : Return whether any element is True. - - Examples - -------- - The result indicates whether any element is True (and by default - skips NAs): - - >>> pd.array([True, True, pd.NA]).all() - True - >>> pd.array([True, False, pd.NA]).all() - False - >>> pd.array([], dtype="boolean").all() - True - >>> pd.array([pd.NA], dtype="boolean").all() - True - - With ``skipna=False``, the result can be NA if this is logically - required (whether ``pd.NA`` is True or False influences the result): - - >>> pd.array([True, True, pd.NA]).all(skipna=False) - - >>> pd.array([True, False, pd.NA]).all(skipna=False) - False - """ - kwargs.pop("axis", None) - nv.validate_all((), kwargs) - - values = self._data.copy() - np.putmask(values, self._mask, True) - result = values.all() - - if skipna: - return result - else: - if not result or len(self) == 0 or not self._mask.any(): - return result - else: - return self.dtype.na_value - def _logical_method(self, other, op): assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} @@ -721,13 +588,6 @@ def _arith_method(self, other, op): return self._maybe_mask_result(result, mask, other, op_name) - def _reduce(self, name: str, *, skipna: bool = True, **kwargs): - - if name in {"any", "all"}: - return getattr(self, name)(skipna=skipna, **kwargs) - - return super()._reduce(name, skipna=skipna, **kwargs) - def _maybe_mask_result(self, result, mask, other, op_name: str): """ Parameters diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 1acbcf17dfffd..f5048cd203f85 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -241,6 +241,9 @@ class FloatingArray(NumericArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value = 0.0 + # Fill values used for any/all + _truthy_value = 1.0 + _falsey_value = 0.0 @cache_readonly def dtype(self) -> FloatingDtype: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index c9ba762a271bd..8b0bfd15872ce 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -303,6 +303,9 @@ class IntegerArray(NumericArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value = 1 + # Fill values used for any/all + _truthy_value = 1 + _falsey_value = 0 @cache_readonly def dtype(self) -> _IntegerDtype: diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index d274501143916..3c4eb59893d85 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -60,7 +60,7 @@ if TYPE_CHECKING: from pandas import Series from pandas.core.arrays import BooleanArray - +from pandas.compat.numpy import function as nv BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray") @@ -111,6 +111,9 @@ class BaseMaskedArray(OpsMixin, ExtensionArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value: Scalar + # Fill values used for any/all + _truthy_value = Scalar # bool(_truthy_value) = True + _falsey_value = Scalar # bool(_falsey_value) = False def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): # values is supposed to already be validated in the subclass @@ -489,6 +492,9 @@ def value_counts(self, dropna: bool = True) -> Series: return Series(counts, index=index) def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + if name in {"any", "all"}: + return getattr(self, name)(skipna=skipna, **kwargs) + data = self._data mask = self._mask @@ -508,3 +514,138 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): return libmissing.NA return result + + def any(self, *, skipna: bool = True, **kwargs): + """ + Return whether any element is True. + + Returns False unless there is at least one element that is True. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be False, as for an empty array. + If `skipna` is False, the result will still be True if there is + at least one element that is True, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.any : Numpy version of this method. + BooleanArray.all : Return whether all elements are True. + + Examples + -------- + The result indicates whether any element is True (and by default + skips NAs): + + >>> pd.array([True, False, True]).any() + True + >>> pd.array([True, False, pd.NA]).any() + True + >>> pd.array([False, False, pd.NA]).any() + False + >>> pd.array([], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="boolean").any() + False + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, False, pd.NA]).any(skipna=False) + True + >>> pd.array([False, False, pd.NA]).any(skipna=False) + + """ + kwargs.pop("axis", None) + nv.validate_any((), kwargs) + + values = self._data.copy() + np.putmask(values, self._mask, self._falsey_value) + result = values.any() + if skipna: + return result + else: + if result or len(self) == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value + + def all(self, *, skipna: bool = True, **kwargs): + """ + Return whether all elements are True. + + Returns True unless there is at least one element that is False. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be True, as for an empty array. + If `skipna` is False, the result will still be False if there is + at least one element that is False, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.all : Numpy version of this method. + BooleanArray.any : Return whether any element is True. + + Examples + -------- + The result indicates whether any element is True (and by default + skips NAs): + + >>> pd.array([True, True, pd.NA]).all() + True + >>> pd.array([True, False, pd.NA]).all() + False + >>> pd.array([], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="boolean").all() + True + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, True, pd.NA]).all(skipna=False) + + >>> pd.array([True, False, pd.NA]).all(skipna=False) + False + """ + kwargs.pop("axis", None) + nv.validate_all((), kwargs) + + values = self._data.copy() + np.putmask(values, self._mask, self._truthy_value) + result = values.all() + + if skipna: + return result + else: + if not result or len(self) == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 2f698a82bac49..2e78de95b4361 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -964,6 +964,7 @@ def test_any_all_object_dtype_missing(self, data, bool_agg_func): expected = bool_agg_func == "any" and None not in data assert result == expected + @pytest.mark.parametrize("dtype", ["boolean", "Int64", "UInt64", "Float64"]) @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize( @@ -979,10 +980,10 @@ def test_any_all_object_dtype_missing(self, data, bool_agg_func): ([True, pd.NA, False], [[True, False], [True, False]]), ], ) - def test_any_all_boolean_kleene_logic( - self, bool_agg_func, skipna, data, expected_data + def test_any_all_nullable_kleene_logic( + self, bool_agg_func, skipna, data, dtype, expected_data ): - ser = Series(data, dtype="boolean") + ser = Series(data, dtype="boolean").astype(dtype) expected = expected_data[skipna][bool_agg_func == "all"] result = getattr(ser, bool_agg_func)(skipna=skipna) From edb07f4744dac668d27ae7e3511bb51406bc4dfe Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 12 Jun 2021 15:15:18 -0400 Subject: [PATCH 2/5] Add whatsnew and fixup docstring --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/arrays/masked.py | 34 ++++++++++++++++++++++++---------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 166ea2f0d4164..5ca200ec278cd 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -29,7 +29,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- :meth:`IntegerArray.all()` , :meth:`IntegerArray.any()`, :meth:`FloatingArray.any()`, and :meth:`FloatingArray.all()` use Kleene logic (:issue:`41967`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 3c4eb59893d85..63d7e8849579d 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -517,9 +517,9 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): def any(self, *, skipna: bool = True, **kwargs): """ - Return whether any element is True. + Return whether any element is truthy. - Returns False unless there is at least one element that is True. + Returns False unless there is at least one element that is truthy. By default, NAs are skipped. If ``skipna=False`` is specified and missing values are present, similar :ref:`Kleene logic ` is used as for logical operations. @@ -530,7 +530,7 @@ def any(self, *, skipna: bool = True, **kwargs): Exclude NA values. If the entire array is NA and `skipna` is True, then the result will be False, as for an empty array. If `skipna` is False, the result will still be True if there is - at least one element that is True, otherwise NA will be returned + at least one element that is truthy, otherwise NA will be returned if there are NA's present. **kwargs : any, default None Additional keywords have no effect but might be accepted for @@ -543,11 +543,11 @@ def any(self, *, skipna: bool = True, **kwargs): See Also -------- numpy.any : Numpy version of this method. - BooleanArray.all : Return whether all elements are True. + BaseMaskedArray.all : Return whether all elements are truthy. Examples -------- - The result indicates whether any element is True (and by default + The result indicates whether any element is truthy (and by default skips NAs): >>> pd.array([True, False, True]).any() @@ -560,14 +560,20 @@ def any(self, *, skipna: bool = True, **kwargs): False >>> pd.array([pd.NA], dtype="boolean").any() False + >>> pd.array([pd.NA], dtype="Float64").any() + False With ``skipna=False``, the result can be NA if this is logically required (whether ``pd.NA`` is True or False influences the result): >>> pd.array([True, False, pd.NA]).any(skipna=False) True + >>> pd.array([1, 0, pd.NA]).any(skipna=False) + True >>> pd.array([False, False, pd.NA]).any(skipna=False) + >>> pd.array([0, 0, pd.NA]).any(skipna=False) + """ kwargs.pop("axis", None) nv.validate_any((), kwargs) @@ -585,9 +591,9 @@ def any(self, *, skipna: bool = True, **kwargs): def all(self, *, skipna: bool = True, **kwargs): """ - Return whether all elements are True. + Return whether all elements are truthy. - Returns True unless there is at least one element that is False. + Returns True unless there is at least one element that is falsey. By default, NAs are skipped. If ``skipna=False`` is specified and missing values are present, similar :ref:`Kleene logic ` is used as for logical operations. @@ -598,7 +604,7 @@ def all(self, *, skipna: bool = True, **kwargs): Exclude NA values. If the entire array is NA and `skipna` is True, then the result will be True, as for an empty array. If `skipna` is False, the result will still be False if there is - at least one element that is False, otherwise NA will be returned + at least one element that is falsey, otherwise NA will be returned if there are NA's present. **kwargs : any, default None Additional keywords have no effect but might be accepted for @@ -611,29 +617,37 @@ def all(self, *, skipna: bool = True, **kwargs): See Also -------- numpy.all : Numpy version of this method. - BooleanArray.any : Return whether any element is True. + BooleanArray.any : Return whether any element is truthy. Examples -------- - The result indicates whether any element is True (and by default + The result indicates whether all elements are truthy (and by default skips NAs): >>> pd.array([True, True, pd.NA]).all() True + >>> pd.array([1, 1, pd.NA]).all() + True >>> pd.array([True, False, pd.NA]).all() False >>> pd.array([], dtype="boolean").all() True >>> pd.array([pd.NA], dtype="boolean").all() True + >>> pd.array([pd.NA], dtype="Float64").all() + True With ``skipna=False``, the result can be NA if this is logically required (whether ``pd.NA`` is True or False influences the result): >>> pd.array([True, True, pd.NA]).all(skipna=False) + >>> pd.array([1, 1, pd.NA]).all(skipna=False) + >>> pd.array([True, False, pd.NA]).all(skipna=False) False + >>> pd.array([1, 0, pd.NA]).all(skipna=False) + False """ kwargs.pop("axis", None) nv.validate_all((), kwargs) From 6619d940e7191eea51454773e67204ce8074ab89 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 12 Jun 2021 16:56:51 -0400 Subject: [PATCH 3/5] Skip repeated tests --- pandas/tests/extension/test_boolean.py | 1 + pandas/tests/extension/test_floating.py | 1 + pandas/tests/extension/test_integer.py | 1 + 3 files changed, 3 insertions(+) diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 172137ff3a5a2..b29d104bd2470 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -381,6 +381,7 @@ def check_reduce(self, s, op_name, skipna): tm.assert_almost_equal(result, expected) +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") class TestBooleanReduce(base.BaseBooleanReduceTests): pass diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index 617dfc694741e..f4d3243b5129f 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -211,6 +211,7 @@ def check_reduce(self, s, op_name, skipna): tm.assert_almost_equal(result, expected) +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") class TestBooleanReduce(base.BaseBooleanReduceTests): pass diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 2305edc1e1327..2cf4f8e415770 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -243,6 +243,7 @@ def check_reduce(self, s, op_name, skipna): tm.assert_almost_equal(result, expected) +@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") class TestBooleanReduce(base.BaseBooleanReduceTests): pass From c16979aa3f88173dce583b256d1adab1b533a73e Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Fri, 18 Jun 2021 10:13:29 -0700 Subject: [PATCH 4/5] Some comments, adjust parameterization --- pandas/core/arrays/masked.py | 4 ++++ pandas/tests/reductions/test_reductions.py | 15 ++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 63d7e8849579d..b862795a207a2 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -524,6 +524,8 @@ def any(self, *, skipna: bool = True, **kwargs): missing values are present, similar :ref:`Kleene logic ` is used as for logical operations. + .. versionchanged:: 1.4.0 + Parameters ---------- skipna : bool, default True @@ -598,6 +600,8 @@ def all(self, *, skipna: bool = True, **kwargs): missing values are present, similar :ref:`Kleene logic ` is used as for logical operations. + .. versionchanged:: 1.4.0 + Parameters ---------- skipna : bool, default True diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 2e78de95b4361..845fb4b2d897a 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -972,18 +972,19 @@ def test_any_all_object_dtype_missing(self, data, bool_agg_func): # [skipna=True/any, skipna=True/all]] "data,expected_data", [ - ([False, False, False], [[False, False], [False, False]]), - ([True, True, True], [[True, True], [True, True]]), - ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]), - ([False, pd.NA, False], [[pd.NA, False], [False, False]]), - ([True, pd.NA, True], [[True, pd.NA], [True, True]]), - ([True, pd.NA, False], [[True, False], [True, False]]), + ([0, 0, 0], [[0, 0], [0, 0]]), + ([1, 1, 1], [[1, 1], [1, 1]]), + ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [0, 1]]), + ([0, pd.NA, 0], [[pd.NA, 0], [0, 0]]), + ([1, pd.NA, 1], [[1, pd.NA], [1, 1]]), + ([1, pd.NA, 0], [[1, 0], [1, 0]]), ], ) def test_any_all_nullable_kleene_logic( self, bool_agg_func, skipna, data, dtype, expected_data ): - ser = Series(data, dtype="boolean").astype(dtype) + # GH-37506, GH-41967 + ser = Series(data, dtype=dtype) expected = expected_data[skipna][bool_agg_func == "all"] result = getattr(ser, bool_agg_func)(skipna=skipna) From 1490dc5afa56710e4dbdfb6f28c5dec8a2de68da Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Mon, 6 Sep 2021 22:42:13 -0400 Subject: [PATCH 5/5] Fix expected type of result --- pandas/tests/reductions/test_reductions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 31ca1581c216c..62aae33134f60 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -972,12 +972,12 @@ def test_any_all_object_dtype_missing(self, data, bool_agg_func): # [skipna=True/any, skipna=True/all]] "data,expected_data", [ - ([0, 0, 0], [[0, 0], [0, 0]]), - ([1, 1, 1], [[1, 1], [1, 1]]), - ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [0, 1]]), - ([0, pd.NA, 0], [[pd.NA, 0], [0, 0]]), - ([1, pd.NA, 1], [[1, pd.NA], [1, 1]]), - ([1, pd.NA, 0], [[1, 0], [1, 0]]), + ([0, 0, 0], [[False, False], [False, False]]), + ([1, 1, 1], [[True, True], [True, True]]), + ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]), + ([0, pd.NA, 0], [[pd.NA, False], [False, False]]), + ([1, pd.NA, 1], [[True, pd.NA], [True, True]]), + ([1, pd.NA, 0], [[True, False], [True, False]]), ], ) def test_any_all_nullable_kleene_logic(