From 315065d1a653d077dae65a909629c7ba12ae8b9a Mon Sep 17 00:00:00 2001 From: Linxiao Wu Date: Sat, 4 Apr 2020 10:27:58 -0400 Subject: [PATCH 1/7] BUG: fix boolean array skipna=False for .any() and .all() --- pandas/core/arrays/boolean.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 442d4ca8cef6d..86d5107f97c53 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -520,7 +520,7 @@ def any(self, skipna: bool = True, **kwargs): if skipna: return result else: - if result or len(self) == 0: + if result or len(self) == 0 or not self._mask.any(): return result else: return self.dtype.na_value @@ -587,7 +587,7 @@ def all(self, skipna: bool = True, **kwargs): if skipna: return result else: - if not result or len(self) == 0: + if not result or len(self) == 0 or not self._mask.any(): return result else: return self.dtype.na_value From 61010150029c4ca877336c58e880ce38925f1688 Mon Sep 17 00:00:00 2001 From: Linxiao Wu Date: Sat, 4 Apr 2020 13:05:38 -0400 Subject: [PATCH 2/7] add test in pandas/tests/arrays/boolean/test_reduction.py related to #33253 --- pandas/tests/arrays/boolean/test_reduction.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py index ce50266c756a8..77563bd12a13e 100644 --- a/pandas/tests/arrays/boolean/test_reduction.py +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -19,6 +19,8 @@ def data(): ([False, pd.NA], False, False, pd.NA, False), ([pd.NA], False, True, pd.NA, pd.NA), ([], False, True, False, True), + ([True, True], True, True, True, True), + ([False, False], False, False, False, False) ], ) def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): From c84de13234ce9760e6e53bf70878134d2b011266 Mon Sep 17 00:00:00 2001 From: Linxiao Wu Date: Sat, 4 Apr 2020 16:06:41 -0400 Subject: [PATCH 3/7] running black to reformat in lint related to issue #33253 --- pandas/tests/arrays/boolean/test_reduction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py index 77563bd12a13e..532db0b3601f6 100644 --- a/pandas/tests/arrays/boolean/test_reduction.py +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -20,7 +20,7 @@ def data(): ([pd.NA], False, True, pd.NA, pd.NA), ([], False, True, False, True), ([True, True], True, True, True, True), - ([False, False], False, False, False, False) + ([False, False], False, False, False, False), ], ) def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): From 1a08e617d710f0bfb3faee68d15d9a58eaa78b2a Mon Sep 17 00:00:00 2001 From: Linxiao Wu Date: Sun, 5 Apr 2020 17:52:27 -0400 Subject: [PATCH 4/7] add whatsnew and add comments to pandas/tests/arrays/boolean/test_reduction.py related to issue #33253 --- doc/source/whatsnew/v1.1.0.rst | 3 ++- pandas/tests/arrays/boolean/test_reduction.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8bff34dbdadad..bb9a26f7462ce 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -368,7 +368,8 @@ Missing ^^^^^^^ - Calling :meth:`fillna` on an empty Series now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`). - +- Bug in :meth:`array.any` incorrectly returns ```` for pandas.array of all ``False`` value, e.g. ``pd.array([False, False], dtype="boolean")``. Now it returns ``False`` (:issue:`33253`) +- Bug in :meth:`array.all` incorrectly returns ```` for pandas.array of all ``True`` value, e.g. ``pd.array([True, True], dtype="boolean")``. Now it returns ``True``(:issue:`33253`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py index 532db0b3601f6..ad600ce022525 100644 --- a/pandas/tests/arrays/boolean/test_reduction.py +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -12,6 +12,7 @@ def data(): ) +# .any(), .all() returns for [False, False], [True, True], GH-33253. @pytest.mark.parametrize( "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", [ From 5d980ce6f3961e7e0b0e23dc2a39b632f367ad48 Mon Sep 17 00:00:00 2001 From: Linxiao Wu Date: Mon, 6 Apr 2020 12:54:56 -0400 Subject: [PATCH 5/7] Update: add whatsnew entry and comments in pandas/tests/arrays/boolean/test_reduction.py based on change suggestions related to issue #33253 --- doc/source/whatsnew/v1.1.0.rst | 3 +-- pandas/tests/arrays/boolean/test_reduction.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index fcdd2fada2d62..da21b181217e3 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -369,8 +369,7 @@ Missing ^^^^^^^ - Calling :meth:`fillna` on an empty Series now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`). -- Bug in :meth:`array.any` incorrectly returns ```` for pandas.array of all ``False`` value, e.g. ``pd.array([False, False], dtype="boolean")``. Now it returns ``False`` (:issue:`33253`) -- Bug in :meth:`array.all` incorrectly returns ```` for pandas.array of all ``True`` value, e.g. ``pd.array([True, True], dtype="boolean")``. Now it returns ``True``(:issue:`33253`) +- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ```` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py index ad600ce022525..5dd5620162a8a 100644 --- a/pandas/tests/arrays/boolean/test_reduction.py +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -12,7 +12,6 @@ def data(): ) -# .any(), .all() returns for [False, False], [True, True], GH-33253. @pytest.mark.parametrize( "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", [ @@ -20,6 +19,7 @@ def data(): ([False, pd.NA], False, False, pd.NA, False), ([pd.NA], False, True, pd.NA, pd.NA), ([], False, True, False, True), + # GH-33253: all True / all False values buggy with skipna=False ([True, True], True, True, True, True), ([False, False], False, False, False, False), ], From 0b93cd4996282763307c6e11bd0a9b7e8d420b5f Mon Sep 17 00:00:00 2001 From: Linxiao Wu Date: Mon, 6 Apr 2020 22:53:43 -0400 Subject: [PATCH 6/7] add testcase similar to test_all_any_params related to issue #33253 --- pandas/tests/reductions/test_reductions.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 962b105d1e8fc..0549f06103a28 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -863,6 +863,20 @@ def test_all_any_params(self): with pytest.raises(NotImplementedError): s.all(bool_only=True) + def test_all_any_boolean(self): + # Check skipna, with boolean type + s1 = Series([pd.NA, True], dtype="boolean") + s2 = Series([pd.NA, False], dtype="boolean") + assert pd.isna(s1.all(skipna=False)) # NA && True => NA + assert s1.all(skipna=True) + assert pd.isna(s2.any(skipna=False)) # NA || False => NA + assert not s2.any(skipna=True) + # GH-33253: all True / all False values buggy with skipna=False + s3 = Series([True, True], dtype="boolean") + s4 = Series([False, False], dtype="boolean") + assert s3.all(skipna=False) + assert not s4.any(skipna=False) + def test_timedelta64_analytics(self): # index min/max From c7b76dfa4980b3b05f4b7cff1895f099a0a894d0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Apr 2020 09:52:52 +0200 Subject: [PATCH 7/7] add level test --- pandas/tests/reductions/test_reductions.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index fc916456fa6c0..fa62d5d8c4983 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -895,16 +895,26 @@ def test_all_any_boolean(self): # Check skipna, with boolean type s1 = Series([pd.NA, True], dtype="boolean") s2 = Series([pd.NA, False], dtype="boolean") - assert pd.isna(s1.all(skipna=False)) # NA && True => NA + assert s1.all(skipna=False) is pd.NA # NA && True => NA assert s1.all(skipna=True) - assert pd.isna(s2.any(skipna=False)) # NA || False => NA + assert s2.any(skipna=False) is pd.NA # NA || False => NA assert not s2.any(skipna=True) + # GH-33253: all True / all False values buggy with skipna=False s3 = Series([True, True], dtype="boolean") s4 = Series([False, False], dtype="boolean") assert s3.all(skipna=False) assert not s4.any(skipna=False) + # Check level TODO(GH-33449) result should also be boolean + s = pd.Series( + [False, False, True, True, False, True], + index=[0, 0, 1, 1, 2, 2], + dtype="boolean", + ) + tm.assert_series_equal(s.all(level=0), Series([False, True, False])) + tm.assert_series_equal(s.any(level=0), Series([False, True, True])) + def test_timedelta64_analytics(self): # index min/max