From 77ee0fec5d10af68c6bed19cd5d668a6d605eb2b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 12 Nov 2020 15:47:32 -0800
Subject: [PATCH 1/3] BUG: DataFrame.all(bool_only=True) inconsistency with
 object dtype

---
 doc/source/whatsnew/v1.2.0.rst           | 56 ++++++++++++++++++++++++
 pandas/core/internals/blocks.py          | 14 ++++++
 pandas/core/internals/managers.py        | 19 +++++++-
 pandas/tests/frame/test_reductions.py    | 31 +++++++++++++
 pandas/tests/internals/test_internals.py |  2 +-
 5 files changed, 119 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index f751a91cecf19..619bbaf926c99 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -235,6 +235,62 @@ Other enhancements
 - Improve numerical stability for :meth:`Rolling.skew()`, :meth:`Rolling.kurt()`, :meth:`Expanding.skew()` and :meth:`Expanding.kurt()` through implementation of Kahan summation (:issue:`6929`)
 - Improved error reporting for subsetting columns of a :class:`DataFrameGroupBy` with ``axis=1`` (:issue:`37725`)
 
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_120.notable_bug_fixes:
+
+Notable bug fixes
+~~~~~~~~~~~~~~~~~
+
+These are bug fixes that might have notable behavior changes.
+
+Consistency of DataFrame Reductions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+:meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True`` now
+determines whether to exclude object-dtype columns on a column-by-column basis,
+instead of checking if *all* object-dtype columns can be considered boolean.
+
+This prevents pathological behavior where applying the reduction on a subset
+of columns could result in a larger :class:`Series` result.
+
+.. ipython:: python
+
+    df = pd.DataFrame({"A": ["foo", "bar"], "B": [True, False]}, dtype=object)
+    df["C"] = pd.Series([True, True])
+
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [5]: df.all(bool_only=True)
+    Out[5]:
+    C    True
+    dtype: bool
+
+    In [6]: df[["B", "C"]].all(bool_only=True)
+    Out[6]:
+    B    False
+    C    True
+    dtype: bool
+
+*New behavior*:
+
+.. code-block:: ipython
+
+    In [5]: df.all(bool_only=True)
+    Out[5]:
+    B    False
+    C     True
+    dtype: bool
+
+    In [6]: df[["B", "C"]].all(bool_only=True)
+    Out[6]:
+    B    False
+    C     True
+    dtype: bool
+
+
 .. _whatsnew_120.api_breaking.python:
 
 Increased minimum version for Python
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index ed77a210b6913..d3f629f985b98 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -448,6 +448,20 @@ def f(mask, val, idx):
 
         return self.split_and_operate(None, f, inplace)
 
+    def _split(self) -> List["Block"]:
+        """
+        Split a block into a list of single-column blocks.
+        """
+        assert self.ndim == 2
+
+        new_blocks = []
+        for i, ref_loc in enumerate(self.mgr_locs):
+            vals = self.values[[i]]
+
+            nb = self.make_block(vals, [ref_loc])
+            new_blocks.append(nb)
+        return new_blocks
+
     def split_and_operate(self, mask, f, inplace: bool) -> List["Block"]:
         """
         split the block per-column, and apply the callable f
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 767c653f8a404..155d88d6ec2d9 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -713,13 +713,28 @@ def is_view(self) -> bool:
 
     def get_bool_data(self, copy: bool = False) -> "BlockManager":
         """
+        Select blocks that are bool-dtype and columns from object-dtype blocks
+        that are all-bool.
+
         Parameters
         ----------
         copy : bool, default False
             Whether to copy the blocks
         """
-        self._consolidate_inplace()
-        return self._combine([b for b in self.blocks if b.is_bool], copy)
+
+        new_blocks = []
+
+        for blk in self.blocks:
+            if blk.dtype == bool:
+                new_blocks.append(blk)
+
+            elif blk.is_object:
+                nbs = blk._split()
+                for nb in nbs:
+                    if nb.is_bool:
+                        new_blocks.append(nb)
+
+        return self._combine(new_blocks, copy)
 
     def get_numeric_data(self, copy: bool = False) -> "BlockManager":
         """
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 374d185f45844..95cb770a11408 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1118,6 +1118,37 @@ def test_any_all_object(self):
         result = np.any(DataFrame(columns=["a", "b"])).item()
         assert result is False
 
+    def test_any_all_object_bool_only(self):
+        df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object)
+        df._consolidate_inplace()
+        df["C"] = Series([True, True])
+
+        # The underlying bug is in DataFrame._get_bool_data, so we check
+        #  that while we're here
+        res = df._get_bool_data()
+        expected = df[["B", "C"]]
+        tm.assert_frame_equal(res, expected)
+
+        res = df.all(bool_only=True, axis=0)
+        expected = Series([False, True], index=["B", "C"])
+        tm.assert_series_equal(res, expected)
+
+        # operating on a subset of columns should not produce a _larger_ Series
+        res = df[["B", "C"]].all(bool_only=True, axis=0)
+        tm.assert_series_equal(res, expected)
+
+        assert not df.all(bool_only=True, axis=None)
+
+        res = df.any(bool_only=True, axis=0)
+        expected = Series([True, True], index=["B", "C"])
+        tm.assert_series_equal(res, expected)
+
+        # operating on a subset of columns should not produce a _larger_ Series
+        res = df[["B", "C"]].any(bool_only=True, axis=0)
+        tm.assert_series_equal(res, expected)
+
+        assert df.any(bool_only=True, axis=None)
+
     @pytest.mark.parametrize("method", ["any", "all"])
     def test_any_all_level_axis_none_raises(self, method):
         df = DataFrame(
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 88b91ecc79060..842298b3cdc7f 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -667,7 +667,7 @@ def test_get_bool_data(self):
         mgr.iset(6, np.array([True, False, True], dtype=np.object_))
 
         bools = mgr.get_bool_data()
-        tm.assert_index_equal(bools.items, Index(["bool"]))
+        tm.assert_index_equal(bools.items, Index(["bool", "dt"]))
         tm.assert_almost_equal(
             mgr.iget(mgr.items.get_loc("bool")).internal_values(),
             bools.iget(bools.items.get_loc("bool")).internal_values(),

From 359abd987884163b0f8bf4201001c7a24c0f6f8a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 12 Nov 2020 21:11:05 -0800
Subject: [PATCH 2/3] GH ref in whatsnew

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 1b32be873aaaf..ef3e936fa4ae9 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -251,7 +251,7 @@ determines whether to exclude object-dtype columns on a column-by-column basis,
 instead of checking if *all* object-dtype columns can be considered boolean.
 
 This prevents pathological behavior where applying the reduction on a subset
-of columns could result in a larger :class:`Series` result.
+of columns could result in a larger :class:`Series` result. See (:issue:`37799`).
 
 .. ipython:: python
 

From 76a97eb8d408801d586f3f22c48c3baf51e32f78 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 13 Nov 2020 08:34:56 -0800
Subject: [PATCH 3/3] whatsnew edit, test

---
 doc/source/whatsnew/v1.2.0.rst           | 10 +---------
 pandas/core/internals/blocks.py          |  2 +-
 pandas/tests/internals/test_internals.py | 19 +++++++++++++++++++
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 3c6d8b55c0001..4dedf9771955d 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -277,19 +277,11 @@ of columns could result in a larger :class:`Series` result. See (:issue:`37799`)
 
 *New behavior*:
 
-.. code-block:: ipython
+.. ipython:: python
 
     In [5]: df.all(bool_only=True)
-    Out[5]:
-    B    False
-    C     True
-    dtype: bool
 
     In [6]: df[["B", "C"]].all(bool_only=True)
-    Out[6]:
-    B    False
-    C     True
-    dtype: bool
 
 
 .. _whatsnew_120.api_breaking.python:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 581f7f273f0f4..92f6bb6f1cbdd 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -458,7 +458,7 @@ def _split(self) -> List["Block"]:
 
         new_blocks = []
         for i, ref_loc in enumerate(self.mgr_locs):
-            vals = self.values[[i]]
+            vals = self.values[slice(i, i + 1)]
 
             nb = self.make_block(vals, [ref_loc])
             new_blocks.append(nb)
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 842298b3cdc7f..d069b5aa08e22 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -264,6 +264,25 @@ def test_delete(self):
         with pytest.raises(IndexError, match=None):
             newb.delete(3)
 
+    def test_split(self):
+        # GH#37799
+        values = np.random.randn(3, 4)
+        blk = make_block(values, placement=[3, 1, 6])
+        result = blk._split()
+
+        # check that we get views, not copies
+        values[:] = -9999
+        assert (blk.values == -9999).all()
+
+        assert len(result) == 3
+        expected = [
+            make_block(values[[0]], placement=[3]),
+            make_block(values[[1]], placement=[1]),
+            make_block(values[[2]], placement=[6]),
+        ]
+        for res, exp in zip(result, expected):
+            assert_block_equal(res, exp)
+
 
 class TestBlockManager:
     def test_attrs(self):