From 6e1514af147fa78fe39e5d08bcf0eb6d32b1c636 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 7 Feb 2021 20:29:59 +0100
Subject: [PATCH 1/9] Revert "REF: simplify _cython_agg_blocks (#35841)"

This reverts commit 0e199f3d490045d592715b3e95a0203729f17a0f.
---
 pandas/core/groupby/generic.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7b6eb4c8fe2f9..02723bc99dc6d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1100,13 +1100,20 @@ def py_fallback(bvalues: ArrayLike) -> ArrayLike:
             #  in the operation.  We un-split here.
             result = result._consolidate()
             assert isinstance(result, (Series, DataFrame))  # for mypy
-            mgr = result._mgr
-            assert isinstance(mgr, BlockManager)
-            assert len(mgr.blocks) == 1
 
             # unwrap DataFrame to get array
-            result = mgr.blocks[0].values
-            return result
+            mgr = result._mgr
+            assert isinstance(mgr, BlockManager)
+            if len(mgr.blocks) != 1:
+                # We've split an object block! Everything we've assumed
+                # about a single block input returning a single block output
+                # is a lie. To keep the code-path for the typical non-split case
+                # clean, we choose to clean up this mess later on.
+                return mgr.as_array()
+            else:
+                assert len(mgr.blocks) == 1
+                result = mgr.blocks[0].values
+                return result
 
         def blk_func(bvalues: ArrayLike) -> ArrayLike:
 

From 62cc348ee3aa1ca2027adb202d706e591d483e75 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 7 Feb 2021 20:34:57 +0100
Subject: [PATCH 2/9] add test case from issue

---
 .../tests/groupby/aggregate/test_aggregate.py | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index de4ef0996ad49..e55a63c0a52a3 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1187,3 +1187,31 @@ def test_aggregate_datetime_objects():
     result = df.groupby("A").B.max()
     expected = df.set_index("A")["B"]
     tm.assert_series_equal(result, expected)
+
+
+def test_aggregate_object_dtype_all_nan():
+    # https://github.com/pandas-dev/pandas/issues/39329
+
+    dates = pd.date_range("2020-01-01", periods=15, freq="D")
+    df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
+    df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
+    df = pd.concat([df1, df2], ignore_index=True)
+
+    result = df.groupby(["key"]).resample("W", on="date").min()
+    idx = MultiIndex.from_arrays(
+        [
+            ["A"] * 3 + ["B"] * 3,
+            pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2),
+        ],
+        names=["key", "date"],
+    )
+    expected = DataFrame(
+        {
+            "key": ["A"] * 3 + ["B"] * 3,
+            "date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),
+            "col1": [0, 5, 12] * 2,
+            "col_object": ["val"] * 3 + [np.nan] * 3,
+        },
+        index=idx,
+    )
+    tm.assert_frame_equal(result, expected)

From 9d0ea88fbd274173e86e466124337e83928d70bc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 7 Feb 2021 20:52:09 +0100
Subject: [PATCH 3/9] add simpler groupby-only test case

---
 .../tests/groupby/aggregate/test_aggregate.py | 29 +++++--------------
 .../tests/resample/test_resampler_grouper.py  | 28 ++++++++++++++++++
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index e55a63c0a52a3..349267b741956 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1191,27 +1191,14 @@ def test_aggregate_datetime_objects():
 
 def test_aggregate_object_dtype_all_nan():
     # https://github.com/pandas-dev/pandas/issues/39329
-
-    dates = pd.date_range("2020-01-01", periods=15, freq="D")
-    df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
-    df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
-    df = pd.concat([df1, df2], ignore_index=True)
-
-    result = df.groupby(["key"]).resample("W", on="date").min()
-    idx = MultiIndex.from_arrays(
-        [
-            ["A"] * 3 + ["B"] * 3,
-            pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2),
-        ],
-        names=["key", "date"],
+    # simplified case: multiple object columns where one is all-NaN
+    # -> gets split as the all-NaN is inferred as float
+    df = DataFrame(
+        {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4},
+        dtype=object,
     )
+    result = df.groupby("key").min()
     expected = DataFrame(
-        {
-            "key": ["A"] * 3 + ["B"] * 3,
-            "date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),
-            "col1": [0, 5, 12] * 2,
-            "col_object": ["val"] * 3 + [np.nan] * 3,
-        },
-        index=idx,
-    )
+        {"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]}
+    ).set_index("key")
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 1a10255a81a8c..30e00ac8afc6a 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -392,3 +392,31 @@ def test_resample_groupby_agg():
     result = resampled.agg({"num": "sum"})
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_resample_groupby_agg_object_dtype_all_nan():
+    # https://github.com/pandas-dev/pandas/issues/39329
+
+    dates = pd.date_range("2020-01-01", periods=15, freq="D")
+    df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
+    df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
+    df = pd.concat([df1, df2], ignore_index=True)
+
+    result = df.groupby(["key"]).resample("W", on="date").min()
+    idx = pd.MultiIndex.from_arrays(
+        [
+            ["A"] * 3 + ["B"] * 3,
+            pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2),
+        ],
+        names=["key", "date"],
+    )
+    expected = DataFrame(
+        {
+            "key": ["A"] * 3 + ["B"] * 3,
+            "date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),
+            "col1": [0, 5, 12] * 2,
+            "col_object": ["val"] * 3 + [np.nan] * 3,
+        },
+        index=idx,
+    )
+    tm.assert_frame_equal(result, expected)

From 383bf50b3a0edfbe62b383491314c772cf779c66 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 7 Feb 2021 21:05:21 +0100
Subject: [PATCH 4/9] add whatsnew

---
 doc/source/whatsnew/v1.2.2.rst | 1 +
 pandas/core/groupby/generic.py | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst
index b6e81e44e6f08..ed77cbd411546 100644
--- a/doc/source/whatsnew/v1.2.2.rst
+++ b/doc/source/whatsnew/v1.2.2.rst
@@ -23,6 +23,7 @@ Fixed regressions
 - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
 - Fixed regression in :meth:`~DataFrame.to_excel` creating corrupt files when appending (``mode="a"``) to an existing file (:issue:`39576`)
 - Fixed regression in :meth:`DataFrame.transform` failing in case of an empty DataFrame or Series (:issue:`39636`)
+- Fixed regression in :meth:`~DataFrame.groupby` or :meth:`~DataFrame.resample` when aggregating an all-NaN object dtype column (:issue:`39329`)
 - Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
 - Fixed regression in :func:`read_excel` that incorrectly raised when the argument ``io`` was a non-path and non-buffer and the ``engine`` argument was specified (:issue:`39528`)
 -
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 02723bc99dc6d..12703a979b0b8 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1107,8 +1107,7 @@ def py_fallback(bvalues: ArrayLike) -> ArrayLike:
             if len(mgr.blocks) != 1:
                 # We've split an object block! Everything we've assumed
                 # about a single block input returning a single block output
-                # is a lie. To keep the code-path for the typical non-split case
-                # clean, we choose to clean up this mess later on.
+                # is a lie. See eg GH-39329
                 return mgr.as_array()
             else:
                 assert len(mgr.blocks) == 1

From a21c4eda75ff018caed09f072404503cd91da7cf Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 7 Feb 2021 21:08:57 +0100
Subject: [PATCH 5/9] add more generic numeric test case

---
 pandas/tests/groupby/aggregate/test_aggregate.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 349267b741956..48527de6b2047 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1189,16 +1189,25 @@ def test_aggregate_datetime_objects():
     tm.assert_series_equal(result, expected)
 
 
-def test_aggregate_object_dtype_all_nan():
+def test_aggregate_numeric_object_dtype():
     # https://github.com/pandas-dev/pandas/issues/39329
     # simplified case: multiple object columns where one is all-NaN
     # -> gets split as the all-NaN is inferred as float
     df = DataFrame(
         {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4},
-        dtype=object,
-    )
+    ).astype(object)
     result = df.groupby("key").min()
     expected = DataFrame(
         {"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]}
     ).set_index("key")
     tm.assert_frame_equal(result, expected)
+
+    # same but with numbers
+    df = DataFrame(
+        {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)},
+    ).astype(object)
+    result = df.groupby("key").min()
+    expected = DataFrame(
+        {"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]}
+    ).set_index("key")
+    tm.assert_frame_equal(result, expected)

From 9ae380ad376dfe0612c021c620167b2abbbcfa7e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 7 Feb 2021 21:10:12 +0100
Subject: [PATCH 6/9] update whatsnew message

---
 doc/source/whatsnew/v1.2.2.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst
index ed77cbd411546..3cabe59744788 100644
--- a/doc/source/whatsnew/v1.2.2.rst
+++ b/doc/source/whatsnew/v1.2.2.rst
@@ -23,7 +23,7 @@ Fixed regressions
 - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
 - Fixed regression in :meth:`~DataFrame.to_excel` creating corrupt files when appending (``mode="a"``) to an existing file (:issue:`39576`)
 - Fixed regression in :meth:`DataFrame.transform` failing in case of an empty DataFrame or Series (:issue:`39636`)
-- Fixed regression in :meth:`~DataFrame.groupby` or :meth:`~DataFrame.resample` when aggregating an all-NaN object dtype column (:issue:`39329`)
+- Fixed regression in :meth:`~DataFrame.groupby` or :meth:`~DataFrame.resample` when aggregating an all-NaN or numeric object dtype column (:issue:`39329`)
 - Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
 - Fixed regression in :func:`read_excel` that incorrectly raised when the argument ``io`` was a non-path and non-buffer and the ``engine`` argument was specified (:issue:`39528`)
 -

From 7fb7108cbb04aafa225878428a1ed834eebbe03c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 7 Feb 2021 21:20:33 +0100
Subject: [PATCH 7/9] other whatsnew fix

---
 doc/source/whatsnew/v1.2.2.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst
index 3cabe59744788..c9cbf33ff2ad9 100644
--- a/doc/source/whatsnew/v1.2.2.rst
+++ b/doc/source/whatsnew/v1.2.2.rst
@@ -24,7 +24,7 @@ Fixed regressions
 - Fixed regression in :meth:`~DataFrame.to_excel` creating corrupt files when appending (``mode="a"``) to an existing file (:issue:`39576`)
 - Fixed regression in :meth:`DataFrame.transform` failing in case of an empty DataFrame or Series (:issue:`39636`)
 - Fixed regression in :meth:`~DataFrame.groupby` or :meth:`~DataFrame.resample` when aggregating an all-NaN or numeric object dtype column (:issue:`39329`)
-- Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
+- Fixed regression in :meth:`.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
 - Fixed regression in :func:`read_excel` that incorrectly raised when the argument ``io`` was a non-path and non-buffer and the ``engine`` argument was specified (:issue:`39528`)
 -
 

From baae4a38019d786f444aa72e00f9de3b12cf20d0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 8 Feb 2021 09:08:18 +0100
Subject: [PATCH 8/9] parametrize with consolidation

---
 pandas/tests/resample/test_resampler_grouper.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 30e00ac8afc6a..39d4533ca08dc 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -394,13 +394,16 @@ def test_resample_groupby_agg():
     tm.assert_frame_equal(result, expected)
 
 
-def test_resample_groupby_agg_object_dtype_all_nan():
+@pytest.mark.parametrize("consolidate", [True, False])
+def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
     # https://github.com/pandas-dev/pandas/issues/39329
 
     dates = pd.date_range("2020-01-01", periods=15, freq="D")
     df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
     df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
     df = pd.concat([df1, df2], ignore_index=True)
+    if consolidate:
+        df = df._consolidate()
 
     result = df.groupby(["key"]).resample("W", on="date").min()
     idx = pd.MultiIndex.from_arrays(

From 277c0fede334876c96cd81004a536f5054c1815e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 8 Feb 2021 09:09:46 +0100
Subject: [PATCH 9/9] remove unnecessary assert

---
 pandas/core/groupby/generic.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 12703a979b0b8..04e9eb039c249 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1100,17 +1100,16 @@ def py_fallback(bvalues: ArrayLike) -> ArrayLike:
             #  in the operation.  We un-split here.
             result = result._consolidate()
             assert isinstance(result, (Series, DataFrame))  # for mypy
-
-            # unwrap DataFrame to get array
             mgr = result._mgr
             assert isinstance(mgr, BlockManager)
+
+            # unwrap DataFrame to get array
             if len(mgr.blocks) != 1:
                 # We've split an object block! Everything we've assumed
                 # about a single block input returning a single block output
                 # is a lie. See eg GH-39329
                 return mgr.as_array()
             else:
-                assert len(mgr.blocks) == 1
                 result = mgr.blocks[0].values
                 return result