REGR: Fixed AssertionError in groupby

TomAugspurger · TomAugspurger · commit f868874e86f0 · 2020-02-03T08:10:16.000-06:00
Closes pandas-dev#31522
diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst
@@ -19,6 +19,7 @@ Fixed regressions
 - Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containg a :class:`datetime.date` (:issue:`31501`)
 - Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`)
 - Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`)
+- Fixed regression in ``.groupby().agg()`` raising an ``AssertionError`` for some reductions like ``min`` on object-dtype columns (:issue:`31522`)
 - Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`)
 - Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`)
 - Fixed regression where setting :attr:`pd.options.display.max_colwidth` was not accepting negative integer. In addition, this behavior has been deprecated in favor of using ``None`` (:issue:`31532`)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1061,8 +1061,16 @@ def _cython_agg_blocks(
                 else:
                     result = cast(DataFrame, result)
                     # unwrap DataFrame to get array
-                    assert len(result._data.blocks) == 1
-                    result = result._data.blocks[0].values
+                    if len(result._data.blocks) != 1:
+                        # An input (P, N)-shape block was split into P
+                        # (1, n_groups) blocks. This is problematic since it breaks
+                        # the assumption that one input block is aggregated
+                        # to one output block. We should be OK as long as
+                        # the split output can be put back into a single block below
+                        assert len(result._data.blocks) == result.shape[1]
+                        result = np.asarray(result)
+                    else:
+                        result = result._data.blocks[0].values
                     if isinstance(result, np.ndarray) and result.ndim == 1:
                         result = result.reshape(1, -1)
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -377,6 +377,30 @@ def test_agg_index_has_complex_internals(index):
     tm.assert_frame_equal(result, expected)
 
 
+def test_agg_split_block():
+    # https://github.com/pandas-dev/pandas/issues/31522
+    df = pd.DataFrame(
+        {
+            "key1": ["a", "a", "b", "b", "a"],
+            "key2": ["one", "two", "one", "two", "one"],
+            "key3": ["three", "three", "three", "six", "six"],
+            "data1": [0.0, 1, 2, 3, 4],
+            "data2": [0.0, 1, 2, 3, 4],
+        }
+    )
+    result = df.groupby("key1").min()
+    expected = pd.DataFrame(
+        {
+            "key2": ["one", "six"],
+            "key3": ["one", "six"],
+            "data1": [0.0, 2.0],
+            "data2": [0.0, 2.0],
+        },
+        index=pd.Index(["a", "b"], name="key1"),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 class TestNamedAggregationSeries:
     def test_series_named_agg(self):
         df = pd.Series([1, 2, 3, 4])