pandas-dev · jbrockmendel · Feb 6, 2020 · Feb 6, 2020 · Feb 6, 2020 · Feb 6, 2020
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1022,9 +1022,32 @@ def _cython_agg_blocks(
         agg_blocks: List[Block] = []
         new_items: List[np.ndarray] = []
         deleted_items: List[np.ndarray] = []
-        # Some object-dtype blocks might be split into List[Block[T], Block[U]]
-        split_items: List[np.ndarray] = []
-        split_frames: List[DataFrame] = []
+
+        def _recast_result(result, values):
+            # see if we can cast the block back to the original dtype
+            assert not isinstance(result, DataFrame)
+            assert result is not no_result
+
+            result = maybe_downcast_numeric(result, values.dtype)
+
+            if not isinstance(values, np.ndarray) and isinstance(result, np.ndarray):
+                # e.g. block.values was an IntegerArray
+                # (1, N) case can occur if block.values was Categorical
+                #  and result is ndarray[object]
+                assert result.ndim == 1 or result.shape[0] == 1
+                try:
+                    # Cast back if feasible
+                    result = type(values)._from_sequence(
+                        result.ravel(), dtype=values.dtype
+                    )
+                except ValueError:
+                    # reshape to be valid for non-Extension Block
+                    result = result.reshape(1, -1)
+
+            elif isinstance(result, np.ndarray) and result.ndim == 1:
+                result = result.reshape(1, -1)
+
+            return result
 
         no_result = object()
         for block in data.blocks:
@@ -1048,6 +1071,7 @@ def _cython_agg_blocks(
                     continue
 
                 # call our grouper again with only this block
+                # TODO: will this mess up if we have duplicate columns?
                 obj = self.obj[data.items[locs]]
                 if obj.shape[1] == 1:
                     # Avoid call to self.values that can occur in DataFrame
@@ -1063,58 +1087,32 @@ def _cython_agg_blocks(
                     deleted_items.append(locs)
                     continue
                 else:
+                    if isinstance(result, Series):
+                        result = result.to_frame()
+
                     result = cast(DataFrame, result)
                     # unwrap DataFrame to get array
-                    if len(result._data.blocks) != 1:
-                        # We've split an object block! Everything we've assumed
-                        # about a single block input returning a single block output
-                        # is a lie. To keep the code-path for the typical non-split case
-                        # clean, we choose to clean up this mess later on.
-                        split_items.append(locs)
-                        split_frames.append(result)
-                        continue
-
-                    assert len(result._data.blocks) == 1
-                    result = result._data.blocks[0].values
-                    if isinstance(result, np.ndarray) and result.ndim == 1:
-                        result = result.reshape(1, -1)
-
-            assert not isinstance(result, DataFrame)
 
-            if result is not no_result:
-                # see if we can cast the block back to the original dtype
-                result = maybe_downcast_numeric(result, block.dtype)
-
-                if block.is_extension and isinstance(result, np.ndarray):
-                    # e.g. block.values was an IntegerArray
-                    # (1, N) case can occur if block.values was Categorical
-                    #  and result is ndarray[object]
-                    assert result.ndim == 1 or result.shape[0] == 1
-                    try:
-                        # Cast back if feasible
-                        result = type(block.values)._from_sequence(
-                            result.ravel(), dtype=block.values.dtype
-                        )
-                    except ValueError:
-                        # reshape to be valid for non-Extension Block
-                        result = result.reshape(1, -1)
+                    for i, col in enumerate(result.columns):
+                        nb = result.iloc[:, [i]]._data.blocks[0]
+                        loc = data.items.get_loc(col)
+                        # FIXME: requires unique?  GH#31735
+                        res = _recast_result(nb.values, data.iget(loc).blocks[0].values)
+                        nb2 = make_block(res, placement=[loc], ndim=2)
+                        agg_blocks.append(nb2)
 
+            else:
+                assert not isinstance(result, DataFrame)
+                assert result is not no_result
+                result = _recast_result(result, block.values)
                 agg_block: Block = block.make_block(result)
+                agg_blocks.append(agg_block)
 
             new_items.append(locs)
-            agg_blocks.append(agg_block)
 
-        if not (agg_blocks or split_frames):
+        if not agg_blocks:
             raise DataError("No numeric types to aggregate")
 
-        if split_items:
-            # Clean up the mess left over from split blocks.
-            for locs, result in zip(split_items, split_frames):
-                assert len(locs) == result.shape[1]
-                for i, loc in enumerate(locs):
-                    new_items.append(np.array([loc], dtype=locs.dtype))
-                    agg_blocks.append(result.iloc[:, [i]]._data.blocks[0])
-
         # reset the locs in the blocks to correspond to our
         # current ordering
         indexer = np.concatenate(new_items)