REF: Groupby._get_cythonized_result operate blockwise in axis==1 case (#43435)

jbrockmendel · AlexeyGy · commit d0f4646baa1c · 2021-09-13T16:32:10.000Z
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -3077,7 +3077,6 @@ def _get_cythonized_result(
         grouper = self.grouper
 
         ids, _, ngroups = grouper.group_info
-        output: dict[base.OutputKey, ArrayLike] = {}
 
         how = base_func.__name__
         base_func = partial(base_func, labels=ids)
@@ -3133,48 +3132,52 @@ def blk_func(values: ArrayLike) -> ArrayLike:
             return result.T
 
         obj = self._obj_with_exclusions
-        if obj.ndim == 2 and self.axis == 0:
-            # Operate block-wise instead of column-by-column
-            mgr = obj._mgr
-            if numeric_only:
-                mgr = mgr.get_numeric_data()
-
-            # setting ignore_failures=False for troubleshooting
-            res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=False)
-            output = type(obj)(res_mgr)
-            return self._wrap_aggregated_output(output)
-
-        error_msg = ""
-        for idx, obj in enumerate(self._iterate_slices()):
-            values = obj._values
-
-            if numeric_only and not is_numeric_dtype(values.dtype):
-                continue
 
-            try:
-                result = blk_func(values)
-            except TypeError as err:
-                error_msg = str(err)
-                howstr = how.replace("group_", "")
-                warnings.warn(
-                    "Dropping invalid columns in "
-                    f"{type(self).__name__}.{howstr} is deprecated. "
-                    "In a future version, a TypeError will be raised. "
-                    f"Before calling .{howstr}, select only columns which "
-                    "should be valid for the function.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                continue
+        # Operate block-wise instead of column-by-column
+        orig_ndim = obj.ndim
+        if orig_ndim == 1:
+            # Operate on DataFrame, then squeeze below
+            obj = obj.to_frame()
 
-            key = base.OutputKey(label=obj.name, position=idx)
-            output[key] = result
+        mgr = obj._mgr
+        if self.axis == 1:
+            mgr = obj.T._mgr
 
-        # error_msg is "" on an frame/series with no rows or columns
-        if not output and error_msg != "":
-            raise TypeError(error_msg)
+        if numeric_only:
+            mgr = mgr.get_numeric_data()
 
-        return self._wrap_aggregated_output(output)
+        res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
+        if len(res_mgr.items) != len(mgr.items):
+            howstr = how.replace("group_", "")
+            warnings.warn(
+                "Dropping invalid columns in "
+                f"{type(self).__name__}.{howstr} is deprecated. "
+                "In a future version, a TypeError will be raised. "
+                f"Before calling .{howstr}, select only columns which "
+                "should be valid for the function.",
+                FutureWarning,
+                stacklevel=3,
+            )
+            if len(res_mgr.items) == 0:
+                # We re-call grouped_reduce to get the right exception message
+                try:
+                    mgr.grouped_reduce(blk_func, ignore_failures=False)
+                except Exception as err:
+                    error_msg = str(err)
+                    raise TypeError(error_msg)
+                # We should never get here
+                raise TypeError("All columns were dropped in grouped_reduce")
+
+        out = type(obj)(res_mgr)
+
+        if orig_ndim == 1:
+            assert out.ndim == 2
+            assert out.shape[1] == 1
+            out = out.iloc[:, 0]
+            # restore name=None in case to_frame set columns to [0]
+            out.name = self.obj.name
+
+        return self._wrap_aggregated_output(out)
 
     @final
     @Substitution(name="groupby")