pandas-dev · jreback · May 10, 2021 · May 6, 2021
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -1136,19 +1136,24 @@ def group_rank(float64_t[:, ::1] out,
     This method modifies the `out` parameter rather than returning an object
     """
     cdef:
+        Py_ssize_t i, k, N
         ndarray[float64_t, ndim=1] result
 
-    result = rank_1d(
-        values=values[:, 0],
-        labels=labels,
-        is_datetimelike=is_datetimelike,
-        ties_method=ties_method,
-        ascending=ascending,
-        pct=pct,
-        na_option=na_option
-    )
-    for i in range(len(result)):
-        out[i, 0] = result[i]
+    N = values.shape[1]
+
+    for k in range(N):
+        result = rank_1d(
+            values=values[:, k],
+            labels=labels,
+            is_datetimelike=is_datetimelike,
+            ties_method=ties_method,
+            ascending=ascending,
+            pct=pct,
+            na_option=na_option
+        )
+        for i in range(len(result)):
+            # TODO: why cant we do out[:, k] = result?
+            out[i, k] = result[i]
 
 
 # ----------------------------------------------------------------------

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -530,6 +530,26 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
             func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
         )
 
+    def _cython_transform(
+        self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
+    ):
+        assert axis == 0  # handled by caller
+
+        obj = self._selected_obj
+
+        is_numeric = is_numeric_dtype(obj.dtype)
+        if numeric_only and not is_numeric:
+            raise DataError("No numeric types to aggregate")
+
+        try:
+            result = self.grouper._cython_operation(
+                "transform", obj._values, how, axis, **kwargs
+            )
+        except (NotImplementedError, TypeError):
+            raise DataError("No numeric types to aggregate")
+
+        return obj._constructor(result, index=self.obj.index, name=obj.name)
+
     def _transform_general(self, func: Callable, *args, **kwargs) -> Series:
         """
         Transform with a callable func`.
@@ -1258,6 +1278,36 @@ def _wrap_applied_output_series(
 
         return self._reindex_output(result)
 
+    def _cython_transform(
+        self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
+    ) -> DataFrame:
+        assert axis == 0  # handled by caller
+        # TODO: no tests with self.ndim == 1 for DataFrameGroupBy
+
+        # With self.axis == 0, we have multi-block tests
+        #  e.g. test_rank_min_int, test_cython_transform_frame
+        #  test_transform_numeric_ret
+        # With self.axis == 1, _get_data_to_aggregate does a transpose
+        #  so we always have a single block.
+        mgr: Manager2D = self._get_data_to_aggregate()
+        if numeric_only:
+            mgr = mgr.get_numeric_data(copy=False)
+
+        def arr_func(bvalues: ArrayLike) -> ArrayLike:
+            return self.grouper._cython_operation(
+                "transform", bvalues, how, 1, **kwargs
+            )
+
+        # We could use `mgr.apply` here and not have to set_axis, but
+        #  we would have to do shape gymnastics for ArrayManager compat
+        res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True)
+        res_mgr.set_axis(1, mgr.axes[1])
+
+        res_df = self.obj._constructor(res_mgr)
+        if self.axis == 1:
+            res_df = res_df.T
+        return res_df
+
     def _transform_general(self, func, *args, **kwargs):
         from pandas.core.reshape.concat import concat
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1361,32 +1361,10 @@ def _cython_agg_general(
     ):
         raise AbstractMethodError(self)
 
-    @final
     def _cython_transform(
         self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
     ):
-        output: dict[base.OutputKey, ArrayLike] = {}
-
-        for idx, obj in enumerate(self._iterate_slices()):
-            name = obj.name
-            is_numeric = is_numeric_dtype(obj.dtype)
-            if numeric_only and not is_numeric:
-                continue
-
-            try:
-                result = self.grouper._cython_operation(
-                    "transform", obj._values, how, axis, **kwargs
-                )
-            except (NotImplementedError, TypeError):
-                continue
-
-            key = base.OutputKey(label=name, position=idx)
-            output[key] = result
-
-        if not output:
-            raise DataError("No numeric types to aggregate")
-
-        return self._wrap_transformed_output(output)
+        raise AbstractMethodError(self)
 
     @final
     def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):

diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py
@@ -51,6 +51,19 @@ def test_transform_groupby_kernel(axis, float_frame, op, request):
     result = float_frame.transform(op, axis, *args)
     tm.assert_frame_equal(result, expected)
 
+    # same thing, but ensuring we have multiple blocks
+    assert "E" not in float_frame.columns
+    float_frame["E"] = float_frame["A"].copy()
+    assert len(float_frame._mgr.arrays) > 1
+
+    if axis == 0 or axis == "index":
+        ones = np.ones(float_frame.shape[0])
+    else:
+        ones = np.ones(float_frame.shape[1])
+    expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args)
+    result2 = float_frame.transform(op, axis, *args)
+    tm.assert_frame_equal(result2, expected2)
+
 
 @pytest.mark.parametrize(
     "ops, names",

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
@@ -584,21 +584,23 @@ def test_rank_multiindex():
     # GH27721
     df = concat(
         {
-            "a": DataFrame({"col1": [1, 2], "col2": [3, 4]}),
+            "a": DataFrame({"col1": [3, 4], "col2": [1, 2]}),
             "b": DataFrame({"col3": [5, 6], "col4": [7, 8]}),
         },
         axis=1,
     )
 
-    result = df.groupby(level=0, axis=1).rank(axis=1, ascending=False, method="first")
+    gb = df.groupby(level=0, axis=1)
+    result = gb.rank(axis=1)
+
     expected = concat(
-        {
-            "a": DataFrame({"col1": [2.0, 2.0], "col2": [1.0, 1.0]}),
-            "b": DataFrame({"col3": [2.0, 2.0], "col4": [1.0, 1.0]}),
-        },
+        [
+            df["a"].rank(axis=1),
+            df["b"].rank(axis=1),
+        ],
         axis=1,
+        keys=["a", "b"],
     )
-
     tm.assert_frame_equal(result, expected)
 
 
@@ -615,3 +617,24 @@ def test_groupby_axis0_rank_axis1():
     # This should match what we get when "manually" operating group-by-group
     expected = concat([df.loc["a"].rank(axis=1), df.loc["b"].rank(axis=1)], axis=0)
     tm.assert_frame_equal(res, expected)
+
+    # check that we haven't accidentally written a case that coincidentally
+    # matches rank(axis=0)
+    alt = gb.rank(axis=0)
+    assert not alt.equals(expected)
+
+
+def test_groupby_axis0_cummax_axis1():
+    # case where groupby axis is 0 and axis keyword in transform is 1
+
+    # df has mixed dtype -> multiple blocks
+    df = DataFrame(
+        {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]},
+        index=["a", "a", "b", "b"],
+    )
+    gb = df.groupby(level=0, axis=0)
+
+    cmax = gb.cummax(axis=1)
+    expected = df[[0, 1]].astype(np.float64)
+    expected[2] = expected[1]
+    tm.assert_frame_equal(cmax, expected)