diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 8637d50745195..7a286188c4e74 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1136,19 +1136,24 @@ def group_rank(float64_t[:, ::1] out, This method modifies the `out` parameter rather than returning an object """ cdef: + Py_ssize_t i, k, N ndarray[float64_t, ndim=1] result - result = rank_1d( - values=values[:, 0], - labels=labels, - is_datetimelike=is_datetimelike, - ties_method=ties_method, - ascending=ascending, - pct=pct, - na_option=na_option - ) - for i in range(len(result)): - out[i, 0] = result[i] + N = values.shape[1] + + for k in range(N): + result = rank_1d( + values=values[:, k], + labels=labels, + is_datetimelike=is_datetimelike, + ties_method=ties_method, + ascending=ascending, + pct=pct, + na_option=na_option + ) + for i in range(len(result)): + # TODO: why cant we do out[:, k] = result? + out[i, k] = result[i] # ---------------------------------------------------------------------- diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 18506b871bda6..c394390f051de 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -530,6 +530,26 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs ) + def _cython_transform( + self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + ): + assert axis == 0 # handled by caller + + obj = self._selected_obj + + is_numeric = is_numeric_dtype(obj.dtype) + if numeric_only and not is_numeric: + raise DataError("No numeric types to aggregate") + + try: + result = self.grouper._cython_operation( + "transform", obj._values, how, axis, **kwargs + ) + except (NotImplementedError, TypeError): + raise DataError("No numeric types to aggregate") + + return obj._constructor(result, index=self.obj.index, name=obj.name) + def _transform_general(self, func: Callable, *args, **kwargs) -> Series: """ Transform with a callable func`. @@ -1258,6 +1278,36 @@ def _wrap_applied_output_series( return self._reindex_output(result) + def _cython_transform( + self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs + ) -> DataFrame: + assert axis == 0 # handled by caller + # TODO: no tests with self.ndim == 1 for DataFrameGroupBy + + # With self.axis == 0, we have multi-block tests + # e.g. test_rank_min_int, test_cython_transform_frame + # test_transform_numeric_ret + # With self.axis == 1, _get_data_to_aggregate does a transpose + # so we always have a single block. + mgr: Manager2D = self._get_data_to_aggregate() + if numeric_only: + mgr = mgr.get_numeric_data(copy=False) + + def arr_func(bvalues: ArrayLike) -> ArrayLike: + return self.grouper._cython_operation( + "transform", bvalues, how, 1, **kwargs + ) + + # We could use `mgr.apply` here and not have to set_axis, but + # we would have to do shape gymnastics for ArrayManager compat + res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=True) + res_mgr.set_axis(1, mgr.axes[1]) + + res_df = self.obj._constructor(res_mgr) + if self.axis == 1: + res_df = res_df.T + return res_df + def _transform_general(self, func, *args, **kwargs): from pandas.core.reshape.concat import concat diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c5ef18c51a533..0d2be53dc3e0e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1361,32 +1361,10 @@ def _cython_agg_general( ): raise AbstractMethodError(self) - @final def _cython_transform( self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs ): - output: dict[base.OutputKey, ArrayLike] = {} - - for idx, obj in enumerate(self._iterate_slices()): - name = obj.name - is_numeric = is_numeric_dtype(obj.dtype) - if numeric_only and not is_numeric: - continue - - try: - result = self.grouper._cython_operation( - "transform", obj._values, how, axis, **kwargs - ) - except (NotImplementedError, TypeError): - continue - - key = base.OutputKey(label=name, position=idx) - output[key] = result - - if not output: - raise DataError("No numeric types to aggregate") - - return self._wrap_transformed_output(output) + raise AbstractMethodError(self) @final def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index d56c8c1e83ab4..18c36e4096b2b 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -51,6 +51,19 @@ def test_transform_groupby_kernel(axis, float_frame, op, request): result = float_frame.transform(op, axis, *args) tm.assert_frame_equal(result, expected) + # same thing, but ensuring we have multiple blocks + assert "E" not in float_frame.columns + float_frame["E"] = float_frame["A"].copy() + assert len(float_frame._mgr.arrays) > 1 + + if axis == 0 or axis == "index": + ones = np.ones(float_frame.shape[0]) + else: + ones = np.ones(float_frame.shape[1]) + expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args) + result2 = float_frame.transform(op, axis, *args) + tm.assert_frame_equal(result2, expected2) + @pytest.mark.parametrize( "ops, names", diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index 20edf03c5b96c..aafdffba43388 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -584,21 +584,23 @@ def test_rank_multiindex(): # GH27721 df = concat( { - "a": DataFrame({"col1": [1, 2], "col2": [3, 4]}), + "a": DataFrame({"col1": [3, 4], "col2": [1, 2]}), "b": DataFrame({"col3": [5, 6], "col4": [7, 8]}), }, axis=1, ) - result = df.groupby(level=0, axis=1).rank(axis=1, ascending=False, method="first") + gb = df.groupby(level=0, axis=1) + result = gb.rank(axis=1) + expected = concat( - { - "a": DataFrame({"col1": [2.0, 2.0], "col2": [1.0, 1.0]}), - "b": DataFrame({"col3": [2.0, 2.0], "col4": [1.0, 1.0]}), - }, + [ + df["a"].rank(axis=1), + df["b"].rank(axis=1), + ], axis=1, + keys=["a", "b"], ) - tm.assert_frame_equal(result, expected) @@ -615,3 +617,24 @@ def test_groupby_axis0_rank_axis1(): # This should match what we get when "manually" operating group-by-group expected = concat([df.loc["a"].rank(axis=1), df.loc["b"].rank(axis=1)], axis=0) tm.assert_frame_equal(res, expected) + + # check that we haven't accidentally written a case that coincidentally + # matches rank(axis=0) + alt = gb.rank(axis=0) + assert not alt.equals(expected) + + +def test_groupby_axis0_cummax_axis1(): + # case where groupby axis is 0 and axis keyword in transform is 1 + + # df has mixed dtype -> multiple blocks + df = DataFrame( + {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]}, + index=["a", "a", "b", "b"], + ) + gb = df.groupby(level=0, axis=0) + + cmax = gb.cummax(axis=1) + expected = df[[0, 1]].astype(np.float64) + expected[2] = expected[1] + tm.assert_frame_equal(cmax, expected)