From d72f1be64f920248599d0b8ed6187d6fc228147c Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 24 May 2023 00:47:57 +0100 Subject: [PATCH 1/4] REF: Apply.apply_multiple --- pandas/core/apply.py | 37 ++++++++++++++++++++++++++++++++++++- pandas/core/frame.py | 21 ++------------------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 020aa4e8916da..a19f518aa2407 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -91,6 +91,8 @@ def frame_apply( elif axis == 1: klass = FrameColumnApply + _, func, _, _ = reconstruct_func(func, **kwargs) + return klass( obj, func, @@ -554,7 +556,20 @@ def apply_multiple(self) -> DataFrame | Series: result: Series, DataFrame, or None Result when self.f is a list-like or dict-like, None otherwise. """ - return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs) + if self.axis == 1 and isinstance(self.obj, ABCDataFrame): + return self.obj.T.apply(self.f, 0, *self.args, **self.kwargs).T + + func = self.f + kwargs = self.kwargs + + if is_dict_like(func): + result = self.agg_dict_like() + else: + result = self.agg_list_like() + + result = reconstruct_and_relabel_result(result, func, **kwargs) + + return result def normalize_dictlike_arg( self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict @@ -1445,6 +1460,26 @@ def relabel_result( return reordered_result_in_dict +def reconstruct_and_relabel_result(result, func, **kwargs) -> DataFrame | Series: + from pandas import DataFrame + + relabeling, func, columns, order = reconstruct_func(func, **kwargs) + + if relabeling: + # This is to keep the order to columns occurrence unchanged, and also + # keep the order of new columns occurrence unchanged + + # For the return values of reconstruct_func, if relabeling is + # False, columns and order will be None. + assert columns is not None + assert order is not None + + result_in_dict = relabel_result(result, func, columns, order) + result = DataFrame(result_in_dict, index=columns) + + return result + + # TODO: Can't use, because mypy doesn't like us setting __name__ # error: "partial[Any]" has no attribute "__name__" # the type is: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8afb3ee96ba94..ac7667afbea5f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -122,10 +122,7 @@ roperator, ) from pandas.core.accessor import CachedAccessor -from pandas.core.apply import ( - reconstruct_func, - relabel_result, -) +from pandas.core.apply import reconstruct_and_relabel_result from pandas.core.array_algos.take import take_2d_multi from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ( @@ -9579,23 +9576,9 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): axis = self._get_axis_number(axis) - relabeling, func, columns, order = reconstruct_func(func, **kwargs) - op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) result = op.agg() - - if relabeling: - # This is to keep the order to columns occurrence unchanged, and also - # keep the order of new columns occurrence unchanged - - # For the return values of reconstruct_func, if relabeling is - # False, columns and order will be None. - assert columns is not None - assert order is not None - - result_in_dict = relabel_result(result, func, columns, order) - result = DataFrame(result_in_dict, index=columns) - + result = reconstruct_and_relabel_result(result, func, **kwargs) return result agg = aggregate From e25b1ddd85174f5853a20ead03bf724ba5d1f6a5 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 24 May 2023 07:13:54 +0100 Subject: [PATCH 2/4] fix typing --- pandas/core/apply.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a19f518aa2407..4408c9e92d4e3 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -92,6 +92,7 @@ def frame_apply( klass = FrameColumnApply _, func, _, _ = reconstruct_func(func, **kwargs) + assert func is not None return klass( obj, @@ -105,11 +106,13 @@ def frame_apply( class Apply(metaclass=abc.ABCMeta): axis: AxisInt + orig_f: AggFuncType + f: AggFuncType def __init__( self, obj: AggObjType, - func, + func: AggFuncType, raw: bool, result_type: str | None, *, @@ -129,6 +132,7 @@ def __init__( self.result_type = result_type + f: AggFuncType # curry if needed if ( (kwargs or args) @@ -137,13 +141,14 @@ def __init__( ): def f(x): + assert callable(func) return func(x, *args, **kwargs) else: f = func - self.orig_f: AggFuncType = func - self.f: AggFuncType = f + self.orig_f = func + self.f = f @abc.abstractmethod def apply(self) -> DataFrame | Series: @@ -1216,7 +1221,7 @@ def transform(self): def reconstruct_func( func: AggFuncType | None, **kwargs -) -> tuple[bool, AggFuncType | None, list[str] | None, npt.NDArray[np.intp] | None]: +) -> tuple[bool, AggFuncType, list[str] | None, npt.NDArray[np.intp] | None]: """ This is the internal function to reconstruct func given if there is relabeling or not and also normalize the keyword to get new order of columns. @@ -1271,6 +1276,7 @@ def reconstruct_func( if relabeling: func, columns, order = normalize_keyword_aggregation(kwargs) + assert func is not None return relabeling, func, columns, order From d5fb4c476cb4e348874e2945f9c206556777ed6d Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 24 May 2023 16:11:23 +0100 Subject: [PATCH 3/4] add test --- pandas/core/apply.py | 2 +- pandas/tests/apply/test_frame_apply.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 4408c9e92d4e3..dfbe97430eeef 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -562,7 +562,7 @@ def apply_multiple(self) -> DataFrame | Series: Result when self.f is a list-like or dict-like, None otherwise. """ if self.axis == 1 and isinstance(self.obj, ABCDataFrame): - return self.obj.T.apply(self.f, 0, *self.args, **self.kwargs).T + return self.obj.T.apply(self.f, 0, args=self.args, **self.kwargs).T func = self.f kwargs = self.kwargs diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 80292ac6641ac..29850c747d3bc 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -36,6 +36,12 @@ def test_apply(float_frame): assert result[d] == expected assert result.index is float_frame.index +@pytest.mark.parametrize("axis", [0, 1]) +def test_apply_args(float_frame, axis): + result = float_frame.apply(lambda x, y: x + y, axis, args=(1,)) + expected = float_frame + 1 + tm.assert_frame_equal(result, expected) + def test_apply_categorical_func(): # GH 9573 From 999256af84ec8bff76f57a7fa5fe88356d10b8b8 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 24 May 2023 17:48:59 +0100 Subject: [PATCH 4/4] fix pre-commit --- pandas/tests/apply/test_frame_apply.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 29850c747d3bc..fc8b57d26a5be 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -36,9 +36,10 @@ def test_apply(float_frame): assert result[d] == expected assert result.index is float_frame.index + @pytest.mark.parametrize("axis", [0, 1]) def test_apply_args(float_frame, axis): - result = float_frame.apply(lambda x, y: x + y, axis, args=(1,)) + result = float_frame.apply(lambda x, y: x + y, axis, args=(1,)) expected = float_frame + 1 tm.assert_frame_equal(result, expected)