Skip to content

REF: Apply.apply_multiple #53362

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 46 additions & 5 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ def frame_apply(
elif axis == 1:
klass = FrameColumnApply

_, func, _, _ = reconstruct_func(func, **kwargs)
assert func is not None

return klass(
obj,
func,
Expand All @@ -103,11 +106,13 @@ def frame_apply(

class Apply(metaclass=abc.ABCMeta):
axis: AxisInt
orig_f: AggFuncType
f: AggFuncType

def __init__(
self,
obj: AggObjType,
func,
func: AggFuncType,
raw: bool,
result_type: str | None,
*,
Expand All @@ -127,6 +132,7 @@ def __init__(

self.result_type = result_type

f: AggFuncType
# curry if needed
if (
(kwargs or args)
Expand All @@ -135,13 +141,14 @@ def __init__(
):

def f(x):
assert callable(func)
return func(x, *args, **kwargs)

else:
f = func

self.orig_f: AggFuncType = func
self.f: AggFuncType = f
self.orig_f = func
self.f = f

@abc.abstractmethod
def apply(self) -> DataFrame | Series:
Expand Down Expand Up @@ -554,7 +561,20 @@ def apply_multiple(self) -> DataFrame | Series:
result: Series, DataFrame, or None
Result when self.f is a list-like or dict-like, None otherwise.
"""
return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs)
if self.axis == 1 and isinstance(self.obj, ABCDataFrame):
return self.obj.T.apply(self.f, 0, *self.args, **self.kwargs).T
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs to be args=self.args, not *self.args. Can you add a test here?

Copy link
Contributor Author

@topper-123 topper-123 May 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, good catch. Done.


func = self.f
kwargs = self.kwargs

if is_dict_like(func):
result = self.agg_dict_like()
else:
result = self.agg_list_like()

result = reconstruct_and_relabel_result(result, func, **kwargs)

return result

def normalize_dictlike_arg(
self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict
Expand Down Expand Up @@ -1201,7 +1221,7 @@ def transform(self):

def reconstruct_func(
func: AggFuncType | None, **kwargs
) -> tuple[bool, AggFuncType | None, list[str] | None, npt.NDArray[np.intp] | None]:
) -> tuple[bool, AggFuncType, list[str] | None, npt.NDArray[np.intp] | None]:
"""
This is the internal function to reconstruct func given if there is relabeling
or not and also normalize the keyword to get new order of columns.
Expand Down Expand Up @@ -1256,6 +1276,7 @@ def reconstruct_func(

if relabeling:
func, columns, order = normalize_keyword_aggregation(kwargs)
assert func is not None

return relabeling, func, columns, order

Expand Down Expand Up @@ -1445,6 +1466,26 @@ def relabel_result(
return reordered_result_in_dict


def reconstruct_and_relabel_result(result, func, **kwargs) -> DataFrame | Series:
from pandas import DataFrame

relabeling, func, columns, order = reconstruct_func(func, **kwargs)

if relabeling:
# This is to keep the order to columns occurrence unchanged, and also
# keep the order of new columns occurrence unchanged

# For the return values of reconstruct_func, if relabeling is
# False, columns and order will be None.
assert columns is not None
assert order is not None

result_in_dict = relabel_result(result, func, columns, order)
result = DataFrame(result_in_dict, index=columns)

return result


# TODO: Can't use, because mypy doesn't like us setting __name__
# error: "partial[Any]" has no attribute "__name__"
# the type is:
Expand Down
21 changes: 2 additions & 19 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,7 @@
roperator,
)
from pandas.core.accessor import CachedAccessor
from pandas.core.apply import (
reconstruct_func,
relabel_result,
)
from pandas.core.apply import reconstruct_and_relabel_result
from pandas.core.array_algos.take import take_2d_multi
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import (
Expand Down Expand Up @@ -9579,23 +9576,9 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):

axis = self._get_axis_number(axis)

relabeling, func, columns, order = reconstruct_func(func, **kwargs)

op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs)
result = op.agg()

if relabeling:
# This is to keep the order to columns occurrence unchanged, and also
# keep the order of new columns occurrence unchanged

# For the return values of reconstruct_func, if relabeling is
# False, columns and order will be None.
assert columns is not None
assert order is not None

result_in_dict = relabel_result(result, func, columns, order)
result = DataFrame(result_in_dict, index=columns)

result = reconstruct_and_relabel_result(result, func, **kwargs)
return result

agg = aggregate
Expand Down