Skip to content

REF/TYP: implement NDFrameApply #41067

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 28, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 38 additions & 31 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,6 @@ def f(x):
self.orig_f: AggFuncType = func
self.f: AggFuncType = f

@property
def index(self) -> Index:
return self.obj.index

@property
def agg_axis(self) -> Index:
return self.obj._get_agg_axis(self.axis)

@abc.abstractmethod
def apply(self) -> FrameOrSeriesUnion:
pass
Expand All @@ -163,9 +155,8 @@ def agg(self) -> FrameOrSeriesUnion | None:
args = self.args
kwargs = self.kwargs

result = self.maybe_apply_str()
if result is not None:
return result
if isinstance(arg, str):
return self.maybe_apply_str()

if is_dict_like(arg):
return self.agg_dict_like()
Expand Down Expand Up @@ -465,27 +456,20 @@ def agg_dict_like(self) -> FrameOrSeriesUnion:

return result

def maybe_apply_str(self) -> FrameOrSeriesUnion | None:
def maybe_apply_str(self) -> FrameOrSeriesUnion:
"""
Compute apply in case of a string.

Returns
-------
result: Series, DataFrame, or None
Result when self.f is a string, None otherwise.
result: Series or DataFrame
"""
# Caller is responsible for checking isinstance(self.f, str)
f = self.f
if not isinstance(f, str):
return None
Comment on lines +467 to -479
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessarily opposed, but why this change? My original thinking was to have each method determine whether or not it would take action rather than the caller - this way that logic is easily shared between the subclasses.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when going through these and seeing

result = self.foo()
if result is not None:
    return result

[...]

it isnt obvious what cases lead to result = None. e.g. before i checked, i imagined it could have been something like

def foo(self):
    try:
        [...]
    except Bar:
        return None

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Looking at this now, I see my usage of maybe_* is a bit different than other places in code, where maybe an operation is applied to data but the data is always returned regardless. The downside is that the condition then needs repeated, but it is simple enough (at least at this point) that I don't find that concerning at all.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this change should be applied to maybe_apply_multiple, and the maybe prefix should be dropped. I'm happy to take care of those in a followup.

f = cast(str, f)

obj = self.obj

# TODO: GH 39993 - Avoid special-casing by replacing with lambda
if f == "size" and isinstance(obj, ABCDataFrame):
# Special-cased because DataFrame.size returns a single scalar
value = obj.shape[self.axis]
return obj._constructor_sliced(value, index=self.agg_axis, name="size")

# Support for `frame.transform('method')`
# Some methods (shift, etc.) require the axis argument, others
# don't, so inspect and insert if necessary.
Expand Down Expand Up @@ -587,7 +571,22 @@ def _try_aggregate_string_function(self, obj, arg: str, *args, **kwargs):
)


class FrameApply(Apply):
class NDFrameApply(Apply):
"""
Methods shared by FrameApply and SeriesApply but
not GroupByApply or ResamplerWindowApply
"""

@property
def index(self) -> Index:
return self.obj.index

@property
def agg_axis(self) -> Index:
return self.obj._get_agg_axis(self.axis)


class FrameApply(NDFrameApply):
obj: DataFrame

# ---------------------------------------------------------------
Expand Down Expand Up @@ -644,9 +643,8 @@ def apply(self) -> FrameOrSeriesUnion:
return self.apply_empty_result()

# string dispatch
result = self.maybe_apply_str()
if result is not None:
return result
if isinstance(self.f, str):
return self.maybe_apply_str()

# ufunc
elif isinstance(self.f, np.ufunc):
Expand Down Expand Up @@ -831,6 +829,16 @@ def wrap_results(self, results: ResType, res_index: Index) -> FrameOrSeriesUnion

return result

def maybe_apply_str(self) -> FrameOrSeriesUnion:
# Caller is responsible for checking isinstance(self.f, str)
# TODO: GH#39993 - Avoid special-casing by replacing with lambda
if self.f == "size":
# Special-cased because DataFrame.size returns a single scalar
obj = self.obj
value = obj.shape[self.axis]
return obj._constructor_sliced(value, index=self.agg_axis, name="size")
return super().maybe_apply_str()


class FrameRowApply(FrameApply):
axis = 0
Expand Down Expand Up @@ -967,7 +975,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
return result


class SeriesApply(Apply):
class SeriesApply(NDFrameApply):
obj: Series
axis = 0

Expand Down Expand Up @@ -1001,10 +1009,9 @@ def apply(self) -> FrameOrSeriesUnion:
if result is not None:
return result

# if we are a string, try to dispatch
result = self.maybe_apply_str()
if result is not None:
return result
if isinstance(self.f, str):
# if we are a string, try to dispatch
return self.maybe_apply_str()

return self.apply_standard()

Expand Down