Skip to content

Commit 3414542

Browse files
rhshadrachluckyvs1
authored andcommitted
REF: Move Series.apply/agg into apply (pandas-dev#39061)
1 parent 84bfabd commit 3414542

File tree

2 files changed

+157
-90
lines changed

2 files changed

+157
-90
lines changed

pandas/core/apply.py

+151-45
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from pandas._config import option_context
1010

11+
from pandas._libs import lib
1112
from pandas._typing import (
1213
AggFuncType,
1314
AggFuncTypeBase,
@@ -26,7 +27,10 @@
2627
from pandas.core.dtypes.generic import ABCSeries
2728

2829
from pandas.core.aggregation import agg_dict_like, agg_list_like
29-
from pandas.core.construction import create_series_with_explicit_dtype
30+
from pandas.core.construction import (
31+
array as pd_array,
32+
create_series_with_explicit_dtype,
33+
)
3034

3135
if TYPE_CHECKING:
3236
from pandas import DataFrame, Index, Series
@@ -63,12 +67,30 @@ def frame_apply(
6367
)
6468

6569

70+
def series_apply(
71+
obj: Series,
72+
how: str,
73+
func: AggFuncType,
74+
convert_dtype: bool = True,
75+
args=None,
76+
kwds=None,
77+
) -> SeriesApply:
78+
return SeriesApply(
79+
obj,
80+
how,
81+
func,
82+
convert_dtype,
83+
args,
84+
kwds,
85+
)
86+
87+
6688
class Apply(metaclass=abc.ABCMeta):
6789
axis: int
6890

6991
def __init__(
7092
self,
71-
obj: DataFrame,
93+
obj: FrameOrSeriesUnion,
7294
how: str,
7395
func,
7496
raw: bool,
@@ -110,12 +132,62 @@ def f(x):
110132
def index(self) -> Index:
111133
return self.obj.index
112134

113-
@abc.abstractmethod
114135
def get_result(self):
136+
if self.how == "apply":
137+
return self.apply()
138+
else:
139+
return self.agg()
140+
141+
@abc.abstractmethod
142+
def apply(self) -> FrameOrSeriesUnion:
115143
pass
116144

145+
def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]:
146+
"""
147+
Provide an implementation for the aggregators.
148+
149+
Returns
150+
-------
151+
tuple of result, how.
152+
153+
Notes
154+
-----
155+
how can be a string describe the required post-processing, or
156+
None if not required.
157+
"""
158+
obj = self.obj
159+
arg = self.f
160+
args = self.args
161+
kwargs = self.kwds
162+
163+
_axis = kwargs.pop("_axis", None)
164+
if _axis is None:
165+
_axis = getattr(obj, "axis", 0)
166+
167+
if isinstance(arg, str):
168+
return obj._try_aggregate_string_function(arg, *args, **kwargs), None
169+
elif is_dict_like(arg):
170+
arg = cast(AggFuncTypeDict, arg)
171+
return agg_dict_like(obj, arg, _axis), True
172+
elif is_list_like(arg):
173+
# we require a list, but not a 'str'
174+
arg = cast(List[AggFuncTypeBase], arg)
175+
return agg_list_like(obj, arg, _axis=_axis), None
176+
else:
177+
result = None
178+
179+
if callable(arg):
180+
f = obj._get_cython_func(arg)
181+
if f and not args and not kwargs:
182+
return getattr(obj, f)(), None
183+
184+
# caller can react
185+
return result, True
186+
117187

118188
class FrameApply(Apply):
189+
obj: DataFrame
190+
119191
# ---------------------------------------------------------------
120192
# Abstract Methods
121193

@@ -168,48 +240,6 @@ def get_result(self):
168240
else:
169241
return self.agg()
170242

171-
def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]:
172-
"""
173-
Provide an implementation for the aggregators.
174-
175-
Returns
176-
-------
177-
tuple of result, how.
178-
179-
Notes
180-
-----
181-
how can be a string describe the required post-processing, or
182-
None if not required.
183-
"""
184-
obj = self.obj
185-
arg = self.f
186-
args = self.args
187-
kwargs = self.kwds
188-
189-
_axis = kwargs.pop("_axis", None)
190-
if _axis is None:
191-
_axis = getattr(obj, "axis", 0)
192-
193-
if isinstance(arg, str):
194-
return obj._try_aggregate_string_function(arg, *args, **kwargs), None
195-
elif is_dict_like(arg):
196-
arg = cast(AggFuncTypeDict, arg)
197-
return agg_dict_like(obj, arg, _axis), True
198-
elif is_list_like(arg):
199-
# we require a list, but not a 'str'
200-
arg = cast(List[AggFuncTypeBase], arg)
201-
return agg_list_like(obj, arg, _axis=_axis), None
202-
else:
203-
result = None
204-
205-
if callable(arg):
206-
f = obj._get_cython_func(arg)
207-
if f and not args and not kwargs:
208-
return getattr(obj, f)(), None
209-
210-
# caller can react
211-
return result, True
212-
213243
def apply(self) -> FrameOrSeriesUnion:
214244
""" compute the results """
215245
# dispatch to agg
@@ -531,3 +561,79 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
531561
result = result.infer_objects()
532562

533563
return result
564+
565+
566+
class SeriesApply(Apply):
567+
obj: Series
568+
axis = 0
569+
570+
def __init__(
571+
self,
572+
obj: Series,
573+
how: str,
574+
func: AggFuncType,
575+
convert_dtype: bool,
576+
args,
577+
kwds,
578+
):
579+
self.convert_dtype = convert_dtype
580+
581+
super().__init__(
582+
obj,
583+
how,
584+
func,
585+
raw=False,
586+
result_type=None,
587+
args=args,
588+
kwds=kwds,
589+
)
590+
591+
def apply(self) -> FrameOrSeriesUnion:
592+
obj = self.obj
593+
func = self.f
594+
args = self.args
595+
kwds = self.kwds
596+
597+
if len(obj) == 0:
598+
return self.apply_empty_result()
599+
600+
# dispatch to agg
601+
if isinstance(func, (list, dict)):
602+
return obj.aggregate(func, *args, **kwds)
603+
604+
# if we are a string, try to dispatch
605+
if isinstance(func, str):
606+
return obj._try_aggregate_string_function(func, *args, **kwds)
607+
608+
return self.apply_standard()
609+
610+
def apply_empty_result(self) -> Series:
611+
obj = self.obj
612+
return obj._constructor(dtype=obj.dtype, index=obj.index).__finalize__(
613+
obj, method="apply"
614+
)
615+
616+
def apply_standard(self) -> FrameOrSeriesUnion:
617+
f = self.f
618+
obj = self.obj
619+
620+
with np.errstate(all="ignore"):
621+
if isinstance(f, np.ufunc):
622+
return f(obj)
623+
624+
# row-wise access
625+
if is_extension_array_dtype(obj.dtype) and hasattr(obj._values, "map"):
626+
# GH#23179 some EAs do not have `map`
627+
mapped = obj._values.map(f)
628+
else:
629+
values = obj.astype(object)._values
630+
mapped = lib.map_infer(values, f, convert=self.convert_dtype)
631+
632+
if len(mapped) and isinstance(mapped[0], ABCSeries):
633+
# GH 25959 use pd.array instead of tolist
634+
# so extension arrays can be used
635+
return obj._constructor_expanddim(pd_array(mapped), index=obj.index)
636+
else:
637+
return obj._constructor(mapped, index=obj.index).__finalize__(
638+
obj, method="apply"
639+
)

pandas/core/series.py

+6-45
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,13 @@
7171

7272
from pandas.core import algorithms, base, generic, missing, nanops, ops
7373
from pandas.core.accessor import CachedAccessor
74-
from pandas.core.aggregation import aggregate, transform
74+
from pandas.core.aggregation import transform
75+
from pandas.core.apply import series_apply
7576
from pandas.core.arrays import ExtensionArray
7677
from pandas.core.arrays.categorical import CategoricalAccessor
7778
from pandas.core.arrays.sparse import SparseAccessor
7879
import pandas.core.common as com
7980
from pandas.core.construction import (
80-
array as pd_array,
8181
create_series_with_explicit_dtype,
8282
extract_array,
8383
is_empty_data,
@@ -3944,7 +3944,8 @@ def aggregate(self, func=None, axis=0, *args, **kwargs):
39443944
if func is None:
39453945
func = dict(kwargs.items())
39463946

3947-
result, how = aggregate(self, func, *args, **kwargs)
3947+
op = series_apply(self, "agg", func, args=args, kwds=kwargs)
3948+
result, how = op.get_result()
39483949
if result is None:
39493950

39503951
# we can be called from an inner function which
@@ -4076,48 +4077,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
40764077
Helsinki 2.484907
40774078
dtype: float64
40784079
"""
4079-
if len(self) == 0:
4080-
return self._constructor(dtype=self.dtype, index=self.index).__finalize__(
4081-
self, method="apply"
4082-
)
4083-
4084-
# dispatch to agg
4085-
if isinstance(func, (list, dict)):
4086-
return self.aggregate(func, *args, **kwds)
4087-
4088-
# if we are a string, try to dispatch
4089-
if isinstance(func, str):
4090-
return self._try_aggregate_string_function(func, *args, **kwds)
4091-
4092-
# handle ufuncs and lambdas
4093-
if kwds or args and not isinstance(func, np.ufunc):
4094-
4095-
def f(x):
4096-
return func(x, *args, **kwds)
4097-
4098-
else:
4099-
f = func
4100-
4101-
with np.errstate(all="ignore"):
4102-
if isinstance(f, np.ufunc):
4103-
return f(self)
4104-
4105-
# row-wise access
4106-
if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"):
4107-
# GH#23179 some EAs do not have `map`
4108-
mapped = self._values.map(f)
4109-
else:
4110-
values = self.astype(object)._values
4111-
mapped = lib.map_infer(values, f, convert=convert_dtype)
4112-
4113-
if len(mapped) and isinstance(mapped[0], Series):
4114-
# GH 25959 use pd.array instead of tolist
4115-
# so extension arrays can be used
4116-
return self._constructor_expanddim(pd_array(mapped), index=self.index)
4117-
else:
4118-
return self._constructor(mapped, index=self.index).__finalize__(
4119-
self, method="apply"
4120-
)
4080+
op = series_apply(self, "apply", func, convert_dtype, args, kwds)
4081+
return op.get_result()
41214082

41224083
def _reduce(
41234084
self,

0 commit comments

Comments
 (0)