From 86c42c56e00bff787af53838c114172a205f3b7f Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 29 May 2023 10:13:30 -0400 Subject: [PATCH 1/2] REF/CLN: func in core.apply --- pandas/core/apply.py | 157 ++++++++++++++++++++++--------------------- 1 file changed, 80 insertions(+), 77 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index c03f1a268906e..318f87b8f6528 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -106,8 +106,7 @@ def frame_apply( class Apply(metaclass=abc.ABCMeta): axis: AxisInt - orig_f: AggFuncType - f: AggFuncType + func: AggFuncType def __init__( self, @@ -132,19 +131,7 @@ def __init__( self.result_type = result_type - f: AggFuncType - # curry if needed - if callable(func) and (kwargs or args) and not isinstance(func, np.ufunc): - - def f(x): - assert callable(func) # needed for mypy - return func(x, *args, **kwargs) - - else: - f = func - - self.orig_f = func - self.f = f + self.func = func @abc.abstractmethod def apply(self) -> DataFrame | Series: @@ -160,21 +147,21 @@ def agg(self) -> DataFrame | Series | None: this method. """ obj = self.obj - arg = self.f + func = self.func args = self.args kwargs = self.kwargs - if isinstance(arg, str): + if isinstance(func, str): return self.apply_str() - if is_dict_like(arg): + if is_dict_like(func): return self.agg_dict_like() - elif is_list_like(arg): + elif is_list_like(func): # we require a list, but not a 'str' return self.agg_list_like() - if callable(arg): - f = com.get_cython_func(arg) + if callable(func): + f = com.get_cython_func(func) if f and not args and not kwargs: return getattr(obj, f)() @@ -197,7 +184,7 @@ def transform(self) -> DataFrame | Series: If the transform function fails or does not transform. """ obj = self.obj - func = self.orig_f + func = self.func axis = self.axis args = self.args kwargs = self.kwargs @@ -309,7 +296,7 @@ def agg_list_like(self) -> DataFrame | Series: from pandas.core.reshape.concat import concat obj = self.obj - arg = cast(List[AggFuncTypeBase], self.f) + func = cast(List[AggFuncTypeBase], self.func) if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") @@ -340,7 +327,7 @@ def agg_list_like(self) -> DataFrame | Series: with context_manager: # degenerate case if selected_obj.ndim == 1: - for a in arg: + for a in func: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) new_res = colg.aggregate(a, *this_args, **self.kwargs) results.append(new_res) @@ -353,7 +340,7 @@ def agg_list_like(self) -> DataFrame | Series: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = colg.aggregate(arg, *this_args, **self.kwargs) + new_res = colg.aggregate(func, *this_args, **self.kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) @@ -388,7 +375,7 @@ def agg_dict_like(self) -> DataFrame | Series: from pandas.core.reshape.concat import concat obj = self.obj - arg = cast(AggFuncTypeDict, self.f) + func = cast(AggFuncTypeDict, self.func) if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") @@ -401,7 +388,7 @@ def agg_dict_like(self) -> DataFrame | Series: selected_obj = obj._selected_obj selection = obj._selection - arg = self.normalize_dictlike_arg("agg", selected_obj, arg) + func = self.normalize_dictlike_arg("agg", selected_obj, func) is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) context_manager: ContextManager @@ -421,14 +408,14 @@ def agg_dict_like(self) -> DataFrame | Series: if selected_obj.ndim == 1: # key only used for output colg = obj._gotitem(selection, ndim=1) - result_data = [colg.agg(how) for _, how in arg.items()] - result_index = list(arg.keys()) + result_data = [colg.agg(how) for _, how in func.items()] + result_index = list(func.keys()) elif is_non_unique_col: # key used for column selection and output # GH#51099 result_data = [] result_index = [] - for key, how in arg.items(): + for key, how in func.items(): indices = selected_obj.columns.get_indexer_for([key]) labels = selected_obj.columns.take(indices) label_to_indices = defaultdict(list) @@ -446,9 +433,9 @@ def agg_dict_like(self) -> DataFrame | Series: else: # key used for column selection and output result_data = [ - obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items() + obj._gotitem(key, ndim=1).agg(how) for key, how in func.items() ] - result_index = list(arg.keys()) + result_index = list(func.keys()) # Avoid making two isinstance calls in all and any below is_ndframe = [isinstance(r, ABCNDFrame) for r in result_data] @@ -503,7 +490,7 @@ def apply_str(self) -> DataFrame | Series: result: Series or DataFrame """ # Caller is responsible for checking isinstance(self.f, str) - f = cast(str, self.f) + func = cast(str, self.func) obj = self.obj @@ -515,14 +502,14 @@ def apply_str(self) -> DataFrame | Series: # Support for `frame.transform('method')` # Some methods (shift, etc.) require the axis argument, others # don't, so inspect and insert if necessary. - func = getattr(obj, f, None) - if callable(func): - sig = inspect.getfullargspec(func) + method = getattr(obj, func, None) + if callable(method): + sig = inspect.getfullargspec(method) arg_names = (*sig.args, *sig.kwonlyargs) if self.axis != 0 and ( - "axis" not in arg_names or f in ("corrwith", "skew") + "axis" not in arg_names or func in ("corrwith", "skew") ): - raise ValueError(f"Operation {f} does not support axis=1") + raise ValueError(f"Operation {func} does not support axis=1") if "axis" in arg_names: if isinstance(obj, (SeriesGroupBy, DataFrameGroupBy)): # Try to avoid FutureWarning for deprecated axis keyword; @@ -530,7 +517,7 @@ def apply_str(self) -> DataFrame | Series: # axis, we safely exclude the keyword. default_axis = 0 - if f in ["idxmax", "idxmin"]: + if func in ["idxmax", "idxmin"]: # DataFrameGroupBy.idxmax, idxmin axis defaults to self.axis, # whereas other axis keywords default to 0 default_axis = self.obj.axis @@ -539,7 +526,7 @@ def apply_str(self) -> DataFrame | Series: self.kwargs["axis"] = self.axis else: self.kwargs["axis"] = self.axis - return self._apply_str(obj, f, *self.args, **self.kwargs) + return self._apply_str(obj, func, *self.args, **self.kwargs) def apply_multiple(self) -> DataFrame | Series: """ @@ -548,12 +535,12 @@ def apply_multiple(self) -> DataFrame | Series: Returns ------- result: Series, DataFrame, or None - Result when self.f is a list-like or dict-like, None otherwise. + Result when self.func is a list-like or dict-like, None otherwise. """ if self.axis == 1 and isinstance(self.obj, ABCDataFrame): - return self.obj.T.apply(self.f, 0, args=self.args, **self.kwargs).T + return self.obj.T.apply(self.func, 0, args=self.args, **self.kwargs).T - func = self.f + func = self.func kwargs = self.kwargs if is_dict_like(func): @@ -610,17 +597,17 @@ def normalize_dictlike_arg( func = new_func return func - def _apply_str(self, obj, arg: str, *args, **kwargs): + def _apply_str(self, obj, func: str, *args, **kwargs): """ if arg is a string, then try to operate on it: - try to find a function (or attribute) on obj - try to find a numpy function - raise """ - assert isinstance(arg, str) + assert isinstance(func, str) - if hasattr(obj, arg): - f = getattr(obj, arg) + if hasattr(obj, func): + f = getattr(obj, func) if callable(f): return f(*args, **kwargs) @@ -629,12 +616,12 @@ def _apply_str(self, obj, arg: str, *args, **kwargs): assert len(args) == 0 assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0 return f - elif hasattr(np, arg) and hasattr(obj, "__array__"): + elif hasattr(np, func) and hasattr(obj, "__array__"): # in particular exclude Window - f = getattr(np, arg) + f = getattr(np, func) return f(obj, *args, **kwargs) else: - msg = f"'{arg}' is not a valid function for '{type(obj).__name__}' object" + msg = f"'{func}' is not a valid function for '{type(obj).__name__}' object" raise AttributeError(msg) @@ -699,7 +686,7 @@ def values(self): def apply(self) -> DataFrame | Series: """compute the results""" # dispatch to agg - if is_list_like(self.f): + if is_list_like(self.func): return self.apply_multiple() # all empty @@ -707,13 +694,13 @@ def apply(self) -> DataFrame | Series: return self.apply_empty_result() # string dispatch - if isinstance(self.f, str): + if isinstance(self.func, str): return self.apply_str() # ufunc - elif isinstance(self.f, np.ufunc): + elif isinstance(self.func, np.ufunc): with np.errstate(all="ignore"): - results = self.obj._mgr.apply("apply", func=self.f) + results = self.obj._mgr.apply("apply", func=self.func) # _constructor will retain self.index and self.columns return self.obj._constructor(data=results) @@ -750,7 +737,7 @@ def agg(self): result = result.T if result is not None else result if result is None: - result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs) + result = self.obj.apply(self.func, axis, args=self.args, **self.kwargs) return result @@ -761,7 +748,7 @@ def apply_empty_result(self): we will try to apply the function to an empty series in order to see if this is a reduction function """ - assert callable(self.f) + assert callable(self.func) # we are not asked to reduce or infer reduction # so just return a copy of the existing object @@ -776,9 +763,15 @@ def apply_empty_result(self): if not should_reduce: try: if self.axis == 0: - r = self.f(Series([], dtype=np.float64)) + r = self.func( + Series([], dtype=np.float64), *self.args, **self.kwargs + ) else: - r = self.f(Series(index=self.columns, dtype=np.float64)) + r = self.func( + Series(index=self.columns, dtype=np.float64), + *self.args, + **self.kwargs, + ) except Exception: pass else: @@ -786,7 +779,7 @@ def apply_empty_result(self): if should_reduce: if len(self.agg_axis): - r = self.f(Series([], dtype=np.float64)) + r = self.func(Series([], dtype=np.float64), *self.args, **self.kwargs) else: r = np.nan @@ -812,7 +805,7 @@ def wrapper(*args, **kwargs): return wrapper - result = np.apply_along_axis(wrap_function(self.f), self.axis, self.values) + result = np.apply_along_axis(wrap_function(self.func), self.axis, self.values) # TODO: mixed type case if result.ndim == 2: @@ -821,7 +814,7 @@ def wrapper(*args, **kwargs): return self.obj._constructor_sliced(result, index=self.agg_axis) def apply_broadcast(self, target: DataFrame) -> DataFrame: - assert callable(self.f) + assert callable(self.func) result_values = np.empty_like(target.values) @@ -829,7 +822,7 @@ def apply_broadcast(self, target: DataFrame) -> DataFrame: result_compare = target.shape[0] for i, col in enumerate(target.columns): - res = self.f(target[col]) + res = self.func(target[col], *self.args, **self.kwargs) ares = np.asarray(res).ndim # must be a scalar or 1d @@ -855,7 +848,7 @@ def apply_standard(self): return self.wrap_results(results, res_index) def apply_series_generator(self) -> tuple[ResType, Index]: - assert callable(self.f) + assert callable(self.func) series_gen = self.series_generator res_index = self.result_index @@ -865,7 +858,7 @@ def apply_series_generator(self) -> tuple[ResType, Index]: with option_context("mode.chained_assignment", None): for i, v in enumerate(series_gen): # ignore SettingWithCopy here in case the user mutates - results[i] = self.f(v) + results[i] = self.func(v, *self.args, **self.kwargs) if isinstance(results[i], ABCSeries): # If we have a view on v, we need to make a copy because # series_generator will swap out the underlying data @@ -895,9 +888,9 @@ def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series return result def apply_str(self) -> DataFrame | Series: - # Caller is responsible for checking isinstance(self.f, str) + # Caller is responsible for checking isinstance(self.func, str) # TODO: GH#39993 - Avoid special-casing by replacing with lambda - if self.f == "size": + if self.func == "size": # Special-cased because DataFrame.size returns a single scalar obj = self.obj value = obj.shape[self.axis] @@ -1079,23 +1072,23 @@ def apply(self) -> DataFrame | Series: return self.apply_empty_result() # dispatch to agg - if is_list_like(self.f): + if is_list_like(self.func): return self.apply_multiple() - if isinstance(self.f, str): + if isinstance(self.func, str): # if we are a string, try to dispatch return self.apply_str() - # self.f is Callable + # self.func is Callable return self.apply_standard() def agg(self): result = super().agg() if result is None: - f = self.f + func = self.func # string, list-like, and dict-like are entirely handled in super - assert callable(f) + assert callable(func) # try a regular apply, this evaluates lambdas # row-by-row; however if the lambda is expected a Series @@ -1106,9 +1099,9 @@ def agg(self): # then .agg and .apply would have different semantics if the # operation is actually defined on the Series, e.g. str try: - result = self.obj.apply(f) + result = self.obj.apply(func, args=self.args, **self.kwargs) except (ValueError, AttributeError, TypeError): - result = f(self.obj) + result = func(self.obj, *self.args, **self.kwargs) return result @@ -1120,12 +1113,20 @@ def apply_empty_result(self) -> Series: def apply_standard(self) -> DataFrame | Series: # caller is responsible for ensuring that f is Callable - f = cast(Callable, self.f) + func = cast(Callable, self.func) obj = self.obj - if isinstance(f, np.ufunc): + if isinstance(func, np.ufunc): with np.errstate(all="ignore"): - return f(obj) + return func(obj, *self.args, **self.kwargs) + + if self.args or self.kwargs: + # _map_values does not support args/kwargs + def curried(x): + return func(x, *self.args, **self.kwargs) + + else: + curried = func # row-wise access # apply doesn't have a `na_action` keyword and for backward compat reasons @@ -1133,7 +1134,9 @@ def apply_standard(self) -> DataFrame | Series: # TODO: remove the `na_action="ignore"` when that default has been changed in # Categorical (GH51645). action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None - mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype) + mapped = obj._map_values( + mapper=curried, na_action=action, convert=self.convert_dtype + ) if len(mapped) and isinstance(mapped[0], ABCSeries): warnings.warn( From 0ff8c5b856ed8297f9f14f97160cc1858570ea39 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 29 May 2023 10:17:14 -0400 Subject: [PATCH 2/2] Remove type-hint --- pandas/core/apply.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 318f87b8f6528..5cb7b1c8279ab 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -106,7 +106,6 @@ def frame_apply( class Apply(metaclass=abc.ABCMeta): axis: AxisInt - func: AggFuncType def __init__( self,