From 86c42c56e00bff787af53838c114172a205f3b7f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 29 May 2023 10:13:30 -0400
Subject: [PATCH 1/2] REF/CLN: func in core.apply

---
 pandas/core/apply.py | 157 ++++++++++++++++++++++---------------------
 1 file changed, 80 insertions(+), 77 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index c03f1a268906e..318f87b8f6528 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -106,8 +106,7 @@ def frame_apply(
 
 class Apply(metaclass=abc.ABCMeta):
     axis: AxisInt
-    orig_f: AggFuncType
-    f: AggFuncType
+    func: AggFuncType
 
     def __init__(
         self,
@@ -132,19 +131,7 @@ def __init__(
 
         self.result_type = result_type
 
-        f: AggFuncType
-        # curry if needed
-        if callable(func) and (kwargs or args) and not isinstance(func, np.ufunc):
-
-            def f(x):
-                assert callable(func)  # needed for mypy
-                return func(x, *args, **kwargs)
-
-        else:
-            f = func
-
-        self.orig_f = func
-        self.f = f
+        self.func = func
 
     @abc.abstractmethod
     def apply(self) -> DataFrame | Series:
@@ -160,21 +147,21 @@ def agg(self) -> DataFrame | Series | None:
         this method.
         """
         obj = self.obj
-        arg = self.f
+        func = self.func
         args = self.args
         kwargs = self.kwargs
 
-        if isinstance(arg, str):
+        if isinstance(func, str):
             return self.apply_str()
 
-        if is_dict_like(arg):
+        if is_dict_like(func):
             return self.agg_dict_like()
-        elif is_list_like(arg):
+        elif is_list_like(func):
             # we require a list, but not a 'str'
             return self.agg_list_like()
 
-        if callable(arg):
-            f = com.get_cython_func(arg)
+        if callable(func):
+            f = com.get_cython_func(func)
             if f and not args and not kwargs:
                 return getattr(obj, f)()
 
@@ -197,7 +184,7 @@ def transform(self) -> DataFrame | Series:
             If the transform function fails or does not transform.
         """
         obj = self.obj
-        func = self.orig_f
+        func = self.func
         axis = self.axis
         args = self.args
         kwargs = self.kwargs
@@ -309,7 +296,7 @@ def agg_list_like(self) -> DataFrame | Series:
         from pandas.core.reshape.concat import concat
 
         obj = self.obj
-        arg = cast(List[AggFuncTypeBase], self.f)
+        func = cast(List[AggFuncTypeBase], self.func)
 
         if getattr(obj, "axis", 0) == 1:
             raise NotImplementedError("axis other than 0 is not supported")
@@ -340,7 +327,7 @@ def agg_list_like(self) -> DataFrame | Series:
         with context_manager:
             # degenerate case
             if selected_obj.ndim == 1:
-                for a in arg:
+                for a in func:
                     colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
                     new_res = colg.aggregate(a, *this_args, **self.kwargs)
                     results.append(new_res)
@@ -353,7 +340,7 @@ def agg_list_like(self) -> DataFrame | Series:
                 indices = []
                 for index, col in enumerate(selected_obj):
                     colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
-                    new_res = colg.aggregate(arg, *this_args, **self.kwargs)
+                    new_res = colg.aggregate(func, *this_args, **self.kwargs)
                     results.append(new_res)
                     indices.append(index)
                 keys = selected_obj.columns.take(indices)
@@ -388,7 +375,7 @@ def agg_dict_like(self) -> DataFrame | Series:
         from pandas.core.reshape.concat import concat
 
         obj = self.obj
-        arg = cast(AggFuncTypeDict, self.f)
+        func = cast(AggFuncTypeDict, self.func)
 
         if getattr(obj, "axis", 0) == 1:
             raise NotImplementedError("axis other than 0 is not supported")
@@ -401,7 +388,7 @@ def agg_dict_like(self) -> DataFrame | Series:
             selected_obj = obj._selected_obj
             selection = obj._selection
 
-        arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
+        func = self.normalize_dictlike_arg("agg", selected_obj, func)
 
         is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
         context_manager: ContextManager
@@ -421,14 +408,14 @@ def agg_dict_like(self) -> DataFrame | Series:
             if selected_obj.ndim == 1:
                 # key only used for output
                 colg = obj._gotitem(selection, ndim=1)
-                result_data = [colg.agg(how) for _, how in arg.items()]
-                result_index = list(arg.keys())
+                result_data = [colg.agg(how) for _, how in func.items()]
+                result_index = list(func.keys())
             elif is_non_unique_col:
                 # key used for column selection and output
                 # GH#51099
                 result_data = []
                 result_index = []
-                for key, how in arg.items():
+                for key, how in func.items():
                     indices = selected_obj.columns.get_indexer_for([key])
                     labels = selected_obj.columns.take(indices)
                     label_to_indices = defaultdict(list)
@@ -446,9 +433,9 @@ def agg_dict_like(self) -> DataFrame | Series:
             else:
                 # key used for column selection and output
                 result_data = [
-                    obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
+                    obj._gotitem(key, ndim=1).agg(how) for key, how in func.items()
                 ]
-                result_index = list(arg.keys())
+                result_index = list(func.keys())
 
         # Avoid making two isinstance calls in all and any below
         is_ndframe = [isinstance(r, ABCNDFrame) for r in result_data]
@@ -503,7 +490,7 @@ def apply_str(self) -> DataFrame | Series:
         result: Series or DataFrame
         """
         # Caller is responsible for checking isinstance(self.f, str)
-        f = cast(str, self.f)
+        func = cast(str, self.func)
 
         obj = self.obj
 
@@ -515,14 +502,14 @@ def apply_str(self) -> DataFrame | Series:
         # Support for `frame.transform('method')`
         # Some methods (shift, etc.) require the axis argument, others
         # don't, so inspect and insert if necessary.
-        func = getattr(obj, f, None)
-        if callable(func):
-            sig = inspect.getfullargspec(func)
+        method = getattr(obj, func, None)
+        if callable(method):
+            sig = inspect.getfullargspec(method)
             arg_names = (*sig.args, *sig.kwonlyargs)
             if self.axis != 0 and (
-                "axis" not in arg_names or f in ("corrwith", "skew")
+                "axis" not in arg_names or func in ("corrwith", "skew")
             ):
-                raise ValueError(f"Operation {f} does not support axis=1")
+                raise ValueError(f"Operation {func} does not support axis=1")
             if "axis" in arg_names:
                 if isinstance(obj, (SeriesGroupBy, DataFrameGroupBy)):
                     # Try to avoid FutureWarning for deprecated axis keyword;
@@ -530,7 +517,7 @@ def apply_str(self) -> DataFrame | Series:
                     #  axis, we safely exclude the keyword.
 
                     default_axis = 0
-                    if f in ["idxmax", "idxmin"]:
+                    if func in ["idxmax", "idxmin"]:
                         # DataFrameGroupBy.idxmax, idxmin axis defaults to self.axis,
                         # whereas other axis keywords default to 0
                         default_axis = self.obj.axis
@@ -539,7 +526,7 @@ def apply_str(self) -> DataFrame | Series:
                         self.kwargs["axis"] = self.axis
                 else:
                     self.kwargs["axis"] = self.axis
-        return self._apply_str(obj, f, *self.args, **self.kwargs)
+        return self._apply_str(obj, func, *self.args, **self.kwargs)
 
     def apply_multiple(self) -> DataFrame | Series:
         """
@@ -548,12 +535,12 @@ def apply_multiple(self) -> DataFrame | Series:
         Returns
         -------
         result: Series, DataFrame, or None
-            Result when self.f is a list-like or dict-like, None otherwise.
+            Result when self.func is a list-like or dict-like, None otherwise.
         """
         if self.axis == 1 and isinstance(self.obj, ABCDataFrame):
-            return self.obj.T.apply(self.f, 0, args=self.args, **self.kwargs).T
+            return self.obj.T.apply(self.func, 0, args=self.args, **self.kwargs).T
 
-        func = self.f
+        func = self.func
         kwargs = self.kwargs
 
         if is_dict_like(func):
@@ -610,17 +597,17 @@ def normalize_dictlike_arg(
             func = new_func
         return func
 
-    def _apply_str(self, obj, arg: str, *args, **kwargs):
+    def _apply_str(self, obj, func: str, *args, **kwargs):
         """
         if arg is a string, then try to operate on it:
         - try to find a function (or attribute) on obj
         - try to find a numpy function
         - raise
         """
-        assert isinstance(arg, str)
+        assert isinstance(func, str)
 
-        if hasattr(obj, arg):
-            f = getattr(obj, arg)
+        if hasattr(obj, func):
+            f = getattr(obj, func)
             if callable(f):
                 return f(*args, **kwargs)
 
@@ -629,12 +616,12 @@ def _apply_str(self, obj, arg: str, *args, **kwargs):
             assert len(args) == 0
             assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
             return f
-        elif hasattr(np, arg) and hasattr(obj, "__array__"):
+        elif hasattr(np, func) and hasattr(obj, "__array__"):
             # in particular exclude Window
-            f = getattr(np, arg)
+            f = getattr(np, func)
             return f(obj, *args, **kwargs)
         else:
-            msg = f"'{arg}' is not a valid function for '{type(obj).__name__}' object"
+            msg = f"'{func}' is not a valid function for '{type(obj).__name__}' object"
             raise AttributeError(msg)
 
 
@@ -699,7 +686,7 @@ def values(self):
     def apply(self) -> DataFrame | Series:
         """compute the results"""
         # dispatch to agg
-        if is_list_like(self.f):
+        if is_list_like(self.func):
             return self.apply_multiple()
 
         # all empty
@@ -707,13 +694,13 @@ def apply(self) -> DataFrame | Series:
             return self.apply_empty_result()
 
         # string dispatch
-        if isinstance(self.f, str):
+        if isinstance(self.func, str):
             return self.apply_str()
 
         # ufunc
-        elif isinstance(self.f, np.ufunc):
+        elif isinstance(self.func, np.ufunc):
             with np.errstate(all="ignore"):
-                results = self.obj._mgr.apply("apply", func=self.f)
+                results = self.obj._mgr.apply("apply", func=self.func)
             # _constructor will retain self.index and self.columns
             return self.obj._constructor(data=results)
 
@@ -750,7 +737,7 @@ def agg(self):
             result = result.T if result is not None else result
 
         if result is None:
-            result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)
+            result = self.obj.apply(self.func, axis, args=self.args, **self.kwargs)
 
         return result
 
@@ -761,7 +748,7 @@ def apply_empty_result(self):
         we will try to apply the function to an empty
         series in order to see if this is a reduction function
         """
-        assert callable(self.f)
+        assert callable(self.func)
 
         # we are not asked to reduce or infer reduction
         # so just return a copy of the existing object
@@ -776,9 +763,15 @@ def apply_empty_result(self):
         if not should_reduce:
             try:
                 if self.axis == 0:
-                    r = self.f(Series([], dtype=np.float64))
+                    r = self.func(
+                        Series([], dtype=np.float64), *self.args, **self.kwargs
+                    )
                 else:
-                    r = self.f(Series(index=self.columns, dtype=np.float64))
+                    r = self.func(
+                        Series(index=self.columns, dtype=np.float64),
+                        *self.args,
+                        **self.kwargs,
+                    )
             except Exception:
                 pass
             else:
@@ -786,7 +779,7 @@ def apply_empty_result(self):
 
         if should_reduce:
             if len(self.agg_axis):
-                r = self.f(Series([], dtype=np.float64))
+                r = self.func(Series([], dtype=np.float64), *self.args, **self.kwargs)
             else:
                 r = np.nan
 
@@ -812,7 +805,7 @@ def wrapper(*args, **kwargs):
 
             return wrapper
 
-        result = np.apply_along_axis(wrap_function(self.f), self.axis, self.values)
+        result = np.apply_along_axis(wrap_function(self.func), self.axis, self.values)
 
         # TODO: mixed type case
         if result.ndim == 2:
@@ -821,7 +814,7 @@ def wrapper(*args, **kwargs):
             return self.obj._constructor_sliced(result, index=self.agg_axis)
 
     def apply_broadcast(self, target: DataFrame) -> DataFrame:
-        assert callable(self.f)
+        assert callable(self.func)
 
         result_values = np.empty_like(target.values)
 
@@ -829,7 +822,7 @@ def apply_broadcast(self, target: DataFrame) -> DataFrame:
         result_compare = target.shape[0]
 
         for i, col in enumerate(target.columns):
-            res = self.f(target[col])
+            res = self.func(target[col], *self.args, **self.kwargs)
             ares = np.asarray(res).ndim
 
             # must be a scalar or 1d
@@ -855,7 +848,7 @@ def apply_standard(self):
         return self.wrap_results(results, res_index)
 
     def apply_series_generator(self) -> tuple[ResType, Index]:
-        assert callable(self.f)
+        assert callable(self.func)
 
         series_gen = self.series_generator
         res_index = self.result_index
@@ -865,7 +858,7 @@ def apply_series_generator(self) -> tuple[ResType, Index]:
         with option_context("mode.chained_assignment", None):
             for i, v in enumerate(series_gen):
                 # ignore SettingWithCopy here in case the user mutates
-                results[i] = self.f(v)
+                results[i] = self.func(v, *self.args, **self.kwargs)
                 if isinstance(results[i], ABCSeries):
                     # If we have a view on v, we need to make a copy because
                     #  series_generator will swap out the underlying data
@@ -895,9 +888,9 @@ def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series
         return result
 
     def apply_str(self) -> DataFrame | Series:
-        # Caller is responsible for checking isinstance(self.f, str)
+        # Caller is responsible for checking isinstance(self.func, str)
         # TODO: GH#39993 - Avoid special-casing by replacing with lambda
-        if self.f == "size":
+        if self.func == "size":
             # Special-cased because DataFrame.size returns a single scalar
             obj = self.obj
             value = obj.shape[self.axis]
@@ -1079,23 +1072,23 @@ def apply(self) -> DataFrame | Series:
             return self.apply_empty_result()
 
         # dispatch to agg
-        if is_list_like(self.f):
+        if is_list_like(self.func):
             return self.apply_multiple()
 
-        if isinstance(self.f, str):
+        if isinstance(self.func, str):
             # if we are a string, try to dispatch
             return self.apply_str()
 
-        # self.f is Callable
+        # self.func is Callable
         return self.apply_standard()
 
     def agg(self):
         result = super().agg()
         if result is None:
-            f = self.f
+            func = self.func
 
             # string, list-like, and dict-like are entirely handled in super
-            assert callable(f)
+            assert callable(func)
 
             # try a regular apply, this evaluates lambdas
             # row-by-row; however if the lambda is expected a Series
@@ -1106,9 +1099,9 @@ def agg(self):
             # then .agg and .apply would have different semantics if the
             # operation is actually defined on the Series, e.g. str
             try:
-                result = self.obj.apply(f)
+                result = self.obj.apply(func, args=self.args, **self.kwargs)
             except (ValueError, AttributeError, TypeError):
-                result = f(self.obj)
+                result = func(self.obj, *self.args, **self.kwargs)
 
         return result
 
@@ -1120,12 +1113,20 @@ def apply_empty_result(self) -> Series:
 
     def apply_standard(self) -> DataFrame | Series:
         # caller is responsible for ensuring that f is Callable
-        f = cast(Callable, self.f)
+        func = cast(Callable, self.func)
         obj = self.obj
 
-        if isinstance(f, np.ufunc):
+        if isinstance(func, np.ufunc):
             with np.errstate(all="ignore"):
-                return f(obj)
+                return func(obj, *self.args, **self.kwargs)
+
+        if self.args or self.kwargs:
+            # _map_values does not support args/kwargs
+            def curried(x):
+                return func(x, *self.args, **self.kwargs)
+
+        else:
+            curried = func
 
         # row-wise access
         # apply doesn't have a `na_action` keyword and for backward compat reasons
@@ -1133,7 +1134,9 @@ def apply_standard(self) -> DataFrame | Series:
         # TODO: remove the `na_action="ignore"` when that default has been changed in
         #  Categorical (GH51645).
         action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
-        mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype)
+        mapped = obj._map_values(
+            mapper=curried, na_action=action, convert=self.convert_dtype
+        )
 
         if len(mapped) and isinstance(mapped[0], ABCSeries):
             warnings.warn(

From 0ff8c5b856ed8297f9f14f97160cc1858570ea39 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 29 May 2023 10:17:14 -0400
Subject: [PATCH 2/2] Remove type-hint

---
 pandas/core/apply.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 318f87b8f6528..5cb7b1c8279ab 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -106,7 +106,6 @@ def frame_apply(
 
 class Apply(metaclass=abc.ABCMeta):
     axis: AxisInt
-    func: AggFuncType
 
     def __init__(
         self,