pandas-dev · rhshadrach · May 28, 2023 · May 28, 2023 · topper-123 · May 29, 2023
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -159,10 +159,7 @@ def agg(self) -> DataFrame | Series | None:
         Result of aggregation, or None if agg cannot be performed by
         this method.
         """
-        obj = self.obj
         arg = self.f
-        args = self.args
-        kwargs = self.kwargs
 
         if isinstance(arg, str):
             return self.apply_str()
@@ -173,11 +170,6 @@ def agg(self) -> DataFrame | Series | None:
             # we require a list, but not a 'str'
             return self.agg_list_like()
 
-        if callable(arg):
-            f = com.get_cython_func(arg)
-            if f and not args and not kwargs:
-                return getattr(obj, f)()
-
         # caller can react
         return None
 
@@ -283,11 +275,6 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
         if isinstance(func, str):
             return self._apply_str(obj, func, *args, **kwargs)
 
-        if not args and not kwargs:
-            f = com.get_cython_func(func)
-            if f:
-                return getattr(obj, f)()
-
         # Two possible ways to use a UDF - apply or call directly
         try:
             return obj.apply(func, args=args, **kwargs)
@@ -1097,18 +1084,23 @@ def agg(self):
             # string, list-like, and dict-like are entirely handled in super
             assert callable(f)
 
-            # try a regular apply, this evaluates lambdas
-            # row-by-row; however if the lambda is expected a Series
-            # expression, e.g.: lambda x: x-x.quantile(0.25)
-            # this will fail, so we can try a vectorized evaluation
-
-            # we cannot FIRST try the vectorized evaluation, because
-            # then .agg and .apply would have different semantics if the
-            # operation is actually defined on the Series, e.g. str
-            try:
-                result = self.obj.apply(f)
-            except (ValueError, AttributeError, TypeError):
+            has_cython_func = f in com._cython_table
+            if has_cython_func and not self.args and not self.kwargs:
+                # previous versions would vectorize NumPy functions
                 result = f(self.obj)
+            else:
+                # try a regular apply, this evaluates lambdas
+                # row-by-row; however if the lambda is expected a Series
+                # expression, e.g.: lambda x: x-x.quantile(0.25)
+                # this will fail, so we can try a vectorized evaluation
+
+                # we cannot FIRST try the vectorized evaluation, because
+                # then .agg and .apply would have different semantics if the
+                # operation is actually defined on the Series, e.g. str
+                try:
+                    result = self.obj.apply(f)
+                except (ValueError, AttributeError, TypeError):
+                    result = f(self.obj)
 
         return result
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -556,15 +556,6 @@ def require_length_match(data, index: Index) -> None:
         )
 
 
-# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
-#  whereas np.min and np.max (which directly call obj.min and obj.max)
-#  default to axis=None.
-_builtin_table = {
-    builtins.sum: np.sum,
-    builtins.max: np.maximum.reduce,
-    builtins.min: np.minimum.reduce,
-}
-
 _cython_table = {
     builtins.sum: "sum",
     builtins.max: "max",
@@ -594,21 +585,6 @@ def require_length_match(data, index: Index) -> None:
 }
 
 
-def get_cython_func(arg: Callable) -> str | None:
-    """
-    if we define an internal function for this argument, return it
-    """
-    return _cython_table.get(arg)
-
-
-def is_builtin_func(arg):
-    """
-    if we define a builtin function for this argument, return it,
-    otherwise return the arg
-    """
-    return _builtin_table.get(arg, arg)
-
-
 def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
     """
     If a name is missing then replace it by level_n, where n is the count

@@ -251,10 +251,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             return ret
 
         else:
-            cyfunc = com.get_cython_func(func)
-            if cyfunc and not args and not kwargs:
-                return getattr(self, cyfunc)()
-
             if self.ngroups == 0:
                 # e.g. test_evaluate_with_empty_groups without any groups to
                 #  iterate over, we have no output on which to do dtype
@@ -297,7 +293,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
     agg = aggregate
 
     def _python_agg_general(self, func, *args, **kwargs):
-        func = com.is_builtin_func(func)
         f = lambda x: func(x, *args, **kwargs)
 
         obj = self._obj_with_exclusions
@@ -1463,7 +1458,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
     agg = aggregate
 
     def _python_agg_general(self, func, *args, **kwargs):
-        func = com.is_builtin_func(func)
         f = lambda x: func(x, *args, **kwargs)
 
         if self.ngroups == 0:

@@ -1459,8 +1459,6 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
         )
     )
     def apply(self, func, *args, **kwargs) -> NDFrameT:
-        func = com.is_builtin_func(func)
-
         if isinstance(func, str):
             if hasattr(self, func):
                 res = getattr(self, func)
@@ -1659,9 +1657,6 @@ def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
                 func, *args, engine_kwargs=engine_kwargs, **kwargs
             )
 
-        # optimized transforms
-        func = com.get_cython_func(func) or func
-
         if not isinstance(func, str):
             return self._transform_general(func, *args, **kwargs)
 

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -43,7 +43,6 @@
 import pandas.core.algorithms as algos
 from pandas.core.apply import ResamplerWindowApply
 from pandas.core.base import PandasObject
-import pandas.core.common as com
 from pandas.core.generic import (
     NDFrame,
     _shared_docs,
@@ -1414,7 +1413,6 @@ def _downsample(self, how, **kwargs):
         how : string / cython mapped function
         **kwargs : kw args passed to how function
         """
-        how = com.get_cython_func(how) or how
         ax = self.ax
 
         # Excludes `on` column when provided
@@ -1566,7 +1564,6 @@ def _downsample(self, how, **kwargs):
         if self.kind == "timestamp":
             return super()._downsample(how, **kwargs)
 
-        how = com.get_cython_func(how) or how
         ax = self.ax
 
         if is_subperiod(ax.freq, self.freq):

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1502,13 +1502,14 @@ def foo2(x, b=2, c=0):
 
 def test_agg_std():
     df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"])
+    expected_value = 1.632993161855452
 
     result = df.agg(np.std)
-    expected = Series({"A": 2.0, "B": 2.0}, dtype=float)
+    expected = Series({"A": expected_value, "B": expected_value}, dtype=float)
     tm.assert_series_equal(result, expected)
 
     result = df.agg([np.std])
-    expected = DataFrame({"A": 2.0, "B": 2.0}, index=["std"])
+    expected = DataFrame({"A": expected_value, "B": expected_value}, index=["std"])
     tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -243,9 +243,15 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis):
 )
 def test_agg_cython_table_raises_series(series, func, expected):
     # GH21224
-    msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
-    if func == "median" or func is np.nanmedian or func is np.median:
-        msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
+    msg = "|".join(
+        [
+            "Cannot convert",
+            "[Cc]ould not convert",
+            "can't multiply sequence by non-int of type",
+            "not supported for the input types",
+            "unsupported operand type",
+        ]
+    )
 
     with pytest.raises(expected, match=msg):
         # e.g. Series('a b'.split()).cumprod() will raise

diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
@@ -1,11 +1,8 @@
-from itertools import chain
 import operator
 
 import numpy as np
 import pytest
 
-from pandas.core.dtypes.common import is_number
-
 from pandas import (
     DataFrame,
     Series,
@@ -86,162 +83,6 @@ def test_apply_np_transformer(float_frame, op, how):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "series, func, expected",
-    chain(
-        tm.get_cython_table_params(
-            Series(dtype=np.float64),
-            [
-                ("sum", 0),
-                ("max", np.nan),
-                ("min", np.nan),
-                ("all", True),
-                ("any", False),
-                ("mean", np.nan),
-                ("prod", 1),
-                ("std", np.nan),
-                ("var", np.nan),
-                ("median", np.nan),
-            ],
-        ),
-        tm.get_cython_table_params(
-            Series([np.nan, 1, 2, 3]),
-            [
-                ("sum", 6),
-                ("max", 3),
-                ("min", 1),
-                ("all", True),
-                ("any", True),
-                ("mean", 2),
-                ("prod", 6),
-                ("std", 1),
-                ("var", 1),
-                ("median", 2),
-            ],
-        ),
-        tm.get_cython_table_params(
-            Series("a b c".split()),
-            [
-                ("sum", "abc"),
-                ("max", "c"),
-                ("min", "a"),
-                ("all", True),
-                ("any", True),
-            ],
-        ),
-    ),
-)
-def test_agg_cython_table_series(series, func, expected):
-    # GH21224
-    # test reducing functions in
-    # pandas.core.base.SelectionMixin._cython_table
-    result = series.agg(func)
-    if is_number(expected):
-        assert np.isclose(result, expected, equal_nan=True)
-    else:
-        assert result == expected
-
-
-@pytest.mark.parametrize(
-    "series, func, expected",
-    chain(
-        tm.get_cython_table_params(
-            Series(dtype=np.float64),
-            [
-                ("cumprod", Series([], dtype=np.float64)),
-                ("cumsum", Series([], dtype=np.float64)),
-            ],
-        ),
-        tm.get_cython_table_params(
-            Series([np.nan, 1, 2, 3]),
-            [
-                ("cumprod", Series([np.nan, 1, 2, 6])),
-                ("cumsum", Series([np.nan, 1, 3, 6])),
-            ],
-        ),
-        tm.get_cython_table_params(
-            Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
-        ),
-    ),
-)
-def test_agg_cython_table_transform_series(series, func, expected):
-    # GH21224
-    # test transforming functions in
-    # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
-    result = series.agg(func)
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "df, func, expected",
-    chain(
-        tm.get_cython_table_params(
-            DataFrame(),
-            [
-                ("sum", Series(dtype="float64")),
-                ("max", Series(dtype="float64")),
-                ("min", Series(dtype="float64")),
-                ("all", Series(dtype=bool)),
-                ("any", Series(dtype=bool)),
-                ("mean", Series(dtype="float64")),
-                ("prod", Series(dtype="float64")),
-                ("std", Series(dtype="float64")),
-                ("var", Series(dtype="float64")),
-                ("median", Series(dtype="float64")),
-            ],
-        ),
-        tm.get_cython_table_params(
-            DataFrame([[np.nan, 1], [1, 2]]),
-            [
-                ("sum", Series([1.0, 3])),
-                ("max", Series([1.0, 2])),
-                ("min", Series([1.0, 1])),
-                ("all", Series([True, True])),
-                ("any", Series([True, True])),
-                ("mean", Series([1, 1.5])),
-                ("prod", Series([1.0, 2])),
-                ("std", Series([np.nan, 0.707107])),
-                ("var", Series([np.nan, 0.5])),
-                ("median", Series([1, 1.5])),
-            ],
-        ),
-    ),
-)
-def test_agg_cython_table_frame(df, func, expected, axis):
-    # GH 21224
-    # test reducing functions in
-    # pandas.core.base.SelectionMixin._cython_table
-    result = df.agg(func, axis=axis)
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "df, func, expected",
-    chain(
-        tm.get_cython_table_params(
-            DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]
-        ),
-        tm.get_cython_table_params(
-            DataFrame([[np.nan, 1], [1, 2]]),
-            [
-                ("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
-                ("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
-            ],
-        ),
-    ),
-)
-def test_agg_cython_table_transform_frame(df, func, expected, axis):
-    # GH 21224
-    # test transforming functions in
-    # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
-    if axis in ("columns", 1):
-        # operating blockwise doesn't let us preserve dtypes
-        expected = expected.astype("float64")
-
-    result = df.agg(func, axis=axis)
-    tm.assert_frame_equal(result, expected)
-
-
 @pytest.mark.parametrize("op", series_transform_kernels)
 def test_transform_groupby_kernel_series(request, string_series, op):
     # GH 35964