pandas-dev · rhshadrach · Jun 29, 2023 · Jun 11, 2023 · Jun 11, 2023 · Jun 11, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -112,7 +112,7 @@ Other enhancements
 - :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`)
 - :meth:`SeriesGroupby.transform` and :meth:`DataFrameGroupby.transform` now support passing in a string as the function for ``engine="numba"`` (:issue:`53579`)
 - Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`)
-- Added a new parameter ``by_row`` to :meth:`Series.apply`. When set to ``False`` the supplied callables will always operate on the whole Series (:issue:`53400`).
+- Added a new parameter ``by_row`` to :meth:`Series.apply` and :meth:`DataFrame.apply`. When set to ``False`` the supplied callables will always operate on the whole Series or DataFrame (:issue:`53400`, :issue:`53601`).
 - Groupby aggregations (such as :meth:`DataFrameGroupby.sum`) now can preserve the dtype of the input instead of casting to ``float64`` (:issue:`44952`)
 - Improved error message when :meth:`DataFrameGroupBy.agg` failed (:issue:`52930`)
 - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -81,6 +81,7 @@ def frame_apply(
     axis: Axis = 0,
     raw: bool = False,
     result_type: str | None = None,
+    by_row: Literal[False, "compat"] = "compat",
     args=None,
     kwargs=None,
 ) -> FrameApply:
@@ -100,6 +101,7 @@ def frame_apply(
         func,
         raw=raw,
         result_type=result_type,
+        by_row=by_row,
         args=args,
         kwargs=kwargs,
     )
@@ -115,11 +117,16 @@ def __init__(
         raw: bool,
         result_type: str | None,
         *,
+        by_row: bool | Literal["compat"] = True,
         args,
         kwargs,
     ) -> None:
         self.obj = obj
         self.raw = raw
+
+        assert isinstance(by_row, bool) or by_row == "compat"
+        self.by_row = by_row
+
         self.args = args or ()
         self.kwargs = kwargs or {}
 
@@ -304,7 +311,14 @@ def agg_or_apply_list_like(
         func = cast(List[AggFuncTypeBase], self.func)
         kwargs = self.kwargs
         if op_name == "apply":
-            kwargs = {**kwargs, "by_row": False}
+            if isinstance(self, FrameApply):
+                by_row = self.by_row
+
+            elif isinstance(self, SeriesApply):
+                by_row = "compat" if self.by_row else False
+            else:
+                by_row = False
+            kwargs = {**kwargs, "by_row": by_row}
 
         if getattr(obj, "axis", 0) == 1:
             raise NotImplementedError("axis other than 0 is not supported")
@@ -397,7 +411,15 @@ def agg_or_apply_dict_like(
 
         obj = self.obj
         func = cast(AggFuncTypeDict, self.func)
-        kwargs = {"by_row": False} if op_name == "apply" else {}
+        kwargs = {}
+        if op_name == "apply":
+            if isinstance(self, FrameApply):
+                by_row = self.by_row
+            elif isinstance(self, SeriesApply) and self.by_row:
+                by_row = "compat"
+            else:
+                by_row = False
+            kwargs.update({"by_row": by_row})
 
         if getattr(obj, "axis", 0) == 1:
             raise NotImplementedError("axis other than 0 is not supported")
@@ -678,6 +700,23 @@ def agg_axis(self) -> Index:
 class FrameApply(NDFrameApply):
     obj: DataFrame
 
+    def __init__(
+        self,
+        obj: AggObjType,
+        func: AggFuncType,
+        raw: bool,
+        result_type: str | None,
+        *,
+        by_row: Literal[False, "compat"] = False,
+        args,
+        kwargs,
+    ) -> None:
+        if by_row is not False and by_row != "compat":
+            raise NotImplementedError(f"by_row={by_row} not implemented")
+        super().__init__(
+            obj, func, raw, result_type, by_row=by_row, args=args, kwargs=kwargs
+        )
+
     # ---------------------------------------------------------------
     # Abstract Methods
 
@@ -1067,15 +1106,15 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
 class SeriesApply(NDFrameApply):
     obj: Series
     axis: AxisInt = 0
-    by_row: bool  # only relevant for apply()
+    by_row: bool | Literal["compat"]  # only relevant for apply()
 
     def __init__(
         self,
         obj: Series,
         func: AggFuncType,
         *,
         convert_dtype: bool | lib.NoDefault = lib.no_default,
-        by_row: bool = True,
+        by_row: bool | Literal["compat"] = True,
         args,
         kwargs,
     ) -> None:
@@ -1090,13 +1129,13 @@ def __init__(
                 stacklevel=find_stack_level(),
             )
         self.convert_dtype = convert_dtype
-        self.by_row = by_row
 
         super().__init__(
             obj,
             func,
             raw=False,
             result_type=None,
+            by_row=by_row,
             args=args,
             kwargs=kwargs,
         )
@@ -1115,6 +1154,9 @@ def apply(self) -> DataFrame | Series:
             # if we are a string, try to dispatch
             return self.apply_str()
 
+        if self.by_row == "compat":
+            return self.apply_compat()
+
         # self.func is Callable
         return self.apply_standard()
 
@@ -1149,6 +1191,28 @@ def apply_empty_result(self) -> Series:
             obj, method="apply"
         )
 
+    def apply_compat(self):
+        """compat apply method.
+
+         Used for each callable when giving listlikes and dictlikes of callables to
+         apply. Needed for copatability with Pandas < v2.1.
+
+        .. versionadded:: 2.1.0
+        """
+        obj = self.obj
+        func = self.func
+
+        if callable(func):
+            f = com.get_cython_func(func)
+            if f and not self.args and not self.kwargs:
+                return obj.apply(func, by_row=False)
+
+        try:
+            result = obj.apply(func, by_row=True)
+        except (ValueError, AttributeError, TypeError):
+            result = obj.apply(func, by_row=False)
+        return result
+
     def apply_standard(self) -> DataFrame | Series:
         # caller is responsible for ensuring that f is Callable
         func = cast(Callable, self.func)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -9611,6 +9611,7 @@ def apply(
         raw: bool = False,
         result_type: Literal["expand", "reduce", "broadcast"] | None = None,
         args=(),
+        by_row: Literal["compat", False] = "compat",
         **kwargs,
     ):
         """
@@ -9659,6 +9660,19 @@ def apply(
         args : tuple
             Positional arguments to pass to `func` in addition to the
             array/series.
+        by_row : False or "compat", default "compat"
+            If "compat", will if possible first translate the func into pandas
+            methods (e.g. ``Series().apply(np.sum)`` will be translated to
+            ``Series().sum()``). If that doesn't work, will try call to apply again with
+            ``by_row=True`` and if that fails, will call apply again with
+            ``by_row=False``
+            If False, the funcs will be passed the whole Series at once.
+            ``by_row`` only has effect when ``func`` is a listlike or dictlike of funcs
+            and the func isn't a string.
+            ``by_row=True`` has not been implemented, and will raise an
+            ``NotImplenentedError``.
+
+            .. versionadded:: 2.1.0
         **kwargs
             Additional keyword arguments to pass as keywords arguments to
             `func`.
@@ -9758,6 +9772,7 @@ def apply(
             axis=axis,
             raw=raw,
             result_type=result_type,
+            by_row=by_row,
             args=args,
             kwargs=kwargs,
         )

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4509,7 +4509,7 @@ def apply(
         convert_dtype: bool | lib.NoDefault = lib.no_default,
         args: tuple[Any, ...] = (),
         *,
-        by_row: bool = True,
+        by_row: bool | Literal["compat"] = True,
         **kwargs,
     ) -> DataFrame | Series:
         """
@@ -4537,10 +4537,17 @@ def apply(
                 instead if you want ``convert_dtype=False``.
         args : tuple
             Positional arguments passed to func after the series value.
-        by_row : bool, default True
+        by_row : bool or "compat", default True
             If False, the func will be passed the whole Series at once.
             If True, will func will be passed each element of the Series, like
-            Series.map (backward compatible).
+            ``Series.map`` (backward compatible).
+            If "compat", will if possible first translate the func into pandas
+            methods (e.g. ``Series().apply(np.sum)`` will be translated to
+            ``Series().sum()``). If that doesn't work, will try call to apply again with
+            ``by_row=True`` and if that fails, will call apply again with
+            ``by_row=False``. Added for backwards compatibility, should not be used
+            directly.
+            ``by_row`` has no effect when ``func`` is a string.
 
             .. versionadded:: 2.1.0
         **kwargs

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -667,6 +667,50 @@ def test_infer_row_shape():
     assert result == (6, 2)
 
 
+@pytest.mark.parametrize(
+    "ops, by_row, expected",
+    [
+        ({"a": lambda x: x + 1}, "compat", DataFrame({"a": [2, 3]})),
+        ({"a": lambda x: x + 1}, False, DataFrame({"a": [2, 3]})),
+        ({"a": lambda x: x.sum()}, "compat", Series({"a": 3})),
+        ({"a": lambda x: x.sum()}, False, Series({"a": 3})),
+        (
+            {"a": ["sum", np.sum, lambda x: x.sum()]},
+            "compat",
+            DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", "<lambda>"]),
+        ),
+        (
+            {"a": ["sum", np.sum, lambda x: x.sum()]},
+            False,
+            DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", "<lambda>"]),
+        ),
+        ({"a": lambda x: 1}, "compat", DataFrame({"a": [1, 1]})),
+        ({"a": lambda x: 1}, False, Series({"a": 1})),
+    ],
+)
+def test_dictlike_lambda(ops, by_row, expected):
+    # GH53601
+    df = DataFrame({"a": [1, 2]})
+    result = df.apply(ops, by_row=by_row)
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "ops",
+    [
+        {"a": lambda x: x + 1},
+        {"a": lambda x: x.sum()},
+        {"a": ["sum", np.sum, lambda x: x.sum()]},
+        {"a": lambda x: 1},
+    ],
+)
+def test_dictlike_lambda_raises(ops):
+    # GH53601
+    df = DataFrame({"a": [1, 2]})
+    with pytest.raises(NotImplementedError, match="by_row=True not implemented"):
+        df.apply(ops, by_row=True)
+
+
 def test_with_dictlike_columns():
     # GH 17602
     df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
@@ -716,6 +760,58 @@ def test_with_dictlike_columns_with_infer():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "ops, by_row, expected",
+    [
+        ([lambda x: x + 1], "compat", DataFrame({("a", "<lambda>"): [2, 3]})),
+        ([lambda x: x + 1], False, DataFrame({("a", "<lambda>"): [2, 3]})),
+        ([lambda x: x.sum()], "compat", DataFrame({"a": [3]}, index=["<lambda>"])),
+        ([lambda x: x.sum()], False, DataFrame({"a": [3]}, index=["<lambda>"])),
+        (
+            ["sum", np.sum, lambda x: x.sum()],
+            "compat",
+            DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", "<lambda>"]),
+        ),
+        (
+            ["sum", np.sum, lambda x: x.sum()],
+            False,
+            DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", "<lambda>"]),
+        ),
+        (
+            [lambda x: x + 1, lambda x: 3],
+            "compat",
+            DataFrame([[2, 3], [3, 3]], columns=[["a", "a"], ["<lambda>", "<lambda>"]]),
+        ),
+        (
+            [lambda x: 2, lambda x: 3],
+            False,
+            DataFrame({"a": [2, 3]}, ["<lambda>", "<lambda>"]),
+        ),
+    ],
+)
+def test_listlike_lambda(ops, by_row, expected):
+    # GH53601
+    df = DataFrame({"a": [1, 2]})
+    result = df.apply(ops, by_row=by_row)
+    tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "ops",
+    [
+        [lambda x: x + 1],
+        [lambda x: x.sum()],
+        ["sum", np.sum, lambda x: x.sum()],
+        [lambda x: x + 1, lambda x: 3],
+    ],
+)
+def test_listlike_lambda_raises(ops):
+    # GH53601
+    df = DataFrame({"a": [1, 2]})
+    with pytest.raises(NotImplementedError, match="by_row=True not implemented"):
+        df.apply(ops, by_row=True)
+
+
 def test_with_listlike_columns():
     # GH 17348
     df = DataFrame(

diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
@@ -73,11 +73,6 @@ def f(x):
     expected = s.map(f)
     tm.assert_series_equal(result, expected)
 
-    s = Series([1, 2, 3])
-    result = s.apply(f, by_row=by_row)
-    expected = s.map(f)
-    tm.assert_series_equal(result, expected)
-
 
 @pytest.mark.parametrize("convert_dtype", [True, False])
 def test_apply_convert_dtype_deprecated(convert_dtype):
@@ -435,7 +430,7 @@ def test_with_nested_series(datetime_series, op_name):
     tm.assert_frame_equal(result, expected)
 
 
-def test_replicate_describe(string_series, by_row):
+def test_replicate_describe(string_series):
     # this also tests a result set that is all scalars
     expected = string_series.describe()
     result = string_series.apply(
@@ -449,7 +444,6 @@ def test_replicate_describe(string_series, by_row):
             "75%": lambda x: x.quantile(0.75),
             "max": "max",
         },
-        by_row=by_row,
     )
     tm.assert_series_equal(result, expected)
 
@@ -617,7 +611,7 @@ def test_apply_listlike_transformer(string_series, ops, names, by_row):
         ([lambda x: x.sum()], Series([6], index=["<lambda>"])),
     ],
 )
-def test_apply_listlike_lambda(ops, expected, by_row=by_row):
+def test_apply_listlike_lambda(ops, expected, by_row):
     # GH53400
     ser = Series([1, 2, 3])
     result = ser.apply(ops, by_row=by_row)