Skip to content

BUG: fix Series.apply(..., by_row), v2. #53601

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 29, 2023
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ Other enhancements
- :meth:`SeriesGroupby.transform` and :meth:`DataFrameGroupby.transform` now support passing in a string as the function for ``engine="numba"`` (:issue:`53579`)
- Added :meth:`ExtensionArray.interpolate` used by :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`53659`)
- Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`)
- Added a new parameter ``by_row`` to :meth:`Series.apply`. When set to ``False`` the supplied callables will always operate on the whole Series (:issue:`53400`).
- Added a new parameter ``by_row`` to :meth:`Series.apply` and :meth:`DataFrame.apply`. When set to ``False`` the supplied callables will always operate on the whole Series or DataFrame (:issue:`53400`, :issue:`53601`).
- Groupby aggregations (such as :meth:`DataFrameGroupby.sum`) now can preserve the dtype of the input instead of casting to ``float64`` (:issue:`44952`)
- Improved error message when :meth:`DataFrameGroupBy.agg` failed (:issue:`52930`)
- Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`)
Expand Down
69 changes: 64 additions & 5 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def frame_apply(
axis: Axis = 0,
raw: bool = False,
result_type: str | None = None,
by_row: Literal[False, "compat"] = "compat",
args=None,
kwargs=None,
) -> FrameApply:
Expand All @@ -100,6 +101,7 @@ def frame_apply(
func,
raw=raw,
result_type=result_type,
by_row=by_row,
args=args,
kwargs=kwargs,
)
Expand All @@ -115,11 +117,16 @@ def __init__(
raw: bool,
result_type: str | None,
*,
by_row: Literal[False, "compat", "_compat"] = "compat",
args,
kwargs,
) -> None:
self.obj = obj
self.raw = raw

assert by_row is False or by_row in ["compat", "_compat"]
self.by_row = by_row

self.args = args or ()
self.kwargs = kwargs or {}

Expand Down Expand Up @@ -304,7 +311,14 @@ def agg_or_apply_list_like(
func = cast(List[AggFuncTypeBase], self.func)
kwargs = self.kwargs
if op_name == "apply":
kwargs = {**kwargs, "by_row": False}
if isinstance(self, FrameApply):
by_row = self.by_row

elif isinstance(self, SeriesApply):
by_row = "_compat" if self.by_row else False
else:
by_row = False
kwargs = {**kwargs, "by_row": by_row}

if getattr(obj, "axis", 0) == 1:
raise NotImplementedError("axis other than 0 is not supported")
Expand Down Expand Up @@ -397,7 +411,10 @@ def agg_or_apply_dict_like(

obj = self.obj
func = cast(AggFuncTypeDict, self.func)
kwargs = {"by_row": False} if op_name == "apply" else {}
kwargs = {}
if op_name == "apply":
by_row = "_compat" if self.by_row else False
kwargs.update({"by_row": by_row})

if getattr(obj, "axis", 0) == 1:
raise NotImplementedError("axis other than 0 is not supported")
Expand Down Expand Up @@ -678,6 +695,23 @@ def agg_axis(self) -> Index:
class FrameApply(NDFrameApply):
obj: DataFrame

def __init__(
self,
obj: AggObjType,
func: AggFuncType,
raw: bool,
result_type: str | None,
*,
by_row: Literal[False, "compat"] = False,
args,
kwargs,
) -> None:
if by_row is not False and by_row != "compat":
raise ValueError(f"by_row={by_row} not allowed")
super().__init__(
obj, func, raw, result_type, by_row=by_row, args=args, kwargs=kwargs
)

# ---------------------------------------------------------------
# Abstract Methods

Expand Down Expand Up @@ -1067,15 +1101,15 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
class SeriesApply(NDFrameApply):
obj: Series
axis: AxisInt = 0
by_row: bool # only relevant for apply()
by_row: Literal[False, "compat", "_compat"] # only relevant for apply()

def __init__(
self,
obj: Series,
func: AggFuncType,
*,
convert_dtype: bool | lib.NoDefault = lib.no_default,
by_row: bool = True,
by_row: Literal[False, "compat", "_compat"] = "compat",
args,
kwargs,
) -> None:
Expand All @@ -1090,13 +1124,13 @@ def __init__(
stacklevel=find_stack_level(),
)
self.convert_dtype = convert_dtype
self.by_row = by_row

super().__init__(
obj,
func,
raw=False,
result_type=None,
by_row=by_row,
args=args,
kwargs=kwargs,
)
Expand All @@ -1115,6 +1149,9 @@ def apply(self) -> DataFrame | Series:
# if we are a string, try to dispatch
return self.apply_str()

if self.by_row == "_compat":
return self.apply_compat()

# self.func is Callable
return self.apply_standard()

Expand Down Expand Up @@ -1149,6 +1186,28 @@ def apply_empty_result(self) -> Series:
obj, method="apply"
)

def apply_compat(self):
"""compat apply method for funcs in listlikes and dictlikes.

Used for each callable when giving listlikes and dictlikes of callables to
apply. Needed for compatibility with Pandas < v2.1.

.. versionadded:: 2.1.0
"""
obj = self.obj
func = self.func

if callable(func):
f = com.get_cython_func(func)
if f and not self.args and not self.kwargs:
return obj.apply(func, by_row=False)

try:
result = obj.apply(func, by_row="compat")
except (ValueError, AttributeError, TypeError):
result = obj.apply(func, by_row=False)
return result

def apply_standard(self) -> DataFrame | Series:
# caller is responsible for ensuring that f is Callable
func = cast(Callable, self.func)
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9634,6 +9634,7 @@ def apply(
raw: bool = False,
result_type: Literal["expand", "reduce", "broadcast"] | None = None,
args=(),
by_row: Literal[False, "compat"] = "compat",
**kwargs,
):
"""
Expand Down Expand Up @@ -9682,6 +9683,17 @@ def apply(
args : tuple
Positional arguments to pass to `func` in addition to the
array/series.
by_row : False or "compat", default "compat"
Only has an effect when ``func`` is a listlike or dictlike of funcs
and the func isn't a string.
If "compat", will if possible first translate the func into pandas
methods (e.g. ``Series().apply(np.sum)`` will be translated to
``Series().sum()``). If that doesn't work, will try call to apply again with
``by_row=True`` and if that fails, will call apply again with
``by_row=False`` (backward compatible).
If False, the funcs will be passed the whole Series at once.

.. versionadded:: 2.1.0
**kwargs
Additional keyword arguments to pass as keywords arguments to
`func`.
Expand Down Expand Up @@ -9781,6 +9793,7 @@ def apply(
axis=axis,
raw=raw,
result_type=result_type,
by_row=by_row,
args=args,
kwargs=kwargs,
)
Expand Down
16 changes: 11 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4538,7 +4538,7 @@ def apply(
convert_dtype: bool | lib.NoDefault = lib.no_default,
args: tuple[Any, ...] = (),
*,
by_row: bool = True,
by_row: Literal[False, "compat"] = "compat",
**kwargs,
) -> DataFrame | Series:
"""
Expand All @@ -4562,14 +4562,20 @@ def apply(
preserved for some extension array dtypes, such as Categorical.

.. deprecated:: 2.1.0
The convert_dtype has been deprecated. Do ``ser.astype(object).apply()``
``convert_dtype`` has been deprecated. Do ``ser.astype(object).apply()``
instead if you want ``convert_dtype=False``.
args : tuple
Positional arguments passed to func after the series value.
by_row : bool, default True
by_row : False or "compat", default "compat"
If ``"compat"`` and func is a callable, func will be passed each element of
the Series, like ``Series.map``. If func is a list or dict of
callables, will first try to translate each func into pandas methods. If
that doesn't work, will try call to apply again with ``by_row="compat"``
and if that fails, will call apply again with ``by_row=False``
(backward compatible).
If False, the func will be passed the whole Series at once.
If True, will func will be passed each element of the Series, like
Series.map (backward compatible).

``by_row`` has no effect when ``func`` is a string.

.. versionadded:: 2.1.0
**kwargs
Expand Down
96 changes: 96 additions & 0 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,50 @@ def test_infer_row_shape():
assert result == (6, 2)


@pytest.mark.parametrize(
"ops, by_row, expected",
[
({"a": lambda x: x + 1}, "compat", DataFrame({"a": [2, 3]})),
({"a": lambda x: x + 1}, False, DataFrame({"a": [2, 3]})),
({"a": lambda x: x.sum()}, "compat", Series({"a": 3})),
({"a": lambda x: x.sum()}, False, Series({"a": 3})),
(
{"a": ["sum", np.sum, lambda x: x.sum()]},
"compat",
DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", "<lambda>"]),
),
(
{"a": ["sum", np.sum, lambda x: x.sum()]},
False,
DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", "<lambda>"]),
),
({"a": lambda x: 1}, "compat", DataFrame({"a": [1, 1]})),
({"a": lambda x: 1}, False, Series({"a": 1})),
],
)
def test_dictlike_lambda(ops, by_row, expected):
# GH53601
df = DataFrame({"a": [1, 2]})
result = df.apply(ops, by_row=by_row)
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"ops",
[
{"a": lambda x: x + 1},
{"a": lambda x: x.sum()},
{"a": ["sum", np.sum, lambda x: x.sum()]},
{"a": lambda x: 1},
],
)
def test_dictlike_lambda_raises(ops):
# GH53601
df = DataFrame({"a": [1, 2]})
with pytest.raises(ValueError, match="by_row=True not allowed"):
df.apply(ops, by_row=True)


def test_with_dictlike_columns():
# GH 17602
df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
Expand Down Expand Up @@ -716,6 +760,58 @@ def test_with_dictlike_columns_with_infer():
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"ops, by_row, expected",
[
([lambda x: x + 1], "compat", DataFrame({("a", "<lambda>"): [2, 3]})),
([lambda x: x + 1], False, DataFrame({("a", "<lambda>"): [2, 3]})),
([lambda x: x.sum()], "compat", DataFrame({"a": [3]}, index=["<lambda>"])),
([lambda x: x.sum()], False, DataFrame({"a": [3]}, index=["<lambda>"])),
(
["sum", np.sum, lambda x: x.sum()],
"compat",
DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", "<lambda>"]),
),
(
["sum", np.sum, lambda x: x.sum()],
False,
DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", "<lambda>"]),
),
(
[lambda x: x + 1, lambda x: 3],
"compat",
DataFrame([[2, 3], [3, 3]], columns=[["a", "a"], ["<lambda>", "<lambda>"]]),
),
(
[lambda x: 2, lambda x: 3],
False,
DataFrame({"a": [2, 3]}, ["<lambda>", "<lambda>"]),
),
],
)
def test_listlike_lambda(ops, by_row, expected):
# GH53601
df = DataFrame({"a": [1, 2]})
result = df.apply(ops, by_row=by_row)
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"ops",
[
[lambda x: x + 1],
[lambda x: x.sum()],
["sum", np.sum, lambda x: x.sum()],
[lambda x: x + 1, lambda x: 3],
],
)
def test_listlike_lambda_raises(ops):
# GH53601
df = DataFrame({"a": [1, 2]})
with pytest.raises(ValueError, match="by_row=True not allowed"):
df.apply(ops, by_row=True)


def test_with_listlike_columns():
# GH 17348
df = DataFrame(
Expand Down
Loading