From 5b7247a7716e2077ea3e16048b38dd6e514c562c Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 11 Jun 2023 11:39:48 +0100 Subject: [PATCH 01/11] fix Series.apply(..., by_row), v2. --- pandas/core/apply.py | 43 ++++++++++++++++++++++--- pandas/core/series.py | 10 ++++-- pandas/tests/apply/test_frame_apply.py | 40 +++++++++++++++++++++++ pandas/tests/apply/test_series_apply.py | 10 ++---- 4 files changed, 88 insertions(+), 15 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 1b2aa1d053240..34bafce1ad785 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -303,8 +303,9 @@ def agg_or_apply_list_like( obj = self.obj func = cast(List[AggFuncTypeBase], self.func) kwargs = self.kwargs - if op_name == "apply": - kwargs = {**kwargs, "by_row": False} + if op_name == "apply" and isinstance(self, SeriesApply): + by_row = "compat" if self.by_row else False + kwargs = {**kwargs, "by_row": by_row} if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") @@ -397,7 +398,13 @@ def agg_or_apply_dict_like( obj = self.obj func = cast(AggFuncTypeDict, self.func) - kwargs = {"by_row": False} if op_name == "apply" else {} + kwargs = {} + if op_name == "apply": + by_row: bool | Literal["compat"] = False + is_series_apply = isinstance(self, SeriesApply) and self.by_row + if isinstance(self, FrameApply) or is_series_apply: + by_row = "compat" + kwargs.update({"by_row": by_row}) if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") @@ -1067,7 +1074,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: class SeriesApply(NDFrameApply): obj: Series axis: AxisInt = 0 - by_row: bool # only relevant for apply() + by_row: bool | Literal["compat"] # only relevant for apply() def __init__( self, @@ -1075,7 +1082,7 @@ def __init__( func: AggFuncType, *, convert_dtype: bool | lib.NoDefault = lib.no_default, - by_row: bool = True, + by_row: bool | Literal["compat"] = True, args, kwargs, ) -> None: @@ -1090,6 +1097,7 @@ def __init__( stacklevel=find_stack_level(), ) self.convert_dtype = convert_dtype + assert isinstance(by_row, bool) or by_row == "compat" self.by_row = by_row super().__init__( @@ -1115,6 +1123,9 @@ def apply(self) -> DataFrame | Series: # if we are a string, try to dispatch return self.apply_str() + if self.by_row == "compat": + return self.apply_compat() + # self.func is Callable return self.apply_standard() @@ -1149,6 +1160,28 @@ def apply_empty_result(self) -> Series: obj, method="apply" ) + def apply_compat(self): + """compat apply method. + + Used for each callable when giving listlikes and dictlikes of callables to + apply. Needed for copatability with Pandas < v2.1. + + .. versionadded:: 2.1.0 + """ + obj = self.obj + func = self.func + + if callable(func): + f = com.get_cython_func(func) + if f and not self.args and not self.kwargs: + return obj.apply(func, by_row=False) + + try: + result = obj.apply(func, by_row=True) + except (ValueError, AttributeError, TypeError): + result = obj.apply(func, by_row=False) + return result + def apply_standard(self) -> DataFrame | Series: # caller is responsible for ensuring that f is Callable func = cast(Callable, self.func) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9c7110cc21082..13b4ca2ff9f37 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4503,7 +4503,7 @@ def apply( convert_dtype: bool | lib.NoDefault = lib.no_default, args: tuple[Any, ...] = (), *, - by_row: bool = True, + by_row: bool | Literal["compat"] = True, **kwargs, ) -> DataFrame | Series: """ @@ -4531,10 +4531,16 @@ def apply( instead if you want ``convert_dtype=False``. args : tuple Positional arguments passed to func after the series value. - by_row : bool, default True + by_row : bool or "compat", default True If False, the func will be passed the whole Series at once. If True, will func will be passed each element of the Series, like Series.map (backward compatible). + If "compat", will if possible first translate the func into pandas + methods (e.g. ``Series().apply(np.sum)`` will be translated to + ``Series().sum()``). If that doesn't work, will try call apply again with + ``by_row=True`` and if that fails, will call apply again with + ``by_row=False``. Added for backwards compatability, should not be used + directly. .. versionadded:: 2.1.0 **kwargs diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 99fc393ff82c5..edbdfe6fd5a05 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -667,6 +667,24 @@ def test_infer_row_shape(): assert result == (6, 2) +@pytest.mark.parametrize( + "ops, expected", + [ + ({"a": lambda x: x + 1}, DataFrame({"a": [2, 3]})), + ({"a": lambda x: x.sum()}, Series({"a": 3})), + ( + {"a": ["sum", np.sum, lambda x: x.sum()]}, + DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), + ), + ], +) +def test_dictlike_lambda(ops, expected): + # GHXXXXX + df = DataFrame({"a": [1, 2]}) + result = df.apply(ops) + tm.assert_equal(result, expected) + + def test_with_dictlike_columns(): # GH 17602 df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) @@ -716,6 +734,28 @@ def test_with_dictlike_columns_with_infer(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "ops, expected", + [ + ([lambda x: x + 1], DataFrame({("a", ""): [2, 3]})), + ([lambda x: x.sum()], DataFrame({"a": [3]}, index=[""])), + ( + ["sum", np.sum, lambda x: x.sum()], + DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), + ), + ( + [lambda x: x + 1, lambda x: 3], + DataFrame([[2, 3], [3, 3]], columns=[["a", "a"], ["", ""]]), + ), + ], +) +def test_listlike_lambda(ops, expected): + # GHxxxxx + df = DataFrame({"a": [1, 2]}) + result = df.apply(ops) + tm.assert_equal(result, expected) + + def test_with_listlike_columns(): # GH 17348 df = DataFrame( diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 425d2fb42a711..f21dbf2e91c1d 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -73,11 +73,6 @@ def f(x): expected = s.map(f) tm.assert_series_equal(result, expected) - s = Series([1, 2, 3]) - result = s.apply(f, by_row=by_row) - expected = s.map(f) - tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("convert_dtype", [True, False]) def test_apply_convert_dtype_deprecated(convert_dtype): @@ -422,7 +417,7 @@ def test_with_nested_series(datetime_series, op_name): tm.assert_frame_equal(result, expected) -def test_replicate_describe(string_series, by_row): +def test_replicate_describe(string_series): # this also tests a result set that is all scalars expected = string_series.describe() result = string_series.apply( @@ -436,7 +431,6 @@ def test_replicate_describe(string_series, by_row): "75%": lambda x: x.quantile(0.75), "max": "max", }, - by_row=by_row, ) tm.assert_series_equal(result, expected) @@ -604,7 +598,7 @@ def test_apply_listlike_transformer(string_series, ops, names, by_row): ([lambda x: x.sum()], Series([6], index=[""])), ], ) -def test_apply_listlike_lambda(ops, expected, by_row=by_row): +def test_apply_listlike_lambda(ops, expected, by_row): # GH53400 ser = Series([1, 2, 3]) result = ser.apply(ops, by_row=by_row) From 94dbb32c1c9ea0a25e53640a5b5483bb8c54c559 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 11 Jun 2023 13:04:51 +0100 Subject: [PATCH 02/11] add gh number --- pandas/tests/apply/test_frame_apply.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index edbdfe6fd5a05..b4fbdd59420d5 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -679,7 +679,7 @@ def test_infer_row_shape(): ], ) def test_dictlike_lambda(ops, expected): - # GHXXXXX + # GH53601 df = DataFrame({"a": [1, 2]}) result = df.apply(ops) tm.assert_equal(result, expected) @@ -750,7 +750,7 @@ def test_with_dictlike_columns_with_infer(): ], ) def test_listlike_lambda(ops, expected): - # GHxxxxx + # GH53601 df = DataFrame({"a": [1, 2]}) result = df.apply(ops) tm.assert_equal(result, expected) From c8aafbb65a5208927d8100e62dd6b54aa8a0257b Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 11 Jun 2023 13:17:01 +0100 Subject: [PATCH 03/11] fix codespell --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 13b4ca2ff9f37..9320aa20e3f62 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4539,7 +4539,7 @@ def apply( methods (e.g. ``Series().apply(np.sum)`` will be translated to ``Series().sum()``). If that doesn't work, will try call apply again with ``by_row=True`` and if that fails, will call apply again with - ``by_row=False``. Added for backwards compatability, should not be used + ``by_row=False``. Added for backwards compatibility, should not be used directly. .. versionadded:: 2.1.0 From 84f4284a548f8ea9e62a370ca376d1872d770d74 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 12 Jun 2023 11:35:15 +0100 Subject: [PATCH 04/11] add by_row para to DataFrame.apply --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/apply.py | 28 ++++++++++++---- pandas/core/frame.py | 14 ++++++++ pandas/core/series.py | 5 +-- pandas/tests/apply/test_frame_apply.py | 46 ++++++++++++++++++++------ 5 files changed, 75 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index baacc8c421414..01f9a1153c85c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -104,7 +104,7 @@ Other enhancements - :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`) - :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`) - Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`) -- Added a new parameter ``by_row`` to :meth:`Series.apply`. When set to ``False`` the supplied callables will always operate on the whole Series (:issue:`53400`). +- Added a new parameter ``by_row`` to :meth:`Series.apply` and :meth:`DataFrame.apply`. When set to ``False`` the supplied callables will always operate on the whole Series or DataFrame (:issue:`53400`, :issue:`53601`). - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 34bafce1ad785..acc23cd393033 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -81,6 +81,7 @@ def frame_apply( axis: Axis = 0, raw: bool = False, result_type: str | None = None, + by_row: bool | Literal["compat"] = "compat", args=None, kwargs=None, ) -> FrameApply: @@ -100,6 +101,7 @@ def frame_apply( func, raw=raw, result_type=result_type, + by_row=by_row, args=args, kwargs=kwargs, ) @@ -115,11 +117,16 @@ def __init__( raw: bool, result_type: str | None, *, + by_row: bool | Literal["compat"] = True, args, kwargs, ) -> None: self.obj = obj self.raw = raw + + assert isinstance(by_row, bool) or by_row == "compat" + self.by_row = by_row + self.args = args or () self.kwargs = kwargs or {} @@ -303,8 +310,14 @@ def agg_or_apply_list_like( obj = self.obj func = cast(List[AggFuncTypeBase], self.func) kwargs = self.kwargs - if op_name == "apply" and isinstance(self, SeriesApply): - by_row = "compat" if self.by_row else False + if op_name == "apply": + if isinstance(self, FrameApply): + by_row = self.by_row + + elif isinstance(self, SeriesApply): + by_row = "compat" if self.by_row else False + else: + by_row = False kwargs = {**kwargs, "by_row": by_row} if getattr(obj, "axis", 0) == 1: @@ -400,10 +413,12 @@ def agg_or_apply_dict_like( func = cast(AggFuncTypeDict, self.func) kwargs = {} if op_name == "apply": - by_row: bool | Literal["compat"] = False - is_series_apply = isinstance(self, SeriesApply) and self.by_row - if isinstance(self, FrameApply) or is_series_apply: + if isinstance(self, FrameApply): + by_row = self.by_row + elif isinstance(self, SeriesApply) and self.by_row: by_row = "compat" + else: + by_row = False kwargs.update({"by_row": by_row}) if getattr(obj, "axis", 0) == 1: @@ -1097,14 +1112,13 @@ def __init__( stacklevel=find_stack_level(), ) self.convert_dtype = convert_dtype - assert isinstance(by_row, bool) or by_row == "compat" - self.by_row = by_row super().__init__( obj, func, raw=False, result_type=None, + by_row=by_row, args=args, kwargs=kwargs, ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 671924c5e9607..0354a22ab0501 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9604,6 +9604,7 @@ def apply( raw: bool = False, result_type: Literal["expand", "reduce", "broadcast"] | None = None, args=(), + by_row: Literal["compat", False] = "compat", **kwargs, ): """ @@ -9652,6 +9653,18 @@ def apply( args : tuple Positional arguments to pass to `func` in addition to the array/series. + by_row : bool or "compat", default "compat" + If "compat", will if possible first translate the func into pandas + methods (e.g. ``Series().apply(np.sum)`` will be translated to + ``Series().sum()``). If that doesn't work, will try call to apply again with + ``by_row=True`` and if that fails, will call apply again with + ``by_row=False`` + If True, gives the same results as for "compat". + If False, the funcs will be passed the whole Series at once. + ``by_row`` only has effect when ``func`` is a listlike or dictlike of funcs + and the func isn't a string. + + .. versionadded:: 2.1.0 **kwargs Additional keyword arguments to pass as keywords arguments to `func`. @@ -9751,6 +9764,7 @@ def apply( axis=axis, raw=raw, result_type=result_type, + by_row=by_row, args=args, kwargs=kwargs, ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9320aa20e3f62..43d4f095406a8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4534,13 +4534,14 @@ def apply( by_row : bool or "compat", default True If False, the func will be passed the whole Series at once. If True, will func will be passed each element of the Series, like - Series.map (backward compatible). + ``Series.map`` (backward compatible). If "compat", will if possible first translate the func into pandas methods (e.g. ``Series().apply(np.sum)`` will be translated to - ``Series().sum()``). If that doesn't work, will try call apply again with + ``Series().sum()``). If that doesn't work, will try call to apply again with ``by_row=True`` and if that fails, will call apply again with ``by_row=False``. Added for backwards compatibility, should not be used directly. + ``by_row`` has no effect when ``func`` is a string. .. versionadded:: 2.1.0 **kwargs diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index b4fbdd59420d5..848701fca8748 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -668,20 +668,30 @@ def test_infer_row_shape(): @pytest.mark.parametrize( - "ops, expected", + "ops, by_row, expected", [ - ({"a": lambda x: x + 1}, DataFrame({"a": [2, 3]})), - ({"a": lambda x: x.sum()}, Series({"a": 3})), + ({"a": lambda x: x + 1}, "compat", DataFrame({"a": [2, 3]})), + ({"a": lambda x: x + 1}, False, DataFrame({"a": [2, 3]})), + ({"a": lambda x: x.sum()}, "compat", Series({"a": 3})), + ({"a": lambda x: x.sum()}, False, Series({"a": 3})), ( {"a": ["sum", np.sum, lambda x: x.sum()]}, + "compat", DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), ), + ( + {"a": ["sum", np.sum, lambda x: x.sum()]}, + False, + DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), + ), + ({"a": lambda x: 1}, "compat", DataFrame({"a": [1, 1]})), + ({"a": lambda x: 1}, False, Series({"a": 1})), ], ) -def test_dictlike_lambda(ops, expected): +def test_dictlike_lambda(ops, by_row, expected): # GH53601 df = DataFrame({"a": [1, 2]}) - result = df.apply(ops) + result = df.apply(ops, by_row=by_row) tm.assert_equal(result, expected) @@ -735,24 +745,40 @@ def test_with_dictlike_columns_with_infer(): @pytest.mark.parametrize( - "ops, expected", + "ops, by_row, expected", [ - ([lambda x: x + 1], DataFrame({("a", ""): [2, 3]})), - ([lambda x: x.sum()], DataFrame({"a": [3]}, index=[""])), + ([lambda x: x + 1], "compat", DataFrame({("a", ""): [2, 3]})), + ([lambda x: x + 1], True, DataFrame({("a", ""): [2, 3]})), + ([lambda x: x + 1], False, DataFrame({("a", ""): [2, 3]})), + ([lambda x: x.sum()], "compat", DataFrame({"a": [3]}, index=[""])), + ([lambda x: x.sum()], True, DataFrame({"a": [3]}, index=[""])), + ([lambda x: x.sum()], False, DataFrame({"a": [3]}, index=[""])), + ( + ["sum", np.sum, lambda x: x.sum()], + "compat", + DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), + ), ( ["sum", np.sum, lambda x: x.sum()], + False, DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), ), ( [lambda x: x + 1, lambda x: 3], + "compat", DataFrame([[2, 3], [3, 3]], columns=[["a", "a"], ["", ""]]), ), + ( + [lambda x: 2, lambda x: 3], + False, + DataFrame({"a": [2, 3]}, ["", ""]), + ), ], ) -def test_listlike_lambda(ops, expected): +def test_listlike_lambda(ops, by_row, expected): # GH53601 df = DataFrame({"a": [1, 2]}) - result = df.apply(ops) + result = df.apply(ops, by_row=by_row) tm.assert_equal(result, expected) From 3c6aa024548a82b3a4f74db6d5ef0e5fc4a8e2c1 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 24 Jun 2023 07:00:09 +0100 Subject: [PATCH 05/11] remove compat=True option --- pandas/core/apply.py | 17 +++++++++++++ pandas/core/frame.py | 5 ++-- pandas/tests/apply/test_frame_apply.py | 33 ++++++++++++++++++++++++-- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index acc23cd393033..fba2b5f727ae7 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -700,6 +700,23 @@ def agg_axis(self) -> Index: class FrameApply(NDFrameApply): obj: DataFrame + def __init__( + self, + obj: AggObjType, + func: AggFuncType, + raw: bool, + result_type: str | None, + *, + by_row: Literal[False, "compat"] = False, + args, + kwargs, + ) -> None: + if by_row is not False or by_row != "compat": + raise NotImplementedError(f"by_row={by_row} not implemented") + super().__init__( + obj, func, raw, result_type, by_row=by_row, args=args, kwargs=kwargs + ) + # --------------------------------------------------------------- # Abstract Methods diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b725d7b41018f..565349f79aff6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9660,16 +9660,17 @@ def apply( args : tuple Positional arguments to pass to `func` in addition to the array/series. - by_row : bool or "compat", default "compat" + by_row : False or "compat", default "compat" If "compat", will if possible first translate the func into pandas methods (e.g. ``Series().apply(np.sum)`` will be translated to ``Series().sum()``). If that doesn't work, will try call to apply again with ``by_row=True`` and if that fails, will call apply again with ``by_row=False`` - If True, gives the same results as for "compat". If False, the funcs will be passed the whole Series at once. ``by_row`` only has effect when ``func`` is a listlike or dictlike of funcs and the func isn't a string. + ``by_row=True`` has not been implemented, and will raise an + ``NotImplenentedError``. .. versionadded:: 2.1.0 **kwargs diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 848701fca8748..c5ae2fea9b66a 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -695,6 +695,22 @@ def test_dictlike_lambda(ops, by_row, expected): tm.assert_equal(result, expected) +@pytest.mark.parametrize( + "ops", + [ + {"a": lambda x: x + 1}, + {"a": lambda x: x.sum()}, + {"a": ["sum", np.sum, lambda x: x.sum()]}, + {"a": lambda x: 1}, + ], +) +def test_dictlike_lambda_raises(ops, by_row, expected): + # GH53601 + df = DataFrame({"a": [1, 2]}) + with pytest.raises(NotImplementedError, match="by_row=True not implemented"): + df.apply(ops, by_row=True) + + def test_with_dictlike_columns(): # GH 17602 df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) @@ -748,10 +764,8 @@ def test_with_dictlike_columns_with_infer(): "ops, by_row, expected", [ ([lambda x: x + 1], "compat", DataFrame({("a", ""): [2, 3]})), - ([lambda x: x + 1], True, DataFrame({("a", ""): [2, 3]})), ([lambda x: x + 1], False, DataFrame({("a", ""): [2, 3]})), ([lambda x: x.sum()], "compat", DataFrame({"a": [3]}, index=[""])), - ([lambda x: x.sum()], True, DataFrame({"a": [3]}, index=[""])), ([lambda x: x.sum()], False, DataFrame({"a": [3]}, index=[""])), ( ["sum", np.sum, lambda x: x.sum()], @@ -782,6 +796,21 @@ def test_listlike_lambda(ops, by_row, expected): tm.assert_equal(result, expected) +@pytest.mark.parametrize( + "ops", + [ + [lambda x: x + 1], + [lambda x: x.sum()], + ["sum", np.sum, lambda x: x.sum()][lambda x: x + 1, lambda x: 3], + ], +) +def test_listlike_lambda_raises(ops): + # GH53601 + df = DataFrame({"a": [1, 2]}) + with pytest.raises(NotImplementedError, match="by_row=True not implemented"): + df.apply(ops, by_row=True) + + def test_with_listlike_columns(): # GH 17348 df = DataFrame( From b5ef3473ec19f79637c974488e7a27ad65b73242 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 24 Jun 2023 08:47:46 +0100 Subject: [PATCH 06/11] remove compat=True option, cleanup --- pandas/core/apply.py | 4 ++-- pandas/tests/apply/test_frame_apply.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index fba2b5f727ae7..7a6dd01ef4fd9 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -81,7 +81,7 @@ def frame_apply( axis: Axis = 0, raw: bool = False, result_type: str | None = None, - by_row: bool | Literal["compat"] = "compat", + by_row: Literal[False, "compat"] = "compat", args=None, kwargs=None, ) -> FrameApply: @@ -711,7 +711,7 @@ def __init__( args, kwargs, ) -> None: - if by_row is not False or by_row != "compat": + if by_row is not False and by_row != "compat": raise NotImplementedError(f"by_row={by_row} not implemented") super().__init__( obj, func, raw, result_type, by_row=by_row, args=args, kwargs=kwargs diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index c5ae2fea9b66a..fefdf2e727c03 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -704,7 +704,7 @@ def test_dictlike_lambda(ops, by_row, expected): {"a": lambda x: 1}, ], ) -def test_dictlike_lambda_raises(ops, by_row, expected): +def test_dictlike_lambda_raises(ops): # GH53601 df = DataFrame({"a": [1, 2]}) with pytest.raises(NotImplementedError, match="by_row=True not implemented"): @@ -801,7 +801,8 @@ def test_listlike_lambda(ops, by_row, expected): [ [lambda x: x + 1], [lambda x: x.sum()], - ["sum", np.sum, lambda x: x.sum()][lambda x: x + 1, lambda x: 3], + ["sum", np.sum, lambda x: x.sum()], + [lambda x: x + 1, lambda x: 3], ], ) def test_listlike_lambda_raises(ops): From a70637bdedaa451f5aa9bd2f998ceed2fbdaf476 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 24 Jun 2023 19:05:27 +0100 Subject: [PATCH 07/11] remove compat=True option, cleanup II --- pandas/core/apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7a6dd01ef4fd9..515c62e8ee862 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -81,7 +81,7 @@ def frame_apply( axis: Axis = 0, raw: bool = False, result_type: str | None = None, - by_row: Literal[False, "compat"] = "compat", + by_row: Literal[False, "compat"] = "compat", args=None, kwargs=None, ) -> FrameApply: From 6f7f127ecdce883cb146492c18b679dac3ad6a08 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 26 Jun 2023 23:03:56 +0100 Subject: [PATCH 08/11] replace by_row=True with by_row='compat' --- pandas/core/apply.py | 21 ++++++++------------- pandas/core/frame.py | 2 +- pandas/core/series.py | 20 ++++++++++---------- pandas/tests/apply/test_series_apply.py | 24 ++++++++++++------------ 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 240456d5e893c..f3777b7de99d9 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -117,14 +117,14 @@ def __init__( raw: bool, result_type: str | None, *, - by_row: bool | Literal["compat"] = True, + by_row: Literal[False, "compat", "_compat"] = "compat", args, kwargs, ) -> None: self.obj = obj self.raw = raw - assert isinstance(by_row, bool) or by_row == "compat" + assert by_row is False or by_row in ["compat", "_compat"] self.by_row = by_row self.args = args or () @@ -315,7 +315,7 @@ def agg_or_apply_list_like( by_row = self.by_row elif isinstance(self, SeriesApply): - by_row = "compat" if self.by_row else False + by_row = "_compat" if self.by_row else False else: by_row = False kwargs = {**kwargs, "by_row": by_row} @@ -413,12 +413,7 @@ def agg_or_apply_dict_like( func = cast(AggFuncTypeDict, self.func) kwargs = {} if op_name == "apply": - if isinstance(self, FrameApply): - by_row = self.by_row - elif isinstance(self, SeriesApply) and self.by_row: - by_row = "compat" - else: - by_row = False + by_row = "_compat" if self.by_row else False kwargs.update({"by_row": by_row}) if getattr(obj, "axis", 0) == 1: @@ -1106,7 +1101,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: class SeriesApply(NDFrameApply): obj: Series axis: AxisInt = 0 - by_row: bool | Literal["compat"] # only relevant for apply() + by_row: Literal[False, "compat", "_compat"] # only relevant for apply() def __init__( self, @@ -1114,7 +1109,7 @@ def __init__( func: AggFuncType, *, convert_dtype: bool | lib.NoDefault = lib.no_default, - by_row: bool | Literal["compat"] = True, + by_row: Literal[False, "compat", "_compat"] = "compat", args, kwargs, ) -> None: @@ -1154,7 +1149,7 @@ def apply(self) -> DataFrame | Series: # if we are a string, try to dispatch return self.apply_str() - if self.by_row == "compat": + if self.by_row == "_compat": return self.apply_compat() # self.func is Callable @@ -1208,7 +1203,7 @@ def apply_compat(self): return obj.apply(func, by_row=False) try: - result = obj.apply(func, by_row=True) + result = obj.apply(func, by_row="compat") except (ValueError, AttributeError, TypeError): result = obj.apply(func, by_row=False) return result diff --git a/pandas/core/frame.py b/pandas/core/frame.py index acaa099068b44..ad948c43bdd5d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9631,7 +9631,7 @@ def apply( raw: bool = False, result_type: Literal["expand", "reduce", "broadcast"] | None = None, args=(), - by_row: Literal["compat", False] = "compat", + by_row: Literal[False, "compat"] = "compat", **kwargs, ): """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 5976f6ea74c4f..bc1404e063c47 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4534,7 +4534,7 @@ def apply( convert_dtype: bool | lib.NoDefault = lib.no_default, args: tuple[Any, ...] = (), *, - by_row: bool | Literal["compat"] = True, + by_row: Literal[False, "compat", "_compat"] = "compat", **kwargs, ) -> DataFrame | Series: """ @@ -4562,16 +4562,16 @@ def apply( instead if you want ``convert_dtype=False``. args : tuple Positional arguments passed to func after the series value. - by_row : bool or "compat", default True + by_row : False, "compat" or "_compat", default "compat" If False, the func will be passed the whole Series at once. - If True, will func will be passed each element of the Series, like - ``Series.map`` (backward compatible). - If "compat", will if possible first translate the func into pandas - methods (e.g. ``Series().apply(np.sum)`` will be translated to - ``Series().sum()``). If that doesn't work, will try call to apply again with - ``by_row=True`` and if that fails, will call apply again with - ``by_row=False``. Added for backwards compatibility, should not be used - directly. + If ``"compat"`` and func is a callable, func will be passed each element of + the Series, like ``Series.map``. If func is a list or dict of + callables, will first try to translate each func into pandas methods. If + that doesn't work, will try call to apply again with ``by_row="compat"`` + and if that fails, will call apply again with ``by_row=False`` + (backward compatible). + ``"_compat"`` is used internally and should not be used directly. + ``by_row`` has no effect when ``func`` is a string. .. versionadded:: 2.1.0 diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 15676f3472f78..9002a5f85cba6 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -14,7 +14,7 @@ from pandas.tests.apply.common import series_transform_kernels -@pytest.fixture(params=[True, False]) +@pytest.fixture(params=[False, "compat"]) def by_row(request): return request.param @@ -69,7 +69,7 @@ def test_apply_map_same_length_inference_bug(): def f(x): return (x, x + 1) - result = s.apply(f, by_row=True) + result = s.apply(f, by_row="compat") expected = s.map(f) tm.assert_series_equal(result, expected) @@ -82,7 +82,7 @@ def func(x): return x if x > 0 else np.nan with tm.assert_produces_warning(FutureWarning): - ser.apply(func, convert_dtype=convert_dtype, by_row=True) + ser.apply(func, convert_dtype=convert_dtype, by_row="compat") def test_apply_args(): @@ -156,7 +156,7 @@ def test_apply_box(): s = Series(vals) assert s.dtype == "datetime64[ns]" # boxed value must be Timestamp instance - res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row=True) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat") exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) tm.assert_series_equal(res, exp) @@ -166,7 +166,7 @@ def test_apply_box(): ] s = Series(vals) assert s.dtype == "datetime64[ns, US/Eastern]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row=True) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat") exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) tm.assert_series_equal(res, exp) @@ -174,7 +174,7 @@ def test_apply_box(): vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] s = Series(vals) assert s.dtype == "timedelta64[ns]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row=True) + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat") exp = Series(["Timedelta_1", "Timedelta_2"]) tm.assert_series_equal(res, exp) @@ -182,7 +182,7 @@ def test_apply_box(): vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] s = Series(vals) assert s.dtype == "Period[M]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row=True) + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat") exp = Series(["Period_M", "Period_M"]) tm.assert_series_equal(res, exp) @@ -392,7 +392,7 @@ def test_demo(): @pytest.mark.parametrize("func", [str, lambda x: str(x)]) def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row): - # test that we are evaluating row-by-row first if by_row=True + # test that we are evaluating row-by-row first if by_row="compat" # else vectorized evaluation result = string_series.apply(func, by_row=by_row) @@ -461,7 +461,7 @@ def test_reduce(string_series): @pytest.mark.parametrize( "how, kwds", - [("agg", {}), ("apply", {"by_row": True}), ("apply", {"by_row": False})], + [("agg", {}), ("apply", {"by_row": "compat"}), ("apply", {"by_row": False})], ) def test_non_callable_aggregates(how, kwds): # test agg using non-callable series attributes @@ -520,7 +520,7 @@ def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): @pytest.mark.parametrize( - "by_row, expected", [(True, Series(np.ones(30), dtype="int64")), (False, 1)] + "by_row, expected", [("compat", Series(np.ones(30), dtype="int64")), (False, 1)] ) def test_apply_scalar_on_date_time_index_aware_series(by_row, expected): # GH 25959 @@ -555,7 +555,7 @@ def test_apply_to_timedelta(by_row): ) @pytest.mark.parametrize( "how, kwargs", - [["agg", {}], ["apply", {"by_row": True}], ["apply", {"by_row": False}]], + [["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]], ) def test_apply_listlike_reducer(string_series, ops, names, how, kwargs): # GH 39140 @@ -576,7 +576,7 @@ def test_apply_listlike_reducer(string_series, ops, names, how, kwargs): ) @pytest.mark.parametrize( "how, kwargs", - [["agg", {}], ["apply", {"by_row": True}], ["apply", {"by_row": False}]], + [["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]], ) def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row): # GH 39140 From 417e958c6fc334fcd92093809f7ca1d5d72d1758 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 28 Jun 2023 12:52:39 +0100 Subject: [PATCH 09/11] remove '_compat' from public interface --- pandas/core/apply.py | 2 +- pandas/core/series.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f3777b7de99d9..2a97208b6a252 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1187,7 +1187,7 @@ def apply_empty_result(self) -> Series: ) def apply_compat(self): - """compat apply method. + """compat apply method for funcs in listlikes and dictlikes. Used for each callable when giving listlikes and dictlikes of callables to apply. Needed for copatability with Pandas < v2.1. diff --git a/pandas/core/series.py b/pandas/core/series.py index bc1404e063c47..838e2dfc5d26d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4534,7 +4534,7 @@ def apply( convert_dtype: bool | lib.NoDefault = lib.no_default, args: tuple[Any, ...] = (), *, - by_row: Literal[False, "compat", "_compat"] = "compat", + by_row: Literal[False, "compat"] = "compat", **kwargs, ) -> DataFrame | Series: """ @@ -4562,7 +4562,7 @@ def apply( instead if you want ``convert_dtype=False``. args : tuple Positional arguments passed to func after the series value. - by_row : False, "compat" or "_compat", default "compat" + by_row : False or "compat", default "compat" If False, the func will be passed the whole Series at once. If ``"compat"`` and func is a callable, func will be passed each element of the Series, like ``Series.map``. If func is a list or dict of @@ -4570,7 +4570,6 @@ def apply( that doesn't work, will try call to apply again with ``by_row="compat"`` and if that fails, will call apply again with ``by_row=False`` (backward compatible). - ``"_compat"`` is used internally and should not be used directly. ``by_row`` has no effect when ``func`` is a string. From b40665cff60f1d2126238870cfac38daef5f0e91 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 29 Jun 2023 04:46:29 +0100 Subject: [PATCH 10/11] update according to comments --- pandas/core/apply.py | 2 +- pandas/core/frame.py | 8 +++----- pandas/core/series.py | 4 ++-- pandas/tests/apply/test_frame_apply.py | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2a97208b6a252..0d8d000748569 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -707,7 +707,7 @@ def __init__( kwargs, ) -> None: if by_row is not False and by_row != "compat": - raise NotImplementedError(f"by_row={by_row} not implemented") + raise ValueError(f"by_row={by_row} not allowed") super().__init__( obj, func, raw, result_type, by_row=by_row, args=args, kwargs=kwargs ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bdd4f51a8ca38..ae43a44d68f1c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9684,16 +9684,14 @@ def apply( Positional arguments to pass to `func` in addition to the array/series. by_row : False or "compat", default "compat" + Only has an effect when ``func`` is a listlike or dictlike of funcs + and the func isn't a string. If "compat", will if possible first translate the func into pandas methods (e.g. ``Series().apply(np.sum)`` will be translated to ``Series().sum()``). If that doesn't work, will try call to apply again with ``by_row=True`` and if that fails, will call apply again with - ``by_row=False`` + ``by_row=False`` (backward compatible). If False, the funcs will be passed the whole Series at once. - ``by_row`` only has effect when ``func`` is a listlike or dictlike of funcs - and the func isn't a string. - ``by_row=True`` has not been implemented, and will raise an - ``NotImplenentedError``. .. versionadded:: 2.1.0 **kwargs diff --git a/pandas/core/series.py b/pandas/core/series.py index 60dad784e4987..e59a4cfc3fcc1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4562,18 +4562,18 @@ def apply( preserved for some extension array dtypes, such as Categorical. .. deprecated:: 2.1.0 - The convert_dtype has been deprecated. Do ``ser.astype(object).apply()`` + ``convert_dtype`` has been deprecated. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. args : tuple Positional arguments passed to func after the series value. by_row : False or "compat", default "compat" - If False, the func will be passed the whole Series at once. If ``"compat"`` and func is a callable, func will be passed each element of the Series, like ``Series.map``. If func is a list or dict of callables, will first try to translate each func into pandas methods. If that doesn't work, will try call to apply again with ``by_row="compat"`` and if that fails, will call apply again with ``by_row=False`` (backward compatible). + If False, the func will be passed the whole Series at once. ``by_row`` has no effect when ``func`` is a string. diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index fefdf2e727c03..5681167cd54f9 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -707,7 +707,7 @@ def test_dictlike_lambda(ops, by_row, expected): def test_dictlike_lambda_raises(ops): # GH53601 df = DataFrame({"a": [1, 2]}) - with pytest.raises(NotImplementedError, match="by_row=True not implemented"): + with pytest.raises(ValueError, match="by_row=True not allowed"): df.apply(ops, by_row=True) @@ -808,7 +808,7 @@ def test_listlike_lambda(ops, by_row, expected): def test_listlike_lambda_raises(ops): # GH53601 df = DataFrame({"a": [1, 2]}) - with pytest.raises(NotImplementedError, match="by_row=True not implemented"): + with pytest.raises(ValueError, match="by_row=True not allowed"): df.apply(ops, by_row=True) From 881889cb372304692f5ad55d11793a01e4766f97 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 29 Jun 2023 04:49:24 +0100 Subject: [PATCH 11/11] linting --- pandas/core/apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 0d8d000748569..83a3b29bfd7f0 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1190,7 +1190,7 @@ def apply_compat(self): """compat apply method for funcs in listlikes and dictlikes. Used for each callable when giving listlikes and dictlikes of callables to - apply. Needed for copatability with Pandas < v2.1. + apply. Needed for compatibility with Pandas < v2.1. .. versionadded:: 2.1.0 """