From cd31cc1e445eafb64e1baa87506de6fdc683b7d7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 5 Jan 2024 16:58:29 -0500 Subject: [PATCH 1/2] DEPR: by_row="compat" in DataFrame.apply and Series.apply --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/apply.py | 9 ++++ pandas/core/frame.py | 5 +++ pandas/core/series.py | 6 +++ pandas/tests/apply/test_frame_apply.py | 58 +++++++++++++++++-------- pandas/tests/apply/test_series_apply.py | 38 +++++++++++++--- 6 files changed, 92 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 15e98cbb2a4d7..ad9b0d8d5b1d1 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -675,6 +675,7 @@ Other Deprecations - Deprecated including the groups in computations when using :meth:`.DataFrameGroupBy.apply` and :meth:`.DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`) - Deprecated indexing an :class:`Index` with a boolean indexer of length zero (:issue:`55820`) - Deprecated not passing a tuple to :class:`.DataFrameGroupBy.get_group` or :class:`.SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`) +- Deprecated specifying ``by_row="compat"`` in :meth:`DataFrame.apply` and :meth:`Series.apply` (:issue:`53400`) - Deprecated string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`54275`) - Deprecated string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`54275`) - Deprecated string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`54275`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 784e11415ade6..fb43bf6713847 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1475,6 +1475,15 @@ def apply_compat(self): try: result = obj.apply(func, by_row="compat") + warnings.warn( + "apply operated row-by-row. This behavior is " + "deprecated and will be removed in a future version of pandas. To keep " + "the current behavior of operating row-by-row, use " + "map. To have apply operate on the entire Series, " + "pass by_row=False.", + FutureWarning, + stacklevel=find_stack_level(), + ) except (ValueError, AttributeError, TypeError): result = obj.apply(func, by_row=False) return result diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 73b5804d8c168..a7cf57c62554b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10182,6 +10182,11 @@ def apply( .. versionadded:: 2.1.0 + .. versionchange:: 2.2.0 + + Specifying ``by_row="compat"`` is deprecated and will be removed in + a future version of pandas. To operate row-by-row, use DataFrame.map. + engine : {'python', 'numba'}, default 'python' Choose between the python (default) engine or the numba engine in apply. diff --git a/pandas/core/series.py b/pandas/core/series.py index 487f57b7390a8..80bb1c1890111 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4803,6 +4803,12 @@ def apply( ``by_row`` has no effect when ``func`` is a string. .. versionadded:: 2.1.0 + + .. versionchange:: 2.2.0 + + Specifying ``by_row="compat"`` is deprecated and will be removed in + a future version of pandas. To operate row-by-row, use Series.map. + **kwargs Additional keyword arguments passed to func. diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 0839f005305a5..a14464cb71f6b 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -715,30 +715,33 @@ def test_infer_row_shape(): @pytest.mark.parametrize( - "ops, by_row, expected", + "ops, by_row, warn, expected", [ - ({"a": lambda x: x + 1}, "compat", DataFrame({"a": [2, 3]})), - ({"a": lambda x: x + 1}, False, DataFrame({"a": [2, 3]})), - ({"a": lambda x: x.sum()}, "compat", Series({"a": 3})), - ({"a": lambda x: x.sum()}, False, Series({"a": 3})), + ({"a": lambda x: x + 1}, "compat", FutureWarning, DataFrame({"a": [2, 3]})), + ({"a": lambda x: x + 1}, False, None, DataFrame({"a": [2, 3]})), + ({"a": lambda x: x.sum()}, "compat", None, Series({"a": 3})), + ({"a": lambda x: x.sum()}, False, None, Series({"a": 3})), ( {"a": ["sum", np.sum, lambda x: x.sum()]}, "compat", + None, DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), ), ( {"a": ["sum", np.sum, lambda x: x.sum()]}, False, + None, DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), ), - ({"a": lambda x: 1}, "compat", DataFrame({"a": [1, 1]})), - ({"a": lambda x: 1}, False, Series({"a": 1})), + ({"a": lambda x: 1}, "compat", FutureWarning, DataFrame({"a": [1, 1]})), + ({"a": lambda x: 1}, False, None, Series({"a": 1})), ], ) -def test_dictlike_lambda(ops, by_row, expected): +def test_dictlike_lambda(ops, by_row, warn, expected): # GH53601 df = DataFrame({"a": [1, 2]}) - result = df.apply(ops, by_row=by_row) + with tm.assert_produces_warning(warn, match="apply operated row-by-row"): + result = df.apply(ops, by_row=by_row) tm.assert_equal(result, expected) @@ -808,38 +811,53 @@ def test_with_dictlike_columns_with_infer(): @pytest.mark.parametrize( - "ops, by_row, expected", + "ops, by_row, warn, expected", [ - ([lambda x: x + 1], "compat", DataFrame({("a", ""): [2, 3]})), - ([lambda x: x + 1], False, DataFrame({("a", ""): [2, 3]})), - ([lambda x: x.sum()], "compat", DataFrame({"a": [3]}, index=[""])), - ([lambda x: x.sum()], False, DataFrame({"a": [3]}, index=[""])), + ( + [lambda x: x + 1], + "compat", + FutureWarning, + DataFrame({("a", ""): [2, 3]}), + ), + ([lambda x: x + 1], False, None, DataFrame({("a", ""): [2, 3]})), + ( + [lambda x: x.sum()], + "compat", + None, + DataFrame({"a": [3]}, index=[""]), + ), + ([lambda x: x.sum()], False, None, DataFrame({"a": [3]}, index=[""])), ( ["sum", np.sum, lambda x: x.sum()], "compat", + None, DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), ), ( ["sum", np.sum, lambda x: x.sum()], False, + None, DataFrame({"a": [3, 3, 3]}, index=["sum", "sum", ""]), ), ( [lambda x: x + 1, lambda x: 3], "compat", + FutureWarning, DataFrame([[2, 3], [3, 3]], columns=[["a", "a"], ["", ""]]), ), ( [lambda x: 2, lambda x: 3], False, + None, DataFrame({"a": [2, 3]}, ["", ""]), ), ], ) -def test_listlike_lambda(ops, by_row, expected): +def test_listlike_lambda(ops, by_row, warn, expected): # GH53601 df = DataFrame({"a": [1, 2]}) - result = df.apply(ops, by_row=by_row) + with tm.assert_produces_warning(warn, match="apply operated row-by-row"): + result = df.apply(ops, by_row=by_row) tm.assert_equal(result, expected) @@ -1106,7 +1124,10 @@ def test_agg_transform(axis, float_frame): tm.assert_frame_equal(result, expected) # list-like - result = float_frame.apply([np.sqrt], axis=axis) + + msg = "apply operated row-by-row" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = float_frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() if axis in {0, "index"}: expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]]) @@ -1117,7 +1138,8 @@ def test_agg_transform(axis, float_frame): # multiple items in list # these are in the order as if we are applying both # functions per series and then concatting - result = float_frame.apply([np.abs, np.sqrt], axis=axis) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = float_frame.apply([np.abs, np.sqrt], axis=axis) expected = zip_frames([f_abs, f_sqrt], axis=other_axis) if axis in {0, "index"}: expected.columns = MultiIndex.from_product( diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index df24fa08f48e1..7fe1000d5e7bd 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -287,6 +287,8 @@ def retrieve(targetRow, targetDF): def test_transform(string_series, by_row): # transforming functions + warn = FutureWarning if by_row == "compat" else None + with np.errstate(all="ignore"): f_sqrt = np.sqrt(string_series) f_abs = np.abs(string_series) @@ -297,7 +299,9 @@ def test_transform(string_series, by_row): tm.assert_series_equal(result, expected) # list-like - result = string_series.apply([np.sqrt], by_row=by_row) + msg = "apply operated row-by-row" + with tm.assert_produces_warning(warn, match=msg): + result = string_series.apply([np.sqrt], by_row=by_row) expected = f_sqrt.to_frame().copy() expected.columns = ["sqrt"] tm.assert_frame_equal(result, expected) @@ -310,7 +314,8 @@ def test_transform(string_series, by_row): # series and then concatting expected = concat([f_sqrt, f_abs], axis=1) expected.columns = ["sqrt", "absolute"] - result = string_series.apply([np.sqrt, np.abs], by_row=by_row) + with tm.assert_produces_warning(warn, match=msg): + result = string_series.apply([np.sqrt, np.abs], by_row=by_row) tm.assert_frame_equal(result, expected) # dict, provide renaming @@ -318,7 +323,8 @@ def test_transform(string_series, by_row): expected.columns = ["foo", "bar"] expected = expected.unstack().rename("series") - result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row) + with tm.assert_produces_warning(warn, match=msg): + result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row) tm.assert_series_equal(result.reindex_like(expected), expected) @@ -617,10 +623,13 @@ def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row): ) def test_apply_listlike_transformer(string_series, ops, names, by_row): # GH 39140 + warn = FutureWarning if by_row == "compat" else None with np.errstate(all="ignore"): expected = concat([op(string_series) for op in ops], axis=1) expected.columns = names - result = string_series.apply(ops, by_row=by_row) + msg = "apply operated row-by-row" + with tm.assert_produces_warning(warn, match=msg): + result = string_series.apply(ops, by_row=by_row) tm.assert_frame_equal(result, expected) @@ -634,7 +643,13 @@ def test_apply_listlike_transformer(string_series, ops, names, by_row): def test_apply_listlike_lambda(ops, expected, by_row): # GH53400 ser = Series([1, 2, 3]) - result = ser.apply(ops, by_row=by_row) + if by_row == "compat" and isinstance(expected, DataFrame): + warn = FutureWarning + else: + warn = None + msg = "apply operated row-by-row" + with tm.assert_produces_warning(warn, match=msg): + result = ser.apply(ops, by_row=by_row) tm.assert_equal(result, expected) @@ -649,10 +664,13 @@ def test_apply_listlike_lambda(ops, expected, by_row): ) def test_apply_dictlike_transformer(string_series, ops, by_row): # GH 39140 + warn = FutureWarning if by_row == "compat" else None with np.errstate(all="ignore"): expected = concat({name: op(string_series) for name, op in ops.items()}) expected.name = string_series.name - result = string_series.apply(ops, by_row=by_row) + msg = "apply operated row-by-row" + with tm.assert_produces_warning(warn, match=msg): + result = string_series.apply(ops, by_row=by_row) tm.assert_series_equal(result, expected) @@ -669,7 +687,13 @@ def test_apply_dictlike_transformer(string_series, ops, by_row): def test_apply_dictlike_lambda(ops, by_row, expected): # GH53400 ser = Series([1, 2, 3]) - result = ser.apply(ops, by_row=by_row) + if by_row == "compat" and len(expected) == 3: + warn = FutureWarning + else: + warn = None + msg = "apply operated row-by-row" + with tm.assert_produces_warning(warn, match=msg): + result = ser.apply(ops, by_row=by_row) tm.assert_equal(result, expected) From 34bc3a82b043e749df348a9b448559457efc29f9 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 6 Jan 2024 09:15:03 -0500 Subject: [PATCH 2/2] fixup docstrings --- pandas/core/frame.py | 2 +- pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a7cf57c62554b..2e128f33acdd5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10182,7 +10182,7 @@ def apply( .. versionadded:: 2.1.0 - .. versionchange:: 2.2.0 + .. versionchanged:: 2.2.0 Specifying ``by_row="compat"`` is deprecated and will be removed in a future version of pandas. To operate row-by-row, use DataFrame.map. diff --git a/pandas/core/series.py b/pandas/core/series.py index 80bb1c1890111..cff792e96d354 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4804,7 +4804,7 @@ def apply( .. versionadded:: 2.1.0 - .. versionchange:: 2.2.0 + .. versionchanged:: 2.2.0 Specifying ``by_row="compat"`` is deprecated and will be removed in a future version of pandas. To operate row-by-row, use Series.map.