From c7992d5ea9bd6549e67255f1bc6404b7579a2dc0 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 4 Mar 2021 20:13:33 -0500 Subject: [PATCH] DEPR: Partial failure in Series.transform and DataFrame.transform. --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/apply.py | 14 ++++++++++- pandas/tests/apply/common.py | 10 ++++++++ pandas/tests/apply/test_frame_transform.py | 24 ++++++++---------- pandas/tests/apply/test_series_apply.py | 29 ++++++++++++++++++++++ 5 files changed, 64 insertions(+), 14 deletions(-) create mode 100644 pandas/tests/apply/common.py diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 477805b09afff..36e2b6a07eb71 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -363,6 +363,7 @@ Deprecations - Deprecated :meth:`core.window.ewm.ExponentialMovingWindow.vol` (:issue:`39220`) - Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`) - Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`) +- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 203a0c675282d..c159abe55b38c 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -15,6 +15,7 @@ Union, cast, ) +import warnings import numpy as np @@ -267,6 +268,7 @@ def transform_dict_like(self, func): func = self.normalize_dictlike_arg("transform", obj, func) results: Dict[Hashable, FrameOrSeriesUnion] = {} + failed_names = [] for name, how in func.items(): colg = obj._gotitem(name, ndim=1) try: @@ -277,10 +279,20 @@ def transform_dict_like(self, func): "No transform functions were provided", }: raise err - + else: + failed_names.append(name) # combine results if not results: raise ValueError("Transform function failed") + if len(failed_names) > 0: + warnings.warn( + f"{failed_names} did not transform successfully. " + f"Allowing for partial failure is deprecated, this will raise " + f"a ValueError in a future version of pandas." + f"Drop these columns/ops to avoid this warning.", + FutureWarning, + stacklevel=4, + ) return concat(results, axis=1) def transform_str_or_callable(self, func) -> FrameOrSeriesUnion: diff --git a/pandas/tests/apply/common.py b/pandas/tests/apply/common.py new file mode 100644 index 0000000000000..91b831bcbb684 --- /dev/null +++ b/pandas/tests/apply/common.py @@ -0,0 +1,10 @@ +from pandas.core.groupby.base import transformation_kernels + +# tshift only works on time index and is deprecated +# There is no Series.cumcount or DataFrame.cumcount +series_transform_kernels = [ + x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] +] +frame_transform_kernels = [ + x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] +] diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 5dc828dea9e35..212a54b78dead 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -9,15 +9,9 @@ Series, ) import pandas._testing as tm -from pandas.core.groupby.base import transformation_kernels +from pandas.tests.apply.common import frame_transform_kernels from pandas.tests.frame.common import zip_frames -# tshift only works on time index and is deprecated -# There is no DataFrame.cumcount -frame_kernels = [ - x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] -] - def unpack_obj(obj, klass, axis): """ @@ -44,7 +38,7 @@ def test_transform_ufunc(axis, float_frame, frame_or_series): tm.assert_equal(result, expected) -@pytest.mark.parametrize("op", frame_kernels) +@pytest.mark.parametrize("op", frame_transform_kernels) def test_transform_groupby_kernel(axis, float_frame, op): # GH 35964 @@ -158,7 +152,7 @@ def test_transform_method_name(method): wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"] -frame_kernels_raise = [x for x in frame_kernels if x not in wont_fail] +frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail] # mypy doesn't allow adding lists of different types @@ -187,21 +181,25 @@ def test_transform_bad_dtype(op, frame_or_series): @pytest.mark.parametrize("op", frame_kernels_raise) def test_transform_partial_failure(op): - # GH 35964 + # GH 35964 & GH 40211 + match = "Allowing for partial failure is deprecated" # Using object makes most transform kernels fail df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) expected = df[["B"]].transform([op]) - result = df.transform([op]) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform([op]) tm.assert_equal(result, expected) expected = df[["B"]].transform({"B": op}) - result = df.transform({"B": op}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": op, "B": op}) tm.assert_equal(result, expected) expected = df[["B"]].transform({"B": [op]}) - result = df.transform({"B": [op]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = df.transform({"A": [op], "B": [op]}) tm.assert_equal(result, expected) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index e2f47fe4ac3a7..dcb5de29da320 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -20,6 +20,7 @@ timedelta_range, ) import pandas._testing as tm +from pandas.tests.apply.common import series_transform_kernels def test_series_map_box_timedelta(): @@ -256,6 +257,34 @@ def test_transform(string_series): tm.assert_series_equal(result.reindex_like(expected), expected) +@pytest.mark.parametrize("op", series_transform_kernels) +def test_transform_partial_failure(op, request): + # GH 35964 & GH 40211 + if op in ("ffill", "bfill", "pad", "backfill", "shift"): + request.node.add_marker( + pytest.mark.xfail(reason=f"{op} is successful on any dtype") + ) + match = "Allowing for partial failure is deprecated" + + # Using object makes most transform kernels fail + ser = Series(3 * [object]) + + expected = ser.transform(["shift"]) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform([op, "shift"]) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": "shift"}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": op, "B": "shift"}) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": ["shift"]}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": [op], "B": ["shift"]}) + tm.assert_equal(result, expected) + + def test_demo(): # demonstration tests s = Series(range(6), dtype="int64", name="series")