From 6fd025a3f9f4bd48babf0145c6e089484f2b2943 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 7 Mar 2021 11:54:33 -0500 Subject: [PATCH 1/4] DEPR: Series/DataFrame.transform partial failure except TypeError --- doc/source/whatsnew/v1.3.0.rst | 2 +- pandas/core/apply.py | 8 ++--- pandas/tests/apply/test_frame_transform.py | 29 +++++++++++++++-- pandas/tests/apply/test_series_apply.py | 38 ++++++++++++++++++---- 4 files changed, 62 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index fb8eecdaa275e..ef5f257b217e9 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -365,7 +365,7 @@ Deprecations - Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`) - Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`) - Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`) -- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`) +- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/apply.py b/pandas/core/apply.py index c159abe55b38c..ad899a5176041 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -279,16 +279,16 @@ def transform_dict_like(self, func): "No transform functions were provided", }: raise err - else: + elif not isinstance(err, TypeError): failed_names.append(name) # combine results if not results: raise ValueError("Transform function failed") if len(failed_names) > 0: warnings.warn( - f"{failed_names} did not transform successfully. " - f"Allowing for partial failure is deprecated, this will raise " - f"a ValueError in a future version of pandas." + f"{failed_names} did not transform successfully and did not raise " + f"a TypeError. If any error is raised except for TypeError, " + f"this will raise in a future version of pandas. " f"Drop these columns/ops to avoid this warning.", FutureWarning, stacklevel=4, diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 212a54b78dead..5965e05aca9e1 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -180,13 +180,36 @@ def test_transform_bad_dtype(op, frame_or_series): @pytest.mark.parametrize("op", frame_kernels_raise) -def test_transform_partial_failure(op): - # GH 35964 & GH 40211 - match = "Allowing for partial failure is deprecated" +def test_transform_partial_failure_typeerror(op): + # GH 35964 # Using object makes most transform kernels fail df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) + expected = df[["B"]].transform([op]) + result = df.transform([op]) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": op}) + result = df.transform({"A": op, "B": op}) + tm.assert_equal(result, expected) + + expected = df[["B"]].transform({"B": [op]}) + result = df.transform({"A": [op], "B": [op]}) + tm.assert_equal(result, expected) + + +def test_transform_partial_failure_valueerror(): + # GH 40211 + match = ".*did not transform successfully and did not raise a TypeError" + + def op(x): + if np.sum(np.sum(x)) < 10: + raise ValueError + return x + + df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) + expected = df[["B"]].transform([op]) with tm.assert_produces_warning(FutureWarning, match=match): result = df.transform([op]) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index dcb5de29da320..223763cf20e98 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -259,29 +259,53 @@ def test_transform(string_series): @pytest.mark.parametrize("op", series_transform_kernels) def test_transform_partial_failure(op, request): - # GH 35964 & GH 40211 + # GH 35964 if op in ("ffill", "bfill", "pad", "backfill", "shift"): request.node.add_marker( pytest.mark.xfail(reason=f"{op} is successful on any dtype") ) - match = "Allowing for partial failure is deprecated" # Using object makes most transform kernels fail ser = Series(3 * [object]) expected = ser.transform(["shift"]) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform([op, "shift"]) + result = ser.transform([op, "shift"]) tm.assert_equal(result, expected) expected = ser.transform({"B": "shift"}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": op, "B": "shift"}) + result = ser.transform({"A": op, "B": "shift"}) tm.assert_equal(result, expected) expected = ser.transform({"B": ["shift"]}) + result = ser.transform({"A": [op], "B": ["shift"]}) + tm.assert_equal(result, expected) + + +def test_transform_partial_failure_valueerror(): + # GH 40211 + match = ".*did not transform successfully and did not raise a TypeError" + + def noop(x): + return x + + def raising_op(_): + raise ValueError + + ser = Series(3 * [object]) + + expected = ser.transform([noop]) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform([noop, raising_op]) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": noop}) + with tm.assert_produces_warning(FutureWarning, match=match): + result = ser.transform({"A": raising_op, "B": noop}) + tm.assert_equal(result, expected) + + expected = ser.transform({"B": [noop]}) with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [op], "B": ["shift"]}) + result = ser.transform({"A": [raising_op], "B": [noop]}) tm.assert_equal(result, expected) From f33c9f3bce452dc2ebb1c995a8e62182fbe1aec5 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 13 Mar 2021 09:16:49 -0500 Subject: [PATCH 2/4] Remove incorrect test --- pandas/tests/apply/test_frame_transform.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 5965e05aca9e1..6f0daeab74a3c 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -194,10 +194,6 @@ def test_transform_partial_failure_typeerror(op): result = df.transform({"A": op, "B": op}) tm.assert_equal(result, expected) - expected = df[["B"]].transform({"B": [op]}) - result = df.transform({"A": [op], "B": [op]}) - tm.assert_equal(result, expected) - def test_transform_partial_failure_valueerror(): # GH 40211 From 2700085850cd9298f073ed83c37b02e3bb729da1 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 13 Mar 2021 10:50:38 -0500 Subject: [PATCH 3/4] Fix issues with TypeError --- pandas/core/apply.py | 12 ++++++-- pandas/tests/apply/test_frame_transform.py | 33 +++++++++++++++++----- pandas/tests/apply/test_invalid_arg.py | 2 +- pandas/tests/apply/test_series_apply.py | 11 ++++++++ 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 8c62f0256b134..e9e86c86ee224 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -227,8 +227,11 @@ def transform(self) -> FrameOrSeriesUnion: func = cast(AggFuncTypeBase, func) try: result = self.transform_str_or_callable(func) - except Exception: - raise ValueError("Transform function failed") + except Exception as err: + if isinstance(err, TypeError): + raise err + else: + raise ValueError("Transform function failed") # Functions that transform may return empty Series/DataFrame # when the dtype is not appropriate @@ -265,6 +268,7 @@ def transform_dict_like(self, func): results: Dict[Hashable, FrameOrSeriesUnion] = {} failed_names = [] + all_type_errors = True for name, how in func.items(): colg = obj._gotitem(name, ndim=1) try: @@ -276,10 +280,12 @@ def transform_dict_like(self, func): }: raise err elif not isinstance(err, TypeError): + all_type_errors = False failed_names.append(name) # combine results if not results: - raise ValueError("Transform function failed") + klass = TypeError if all_type_errors else ValueError + raise klass("Transform function failed") if len(failed_names) > 0: warnings.warn( f"{failed_names} did not transform successfully and did not raise " diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 6f0daeab74a3c..5b3878f5fa690 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -158,24 +158,27 @@ def test_transform_method_name(method): # mypy doesn't allow adding lists of different types # https://github.com/python/mypy/issues/5492 @pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) -def test_transform_bad_dtype(op, frame_or_series): +def test_transform_bad_dtype(op, frame_or_series, request): # GH 35964 + if op == "rank": + request.node.add_marker( + pytest.mark.xfail(reason="GH 40418: rank does not raise a TypeError") + ) + obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms if frame_or_series is not DataFrame: obj = obj["A"] - msg = "Transform function failed" - # tshift is deprecated warn = None if op != "tshift" else FutureWarning with tm.assert_produces_warning(warn): - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match="unsupported operand|not supported"): obj.transform(op) - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match="Transform function failed"): obj.transform([op]) - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match="Transform function failed"): obj.transform({"A": op}) - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match="Transform function failed"): obj.transform({"A": [op]}) @@ -183,6 +186,9 @@ def test_transform_bad_dtype(op, frame_or_series): def test_transform_partial_failure_typeerror(op): # GH 35964 + if op == "rank": + pytest.skip("GH 40418: rank does not raise a TypeError") + # Using object makes most transform kernels fail df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) @@ -194,6 +200,14 @@ def test_transform_partial_failure_typeerror(op): result = df.transform({"A": op, "B": op}) tm.assert_equal(result, expected) + expected = df[["B"]].transform({"B": [op]}) + result = df.transform({"A": [op], "B": [op]}) + tm.assert_equal(result, expected) + + expected = df.transform({"A": ["shift"], "B": [op]}) + result = df.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + def test_transform_partial_failure_valueerror(): # GH 40211 @@ -221,6 +235,11 @@ def op(x): result = df.transform({"A": [op], "B": [op]}) tm.assert_equal(result, expected) + expected = df.transform({"A": ["shift"], "B": [op]}) + with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False): + result = df.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + @pytest.mark.parametrize("use_apply", [True, False]) def test_transform_passes_args(use_apply, frame_or_series): diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 2499c90535469..5bc860c6e8e6d 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -296,7 +296,7 @@ def test_transform_none_to_type(): # GH#34377 df = DataFrame({"a": [None]}) msg = "Transform function failed" - with pytest.raises(ValueError, match=msg): + with pytest.raises(TypeError, match=msg): df.transform({"a": int}) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 223763cf20e98..6722fc43aa75e 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -264,6 +264,8 @@ def test_transform_partial_failure(op, request): request.node.add_marker( pytest.mark.xfail(reason=f"{op} is successful on any dtype") ) + if op in ("rank", "fillna"): + pytest.skip(f"{op} doesn't raise TypeError on object") # Using object makes most transform kernels fail ser = Series(3 * [object]) @@ -280,6 +282,10 @@ def test_transform_partial_failure(op, request): result = ser.transform({"A": [op], "B": ["shift"]}) tm.assert_equal(result, expected) + expected = ser.transform({"A": ["shift"], "B": [op]}) + result = ser.transform({"A": [op, "shift"], "B": [op]}) + tm.assert_equal(result, expected) + def test_transform_partial_failure_valueerror(): # GH 40211 @@ -308,6 +314,11 @@ def raising_op(_): result = ser.transform({"A": [raising_op], "B": [noop]}) tm.assert_equal(result, expected) + expected = ser.transform({"A": [noop], "B": [noop]}) + with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False): + result = ser.transform({"A": [noop, raising_op], "B": [noop]}) + tm.assert_equal(result, expected) + def test_demo(): # demonstration tests From b1685ccc2829d6b315a445c21fe50019eeec3687 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 20 Mar 2021 09:15:40 -0400 Subject: [PATCH 4/4] Cleaner exception handling --- pandas/core/apply.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7577c7089af67..090ae266906f3 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -227,11 +227,10 @@ def transform(self) -> FrameOrSeriesUnion: func = cast(AggFuncTypeBase, func) try: result = self.transform_str_or_callable(func) + except TypeError: + raise except Exception as err: - if isinstance(err, TypeError): - raise err - else: - raise ValueError("Transform function failed") + raise ValueError("Transform function failed") from err # Functions that transform may return empty Series/DataFrame # when the dtype is not appropriate