From 17126c4bb39bbc9b0812559f8d81cc6c03f28632 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 28 Oct 2022 14:59:07 -0400 Subject: [PATCH 1/3] DEPR: Enforce deprecation of partial failure in transform for lists and dicts --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/apply.py | 27 +------ pandas/tests/apply/test_frame_transform.py | 93 ++++++++++------------ pandas/tests/apply/test_invalid_arg.py | 2 +- pandas/tests/apply/test_series_apply.py | 83 ++++++++----------- 5 files changed, 79 insertions(+), 127 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5cf5c9aaccb52..0fe77d928be65 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -259,6 +259,7 @@ Removal of prior version deprecations/changes - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`) - Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`) +- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 4f9af2d0c01d6..cccef939f94d4 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -266,34 +266,9 @@ def transform_dict_like(self, func): func = self.normalize_dictlike_arg("transform", obj, func) results: dict[Hashable, DataFrame | Series] = {} - failed_names = [] - all_type_errors = True for name, how in func.items(): colg = obj._gotitem(name, ndim=1) - try: - results[name] = colg.transform(how, 0, *args, **kwargs) - except Exception as err: - if str(err) in { - "Function did not transform", - "No transform functions were provided", - }: - raise err - else: - if not isinstance(err, TypeError): - all_type_errors = False - failed_names.append(name) - # combine results - if not results: - klass = TypeError if all_type_errors else ValueError - raise klass("Transform function failed") - if len(failed_names) > 0: - warnings.warn( - f"{failed_names} did not transform successfully. If any error is " - f"raised, this will raise in a future version of pandas. " - f"Drop these columns/ops to avoid this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) + results[name] = colg.transform(how, 0, *args, **kwargs) return concat(results, axis=1) def transform_str_or_callable(self, func) -> DataFrame | Series: diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index f884e8a7daf67..b7d1d2d363ddb 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -133,32 +133,35 @@ def func(x): @pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) def test_transform_bad_dtype(op, frame_or_series, request): # GH 35964 - if op == "rank": - request.node.add_marker( - pytest.mark.xfail( - raises=ValueError, reason="GH 40418: rank does not raise a TypeError" - ) - ) - elif op == "ngroup": + if op == "ngroup": request.node.add_marker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") ) obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms obj = tm.get_obj(obj, frame_or_series) + if op == "rank": + error = ValueError + msg = "Transform function failed" + else: + error = TypeError + msg = ( + "(not supported between instances of 'type' and 'type'" + "|unsupported operand type)" + ) - with pytest.raises(TypeError, match="unsupported operand|not supported"): + with pytest.raises(error, match=msg): obj.transform(op) - with pytest.raises(TypeError, match="Transform function failed"): + with pytest.raises(error, match=msg): obj.transform([op]) - with pytest.raises(TypeError, match="Transform function failed"): + with pytest.raises(error, match=msg): obj.transform({"A": op}) - with pytest.raises(TypeError, match="Transform function failed"): + with pytest.raises(error, match=msg): obj.transform({"A": [op]}) @pytest.mark.parametrize("op", frame_kernels_raise) -def test_transform_partial_failure_typeerror(request, op): +def test_transform_failure_typeerror(request, op): # GH 35964 if op == "ngroup": @@ -168,62 +171,50 @@ def test_transform_partial_failure_typeerror(request, op): # Using object makes most transform kernels fail df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) + if op == "rank": + error = ValueError + msg = "Transform function failed" + else: + error = TypeError + msg = ( + "(not supported between instances of 'type' and 'type'" + "|unsupported operand type)" + ) - expected = df[["B"]].transform([op]) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform([op]) - tm.assert_equal(result, expected) + with pytest.raises(error, match=msg): + df.transform([op]) - expected = df[["B"]].transform({"B": op}) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": op, "B": op}) - tm.assert_equal(result, expected) + with pytest.raises(error, match=msg): + df.transform({"A": op, "B": op}) - expected = df[["B"]].transform({"B": [op]}) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": [op], "B": [op]}) - tm.assert_equal(result, expected) + with pytest.raises(error, match=msg): + df.transform({"A": [op], "B": [op]}) - expected = df.transform({"A": ["shift"], "B": [op]}) - match = rf"\['{op}'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": [op, "shift"], "B": [op]}) - tm.assert_equal(result, expected) + with pytest.raises(error, match=msg): + df.transform({"A": [op, "shift"], "B": [op]}) -def test_transform_partial_failure_valueerror(): +def test_transform_failure_valueerror(): # GH 40211 - match = ".*did not transform successfully" - def op(x): if np.sum(np.sum(x)) < 10: raise ValueError return x df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) + msg = "Transform function failed" - expected = df[["B"]].transform([op]) - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform([op]) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + df.transform([op]) - expected = df[["B"]].transform({"B": op}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": op, "B": op}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + df.transform({"A": op, "B": op}) - expected = df[["B"]].transform({"B": [op]}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": [op], "B": [op]}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + df.transform({"A": [op], "B": [op]}) - expected = df.transform({"A": ["shift"], "B": [op]}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = df.transform({"A": [op, "shift"], "B": [op]}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + df.transform({"A": [op, "shift"], "B": [op]}) @pytest.mark.parametrize("use_apply", [True, False]) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index bf7f3abc04aa5..0d04011dea34b 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -283,7 +283,7 @@ def test_agg_none_to_type(): def test_transform_none_to_type(): # GH#34377 df = DataFrame({"a": [None]}) - msg = "Transform function failed" + msg = "argument must be a string, a bytes-like object or a number" with pytest.raises(TypeError, match=msg): df.transform({"a": int}) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index b67af8c521090..d02c62a307ad3 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -280,45 +280,37 @@ def test_transform_partial_failure(op, request): # GH 35964 if op in ("ffill", "bfill", "pad", "backfill", "shift"): request.node.add_marker( - pytest.mark.xfail( - raises=AssertionError, reason=f"{op} is successful on any dtype" - ) + pytest.mark.xfail(reason=f"{op} is successful on any dtype") ) # Using object makes most transform kernels fail ser = Series(3 * [object]) - expected = ser.transform(["shift"]) - match = rf"\['{op}'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform([op, "shift"]) - tm.assert_equal(result, expected) - - expected = ser.transform({"B": "shift"}) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": op, "B": "shift"}) - tm.assert_equal(result, expected) - - expected = ser.transform({"B": ["shift"]}) - match = r"\['A'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [op], "B": ["shift"]}) - tm.assert_equal(result, expected) - - match = r"\['B'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - expected = ser.transform({"A": ["shift"], "B": [op]}) - match = rf"\['{op}'\] did not transform successfully" - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [op, "shift"], "B": [op]}) - tm.assert_equal(result, expected) + if op in ("fillna", "ngroup", "rank"): + error = ValueError + msg = "Transform function failed" + else: + error = TypeError + msg = ( + "(not supported between instances of 'type' and 'type'" + "|unsupported operand type)" + ) + + with pytest.raises(error, match=msg): + ser.transform([op, "shift"]) + + with pytest.raises(error, match=msg): + ser.transform({"A": op, "B": "shift"}) + + with pytest.raises(error, match=msg): + ser.transform({"A": [op], "B": ["shift"]}) + + with pytest.raises(error, match=msg): + ser.transform({"A": [op, "shift"], "B": [op]}) def test_transform_partial_failure_valueerror(): # GH 40211 - match = ".*did not transform successfully" - def noop(x): return x @@ -326,26 +318,19 @@ def raising_op(_): raise ValueError ser = Series(3 * [object]) + msg = "Transform function failed" + + with pytest.raises(ValueError, match=msg): + ser.transform([noop, raising_op]) + + with pytest.raises(ValueError, match=msg): + ser.transform({"A": raising_op, "B": noop}) + + with pytest.raises(ValueError, match=msg): + ser.transform({"A": [raising_op], "B": [noop]}) - expected = ser.transform([noop]) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform([noop, raising_op]) - tm.assert_equal(result, expected) - - expected = ser.transform({"B": noop}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": raising_op, "B": noop}) - tm.assert_equal(result, expected) - - expected = ser.transform({"B": [noop]}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [raising_op], "B": [noop]}) - tm.assert_equal(result, expected) - - expected = ser.transform({"A": [noop], "B": [noop]}) - with tm.assert_produces_warning(FutureWarning, match=match): - result = ser.transform({"A": [noop, raising_op], "B": [noop]}) - tm.assert_equal(result, expected) + with pytest.raises(ValueError, match=msg): + ser.transform({"A": [noop, raising_op], "B": [noop]}) def test_demo(): From 650be5de2d8d2f0d00a25c65bc591fe86812858b Mon Sep 17 00:00:00 2001 From: richard Date: Fri, 28 Oct 2022 20:17:35 -0400 Subject: [PATCH 2/3] Warning message fix --- pandas/tests/apply/test_invalid_arg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 0d04011dea34b..6ed962c8f68e6 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -283,7 +283,7 @@ def test_agg_none_to_type(): def test_transform_none_to_type(): # GH#34377 df = DataFrame({"a": [None]}) - msg = "argument must be a string, a bytes-like object or a number" + msg = "argument must be a" with pytest.raises(TypeError, match=msg): df.transform({"a": int}) From 67346b06d0db11443185ddc7f68459eb4faaf4bf Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 30 Oct 2022 08:16:47 -0400 Subject: [PATCH 3/3] Cleaner msgs --- pandas/tests/apply/test_frame_transform.py | 16 ++++++++++------ pandas/tests/apply/test_series_apply.py | 8 +++++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index b7d1d2d363ddb..4749cec018fe6 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -145,9 +145,11 @@ def test_transform_bad_dtype(op, frame_or_series, request): msg = "Transform function failed" else: error = TypeError - msg = ( - "(not supported between instances of 'type' and 'type'" - "|unsupported operand type)" + msg = "|".join( + [ + "not supported between instances of 'type' and 'type'", + "unsupported operand type", + ] ) with pytest.raises(error, match=msg): @@ -176,9 +178,11 @@ def test_transform_failure_typeerror(request, op): msg = "Transform function failed" else: error = TypeError - msg = ( - "(not supported between instances of 'type' and 'type'" - "|unsupported operand type)" + msg = "|".join( + [ + "not supported between instances of 'type' and 'type'", + "unsupported operand type", + ] ) with pytest.raises(error, match=msg): diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index d02c62a307ad3..9b51ea7fef5f8 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -291,9 +291,11 @@ def test_transform_partial_failure(op, request): msg = "Transform function failed" else: error = TypeError - msg = ( - "(not supported between instances of 'type' and 'type'" - "|unsupported operand type)" + msg = "|".join( + [ + "not supported between instances of 'type' and 'type'", + "unsupported operand type", + ] ) with pytest.raises(error, match=msg):