Skip to content

DEPR: Series/DataFrame.transform partial failure except TypeError #40288

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 26, 2021
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ Deprecations
- Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
- Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`)
- Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`, :issue:`40425`)
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`)
- Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like and raises anything but ``TypeError``; ``func`` raising anything but a ``TypeError`` will raise in a future version (:issue:`40211`)
- Deprecated support for ``np.ma.mrecords.MaskedRecords`` in the :class:`DataFrame` constructor, pass ``{name: data[name] for name in data.dtype.names}`` instead (:issue:`40363`)

.. ---------------------------------------------------------------------------
Expand Down
20 changes: 13 additions & 7 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,11 @@ def transform(self) -> FrameOrSeriesUnion:
func = cast(AggFuncTypeBase, func)
try:
result = self.transform_str_or_callable(func)
except Exception:
raise ValueError("Transform function failed")
except Exception as err:
if isinstance(err, TypeError):
raise err
else:
raise ValueError("Transform function failed")

# Functions that transform may return empty Series/DataFrame
# when the dtype is not appropriate
Expand Down Expand Up @@ -265,6 +268,7 @@ def transform_dict_like(self, func):

results: Dict[Hashable, FrameOrSeriesUnion] = {}
failed_names = []
all_type_errors = True
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
try:
Expand All @@ -275,16 +279,18 @@ def transform_dict_like(self, func):
"No transform functions were provided",
}:
raise err
else:
elif not isinstance(err, TypeError):
all_type_errors = False
failed_names.append(name)
# combine results
if not results:
raise ValueError("Transform function failed")
klass = TypeError if all_type_errors else ValueError
raise klass("Transform function failed")
if len(failed_names) > 0:
warnings.warn(
f"{failed_names} did not transform successfully. "
f"Allowing for partial failure is deprecated, this will raise "
f"a ValueError in a future version of pandas."
f"{failed_names} did not transform successfully and did not raise "
f"a TypeError. If any error is raised except for TypeError, "
f"this will raise in a future version of pandas. "
f"Drop these columns/ops to avoid this warning.",
FutureWarning,
stacklevel=4,
Expand Down
58 changes: 48 additions & 10 deletions pandas/tests/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,35 +158,68 @@ def test_transform_method_name(method):
# mypy doesn't allow adding lists of different types
# https://github.com/python/mypy/issues/5492
@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
def test_transform_bad_dtype(op, frame_or_series):
def test_transform_bad_dtype(op, frame_or_series, request):
# GH 35964
if op == "rank":
request.node.add_marker(
pytest.mark.xfail(reason="GH 40418: rank does not raise a TypeError")
)

obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
if frame_or_series is not DataFrame:
obj = obj["A"]

msg = "Transform function failed"

# tshift is deprecated
warn = None if op != "tshift" else FutureWarning
with tm.assert_produces_warning(warn):
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match="unsupported operand|not supported"):
obj.transform(op)
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform([op])
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform({"A": op})
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform({"A": [op]})


@pytest.mark.parametrize("op", frame_kernels_raise)
def test_transform_partial_failure(op):
# GH 35964 & GH 40211
match = "Allowing for partial failure is deprecated"
def test_transform_partial_failure_typeerror(op):
# GH 35964

if op == "rank":
pytest.skip("GH 40418: rank does not raise a TypeError")

# Using object makes most transform kernels fail
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})

expected = df[["B"]].transform([op])
result = df.transform([op])
tm.assert_equal(result, expected)

expected = df[["B"]].transform({"B": op})
result = df.transform({"A": op, "B": op})
tm.assert_equal(result, expected)

expected = df[["B"]].transform({"B": [op]})
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)

expected = df.transform({"A": ["shift"], "B": [op]})
result = df.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)


def test_transform_partial_failure_valueerror():
# GH 40211
match = ".*did not transform successfully and did not raise a TypeError"

def op(x):
if np.sum(np.sum(x)) < 10:
raise ValueError
return x

df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})

expected = df[["B"]].transform([op])
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform([op])
Expand All @@ -202,6 +235,11 @@ def test_transform_partial_failure(op):
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)

expected = df.transform({"A": ["shift"], "B": [op]})
with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False):
result = df.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)


@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_passes_args(use_apply, frame_or_series):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def test_transform_none_to_type():
# GH#34377
df = DataFrame({"a": [None]})
msg = "Transform function failed"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
df.transform({"a": int})


Expand Down
49 changes: 42 additions & 7 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,29 +259,64 @@ def test_transform(string_series):

@pytest.mark.parametrize("op", series_transform_kernels)
def test_transform_partial_failure(op, request):
# GH 35964 & GH 40211
# GH 35964
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
request.node.add_marker(
pytest.mark.xfail(reason=f"{op} is successful on any dtype")
)
match = "Allowing for partial failure is deprecated"
if op in ("rank", "fillna"):
pytest.skip(f"{op} doesn't raise TypeError on object")

# Using object makes most transform kernels fail
ser = Series(3 * [object])

expected = ser.transform(["shift"])
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([op, "shift"])
result = ser.transform([op, "shift"])
tm.assert_equal(result, expected)

expected = ser.transform({"B": "shift"})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": op, "B": "shift"})
result = ser.transform({"A": op, "B": "shift"})
tm.assert_equal(result, expected)

expected = ser.transform({"B": ["shift"]})
result = ser.transform({"A": [op], "B": ["shift"]})
tm.assert_equal(result, expected)

expected = ser.transform({"A": ["shift"], "B": [op]})
result = ser.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)


def test_transform_partial_failure_valueerror():
# GH 40211
match = ".*did not transform successfully and did not raise a TypeError"

def noop(x):
return x

def raising_op(_):
raise ValueError

ser = Series(3 * [object])

expected = ser.transform([noop])
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([noop, raising_op])
tm.assert_equal(result, expected)

expected = ser.transform({"B": noop})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [op], "B": ["shift"]})
result = ser.transform({"A": raising_op, "B": noop})
tm.assert_equal(result, expected)

expected = ser.transform({"B": [noop]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [raising_op], "B": [noop]})
tm.assert_equal(result, expected)

expected = ser.transform({"A": [noop], "B": [noop]})
with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False):
result = ser.transform({"A": [noop, raising_op], "B": [noop]})
tm.assert_equal(result, expected)


Expand Down