Skip to content

Commit 7e5a700

Browse files
jbrockmendelMarcoGorelli
authored and
MarcoGorelli
committed
DEPR: enforce ufunc, reduction diff deprecations (pandas-dev#49717)
1 parent 46de017 commit 7e5a700

File tree

11 files changed

+36
-67
lines changed

11 files changed

+36
-67
lines changed

doc/source/whatsnew/v2.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,9 @@ Removal of prior version deprecations/changes
553553
- Changed behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`)
554554
- Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__`` behavior (:issue:`33469`)
555555
- Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`)
556+
- Changed behavior of :meth:`Series.diff` and :meth:`DataFrame.diff` with :class:`ExtensionDtype` dtypes whose arrays do not implement ``diff``, these now raise ``TypeError`` rather than casting to numpy (:issue:`31025`)
557+
- Enforced deprecation of calling numpy "ufunc"s on :class:`DataFrame` with ``method="outer"``; this now raises ``NotImplementedError`` (:issue:`36955`)
558+
- Enforced deprecation disallowing passing ``numeric_only=True`` to :class:`Series` reductions (``rank``, ``any``, ``all``, ...) with non-numeric dtype (:issue:`47500`)
556559
- Changed behavior of :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`)
557560
- Enforced deprecation ``numeric_only=None`` (the default) in DataFrame reductions that would silently drop columns that raised; ``numeric_only`` now defaults to ``False`` (:issue:`41480`)
558561
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)

pandas/core/algorithms.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -1616,14 +1616,10 @@ def diff(arr, n: int, axis: AxisInt = 0):
16161616
raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}")
16171617
return op(arr, arr.shift(n))
16181618
else:
1619-
warnings.warn(
1620-
"dtype lost in 'diff()'. In the future this will raise a "
1621-
"TypeError. Convert to a suitable dtype prior to calling 'diff'.",
1622-
FutureWarning,
1623-
stacklevel=find_stack_level(),
1619+
raise TypeError(
1620+
f"{type(arr).__name__} has no 'diff' method. "
1621+
"Convert to a suitable dtype prior to calling 'diff'."
16241622
)
1625-
arr = np.asarray(arr)
1626-
dtype = arr.dtype
16271623

16281624
is_timedelta = False
16291625
if needs_i8_conversion(arr.dtype):

pandas/core/arraylike.py

-13
Original file line numberDiff line numberDiff line change
@@ -341,19 +341,6 @@ def _reconstruct(result):
341341

342342
if result.ndim != self.ndim:
343343
if method == "outer":
344-
if self.ndim == 2:
345-
# we already deprecated for Series
346-
msg = (
347-
"outer method for ufunc {} is not implemented on "
348-
"pandas objects. Returning an ndarray, but in the "
349-
"future this will raise a 'NotImplementedError'. "
350-
"Consider explicitly converting the DataFrame "
351-
"to an array with '.to_numpy()' first."
352-
)
353-
warnings.warn(
354-
msg.format(ufunc), FutureWarning, stacklevel=find_stack_level()
355-
)
356-
return result
357344
raise NotImplementedError
358345
return result
359346
if isinstance(result, BlockManager):

pandas/core/generic.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -9000,12 +9000,9 @@ def ranker(data):
90009000
if numeric_only:
90019001
if self.ndim == 1 and not is_numeric_dtype(self.dtype):
90029002
# GH#47500
9003-
warnings.warn(
9004-
f"Calling Series.rank with numeric_only={numeric_only} and dtype "
9005-
f"{self.dtype} is deprecated and will raise a TypeError in a "
9006-
"future version of pandas",
9007-
category=FutureWarning,
9008-
stacklevel=find_stack_level(),
9003+
raise TypeError(
9004+
"Series.rank does not allow numeric_only=True with "
9005+
"non-numeric dtype."
90099006
)
90109007
data = self._get_numeric_data()
90119008
else:
@@ -10946,6 +10943,7 @@ def _stat_function(
1094610943
FutureWarning,
1094710944
stacklevel=find_stack_level(),
1094810945
)
10946+
1094910947
if axis is lib.no_default:
1095010948
axis = None
1095110949

pandas/core/series.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
cast,
1919
overload,
2020
)
21-
import warnings
2221
import weakref
2322

2423
import numpy as np
@@ -69,7 +68,6 @@
6968
Substitution,
7069
doc,
7170
)
72-
from pandas.util._exceptions import find_stack_level
7371
from pandas.util._validators import (
7472
validate_ascending,
7573
validate_bool_kwarg,
@@ -4579,14 +4577,9 @@ def _reduce(
45794577
if name in ["any", "all"]:
45804578
kwd_name = "bool_only"
45814579
# GH#47500 - change to TypeError to match other methods
4582-
warnings.warn(
4583-
f"Calling Series.{name} with {kwd_name}={numeric_only} and "
4584-
f"dtype {self.dtype} will raise a TypeError in the future",
4585-
FutureWarning,
4586-
stacklevel=find_stack_level(),
4587-
)
4588-
raise NotImplementedError(
4589-
f"Series.{name} does not implement {kwd_name}."
4580+
raise TypeError(
4581+
f"Series.{name} does not allow {kwd_name}={numeric_only} "
4582+
"with non-numeric dtypes."
45904583
)
45914584
with np.errstate(all="ignore"):
45924585
return op(delegate, skipna=skipna, **kwds)

pandas/tests/arrays/categorical/test_algos.py

+9-12
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,12 @@ def test_isin_empty(empty):
6969

7070

7171
def test_diff():
72-
s = pd.Series([1, 2, 3], dtype="category")
73-
with tm.assert_produces_warning(FutureWarning):
74-
result = s.diff()
75-
expected = pd.Series([np.nan, 1, 1])
76-
tm.assert_series_equal(result, expected)
77-
78-
expected = expected.to_frame(name="A")
79-
df = s.to_frame(name="A")
80-
with tm.assert_produces_warning(FutureWarning):
81-
result = df.diff()
82-
83-
tm.assert_frame_equal(result, expected)
72+
ser = pd.Series([1, 2, 3], dtype="category")
73+
74+
msg = "Convert to a suitable dtype"
75+
with pytest.raises(TypeError, match=msg):
76+
ser.diff()
77+
78+
df = ser.to_frame(name="A")
79+
with pytest.raises(TypeError, match=msg):
80+
df.diff()

pandas/tests/frame/test_reductions.py

+1
Original file line numberDiff line numberDiff line change
@@ -1489,6 +1489,7 @@ def test_median_categorical_dtype_nuisance_column(self):
14891489
# TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead
14901490
# of expected.values
14911491

1492+
@pytest.mark.filterwarnings("ignore:.*will return a scalar.*:FutureWarning")
14921493
@pytest.mark.parametrize("method", ["min", "max"])
14931494
def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method):
14941495
# GH#28949 DataFrame.min should behave like Series.min

pandas/tests/frame/test_ufunc.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,10 @@ def test_unary_accumulate_axis():
200200
tm.assert_frame_equal(result, expected)
201201

202202

203-
def test_frame_outer_deprecated():
203+
def test_frame_outer_disallowed():
204204
df = pd.DataFrame({"A": [1, 2]})
205-
with tm.assert_produces_warning(FutureWarning):
205+
with pytest.raises(NotImplementedError, match=""):
206+
# deprecation enforced in 2.0
206207
np.subtract.outer(df, df)
207208

208209

pandas/tests/groupby/test_function.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1528,8 +1528,10 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
15281528
err_category = TypeError
15291529
err_msg = f"{groupby_func} is not supported for object dtype"
15301530
elif groupby_func == "skew":
1531-
warn_category = FutureWarning
1532-
warn_msg = "will raise a TypeError in the future"
1531+
warn_category = None
1532+
warn_msg = ""
1533+
err_category = TypeError
1534+
err_msg = "Series.skew does not allow numeric_only=True with non-numeric"
15331535
else:
15341536
warn_category = FutureWarning
15351537
warn_msg = "This will raise a TypeError"

pandas/tests/groupby/transform/test_transform.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -495,9 +495,9 @@ def test_transform_coercion():
495495

496496
expected = g.transform(np.mean)
497497

498-
msg = "will return a scalar mean"
499-
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
500-
result = g.transform(lambda x: np.mean(x))
498+
# in 2.0 np.mean on a DataFrame is equivalent to frame.mean(axis=None)
499+
# which not gives a scalar instead of Series
500+
result = g.transform(lambda x: np.mean(x))
501501
tm.assert_frame_equal(result, expected)
502502

503503
with tm.assert_produces_warning(None):

pandas/tests/series/test_api.py

+3-12
Original file line numberDiff line numberDiff line change
@@ -275,18 +275,9 @@ def test_numeric_only(self, kernel, has_numeric_only, dtype):
275275
with pytest.raises(TypeError, match=msg):
276276
method(*args, numeric_only=True)
277277
elif dtype is object:
278-
if kernel == "rank":
279-
msg = "Calling Series.rank with numeric_only=True and dtype object"
280-
with tm.assert_produces_warning(FutureWarning, match=msg):
281-
method(*args, numeric_only=True)
282-
else:
283-
warn_msg = (
284-
f"Calling Series.{kernel} with numeric_only=True and dtype object"
285-
)
286-
err_msg = f"Series.{kernel} does not implement numeric_only"
287-
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
288-
with pytest.raises(NotImplementedError, match=err_msg):
289-
method(*args, numeric_only=True)
278+
msg = f"Series.{kernel} does not allow numeric_only=True with non-numeric"
279+
with pytest.raises(TypeError, match=msg):
280+
method(*args, numeric_only=True)
290281
else:
291282
result = method(*args, numeric_only=True)
292283
expected = method(*args, numeric_only=False)

0 commit comments

Comments
 (0)