DEPR: enforce ufunc, reduction diff deprecations (pandas-dev#49717)

jbrockmendel · MarcoGorelli · commit 7e5a700d6ad7 · 2022-11-18T12:20:56.000Z
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -553,6 +553,9 @@ Removal of prior version deprecations/changes
 - Changed behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`)
 - Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__`` behavior (:issue:`33469`)
 - Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`)
+- Changed behavior of :meth:`Series.diff` and :meth:`DataFrame.diff` with :class:`ExtensionDtype` dtypes whose arrays do not implement ``diff``, these now raise ``TypeError`` rather than casting to numpy (:issue:`31025`)
+- Enforced deprecation of calling numpy "ufunc"s on :class:`DataFrame` with ``method="outer"``; this now raises ``NotImplementedError`` (:issue:`36955`)
+- Enforced deprecation disallowing passing ``numeric_only=True`` to :class:`Series` reductions (``rank``, ``any``, ``all``, ...) with non-numeric dtype (:issue:`47500`)
 - Changed behavior of :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` so that ``group_keys`` is respected even if a transformer is detected (:issue:`34998`)
 - Enforced deprecation ``numeric_only=None`` (the default) in DataFrame reductions that would silently drop columns that raised; ``numeric_only`` now defaults to ``False`` (:issue:`41480`)
 - Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1616,14 +1616,10 @@ def diff(arr, n: int, axis: AxisInt = 0):
                 raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}")
             return op(arr, arr.shift(n))
         else:
-            warnings.warn(
-                "dtype lost in 'diff()'. In the future this will raise a "
-                "TypeError. Convert to a suitable dtype prior to calling 'diff'.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
+            raise TypeError(
+                f"{type(arr).__name__} has no 'diff' method. "
+                "Convert to a suitable dtype prior to calling 'diff'."
             )
-            arr = np.asarray(arr)
-            dtype = arr.dtype
 
     is_timedelta = False
     if needs_i8_conversion(arr.dtype):
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -341,19 +341,6 @@ def _reconstruct(result):
 
         if result.ndim != self.ndim:
             if method == "outer":
-                if self.ndim == 2:
-                    # we already deprecated for Series
-                    msg = (
-                        "outer method for ufunc {} is not implemented on "
-                        "pandas objects. Returning an ndarray, but in the "
-                        "future this will raise a 'NotImplementedError'. "
-                        "Consider explicitly converting the DataFrame "
-                        "to an array with '.to_numpy()' first."
-                    )
-                    warnings.warn(
-                        msg.format(ufunc), FutureWarning, stacklevel=find_stack_level()
-                    )
-                    return result
                 raise NotImplementedError
             return result
         if isinstance(result, BlockManager):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9000,12 +9000,9 @@ def ranker(data):
         if numeric_only:
             if self.ndim == 1 and not is_numeric_dtype(self.dtype):
                 # GH#47500
-                warnings.warn(
-                    f"Calling Series.rank with numeric_only={numeric_only} and dtype "
-                    f"{self.dtype} is deprecated and will raise a TypeError in a "
-                    "future version of pandas",
-                    category=FutureWarning,
-                    stacklevel=find_stack_level(),
+                raise TypeError(
+                    "Series.rank does not allow numeric_only=True with "
+                    "non-numeric dtype."
                 )
             data = self._get_numeric_data()
         else:
@@ -10946,6 +10943,7 @@ def _stat_function(
                 FutureWarning,
                 stacklevel=find_stack_level(),
             )
+
         if axis is lib.no_default:
             axis = None
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -18,7 +18,6 @@
     cast,
     overload,
 )
-import warnings
 import weakref
 
 import numpy as np
@@ -69,7 +68,6 @@
     Substitution,
     doc,
 )
-from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import (
     validate_ascending,
     validate_bool_kwarg,
@@ -4579,14 +4577,9 @@ def _reduce(
                 if name in ["any", "all"]:
                     kwd_name = "bool_only"
                 # GH#47500 - change to TypeError to match other methods
-                warnings.warn(
-                    f"Calling Series.{name} with {kwd_name}={numeric_only} and "
-                    f"dtype {self.dtype} will raise a TypeError in the future",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                raise NotImplementedError(
-                    f"Series.{name} does not implement {kwd_name}."
+                raise TypeError(
+                    f"Series.{name} does not allow {kwd_name}={numeric_only} "
+                    "with non-numeric dtypes."
                 )
             with np.errstate(all="ignore"):
                 return op(delegate, skipna=skipna, **kwds)
diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py
@@ -69,15 +69,12 @@ def test_isin_empty(empty):
 
 
 def test_diff():
-    s = pd.Series([1, 2, 3], dtype="category")
-    with tm.assert_produces_warning(FutureWarning):
-        result = s.diff()
-    expected = pd.Series([np.nan, 1, 1])
-    tm.assert_series_equal(result, expected)
-
-    expected = expected.to_frame(name="A")
-    df = s.to_frame(name="A")
-    with tm.assert_produces_warning(FutureWarning):
-        result = df.diff()
-
-    tm.assert_frame_equal(result, expected)
+    ser = pd.Series([1, 2, 3], dtype="category")
+
+    msg = "Convert to a suitable dtype"
+    with pytest.raises(TypeError, match=msg):
+        ser.diff()
+
+    df = ser.to_frame(name="A")
+    with pytest.raises(TypeError, match=msg):
+        df.diff()
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -1489,6 +1489,7 @@ def test_median_categorical_dtype_nuisance_column(self):
         # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead
         #  of expected.values
 
+    @pytest.mark.filterwarnings("ignore:.*will return a scalar.*:FutureWarning")
     @pytest.mark.parametrize("method", ["min", "max"])
     def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method):
         # GH#28949 DataFrame.min should behave like Series.min
diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py
@@ -200,9 +200,10 @@ def test_unary_accumulate_axis():
     tm.assert_frame_equal(result, expected)
 
 
-def test_frame_outer_deprecated():
+def test_frame_outer_disallowed():
     df = pd.DataFrame({"A": [1, 2]})
-    with tm.assert_produces_warning(FutureWarning):
+    with pytest.raises(NotImplementedError, match=""):
+        # deprecation enforced in 2.0
         np.subtract.outer(df, df)
 
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -1528,8 +1528,10 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
             err_category = TypeError
             err_msg = f"{groupby_func} is not supported for object dtype"
         elif groupby_func == "skew":
-            warn_category = FutureWarning
-            warn_msg = "will raise a TypeError in the future"
+            warn_category = None
+            warn_msg = ""
+            err_category = TypeError
+            err_msg = "Series.skew does not allow numeric_only=True with non-numeric"
         else:
             warn_category = FutureWarning
             warn_msg = "This will raise a TypeError"
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -495,9 +495,9 @@ def test_transform_coercion():
 
     expected = g.transform(np.mean)
 
-    msg = "will return a scalar mean"
-    with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
-        result = g.transform(lambda x: np.mean(x))
+    # in 2.0 np.mean on a DataFrame is equivalent to frame.mean(axis=None)
+    #  which not gives a scalar instead of Series
+    result = g.transform(lambda x: np.mean(x))
     tm.assert_frame_equal(result, expected)
 
     with tm.assert_produces_warning(None):
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -275,18 +275,9 @@ def test_numeric_only(self, kernel, has_numeric_only, dtype):
             with pytest.raises(TypeError, match=msg):
                 method(*args, numeric_only=True)
         elif dtype is object:
-            if kernel == "rank":
-                msg = "Calling Series.rank with numeric_only=True and dtype object"
-                with tm.assert_produces_warning(FutureWarning, match=msg):
-                    method(*args, numeric_only=True)
-            else:
-                warn_msg = (
-                    f"Calling Series.{kernel} with numeric_only=True and dtype object"
-                )
-                err_msg = f"Series.{kernel} does not implement numeric_only"
-                with tm.assert_produces_warning(FutureWarning, match=warn_msg):
-                    with pytest.raises(NotImplementedError, match=err_msg):
-                        method(*args, numeric_only=True)
+            msg = f"Series.{kernel} does not allow numeric_only=True with non-numeric"
+            with pytest.raises(TypeError, match=msg):
+                method(*args, numeric_only=True)
         else:
             result = method(*args, numeric_only=True)
             expected = method(*args, numeric_only=False)