From de049360a3cdeeb09f4213a99d0d67933d506986 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 22 Oct 2022 17:45:29 -0400 Subject: [PATCH 1/4] DEPR: Enforce deprecation of mad and tshift --- doc/redirects.csv | 6 - doc/source/reference/frame.rst | 2 - doc/source/reference/groupby.rst | 4 - doc/source/reference/series.rst | 2 - doc/source/user_guide/basics.rst | 1 - pandas/_typing.py | 2 +- pandas/core/apply.py | 2 +- pandas/core/generic.py | 126 +----------------- pandas/core/groupby/base.py | 2 - pandas/core/groupby/generic.py | 24 ---- pandas/core/groupby/groupby.py | 2 - pandas/core/groupby/ops.py | 1 - pandas/tests/apply/common.py | 7 +- pandas/tests/apply/test_frame_transform.py | 19 ++- .../arrays/categorical/test_operators.py | 2 - pandas/tests/frame/conftest.py | 1 - pandas/tests/frame/methods/test_shift.py | 58 -------- pandas/tests/frame/test_reductions.py | 110 ++------------- pandas/tests/generic/test_finalize.py | 16 --- pandas/tests/groupby/__init__.py | 2 - .../tests/groupby/aggregate/test_aggregate.py | 6 +- pandas/tests/groupby/test_allowlist.py | 33 +---- pandas/tests/groupby/test_api_consistency.py | 2 - pandas/tests/groupby/test_apply.py | 6 +- pandas/tests/groupby/test_categorical.py | 17 +-- pandas/tests/groupby/test_function.py | 21 +-- pandas/tests/groupby/test_groupby.py | 77 +++-------- pandas/tests/groupby/test_groupby_subclass.py | 7 +- .../tests/groupby/transform/test_transform.py | 58 ++------ 29 files changed, 65 insertions(+), 551 deletions(-) diff --git a/doc/redirects.csv b/doc/redirects.csv index 42f91a8b9884f..8a55c48996e84 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -186,7 +186,6 @@ generated/pandas.core.groupby.DataFrameGroupBy.filter,../reference/api/pandas.co generated/pandas.core.groupby.DataFrameGroupBy.hist,../reference/api/pandas.core.groupby.DataFrameGroupBy.hist generated/pandas.core.groupby.DataFrameGroupBy.idxmax,../reference/api/pandas.core.groupby.DataFrameGroupBy.idxmax generated/pandas.core.groupby.DataFrameGroupBy.idxmin,../reference/api/pandas.core.groupby.DataFrameGroupBy.idxmin -generated/pandas.core.groupby.DataFrameGroupBy.mad,../reference/api/pandas.core.groupby.DataFrameGroupBy.mad generated/pandas.core.groupby.DataFrameGroupBy.pct_change,../reference/api/pandas.core.groupby.DataFrameGroupBy.pct_change generated/pandas.core.groupby.DataFrameGroupBy.plot,../reference/api/pandas.core.groupby.DataFrameGroupBy.plot generated/pandas.core.groupby.DataFrameGroupBy.quantile,../reference/api/pandas.core.groupby.DataFrameGroupBy.quantile @@ -196,7 +195,6 @@ generated/pandas.core.groupby.DataFrameGroupBy.shift,../reference/api/pandas.cor generated/pandas.core.groupby.DataFrameGroupBy.size,../reference/api/pandas.core.groupby.DataFrameGroupBy.size generated/pandas.core.groupby.DataFrameGroupBy.skew,../reference/api/pandas.core.groupby.DataFrameGroupBy.skew generated/pandas.core.groupby.DataFrameGroupBy.take,../reference/api/pandas.core.groupby.DataFrameGroupBy.take -generated/pandas.core.groupby.DataFrameGroupBy.tshift,../reference/api/pandas.core.groupby.DataFrameGroupBy.tshift generated/pandas.core.groupby.GroupBy.agg,../reference/api/pandas.core.groupby.GroupBy.agg generated/pandas.core.groupby.GroupBy.aggregate,../reference/api/pandas.core.groupby.GroupBy.aggregate generated/pandas.core.groupby.GroupBy.all,../reference/api/pandas.core.groupby.GroupBy.all @@ -415,7 +413,6 @@ generated/pandas.DataFrame.le,../reference/api/pandas.DataFrame.le generated/pandas.DataFrame.loc,../reference/api/pandas.DataFrame.loc generated/pandas.DataFrame.lookup,../reference/api/pandas.DataFrame.lookup generated/pandas.DataFrame.lt,../reference/api/pandas.DataFrame.lt -generated/pandas.DataFrame.mad,../reference/api/pandas.DataFrame.mad generated/pandas.DataFrame.mask,../reference/api/pandas.DataFrame.mask generated/pandas.DataFrame.max,../reference/api/pandas.DataFrame.max generated/pandas.DataFrame.mean,../reference/api/pandas.DataFrame.mean @@ -528,7 +525,6 @@ generated/pandas.DataFrame.transform,../reference/api/pandas.DataFrame.transform generated/pandas.DataFrame.transpose,../reference/api/pandas.DataFrame.transpose generated/pandas.DataFrame.truediv,../reference/api/pandas.DataFrame.truediv generated/pandas.DataFrame.truncate,../reference/api/pandas.DataFrame.truncate -generated/pandas.DataFrame.tshift,../reference/api/pandas.DataFrame.tshift generated/pandas.DataFrame.tz_convert,../reference/api/pandas.DataFrame.tz_convert generated/pandas.DataFrame.tz_localize,../reference/api/pandas.DataFrame.tz_localize generated/pandas.DataFrame.unstack,../reference/api/pandas.DataFrame.unstack @@ -1097,7 +1093,6 @@ generated/pandas.Series.last_valid_index,../reference/api/pandas.Series.last_val generated/pandas.Series.le,../reference/api/pandas.Series.le generated/pandas.Series.loc,../reference/api/pandas.Series.loc generated/pandas.Series.lt,../reference/api/pandas.Series.lt -generated/pandas.Series.mad,../reference/api/pandas.Series.mad generated/pandas.Series.map,../reference/api/pandas.Series.map generated/pandas.Series.mask,../reference/api/pandas.Series.mask generated/pandas.Series.max,../reference/api/pandas.Series.max @@ -1266,7 +1261,6 @@ generated/pandas.Series.transform,../reference/api/pandas.Series.transform generated/pandas.Series.transpose,../reference/api/pandas.Series.transpose generated/pandas.Series.truediv,../reference/api/pandas.Series.truediv generated/pandas.Series.truncate,../reference/api/pandas.Series.truncate -generated/pandas.Series.tshift,../reference/api/pandas.Series.tshift generated/pandas.Series.tz_convert,../reference/api/pandas.Series.tz_convert generated/pandas.Series.tz_localize,../reference/api/pandas.Series.tz_localize generated/pandas.Series.unique,../reference/api/pandas.Series.unique diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index e71ee80767d29..cc38f6cc42972 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -151,7 +151,6 @@ Computations / descriptive stats DataFrame.eval DataFrame.kurt DataFrame.kurtosis - DataFrame.mad DataFrame.max DataFrame.mean DataFrame.median @@ -268,7 +267,6 @@ Time Series-related DataFrame.asof DataFrame.shift DataFrame.slice_shift - DataFrame.tshift DataFrame.first_valid_index DataFrame.last_valid_index DataFrame.resample diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 7c6bf485c0599..54b2e893bfd08 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -84,7 +84,6 @@ Function application DataFrameGroupBy.idxmax DataFrameGroupBy.idxmin DataFrameGroupBy.last - DataFrameGroupBy.mad DataFrameGroupBy.max DataFrameGroupBy.mean DataFrameGroupBy.median @@ -108,7 +107,6 @@ Function application DataFrameGroupBy.var DataFrameGroupBy.tail DataFrameGroupBy.take - DataFrameGroupBy.tshift DataFrameGroupBy.value_counts ``SeriesGroupBy`` computations / descriptive stats @@ -138,7 +136,6 @@ Function application SeriesGroupBy.idxmin SeriesGroupBy.is_monotonic_increasing SeriesGroupBy.is_monotonic_decreasing - SeriesGroupBy.mad SeriesGroupBy.max SeriesGroupBy.mean SeriesGroupBy.median @@ -165,7 +162,6 @@ Function application SeriesGroupBy.var SeriesGroupBy.tail SeriesGroupBy.take - SeriesGroupBy.tshift SeriesGroupBy.value_counts Plotting and visualization diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index fcdc9ea9b95da..0beac55c8b86c 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -148,7 +148,6 @@ Computations / descriptive stats Series.diff Series.factorize Series.kurt - Series.mad Series.max Series.mean Series.median @@ -269,7 +268,6 @@ Time Series-related Series.tz_localize Series.at_time Series.between_time - Series.tshift Series.slice_shift Accessors diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index a34d4891b9d77..0883113474f54 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -556,7 +556,6 @@ optional ``level`` parameter which applies only if the object has a ``count``, Number of non-NA observations ``sum``, Sum of values ``mean``, Mean of values - ``mad``, Mean absolute deviation ``median``, Arithmetic median of values ``min``, Minimum ``max``, Maximum diff --git a/pandas/_typing.py b/pandas/_typing.py index 5c22baa4bd42e..39365d339016c 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -280,7 +280,7 @@ def closed(self) -> bool: ] # Arguments for fillna() -FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"] +FillnaOptions = Literal["backfill", "bfill", "ffill"] # internals Manager = Union[ diff --git a/pandas/core/apply.py b/pandas/core/apply.py index bbb954c1a4e80..4f9af2d0c01d6 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -557,7 +557,7 @@ def apply_str(self) -> DataFrame | Series: sig = inspect.getfullargspec(func) arg_names = (*sig.args, *sig.kwonlyargs) if self.axis != 0 and ( - "axis" not in arg_names or f in ("corrwith", "mad", "skew") + "axis" not in arg_names or f in ("corrwith", "skew") ): raise ValueError(f"Operation {f} does not support axis=1") elif "axis" in arg_names: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7a1026d32d4f3..f46b429a5fc75 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -250,7 +250,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin): _internal_names_set: set[str] = set(_internal_names) _accessors: set[str] = set() _hidden_attrs: frozenset[str] = frozenset( - ["_AXIS_NAMES", "_AXIS_NUMBERS", "get_values", "tshift"] + ["_AXIS_NAMES", "_AXIS_NUMBERS", "get_values"] ) _metadata: list[str] = [] _is_copy: weakref.ReferenceType[NDFrame] | None = None @@ -10122,8 +10122,6 @@ def shift( Index.shift : Shift values of Index. DatetimeIndex.shift : Shift values of DatetimeIndex. PeriodIndex.shift : Shift values of PeriodIndex. - tshift : Shift the time index, using the index's frequency if - available. Examples -------- @@ -10272,49 +10270,6 @@ def slice_shift(self: NDFrameT, periods: int = 1, axis: Axis = 0) -> NDFrameT: new_obj = new_obj.set_axis(shifted_axis, axis=axis, copy=False) return new_obj.__finalize__(self, method="slice_shift") - @final - def tshift(self: NDFrameT, periods: int = 1, freq=None, axis: Axis = 0) -> NDFrameT: - """ - Shift the time index, using the index's frequency if available. - - .. deprecated:: 1.1.0 - Use `shift` instead. - - Parameters - ---------- - periods : int - Number of periods to move, can be positive or negative. - freq : DateOffset, timedelta, or str, default None - Increment to use from the tseries module - or time rule expressed as a string (e.g. 'EOM'). - axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0 - Corresponds to the axis that contains the Index. - For `Series` this parameter is unused and defaults to 0. - - Returns - ------- - shifted : Series/DataFrame - - Notes - ----- - If freq is not specified then tries to use the freq or inferred_freq - attributes of the index. If neither of those attributes exist, a - ValueError is thrown - """ - warnings.warn( - ( - "tshift is deprecated and will be removed in a future version. " - "Please use shift instead." - ), - FutureWarning, - stacklevel=find_stack_level(), - ) - - if freq is None: - freq = "infer" - - return self.shift(periods, freq, axis) - def truncate( self: NDFrameT, before=None, @@ -11544,70 +11499,6 @@ def prod( product = prod - def mad( - self, - axis: Axis | None = None, - skipna: bool_t = True, - level: Level | None = None, - ) -> Series | float: - """ - {desc} - - .. deprecated:: 1.5.0 - mad is deprecated. - - Parameters - ---------- - axis : {axis_descr} - Axis for the function to be applied on. - For `Series` this parameter is unused and defaults to 0. - skipna : bool, default True - Exclude NA/null values when computing the result. - level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a {name1}. - - Returns - ------- - {name1} or {name2} (if level specified)\ - {see_also}\ - {examples} - """ - msg = ( - "The 'mad' method is deprecated and will be removed in a future version. " - "To compute the same result, you may do `(df - df.mean()).abs().mean()`." - ) - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) - - if not is_bool(skipna): - warnings.warn( - "Passing None for skipna is deprecated and will raise in a future" - "version. Pass True instead. Only boolean values will be allowed " - "in the future.", - FutureWarning, - stacklevel=find_stack_level(), - ) - skipna = True - if axis is None: - axis = self._stat_axis_number - if level is not None: - warnings.warn( - "Using the level keyword in DataFrame and Series aggregations is " - "deprecated and will be removed in a future version. Use groupby " - "instead. df.mad(level=1) should use df.groupby(level=1).mad()", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) - - data = self._get_numeric_data() - if axis == 0: - # error: Unsupported operand types for - ("NDFrame" and "float") - demeaned = data - data.mean(axis=0) # type: ignore[operator] - else: - demeaned = data.sub(data.mean(axis=1), axis=0) - return np.abs(demeaned).mean(axis=axis, skipna=skipna) - @classmethod def _add_numeric_operations(cls) -> None: """ @@ -11664,21 +11555,6 @@ def all( setattr(cls, "all", all) - @doc( - NDFrame.mad.__doc__, - desc="Return the mean absolute deviation of the values " - "over the requested axis.", - name1=name1, - name2=name2, - axis_descr=axis_descr, - see_also="", - examples="", - ) - def mad(self, axis: Axis | None = None, skipna: bool_t = True, level=None): - return NDFrame.mad(self, axis, skipna, level) - - setattr(cls, "mad", mad) - @doc( _num_ddof_doc, desc="Return unbiased standard error of the mean over requested " diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index a953dab2115da..42630845bf6b2 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -36,7 +36,6 @@ class OutputKey: "idxmax", "idxmin", "last", - "mad", "max", "mean", "median", @@ -86,7 +85,6 @@ def maybe_normalize_deprecated_kernels(kernel) -> Literal["bfill", "ffill"]: "pct_change", "rank", "shift", - "tshift", ] ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 47452d885543e..16732f5421df7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -880,18 +880,6 @@ def skew( ) return result - @doc(Series.mad.__doc__) - def mad( - self, axis: Axis | None = None, skipna: bool = True, level: Level | None = None - ) -> Series: - result = self._op_via_apply("mad", axis=axis, skipna=skipna, level=level) - return result - - @doc(Series.tshift.__doc__) - def tshift(self, periods: int = 1, freq=None) -> Series: - result = self._op_via_apply("tshift", periods=periods, freq=freq) - return result - @property @doc(Series.plot.__doc__) def plot(self): @@ -2275,18 +2263,6 @@ def skew( ) return result - @doc(DataFrame.mad.__doc__) - def mad( - self, axis: Axis | None = None, skipna: bool = True, level: Level | None = None - ) -> DataFrame: - result = self._op_via_apply("mad", axis=axis, skipna=skipna, level=level) - return result - - @doc(DataFrame.tshift.__doc__) - def tshift(self, periods: int = 1, freq=None, axis: Axis = 0) -> DataFrame: - result = self._op_via_apply("tshift", periods=periods, freq=freq, axis=axis) - return result - @property @doc(DataFrame.plot.__doc__) def plot(self) -> GroupByPlot: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6c975058c5b76..a0f83e13c4ece 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3877,8 +3877,6 @@ def shift(self, periods: int = 1, freq=None, axis: Axis = 0, fill_value=None): See Also -------- Index.shift : Shift values of Index. - tshift : Shift the time index, using the index’s frequency - if available. """ if freq is not None or axis != 0: f = lambda x: x.shift(periods, freq, axis, fill_value) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 04f18369f4fcc..bf3f74330e8cb 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -789,7 +789,6 @@ def apply( result_values.append(res) # getattr pattern for __name__ is needed for functools.partial objects if len(group_keys) == 0 and getattr(f, "__name__", None) in [ - "mad", "skew", "sum", "prod", diff --git a/pandas/tests/apply/common.py b/pandas/tests/apply/common.py index 91b831bcbb684..b4d153df54059 100644 --- a/pandas/tests/apply/common.py +++ b/pandas/tests/apply/common.py @@ -1,10 +1,7 @@ from pandas.core.groupby.base import transformation_kernels -# tshift only works on time index and is deprecated # There is no Series.cumcount or DataFrame.cumcount series_transform_kernels = [ - x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] -] -frame_transform_kernels = [ - x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"] + x for x in sorted(transformation_kernels) if x != "cumcount" ] +frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"] diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index c7a99400ab8e1..f884e8a7daf67 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -147,17 +147,14 @@ def test_transform_bad_dtype(op, frame_or_series, request): obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms obj = tm.get_obj(obj, frame_or_series) - # tshift is deprecated - warn = None if op != "tshift" else FutureWarning - with tm.assert_produces_warning(warn): - with pytest.raises(TypeError, match="unsupported operand|not supported"): - obj.transform(op) - with pytest.raises(TypeError, match="Transform function failed"): - obj.transform([op]) - with pytest.raises(TypeError, match="Transform function failed"): - obj.transform({"A": op}) - with pytest.raises(TypeError, match="Transform function failed"): - obj.transform({"A": [op]}) + with pytest.raises(TypeError, match="unsupported operand|not supported"): + obj.transform(op) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform([op]) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform({"A": op}) + with pytest.raises(TypeError, match="Transform function failed"): + obj.transform({"A": [op]}) @pytest.mark.parametrize("op", frame_kernels_raise) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 9642691b5c578..70636d633b674 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -374,8 +374,6 @@ def test_numeric_like_ops(self): with pytest.raises(TypeError, match=msg): getattr(s, op)(numeric_only=False) - # mad technically works because it takes always the numeric data - def test_numeric_like_ops_series(self): # numpy ops s = Series(Categorical([1, 2, 3, 4])) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 8dbed84b85837..2cfa295d939a8 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -277,7 +277,6 @@ def frame_of_index_cols(): "sem", "var", "std", - "mad", ] ) def reduction_functions(request): diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index bfc3c8e0a25eb..f76deca9048be 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -447,64 +447,6 @@ def test_shift_axis1_multiple_blocks_with_int_fill(self): tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") - def test_tshift(self, datetime_frame, frame_or_series): - # TODO(2.0): remove this test when tshift deprecation is enforced - - # PeriodIndex - ps = tm.makePeriodFrame() - ps = tm.get_obj(ps, frame_or_series) - shifted = ps.tshift(1) - unshifted = shifted.tshift(-1) - - tm.assert_equal(unshifted, ps) - - shifted2 = ps.tshift(freq="B") - tm.assert_equal(shifted, shifted2) - - shifted3 = ps.tshift(freq=offsets.BDay()) - tm.assert_equal(shifted, shifted3) - - msg = "Given freq M does not match PeriodIndex freq B" - with pytest.raises(ValueError, match=msg): - ps.tshift(freq="M") - - # DatetimeIndex - dtobj = tm.get_obj(datetime_frame, frame_or_series) - shifted = dtobj.tshift(1) - unshifted = shifted.tshift(-1) - - tm.assert_equal(dtobj, unshifted) - - shifted2 = dtobj.tshift(freq=dtobj.index.freq) - tm.assert_equal(shifted, shifted2) - - inferred_ts = DataFrame( - datetime_frame.values, - Index(np.asarray(datetime_frame.index)), - columns=datetime_frame.columns, - ) - inferred_ts = tm.get_obj(inferred_ts, frame_or_series) - shifted = inferred_ts.tshift(1) - - expected = dtobj.tshift(1) - expected.index = expected.index._with_freq(None) - tm.assert_equal(shifted, expected) - - unshifted = shifted.tshift(-1) - tm.assert_equal(unshifted, inferred_ts) - - no_freq = dtobj.iloc[[0, 5, 7]] - msg = "Freq was not set in the index hence cannot be inferred" - with pytest.raises(ValueError, match=msg): - no_freq.tshift() - - def test_tshift_deprecated(self, datetime_frame, frame_or_series): - # GH#11631 - dtobj = tm.get_obj(datetime_frame, frame_or_series) - with tm.assert_produces_warning(FutureWarning): - dtobj.tshift() - def test_period_index_frame_shift_with_freq(self, frame_or_series): ps = tm.makePeriodFrame() ps = tm.get_obj(ps, frame_or_series) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 6654ecec78c94..744d06d6cf339 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -69,7 +69,6 @@ def assert_stat_op_calc( skipna_alternative : function, default None NaN-safe version of alternative """ - warn = FutureWarning if opname == "mad" else None f = getattr(frame, opname) if check_dates: @@ -91,9 +90,8 @@ def wrapper(x): return alternative(x.values) skipna_wrapper = tm._make_skipna_wrapper(alternative, skipna_alternative) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result0 = f(axis=0, skipna=False) - result1 = f(axis=1, skipna=False) + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) tm.assert_series_equal( result0, frame.apply(wrapper), check_dtype=check_dtype, rtol=rtol, atol=atol ) @@ -106,9 +104,8 @@ def wrapper(x): else: skipna_wrapper = alternative - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result0 = f(axis=0) - result1 = f(axis=1) + result0 = f(axis=0) + result1 = f(axis=1) tm.assert_series_equal( result0, frame.apply(skipna_wrapper), @@ -130,18 +127,14 @@ def wrapper(x): assert lcd_dtype == result1.dtype # bad axis - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - with pytest.raises(ValueError, match="No axis named 2"): - f(axis=2) + with pytest.raises(ValueError, match="No axis named 2"): + f(axis=2) # all NA case if has_skipna: all_na = frame * np.NaN - with tm.assert_produces_warning( - warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False - ): - r0 = getattr(all_na, opname)(axis=0) - r1 = getattr(all_na, opname)(axis=1) + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) if opname in ["sum", "prod"]: unit = 1 if opname == "prod" else 0 # result for empty sum/prod expected = Series(unit, index=r0.index, dtype=r0.dtype) @@ -167,7 +160,6 @@ class TestDataFrameAnalytics: "min", "max", "nunique", - "mad", "var", "std", "sem", @@ -176,13 +168,9 @@ class TestDataFrameAnalytics: ], ) def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname): - warn = FutureWarning if opname == "mad" else None - with tm.assert_produces_warning( - warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False - ): - getattr(float_string_frame, opname)(axis=axis) - if opname not in ("nunique", "mad"): - getattr(float_string_frame, opname)(axis=axis, numeric_only=True) + getattr(float_string_frame, opname)(axis=axis) + if opname != "nunique": + getattr(float_string_frame, opname)(axis=axis, numeric_only=True) @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") @pytest.mark.parametrize("axis", [0, 1]) @@ -213,9 +201,6 @@ def count(s): def nunique(s): return len(algorithms.unique1d(s.dropna())) - def mad(x): - return np.abs(x - x.mean()).mean() - def var(x): return np.var(x, ddof=1) @@ -253,7 +238,6 @@ def sem(x): "product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod ) - assert_stat_op_calc("mad", mad, float_frame_with_na) assert_stat_op_calc("var", var, float_frame_with_na) assert_stat_op_calc("std", std, float_frame_with_na) assert_stat_op_calc("sem", sem, float_frame_with_na) @@ -1490,14 +1474,6 @@ def test_frame_any_with_timedelta(self): expected = Series(data=[False, True]) tm.assert_series_equal(result, expected) - def test_reductions_deprecation_skipna_none(self, frame_or_series): - # GH#44580 - obj = frame_or_series([1, 2, 3]) - with tm.assert_produces_warning( - FutureWarning, match="skipna", raise_on_extra_warnings=False - ): - obj.mad(skipna=None) - def test_reductions_deprecation_level_argument( self, frame_or_series, reduction_functions ): @@ -1515,8 +1491,6 @@ def test_reductions_skipna_none_raises( request.node.add_marker( pytest.mark.xfail(reason="Count does not accept skipna") ) - elif reduction_functions == "mad": - pytest.skip("Mad is deprecated: GH#11787") obj = frame_or_series([1, 2, 3]) msg = 'For argument "skipna" expected type bool, received type NoneType.' with pytest.raises(ValueError, match=msg): @@ -1718,68 +1692,6 @@ def test_minmax_extensionarray(method, numeric_only): tm.assert_series_equal(result, expected) -def test_mad_nullable_integer(any_signed_int_ea_dtype): - # GH#33036 - df = DataFrame(np.random.randn(100, 4).astype(np.int64)) - df2 = df.astype(any_signed_int_ea_dtype) - - with tm.assert_produces_warning( - FutureWarning, match="The 'mad' method is deprecated" - ): - result = df2.mad() - expected = df.mad() - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning( - FutureWarning, match="The 'mad' method is deprecated" - ): - result = df2.mad(axis=1) - expected = df.mad(axis=1) - tm.assert_series_equal(result, expected) - - # case with NAs present - df2.iloc[::2, 1] = pd.NA - - with tm.assert_produces_warning( - FutureWarning, match="The 'mad' method is deprecated" - ): - result = df2.mad() - expected = df.mad() - expected[1] = df.iloc[1::2, 1].mad() - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning( - FutureWarning, match="The 'mad' method is deprecated" - ): - result = df2.mad(axis=1) - expected = df.mad(axis=1) - expected[::2] = df.T.loc[[0, 2, 3], ::2].mad() - tm.assert_series_equal(result, expected) - - -@pytest.mark.xfail(reason="GH#42895 caused by lack of 2D EA") -def test_mad_nullable_integer_all_na(any_signed_int_ea_dtype): - # GH#33036 - df = DataFrame(np.random.randn(100, 4).astype(np.int64)) - df2 = df.astype(any_signed_int_ea_dtype) - - # case with all-NA row/column - msg = "will attempt to set the values inplace instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - df2.iloc[:, 1] = pd.NA # FIXME(GH#44199): this doesn't operate in-place - df2.iloc[:, 1] = pd.array([pd.NA] * len(df2), dtype=any_signed_int_ea_dtype) - - with tm.assert_produces_warning( - FutureWarning, match="The 'mad' method is deprecated" - ): - result = df2.mad() - expected = df.mad() - - expected[1] = pd.NA - expected = expected.astype("Float64") - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("meth", ["max", "min", "sum", "mean", "median"]) def test_groupby_regular_arithmetic_equivalent(meth): # GH#40660 diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 3c40218ef9024..7634f783117d6 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -395,22 +395,6 @@ (pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))), (pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))), (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))), - pytest.param( - ( - pd.Series, - (1, pd.date_range("2000", periods=4)), - operator.methodcaller("tshift"), - ), - marks=pytest.mark.filterwarnings("ignore::FutureWarning"), - ), - pytest.param( - ( - pd.DataFrame, - ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - operator.methodcaller("tshift"), - ), - marks=pytest.mark.filterwarnings("ignore::FutureWarning"), - ), (pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)), (pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)), ( diff --git a/pandas/tests/groupby/__init__.py b/pandas/tests/groupby/__init__.py index c63aa568a15dc..446d9da437771 100644 --- a/pandas/tests/groupby/__init__.py +++ b/pandas/tests/groupby/__init__.py @@ -22,6 +22,4 @@ def get_groupby_method_args(name, obj): return (0.5,) if name == "corrwith": return (obj,) - if name == "tshift": - return (0, 0) return () diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3e1ee02aabce7..ad7368a69c0f5 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1348,13 +1348,11 @@ def test_groupby_aggregate_directory(reduction_func): # GH#32793 if reduction_func in ["corrwith", "nth"]: return None - warn = FutureWarning if reduction_func == "mad" else None obj = DataFrame([[0, 1], [0, np.nan]]) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result_reduced_series = obj.groupby(0).agg(reduction_func) - result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) + result_reduced_series = obj.groupby(0).agg(reduction_func) + result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) if reduction_func in ["size", "ngroup"]: # names are different: None / 1 diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 93df7d1f1d4a0..1dba199dc8f22 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -28,12 +28,11 @@ "median", "mean", "skew", - "mad", "std", "var", "sem", ] -AGG_FUNCTIONS_WITH_SKIPNA = ["skew", "mad"] +AGG_FUNCTIONS_WITH_SKIPNA = ["skew"] @pytest.fixture @@ -79,8 +78,6 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): # GH6944 # GH 17537 # explicitly test the allowlist methods - warn = FutureWarning if op == "mad" else None - if axis == 0: frame = raw_frame else: @@ -88,20 +85,15 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): if op in AGG_FUNCTIONS_WITH_SKIPNA: grouped = frame.groupby(level=level, axis=axis, sort=sort) - with tm.assert_produces_warning( - warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False - ): - result = getattr(grouped, op)(skipna=skipna) - with tm.assert_produces_warning(FutureWarning): - expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + result = getattr(grouped, op)(skipna=skipna) + expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) if sort: expected = expected.sort_index(axis=axis, level=level) tm.assert_frame_equal(result, expected) else: grouped = frame.groupby(level=level, axis=axis, sort=sort) - with tm.assert_produces_warning(FutureWarning): - result = getattr(grouped, op)() - expected = getattr(frame, op)(level=level, axis=axis) + result = getattr(grouped, op)() + expected = getattr(frame, op)(level=level, axis=axis) if sort: expected = expected.sort_index(axis=axis, level=level) tm.assert_frame_equal(result, expected) @@ -203,10 +195,8 @@ def test_tab_completion(mframe): "shift", "skew", "take", - "tshift", "pct_change", "any", - "mad", "corr", "corrwith", "cov", @@ -272,19 +262,6 @@ def test_groupby_selection_with_methods(df, method): tm.assert_frame_equal(res, exp) -@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") -def test_groupby_selection_tshift_raises(df): - rng = date_range("2014", periods=len(df)) - df.index = rng - - g = df.groupby(["A"])[["C"]] - - # check that the index cache is cleared - with pytest.raises(ValueError, match="Freq was not set in the index"): - # GH#35937 - g.tshift() - - def test_groupby_selection_other_methods(df): # some methods which require DatetimeIndex rng = date_range("2014", periods=len(df)) diff --git a/pandas/tests/groupby/test_api_consistency.py b/pandas/tests/groupby/test_api_consistency.py index 1e82c2b6ac6e2..155f86c23e106 100644 --- a/pandas/tests/groupby/test_api_consistency.py +++ b/pandas/tests/groupby/test_api_consistency.py @@ -98,8 +98,6 @@ def test_series_consistency(request, groupby_func): exclude_expected = {"kwargs", "bool_only", "level", "axis"} elif groupby_func in ("count",): exclude_expected = {"level"} - elif groupby_func in ("tshift",): - exclude_expected = {"axis"} elif groupby_func in ("diff",): exclude_result = {"axis"} elif groupby_func in ("max", "min"): diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 47ea6a99ffea9..0cd89a205bb82 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1048,8 +1048,6 @@ def test_apply_with_timezones_aware(): def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): # GH #34656 # GH #34271 - warn = FutureWarning if reduction_func == "mad" else None - df = DataFrame( { "a": [99, 99, 99, 88, 88, 88], @@ -1071,8 +1069,7 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): # Check output when another method is called before .apply() grp = df.groupby(by="a") args = get_groupby_method_args(reduction_func, df) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - _ = getattr(grp, reduction_func)(*args) + _ = getattr(grp, reduction_func)(*args) result = grp.apply(sum) tm.assert_frame_equal(result, expected) @@ -1338,7 +1335,6 @@ def test_result_name_when_one_group(name): [ ("apply", lambda gb: gb.values[-1]), ("apply", lambda gb: gb["b"].iloc[0]), - ("agg", "mad"), ("agg", "skew"), ("agg", "prod"), ("agg", "sum"), diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index e99d1325a7e4f..a3821fc2216ec 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -49,7 +49,6 @@ def f(a): "idxmax": np.NaN, "idxmin": np.NaN, "last": np.NaN, - "mad": np.NaN, "max": np.NaN, "mean": np.NaN, "median": np.NaN, @@ -1365,7 +1364,6 @@ def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, r reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293" ) request.node.add_marker(mark) - warn = FutureWarning if reduction_func == "mad" else None df = DataFrame( { @@ -1380,8 +1378,7 @@ def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, r series_groupby = df.groupby(["cat_1", "cat_2"], observed=observed)["value"] agg = getattr(series_groupby, reduction_func) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = agg(*args) + result = agg(*args) assert len(result) == expected_length @@ -1400,7 +1397,6 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293" ) request.node.add_marker(mark) - warn = FutureWarning if reduction_func == "mad" else None df = DataFrame( { @@ -1414,8 +1410,7 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( series_groupby = df.groupby(["cat_1", "cat_2"], observed=False)["value"] agg = getattr(series_groupby, reduction_func) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = agg(*args) + result = agg(*args) zero_or_nan = _results_for_groupbys_with_missing_categories[reduction_func] @@ -1438,7 +1433,6 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun # does not return the categories that are not in df when observed=True if reduction_func == "ngroup": pytest.skip("ngroup does not return the Categories on the index") - warn = FutureWarning if reduction_func == "mad" else None df = DataFrame( { @@ -1452,8 +1446,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun df_grp = df.groupby(["cat_1", "cat_2"], observed=True) args = get_groupby_method_args(reduction_func, df) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - res = getattr(df_grp, reduction_func)(*args) + res = getattr(df_grp, reduction_func)(*args) for cat in unobserved_cats: assert cat not in res.index @@ -1470,7 +1463,6 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( if reduction_func == "ngroup": pytest.skip("ngroup does not return the Categories on the index") - warn = FutureWarning if reduction_func == "mad" else None df = DataFrame( { @@ -1484,8 +1476,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( df_grp = df.groupby(["cat_1", "cat_2"], observed=observed) args = get_groupby_method_args(reduction_func, df) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - res = getattr(df_grp, reduction_func)(*args) + res = getattr(df_grp, reduction_func)(*args) expected = _results_for_groupbys_with_missing_categories[reduction_func] diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index cdbb121819c5e..2b583431dcd71 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -323,23 +323,6 @@ def test_idxmin(self, gb): result = gb.idxmin() tm.assert_frame_equal(result, expected) - def test_mad(self, gb, gni): - # mad - expected = DataFrame([[0], [np.nan]], columns=["B"], index=[1, 3]) - expected.index.name = "A" - with tm.assert_produces_warning( - FutureWarning, match="The 'mad' method is deprecated" - ): - result = gb.mad() - tm.assert_frame_equal(result, expected) - - expected = DataFrame([[1, 0.0], [3, np.nan]], columns=["A", "B"], index=[0, 1]) - with tm.assert_produces_warning( - FutureWarning, match="The 'mad' method is deprecated" - ): - result = gni.mad() - tm.assert_frame_equal(result, expected) - def test_describe(self, df, gb, gni): # describe expected_index = Index([1, 3], name="A") @@ -560,8 +543,6 @@ def test_idxmin_idxmax_axis1(): def test_axis1_numeric_only(request, groupby_func, numeric_only): if groupby_func in ("idxmax", "idxmin"): pytest.skip("idxmax and idx_min tested in test_idxmin_idxmax_axis1") - if groupby_func in ("mad", "tshift"): - pytest.skip("mad and tshift are deprecated") if groupby_func in ("corrwith", "skew"): msg = "GH#47723 groupby.corrwith and skew do not correctly implement axis=1" request.node.add_marker(pytest.mark.xfail(reason=msg)) @@ -1460,7 +1441,7 @@ def test_deprecate_numeric_only( @pytest.mark.parametrize("dtype", [bool, int, float, object]) def test_deprecate_numeric_only_series(dtype, groupby_func, request): # GH#46560 - if groupby_func in ("backfill", "mad", "pad", "tshift"): + if groupby_func in ("backfill", "pad"): pytest.skip("method is deprecated") elif groupby_func == "corrwith": msg = "corrwith is not implemented on SeriesGroupBy" diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 74b4d5dc19ca1..26f269d3d4384 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -688,11 +688,9 @@ def test_ops_not_as_index(reduction_func): if reduction_func in ("corrwith", "nth", "ngroup"): pytest.skip(f"GH 5755: Test not applicable for {reduction_func}") - warn = FutureWarning if reduction_func == "mad" else None df = DataFrame(np.random.randint(0, 5, size=(100, 2)), columns=["a", "b"]) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - expected = getattr(df.groupby("a"), reduction_func)() + expected = getattr(df.groupby("a"), reduction_func)() if reduction_func == "size": expected = expected.rename("size") expected = expected.reset_index() @@ -703,20 +701,16 @@ def test_ops_not_as_index(reduction_func): g = df.groupby("a", as_index=False) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = getattr(g, reduction_func)() + result = getattr(g, reduction_func)() tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = g.agg(reduction_func) + result = g.agg(reduction_func) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = getattr(g["b"], reduction_func)() + result = getattr(g["b"], reduction_func)() tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = g["b"].agg(reduction_func) + result = g["b"].agg(reduction_func) tm.assert_frame_equal(result, expected) @@ -1877,7 +1871,7 @@ def test_pivot_table_values_key_error(): ) @pytest.mark.parametrize("method", ["attr", "agg", "apply"]) @pytest.mark.parametrize( - "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"] + "op", ["idxmax", "idxmin", "min", "max", "sum", "prod", "skew"] ) @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning") @pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning") @@ -1888,16 +1882,10 @@ def test_empty_groupby(columns, keys, values, method, op, request, using_array_m if ( isinstance(values, Categorical) and not isinstance(columns, list) - and op in ["sum", "prod", "skew", "mad"] + and op in ["sum", "prod", "skew"] ): # handled below GH#41291 - - if using_array_manager and op == "mad": - right_msg = "Cannot interpret 'CategoricalDtype.* as a data type" - msg = "Regex pattern \"'Categorical' does not implement.*" + right_msg - mark = pytest.mark.xfail(raises=AssertionError, match=msg) - request.node.add_marker(mark) - + pass elif ( isinstance(values, Categorical) and len(keys) == 1 @@ -1931,19 +1919,6 @@ def test_empty_groupby(columns, keys, values, method, op, request, using_array_m ) request.node.add_marker(mark) - elif ( - op == "mad" - and not isinstance(columns, list) - and isinstance(values, pd.DatetimeIndex) - and values.tz is not None - and using_array_manager - ): - mark = pytest.mark.xfail( - raises=TypeError, - match=r"Cannot interpret 'datetime64\[ns, US/Eastern\]' as a data type", - ) - request.node.add_marker(mark) - elif isinstance(values, BooleanArray) and op in ["sum", "prod"]: # We expect to get Int64 back for these override_dtype = "Int64" @@ -1963,14 +1938,10 @@ def test_empty_groupby(columns, keys, values, method, op, request, using_array_m gb = df.groupby(keys, group_keys=False)[columns] def get_result(): - warn = FutureWarning if op == "mad" else None - with tm.assert_produces_warning( - warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False - ): - if method == "attr": - return getattr(gb, op)() - else: - return getattr(gb, method)(op) + if method == "attr": + return getattr(gb, op)() + else: + return getattr(gb, method)(op) if columns == "C": # i.e. SeriesGroupBy @@ -1987,13 +1958,10 @@ def get_result(): get_result() return - if op in ["prod", "sum", "skew", "mad"]: + if op in ["prod", "sum", "skew"]: if isinstance(values, Categorical): # GH#41291 - if op == "mad": - # mad calls mean, which Categorical doesn't implement - msg = "does not support reduction 'mean'" - elif op == "skew": + if op == "skew": msg = f"does not support reduction '{op}'" else: msg = "category type does not support" @@ -2044,7 +2012,7 @@ def get_result(): return if ( - op in ["mad", "min", "max", "skew"] + op in ["min", "max", "skew"] and isinstance(values, Categorical) and len(keys) == 1 ): @@ -2307,13 +2275,9 @@ def test_groupby_duplicate_index(): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:.*is deprecated.*:FutureWarning") def test_group_on_empty_multiindex(transformation_func, request): # GH 47787 # With one row, those are transforms so the schema should be the same - if transformation_func == "tshift": - mark = pytest.mark.xfail(raises=NotImplementedError) - request.node.add_marker(mark) df = DataFrame( data=[[1, Timestamp("today"), 3, 4]], columns=["col_1", "col_2", "col_3", "col_4"], @@ -2323,8 +2287,6 @@ def test_group_on_empty_multiindex(transformation_func, request): df = df.set_index(["col_1", "col_2"]) if transformation_func == "fillna": args = ("ffill",) - elif transformation_func == "tshift": - args = (1, "D") else: args = () result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args) @@ -2351,24 +2313,17 @@ def test_group_on_empty_multiindex(transformation_func, request): MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]), ], ) -@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_dup_labels_output_shape(groupby_func, idx): if groupby_func in {"size", "ngroup", "cumcount"}: pytest.skip(f"Not applicable for {groupby_func}") # TODO(2.0) Remove after pad/backfill deprecation enforced groupby_func = maybe_normalize_deprecated_kernels(groupby_func) - warn = FutureWarning if groupby_func in ("mad", "tshift") else None df = DataFrame([[1, 1]], columns=idx) grp_by = df.groupby([0]) - if groupby_func == "tshift": - df.index = [Timestamp("today")] - # args.extend([1, "D"]) args = get_groupby_method_args(groupby_func, df) - - with tm.assert_produces_warning(warn, match="is deprecated"): - result = getattr(grp_by, groupby_func)(*args) + result = getattr(grp_by, groupby_func)(*args) assert result.shape == (1, 2) tm.assert_index_equal(result.columns, idx) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index fddf0c86d0ab1..b8aa2a1c9656d 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -20,7 +20,6 @@ tm.SubclassedSeries(np.arange(0, 10), name="A"), ], ) -@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_groupby_preserves_subclass(obj, groupby_func): # GH28330 -- preserve subclass through groupby operations @@ -28,7 +27,6 @@ def test_groupby_preserves_subclass(obj, groupby_func): pytest.skip(f"Not applicable for Series and {groupby_func}") # TODO(2.0) Remove after pad/backfill deprecation enforced groupby_func = maybe_normalize_deprecated_kernels(groupby_func) - warn = FutureWarning if groupby_func in ("mad", "tshift") else None grouped = obj.groupby(np.arange(0, 10)) @@ -37,9 +35,8 @@ def test_groupby_preserves_subclass(obj, groupby_func): args = get_groupby_method_args(groupby_func, obj) - with tm.assert_produces_warning(warn, match="is deprecated"): - result1 = getattr(grouped, groupby_func)(*args) - result2 = grouped.agg(groupby_func, *args) + result1 = getattr(grouped, groupby_func)(*args) + result2 = grouped.agg(groupby_func, *args) # Reduction or transformation kernels should preserve type slices = {"ngroup", "cumcount", "size"} diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 8a2bd64a3deb0..2b4eba539ec82 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -173,13 +173,10 @@ def test_transform_axis_1(request, transformation_func): msg = "ngroup fails with axis=1: #45986" request.node.add_marker(pytest.mark.xfail(reason=msg)) - warn = FutureWarning if transformation_func == "tshift" else None - df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) args = get_groupby_method_args(transformation_func, df) - with tm.assert_produces_warning(warn): - result = df.groupby([0, 0, 1], axis=1).transform(transformation_func, *args) - expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T + result = df.groupby([0, 0, 1], axis=1).transform(transformation_func, *args) + expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T if transformation_func in ["diff", "shift"]: # Result contains nans, so transpose coerces to float @@ -200,22 +197,13 @@ def test_transform_axis_1_reducer(request, reduction_func): ): marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986") request.node.add_marker(marker) - if reduction_func == "mad": - warn = FutureWarning - msg = "The 'mad' method is deprecated" - elif reduction_func in ("sem", "std"): - warn = FutureWarning - msg = "The default value of numeric_only" - else: - warn = None - msg = "" + warn = FutureWarning if reduction_func in ("sem", "std") else None + msg = "The default value of numeric_only" df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) with tm.assert_produces_warning(warn, match=msg): result = df.groupby([0, 0, 1], axis=1).transform(reduction_func) - warn = FutureWarning if reduction_func == "mad" else None - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T + expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T tm.assert_equal(result, expected) @@ -402,12 +390,6 @@ def mock_op(x): counter += 1 return Series(counter, index=x.index) - elif transformation_func == "tshift": - msg = ( - "Current behavior of groupby.tshift is inconsistent with other " - "transformations. See GH34452 for more details" - ) - request.node.add_marker(pytest.mark.xfail(reason=msg)) else: test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() @@ -1152,7 +1134,6 @@ def test_transform_invalid_name_raises(): ) def test_transform_agg_by_name(request, reduction_func, obj): func = reduction_func - warn = FutureWarning if func == "mad" else None g = obj.groupby(np.repeat([0, 1], 3)) @@ -1162,8 +1143,7 @@ def test_transform_agg_by_name(request, reduction_func, obj): ) args = get_groupby_method_args(reduction_func, obj) - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = g.transform(func, *args) + result = g.transform(func, *args) # this is the *definition* of a transformation tm.assert_index_equal(result.index, obj.index) @@ -1353,7 +1333,6 @@ def test_null_group_str_reducer(request, dropna, reduction_func): if reduction_func == "corrwith": msg = "incorrectly raises" request.node.add_marker(pytest.mark.xfail(reason=msg)) - warn = FutureWarning if reduction_func == "mad" else None index = [1, 2, 3, 4] # test transform preserves non-standard index df = DataFrame({"A": [1, 1, np.nan, np.nan], "B": [1, 2, 2, 3]}, index=index) @@ -1377,10 +1356,7 @@ def test_null_group_str_reducer(request, dropna, reduction_func): expected_gb = df.groupby("A", dropna=False) buffer = [] for idx, group in expected_gb: - with tm.assert_produces_warning( - warn, match="The 'mad' method is deprecated" - ): - res = getattr(group["B"], reduction_func)() + res = getattr(group["B"], reduction_func)() buffer.append(Series(res, index=group.index)) expected = concat(buffer).to_frame("B") if dropna: @@ -1391,17 +1367,12 @@ def test_null_group_str_reducer(request, dropna, reduction_func): else: expected.iloc[[2, 3]] = np.nan - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = gb.transform(reduction_func, *args) + result = gb.transform(reduction_func, *args) tm.assert_equal(result, expected) -@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_null_group_str_transformer(request, dropna, transformation_func): # GH 17093 - if transformation_func == "tshift": - msg = "tshift requires timeseries" - request.node.add_marker(pytest.mark.xfail(reason=msg)) df = DataFrame({"A": [1, 1, np.nan], "B": [1, 2, 2]}, index=[1, 2, 3]) args = get_groupby_method_args(transformation_func, df) gb = df.groupby("A", dropna=dropna) @@ -1438,7 +1409,6 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func): if reduction_func == "corrwith": msg = "corrwith not implemented for SeriesGroupBy" request.node.add_marker(pytest.mark.xfail(reason=msg)) - warn = FutureWarning if reduction_func == "mad" else None # GH 17093 index = [1, 2, 3, 4] # test transform preserves non-standard index @@ -1463,10 +1433,7 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func): expected_gb = ser.groupby([1, 1, np.nan, np.nan], dropna=False) buffer = [] for idx, group in expected_gb: - with tm.assert_produces_warning( - warn, match="The 'mad' method is deprecated" - ): - res = getattr(group, reduction_func)() + res = getattr(group, reduction_func)() buffer.append(Series(res, index=group.index)) expected = concat(buffer) if dropna: @@ -1474,17 +1441,12 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func): expected = expected.astype(dtype) expected.iloc[[2, 3]] = np.nan - with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): - result = gb.transform(reduction_func, *args) + result = gb.transform(reduction_func, *args) tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_null_group_str_transformer_series(request, dropna, transformation_func): # GH 17093 - if transformation_func == "tshift": - msg = "tshift requires timeseries" - request.node.add_marker(pytest.mark.xfail(reason=msg)) ser = Series([1, 2, 2], index=[1, 2, 3]) args = get_groupby_method_args(transformation_func, ser) gb = ser.groupby([1, 1, np.nan], dropna=dropna) From 1dee48d4b005c5a3038fed14247e2ce52c6858b9 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 22 Oct 2022 17:52:04 -0400 Subject: [PATCH 2/4] whatsnew --- doc/source/whatsnew/v2.0.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e281e250d608e..cb7a694ea1d6f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -196,7 +196,8 @@ Removal of prior version deprecations/changes - Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame`` (:issue:`30642`) - Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`) - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`) -- +- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) +- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`) .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: From baf77055d8c0ec20aeee6c9be0ea2901450f95e7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 22 Oct 2022 17:54:11 -0400 Subject: [PATCH 3/4] revert --- pandas/_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 39365d339016c..5c22baa4bd42e 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -280,7 +280,7 @@ def closed(self) -> bool: ] # Arguments for fillna() -FillnaOptions = Literal["backfill", "bfill", "ffill"] +FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"] # internals Manager = Union[ From c6f89c14b730e47c607dbca67673a8e14c8b0a3e Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 22 Oct 2022 19:50:36 -0400 Subject: [PATCH 4/4] Remove ASVs for mad --- asv_bench/benchmarks/groupby.py | 5 +---- asv_bench/benchmarks/stat_ops.py | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 3007d2d1e126c..97f48a3a6f69f 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -35,7 +35,6 @@ "pct_change", "min", "var", - "mad", "describe", "std", "quantile", @@ -52,7 +51,6 @@ "cummax", "pct_change", "var", - "mad", "describe", "std", }, @@ -437,7 +435,6 @@ class GroupByMethods: "first", "head", "last", - "mad", "max", "min", "median", @@ -483,7 +480,7 @@ def setup(self, dtype, method, application, ncols): if method == "describe": ngroups = 20 - elif method in ["mad", "skew"]: + elif method == "skew": ngroups = 100 else: ngroups = 1000 diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 92a78b7c2f63d..19fa7f7a06cf2 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -2,7 +2,7 @@ import pandas as pd -ops = ["mean", "sum", "median", "std", "skew", "kurt", "mad", "prod", "sem", "var"] +ops = ["mean", "sum", "median", "std", "skew", "kurt", "prod", "sem", "var"] class FrameOps: @@ -11,9 +11,6 @@ class FrameOps: param_names = ["op", "dtype", "axis"] def setup(self, op, dtype, axis): - if op == "mad" and dtype == "Int64": - # GH-33036, GH#33600 - raise NotImplementedError values = np.random.randn(100000, 4) if dtype == "Int64": values = values.astype(int)