diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 4920622a15f3f..2f9dfdf7cd66f 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -430,7 +430,7 @@ Other Deprecations - Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`) - Deprecated :attr:`Timedelta.delta` (:issue:`46476`) - Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`) -- +- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`) .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2e65e3139ffa1..bdde89e4f24ac 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10881,6 +10881,9 @@ def mad( """ {desc} + .. deprecated:: 1.5.0 + mad is deprecated. + Parameters ---------- axis : {axis_descr} @@ -10897,6 +10900,12 @@ def mad( {see_also}\ {examples} """ + msg = ( + "The 'mad' method is deprecated and will be removed in a future version. " + "To compute the same result, you may do `(df - df.mean()).abs().mean()`." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + if not is_bool(skipna): warnings.warn( "Passing None for skipna is deprecated and will raise in a future" diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index cc0195bf1dff9..d2c47498b2fe5 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -67,6 +67,7 @@ def assert_stat_op_calc( skipna_alternative : function, default None NaN-safe version of alternative """ + warn = FutureWarning if opname == "mad" else None f = getattr(frame, opname) if check_dates: @@ -88,8 +89,9 @@ def wrapper(x): return alternative(x.values) skipna_wrapper = tm._make_skipna_wrapper(alternative, skipna_alternative) - result0 = f(axis=0, skipna=False) - result1 = f(axis=1, skipna=False) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) tm.assert_series_equal( result0, frame.apply(wrapper), check_dtype=check_dtype, rtol=rtol, atol=atol ) @@ -102,8 +104,9 @@ def wrapper(x): else: skipna_wrapper = alternative - result0 = f(axis=0) - result1 = f(axis=1) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result0 = f(axis=0) + result1 = f(axis=1) tm.assert_series_equal( result0, frame.apply(skipna_wrapper), @@ -125,14 +128,18 @@ def wrapper(x): assert lcd_dtype == result1.dtype # bad axis - with pytest.raises(ValueError, match="No axis named 2"): - f(axis=2) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + with pytest.raises(ValueError, match="No axis named 2"): + f(axis=2) # all NA case if has_skipna: all_na = frame * np.NaN - r0 = getattr(all_na, opname)(axis=0) - r1 = getattr(all_na, opname)(axis=1) + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False + ): + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) if opname in ["sum", "prod"]: unit = 1 if opname == "prod" else 0 # result for empty sum/prod expected = Series(unit, index=r0.index, dtype=r0.dtype) @@ -167,9 +174,13 @@ class TestDataFrameAnalytics: ], ) def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname): - getattr(float_string_frame, opname)(axis=axis) - if opname not in ("nunique", "mad"): - getattr(float_string_frame, opname)(axis=axis, numeric_only=True) + warn = FutureWarning if opname == "mad" else None + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False + ): + getattr(float_string_frame, opname)(axis=axis) + if opname not in ("nunique", "mad"): + getattr(float_string_frame, opname)(axis=axis, numeric_only=True) @pytest.mark.filterwarnings("ignore:Dropping of nuisance:FutureWarning") @pytest.mark.parametrize("axis", [0, 1]) @@ -1424,7 +1435,9 @@ def test_frame_any_with_timedelta(self): def test_reductions_deprecation_skipna_none(self, frame_or_series): # GH#44580 obj = frame_or_series([1, 2, 3]) - with tm.assert_produces_warning(FutureWarning, match="skipna"): + with tm.assert_produces_warning( + FutureWarning, match="skipna", raise_on_extra_warnings=False + ): obj.mad(skipna=None) def test_reductions_deprecation_level_argument( @@ -1445,7 +1458,7 @@ def test_reductions_skipna_none_raises( pytest.mark.xfail(reason="Count does not accept skipna") ) elif reduction_functions == "mad": - pytest.skip("Mad needs a deprecation cycle: GH 11787") + pytest.skip("Mad is deprecated: GH#11787") obj = frame_or_series([1, 2, 3]) msg = 'For argument "skipna" expected type bool, received type NoneType.' with pytest.raises(ValueError, match=msg): @@ -1644,25 +1657,37 @@ def test_mad_nullable_integer(any_signed_int_ea_dtype): df = DataFrame(np.random.randn(100, 4).astype(np.int64)) df2 = df.astype(any_signed_int_ea_dtype) - result = df2.mad() - expected = df.mad() + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad() + expected = df.mad() tm.assert_series_equal(result, expected) - result = df2.mad(axis=1) - expected = df.mad(axis=1) + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad(axis=1) + expected = df.mad(axis=1) tm.assert_series_equal(result, expected) # case with NAs present df2.iloc[::2, 1] = pd.NA - result = df2.mad() - expected = df.mad() - expected[1] = df.iloc[1::2, 1].mad() + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad() + expected = df.mad() + expected[1] = df.iloc[1::2, 1].mad() tm.assert_series_equal(result, expected) - result = df2.mad(axis=1) - expected = df.mad(axis=1) - expected[::2] = df.T.loc[[0, 2, 3], ::2].mad() + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad(axis=1) + expected = df.mad(axis=1) + expected[::2] = df.T.loc[[0, 2, 3], ::2].mad() tm.assert_series_equal(result, expected) @@ -1675,8 +1700,11 @@ def test_mad_nullable_integer_all_na(any_signed_int_ea_dtype): # case with all-NA row/column df2.iloc[:, 1] = pd.NA # FIXME(GH#44199): this doesn't operate in-place df2.iloc[:, 1] = pd.array([pd.NA] * len(df2), dtype=any_signed_int_ea_dtype) - result = df2.mad() - expected = df.mad() + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = df2.mad() + expected = df.mad() expected[1] = pd.NA expected = expected.astype("Float64") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index cae3bdf1a8f86..bdb33bff5eadd 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1325,11 +1325,13 @@ def test_groupby_aggregate_directory(reduction_func): # GH#32793 if reduction_func in ["corrwith", "nth"]: return None + warn = FutureWarning if reduction_func == "mad" else None obj = DataFrame([[0, 1], [0, np.nan]]) - result_reduced_series = obj.groupby(0).agg(reduction_func) - result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result_reduced_series = obj.groupby(0).agg(reduction_func) + result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) if reduction_func in ["size", "ngroup"]: # names are different: None / 1 diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index e6c47aec79738..7c64d82608c9e 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -178,6 +178,7 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): # GH6944 # GH 17537 # explicitly test the allowlist methods + warn = FutureWarning if op == "mad" else None if axis == 0: frame = raw_frame @@ -186,7 +187,8 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): if op in AGG_FUNCTIONS_WITH_SKIPNA: grouped = frame.groupby(level=level, axis=axis, sort=sort) - result = getattr(grouped, op)(skipna=skipna) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = getattr(grouped, op)(skipna=skipna) with tm.assert_produces_warning(FutureWarning): expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) if sort: diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b2de4a8144ff9..9cb64766a1079 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1047,6 +1047,8 @@ def test_apply_with_timezones_aware(): def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): # GH #34656 # GH #34271 + warn = FutureWarning if reduction_func == "mad" else None + df = DataFrame( { "a": [99, 99, 99, 88, 88, 88], @@ -1068,7 +1070,8 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): # Check output when another method is called before .apply() grp = df.groupby(by="a") args = {"nth": [0], "corrwith": [df]}.get(reduction_func, []) - _ = getattr(grp, reduction_func)(*args) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + _ = getattr(grp, reduction_func)(*args) result = grp.apply(sum) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 603f6d1e37bf4..abe1b8f13e32e 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1357,6 +1357,7 @@ def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, r reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293" ) request.node.add_marker(mark) + warn = FutureWarning if reduction_func == "mad" else None df = DataFrame( { @@ -1371,7 +1372,8 @@ def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, r series_groupby = df.groupby(["cat_1", "cat_2"], observed=observed)["value"] agg = getattr(series_groupby, reduction_func) - result = agg(*args) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = agg(*args) assert len(result) == expected_length @@ -1390,6 +1392,7 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( reason="TODO: implemented SeriesGroupBy.corrwith. See GH 32293" ) request.node.add_marker(mark) + warn = FutureWarning if reduction_func == "mad" else None df = DataFrame( { @@ -1403,7 +1406,8 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( series_groupby = df.groupby(["cat_1", "cat_2"], observed=False)["value"] agg = getattr(series_groupby, reduction_func) - result = agg(*args) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = agg(*args) zero_or_nan = _results_for_groupbys_with_missing_categories[reduction_func] @@ -1426,6 +1430,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun # does not return the categories that are not in df when observed=True if reduction_func == "ngroup": pytest.skip("ngroup does not return the Categories on the index") + warn = FutureWarning if reduction_func == "mad" else None df = DataFrame( { @@ -1439,7 +1444,8 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun df_grp = df.groupby(["cat_1", "cat_2"], observed=True) args = {"nth": [0], "corrwith": [df]}.get(reduction_func, []) - res = getattr(df_grp, reduction_func)(*args) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + res = getattr(df_grp, reduction_func)(*args) for cat in unobserved_cats: assert cat not in res.index @@ -1456,6 +1462,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( if reduction_func == "ngroup": pytest.skip("ngroup does not return the Categories on the index") + warn = FutureWarning if reduction_func == "mad" else None df = DataFrame( { @@ -1469,7 +1476,8 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( df_grp = df.groupby(["cat_1", "cat_2"], observed=observed) args = {"nth": [0], "corrwith": [df]}.get(reduction_func, []) - res = getattr(df_grp, reduction_func)(*args) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + res = getattr(df_grp, reduction_func)(*args) expected = _results_for_groupbys_with_missing_categories[reduction_func] diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 42cce74c5c01d..102a3333035e5 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -321,11 +321,17 @@ def test_mad(self, gb, gni): # mad expected = DataFrame([[0], [np.nan]], columns=["B"], index=[1, 3]) expected.index.name = "A" - result = gb.mad() + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = gb.mad() tm.assert_frame_equal(result, expected) expected = DataFrame([[1, 0.0], [3, np.nan]], columns=["A", "B"], index=[0, 1]) - result = gni.mad() + with tm.assert_produces_warning( + FutureWarning, match="The 'mad' method is deprecated" + ): + result = gni.mad() tm.assert_frame_equal(result, expected) def test_describe(self, df, gb, gni): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 97e388cd074c3..e7c120beeed12 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -717,9 +717,11 @@ def test_ops_not_as_index(reduction_func): if reduction_func in ("corrwith", "nth", "ngroup"): pytest.skip(f"GH 5755: Test not applicable for {reduction_func}") + warn = FutureWarning if reduction_func == "mad" else None df = DataFrame(np.random.randint(0, 5, size=(100, 2)), columns=["a", "b"]) - expected = getattr(df.groupby("a"), reduction_func)() + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + expected = getattr(df.groupby("a"), reduction_func)() if reduction_func == "size": expected = expected.rename("size") expected = expected.reset_index() @@ -730,16 +732,20 @@ def test_ops_not_as_index(reduction_func): g = df.groupby("a", as_index=False) - result = getattr(g, reduction_func)() + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = getattr(g, reduction_func)() tm.assert_frame_equal(result, expected) - result = g.agg(reduction_func) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = g.agg(reduction_func) tm.assert_frame_equal(result, expected) - result = getattr(g["b"], reduction_func)() + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = getattr(g["b"], reduction_func)() tm.assert_frame_equal(result, expected) - result = g["b"].agg(reduction_func) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = g["b"].agg(reduction_func) tm.assert_frame_equal(result, expected) @@ -1918,10 +1924,14 @@ def test_empty_groupby(columns, keys, values, method, op, request, using_array_m gb = df.groupby(keys, group_keys=False)[columns] def get_result(): - if method == "attr": - return getattr(gb, op)() - else: - return getattr(gb, method)(op) + warn = FutureWarning if op == "mad" else None + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated", raise_on_extra_warnings=False + ): + if method == "attr": + return getattr(gb, op)() + else: + return getattr(gb, method)(op) if columns == "C": # i.e. SeriesGroupBy @@ -2271,6 +2281,8 @@ def test_dup_labels_output_shape(groupby_func, idx): pytest.skip(f"Not applicable for {groupby_func}") # TODO(2.0) Remove after pad/backfill deprecation enforced groupby_func = maybe_normalize_deprecated_kernels(groupby_func) + warn = FutureWarning if groupby_func in ("mad", "tshift") else None + df = DataFrame([[1, 1]], columns=idx) grp_by = df.groupby([0]) @@ -2283,7 +2295,8 @@ def test_dup_labels_output_shape(groupby_func, idx): df.index = [Timestamp("today")] args.extend([1, "D"]) - result = getattr(grp_by, groupby_func)(*args) + with tm.assert_produces_warning(warn, match="is deprecated"): + result = getattr(grp_by, groupby_func)(*args) assert result.shape == (1, 2) tm.assert_index_equal(result.columns, idx) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 64b4daf49ac09..54cde30ceac92 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -27,6 +27,8 @@ def test_groupby_preserves_subclass(obj, groupby_func): pytest.skip(f"Not applicable for Series and {groupby_func}") # TODO(2.0) Remove after pad/backfill deprecation enforced groupby_func = maybe_normalize_deprecated_kernels(groupby_func) + warn = FutureWarning if groupby_func in ("mad", "tshift") else None + grouped = obj.groupby(np.arange(0, 10)) # Groups should preserve subclass type @@ -40,8 +42,9 @@ def test_groupby_preserves_subclass(obj, groupby_func): elif groupby_func == "tshift": args.extend([0, 0]) - result1 = getattr(grouped, groupby_func)(*args) - result2 = grouped.agg(groupby_func, *args) + with tm.assert_produces_warning(warn, match="is deprecated"): + result1 = getattr(grouped, groupby_func)(*args) + result2 = grouped.agg(groupby_func, *args) # Reduction or transformation kernels should preserve type slices = {"ngroup", "cumcount", "size"} diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index f178e05d40dd0..a77c95e30ab43 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -203,13 +203,17 @@ def test_transform_axis_1_reducer(request, reduction_func): ): marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986") request.node.add_marker(marker) + warn = FutureWarning if reduction_func == "mad" else None + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - result = df.groupby([0, 0, 1], axis=1).transform(reduction_func) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = df.groupby([0, 0, 1], axis=1).transform(reduction_func) if reduction_func == "size": # size doesn't behave in the same manner; hardcode expected result expected = DataFrame(2 * [[2, 2, 1]], index=df.index, columns=df.columns) else: - expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T tm.assert_equal(result, expected) @@ -1137,6 +1141,8 @@ def test_transform_invalid_name_raises(): ) def test_transform_agg_by_name(request, reduction_func, obj): func = reduction_func + warn = FutureWarning if func == "mad" else None + g = obj.groupby(np.repeat([0, 1], 3)) if func == "corrwith" and isinstance(obj, Series): # GH#32293 @@ -1145,7 +1151,8 @@ def test_transform_agg_by_name(request, reduction_func, obj): ) args = {"nth": [0], "quantile": [0.5], "corrwith": [obj]}.get(func, []) - result = g.transform(func, *args) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = g.transform(func, *args) # this is the *definition* of a transformation tm.assert_index_equal(result.index, obj.index) @@ -1339,6 +1346,8 @@ def test_null_group_str_reducer(request, dropna, reduction_func): if reduction_func == "corrwith": msg = "incorrectly raises" request.node.add_marker(pytest.mark.xfail(reason=msg)) + warn = FutureWarning if reduction_func == "mad" else None + index = [1, 2, 3, 4] # test transform preserves non-standard index df = DataFrame({"A": [1, 1, np.nan, np.nan], "B": [1, 2, 2, 3]}, index=index) gb = df.groupby("A", dropna=dropna) @@ -1366,7 +1375,10 @@ def test_null_group_str_reducer(request, dropna, reduction_func): expected_gb = df.groupby("A", dropna=False) buffer = [] for idx, group in expected_gb: - res = getattr(group["B"], reduction_func)() + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated" + ): + res = getattr(group["B"], reduction_func)() buffer.append(Series(res, index=group.index)) expected = concat(buffer).to_frame("B") if dropna: @@ -1377,7 +1389,8 @@ def test_null_group_str_reducer(request, dropna, reduction_func): else: expected.iloc[[2, 3]] = np.nan - result = gb.transform(reduction_func, *args) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = gb.transform(reduction_func, *args) tm.assert_equal(result, expected) @@ -1423,6 +1436,7 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func): if reduction_func == "corrwith": msg = "corrwith not implemented for SeriesGroupBy" request.node.add_marker(pytest.mark.xfail(reason=msg)) + warn = FutureWarning if reduction_func == "mad" else None # GH 17093 index = [1, 2, 3, 4] # test transform preserves non-standard index @@ -1452,7 +1466,10 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func): expected_gb = ser.groupby([1, 1, np.nan, np.nan], dropna=False) buffer = [] for idx, group in expected_gb: - res = getattr(group, reduction_func)() + with tm.assert_produces_warning( + warn, match="The 'mad' method is deprecated" + ): + res = getattr(group, reduction_func)() buffer.append(Series(res, index=group.index)) expected = concat(buffer) if dropna: @@ -1460,7 +1477,8 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func): expected = expected.astype(dtype) expected.iloc[[2, 3]] = np.nan - result = gb.transform(reduction_func, *args) + with tm.assert_produces_warning(warn, match="The 'mad' method is deprecated"): + result = gb.transform(reduction_func, *args) tm.assert_series_equal(result, expected)