diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index bc922bace053a..97ee96d8be25d 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -573,6 +573,7 @@ Removal of prior version deprecations/changes - Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`) - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) - Changed default of ``numeric_only`` to ``False`` in :meth:`.DataFrameGroupBy.sum` and :meth:`.DataFrameGroupBy.mean` (:issue:`46072`) +- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4cce6f2b71771..f5c76aade9956 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -899,7 +899,7 @@ def asfreq(self, fill_value=None): def mean( self, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, *args, **kwargs, ): @@ -911,6 +911,10 @@ def mean( numeric_only : bool, default False Include only `float`, `int` or `boolean` data. + .. versionchanged:: 2.0.0 + + numeric_only now defaults to ``False``. + Returns ------- DataFrame or Series @@ -922,7 +926,7 @@ def mean( def std( self, ddof: int = 1, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, *args, **kwargs, ): @@ -938,6 +942,10 @@ def std( .. versionadded:: 1.5.0 + .. versionchanged:: 2.0.0 + + numeric_only now defaults to ``False``. + Returns ------- DataFrame or Series @@ -949,7 +957,7 @@ def std( def var( self, ddof: int = 1, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, *args, **kwargs, ): @@ -966,6 +974,10 @@ def var( .. versionadded:: 1.5.0 + .. versionchanged:: 2.0.0 + + numeric_only now defaults to ``False``. + Returns ------- DataFrame or Series @@ -1058,25 +1070,19 @@ def _add_downsample_kernel( def f( self, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, min_count: int = 0, *args, **kwargs, ): nv.validate_resampler_func(name, args, kwargs) - if numeric_only is lib.no_default and name != "sum": - # For DataFrameGroupBy, set it to be False for methods other than `sum`. - numeric_only = False - return self._downsample( name, numeric_only=numeric_only, min_count=min_count ) elif args == ("numeric_only",): # error: All conditional function variants must have identical signatures - def f( # type: ignore[misc] - self, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs - ): + def f(self, numeric_only: bool = False, *args, **kwargs): # type: ignore[misc] nv.validate_resampler_func(name, args, kwargs) return self._downsample(name, numeric_only=numeric_only) @@ -1085,7 +1091,7 @@ def f( # type: ignore[misc] def f( # type: ignore[misc] self, ddof: int = 1, - numeric_only: bool | lib.NoDefault = lib.no_default, + numeric_only: bool = False, *args, **kwargs, ): diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 22075a30bdb65..cb796e1b1ec64 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1789,10 +1789,9 @@ def test_stack_multiple_bug(self): multi = df.set_index(["DATE", "ID"]) multi.columns.name = "Params" unst = multi.unstack("ID") - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - down = unst.resample("W-THU").mean() - + with pytest.raises(TypeError, match="Could not convert"): + unst.resample("W-THU").mean() + down = unst.resample("W-THU").mean(numeric_only=True) rs = down.stack("ID") xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID") xp.columns.name = "Params" diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index eeedb3d6bb1d0..773c1e60e97af 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -101,7 +101,5 @@ def test_groupby_resample_preserves_subclass(obj): df = df.set_index("Date") # Confirm groupby.resample() preserves dataframe type - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("Buyer").resample("5D").sum() + result = df.groupby("Buyer").resample("5D").sum() assert isinstance(result, obj) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index d5b4d9ae44dab..4a707d8875db3 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -108,10 +108,8 @@ def test_groupby_with_timegrouper(self): expected.iloc[18, 0] = "Joe" expected.iloc[[0, 6, 18], 1] = np.array([24, 6, 9], dtype="int64") - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result1 = df.resample("5D").sum() - tm.assert_frame_equal(result1, expected[["Quantity"]]) + result1 = df.resample("5D").sum() + tm.assert_frame_equal(result1, expected) df_sorted = df.sort_index() result2 = df_sorted.groupby(Grouper(freq="5D")).sum() diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index ca5444fd4e62f..e256b957699b7 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -90,10 +90,8 @@ def test_groupby_resample_on_api(): } ) - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.set_index("dates").groupby("key").resample("D").mean() - result = df.groupby("key").resample("D", on="dates").mean() + expected = df.set_index("dates").groupby("key").resample("D").mean() + result = df.groupby("key").resample("D", on="dates").mean() tm.assert_frame_equal(result, expected) @@ -187,19 +185,19 @@ def test_api_compat_before_use(attr): getattr(rs, attr) -def tests_skip_nuisance(test_frame): +def tests_raises_on_nuisance(test_frame): df = test_frame df["D"] = "foo" r = df.resample("H") - result = r[["A", "B"]].sum() - expected = pd.concat([r.A.sum(), r.B.sum()], axis=1) + result = r[["A", "B"]].mean() + expected = pd.concat([r.A.mean(), r.B.mean()], axis=1) tm.assert_frame_equal(result, expected) - expected = r[["A", "B", "C"]].sum() - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = r.sum() + expected = r[["A", "B", "C"]].mean() + with pytest.raises(TypeError, match="Could not convert"): + r.mean() + result = r.mean(numeric_only=True) tm.assert_frame_equal(result, expected) @@ -681,9 +679,9 @@ def test_selection_api_validation(): tm.assert_frame_equal(exp, result) exp.index.name = "d" - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.resample("2D", level="d").sum() + with pytest.raises(TypeError, match="datetime64 type does not support sum"): + df.resample("2D", level="d").sum() + result = df.resample("2D", level="d").sum(numeric_only=True) tm.assert_frame_equal(exp, result) @@ -819,7 +817,7 @@ def test_end_and_end_day_origin( [ ("sum", True, {"num": [25]}), ("sum", False, {"cat": ["cat_1cat_2"], "num": [25]}), - ("sum", lib.no_default, {"num": [25]}), + ("sum", lib.no_default, {"cat": ["cat_1cat_2"], "num": [25]}), ("prod", True, {"num": [100]}), ("prod", False, "can't multiply sequence"), ("prod", lib.no_default, "can't multiply sequence"), @@ -837,19 +835,19 @@ def test_end_and_end_day_origin( ("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}), ("mean", True, {"num": [12.5]}), ("mean", False, "Could not convert"), - ("mean", lib.no_default, {"num": [12.5]}), + ("mean", lib.no_default, "Could not convert"), ("median", True, {"num": [12.5]}), ("median", False, "could not convert"), - ("median", lib.no_default, {"num": [12.5]}), + ("median", lib.no_default, "could not convert"), ("std", True, {"num": [10.606601717798213]}), ("std", False, "could not convert string to float"), - ("std", lib.no_default, {"num": [10.606601717798213]}), + ("std", lib.no_default, "could not convert string to float"), ("var", True, {"num": [112.5]}), ("var", False, "could not convert string to float"), - ("var", lib.no_default, {"num": [112.5]}), + ("var", lib.no_default, "could not convert string to float"), ("sem", True, {"num": [7.5]}), ("sem", False, "could not convert string to float"), - ("sem", lib.no_default, {"num": [7.5]}), + ("sem", lib.no_default, "could not convert string to float"), ], ) def test_frame_downsample_method(method, numeric_only, expected_data): @@ -865,31 +863,14 @@ def test_frame_downsample_method(method, numeric_only, expected_data): kwargs = {"numeric_only": numeric_only} func = getattr(resampled, method) - if numeric_only is lib.no_default and method not in ( - "min", - "max", - "first", - "last", - "prod", - ): - warn = FutureWarning - msg = ( - f"default value of numeric_only in DataFrameGroupBy.{method} is deprecated" - ) + if isinstance(expected_data, str): + klass = TypeError if method in ("var", "mean", "median", "prod") else ValueError + with pytest.raises(klass, match=expected_data): + _ = func(**kwargs) else: - warn = None - msg = "" - with tm.assert_produces_warning(warn, match=msg): - if isinstance(expected_data, str): - klass = ( - TypeError if method in ("var", "mean", "median", "prod") else ValueError - ) - with pytest.raises(klass, match=expected_data): - _ = func(**kwargs) - else: - result = func(**kwargs) - expected = DataFrame(expected_data, index=expected_index) - tm.assert_frame_equal(result, expected) + result = func(**kwargs) + expected = DataFrame(expected_data, index=expected_index) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 7fe1e645aa141..0432cf397067d 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -408,9 +408,7 @@ def test_resample_groupby_agg(): df["date"] = pd.to_datetime(df["date"]) resampled = df.groupby("cat").resample("Y", on="date") - msg = "The default value of numeric_only" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = resampled.sum() + expected = resampled[["num"]].sum() result = resampled.agg({"num": "sum"}) tm.assert_frame_equal(result, expected)