Skip to content

Commit 16b4e53

Browse files
authored
DEPR: Change default of numeric_only to False in Resampler methods (#49915)
* DEPR: Change default of numeric_only to False in Resampler methods * more cleanup
1 parent 0b47d85 commit 16b4e53

File tree

7 files changed

+51
-70
lines changed

7 files changed

+51
-70
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ Removal of prior version deprecations/changes
573573
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
574574
- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
575575
- Changed default of ``numeric_only`` to ``False`` in :meth:`.DataFrameGroupBy.sum` and :meth:`.DataFrameGroupBy.mean` (:issue:`46072`)
576+
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
576577
-
577578

578579
.. ---------------------------------------------------------------------------

pandas/core/resample.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ def asfreq(self, fill_value=None):
899899

900900
def mean(
901901
self,
902-
numeric_only: bool | lib.NoDefault = lib.no_default,
902+
numeric_only: bool = False,
903903
*args,
904904
**kwargs,
905905
):
@@ -911,6 +911,10 @@ def mean(
911911
numeric_only : bool, default False
912912
Include only `float`, `int` or `boolean` data.
913913
914+
.. versionchanged:: 2.0.0
915+
916+
numeric_only now defaults to ``False``.
917+
914918
Returns
915919
-------
916920
DataFrame or Series
@@ -922,7 +926,7 @@ def mean(
922926
def std(
923927
self,
924928
ddof: int = 1,
925-
numeric_only: bool | lib.NoDefault = lib.no_default,
929+
numeric_only: bool = False,
926930
*args,
927931
**kwargs,
928932
):
@@ -938,6 +942,10 @@ def std(
938942
939943
.. versionadded:: 1.5.0
940944
945+
.. versionchanged:: 2.0.0
946+
947+
numeric_only now defaults to ``False``.
948+
941949
Returns
942950
-------
943951
DataFrame or Series
@@ -949,7 +957,7 @@ def std(
949957
def var(
950958
self,
951959
ddof: int = 1,
952-
numeric_only: bool | lib.NoDefault = lib.no_default,
960+
numeric_only: bool = False,
953961
*args,
954962
**kwargs,
955963
):
@@ -966,6 +974,10 @@ def var(
966974
967975
.. versionadded:: 1.5.0
968976
977+
.. versionchanged:: 2.0.0
978+
979+
numeric_only now defaults to ``False``.
980+
969981
Returns
970982
-------
971983
DataFrame or Series
@@ -1058,25 +1070,19 @@ def _add_downsample_kernel(
10581070

10591071
def f(
10601072
self,
1061-
numeric_only: bool | lib.NoDefault = lib.no_default,
1073+
numeric_only: bool = False,
10621074
min_count: int = 0,
10631075
*args,
10641076
**kwargs,
10651077
):
10661078
nv.validate_resampler_func(name, args, kwargs)
1067-
if numeric_only is lib.no_default and name != "sum":
1068-
# For DataFrameGroupBy, set it to be False for methods other than `sum`.
1069-
numeric_only = False
1070-
10711079
return self._downsample(
10721080
name, numeric_only=numeric_only, min_count=min_count
10731081
)
10741082

10751083
elif args == ("numeric_only",):
10761084
# error: All conditional function variants must have identical signatures
1077-
def f( # type: ignore[misc]
1078-
self, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs
1079-
):
1085+
def f(self, numeric_only: bool = False, *args, **kwargs): # type: ignore[misc]
10801086
nv.validate_resampler_func(name, args, kwargs)
10811087
return self._downsample(name, numeric_only=numeric_only)
10821088

@@ -1085,7 +1091,7 @@ def f( # type: ignore[misc]
10851091
def f( # type: ignore[misc]
10861092
self,
10871093
ddof: int = 1,
1088-
numeric_only: bool | lib.NoDefault = lib.no_default,
1094+
numeric_only: bool = False,
10891095
*args,
10901096
**kwargs,
10911097
):

pandas/tests/frame/test_stack_unstack.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1789,10 +1789,9 @@ def test_stack_multiple_bug(self):
17891789
multi = df.set_index(["DATE", "ID"])
17901790
multi.columns.name = "Params"
17911791
unst = multi.unstack("ID")
1792-
msg = "The default value of numeric_only"
1793-
with tm.assert_produces_warning(FutureWarning, match=msg):
1794-
down = unst.resample("W-THU").mean()
1795-
1792+
with pytest.raises(TypeError, match="Could not convert"):
1793+
unst.resample("W-THU").mean()
1794+
down = unst.resample("W-THU").mean(numeric_only=True)
17961795
rs = down.stack("ID")
17971796
xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID")
17981797
xp.columns.name = "Params"

pandas/tests/groupby/test_groupby_subclass.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,5 @@ def test_groupby_resample_preserves_subclass(obj):
101101
df = df.set_index("Date")
102102

103103
# Confirm groupby.resample() preserves dataframe type
104-
msg = "The default value of numeric_only"
105-
with tm.assert_produces_warning(FutureWarning, match=msg):
106-
result = df.groupby("Buyer").resample("5D").sum()
104+
result = df.groupby("Buyer").resample("5D").sum()
107105
assert isinstance(result, obj)

pandas/tests/groupby/test_timegrouper.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,8 @@ def test_groupby_with_timegrouper(self):
108108
expected.iloc[18, 0] = "Joe"
109109
expected.iloc[[0, 6, 18], 1] = np.array([24, 6, 9], dtype="int64")
110110

111-
msg = "The default value of numeric_only"
112-
with tm.assert_produces_warning(FutureWarning, match=msg):
113-
result1 = df.resample("5D").sum()
114-
tm.assert_frame_equal(result1, expected[["Quantity"]])
111+
result1 = df.resample("5D").sum()
112+
tm.assert_frame_equal(result1, expected)
115113

116114
df_sorted = df.sort_index()
117115
result2 = df_sorted.groupby(Grouper(freq="5D")).sum()

pandas/tests/resample/test_resample_api.py

+25-44
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,8 @@ def test_groupby_resample_on_api():
9090
}
9191
)
9292

93-
msg = "The default value of numeric_only"
94-
with tm.assert_produces_warning(FutureWarning, match=msg):
95-
expected = df.set_index("dates").groupby("key").resample("D").mean()
96-
result = df.groupby("key").resample("D", on="dates").mean()
93+
expected = df.set_index("dates").groupby("key").resample("D").mean()
94+
result = df.groupby("key").resample("D", on="dates").mean()
9795
tm.assert_frame_equal(result, expected)
9896

9997

@@ -187,19 +185,19 @@ def test_api_compat_before_use(attr):
187185
getattr(rs, attr)
188186

189187

190-
def tests_skip_nuisance(test_frame):
188+
def tests_raises_on_nuisance(test_frame):
191189

192190
df = test_frame
193191
df["D"] = "foo"
194192
r = df.resample("H")
195-
result = r[["A", "B"]].sum()
196-
expected = pd.concat([r.A.sum(), r.B.sum()], axis=1)
193+
result = r[["A", "B"]].mean()
194+
expected = pd.concat([r.A.mean(), r.B.mean()], axis=1)
197195
tm.assert_frame_equal(result, expected)
198196

199-
expected = r[["A", "B", "C"]].sum()
200-
msg = "The default value of numeric_only"
201-
with tm.assert_produces_warning(FutureWarning, match=msg):
202-
result = r.sum()
197+
expected = r[["A", "B", "C"]].mean()
198+
with pytest.raises(TypeError, match="Could not convert"):
199+
r.mean()
200+
result = r.mean(numeric_only=True)
203201
tm.assert_frame_equal(result, expected)
204202

205203

@@ -681,9 +679,9 @@ def test_selection_api_validation():
681679
tm.assert_frame_equal(exp, result)
682680

683681
exp.index.name = "d"
684-
msg = "The default value of numeric_only"
685-
with tm.assert_produces_warning(FutureWarning, match=msg):
686-
result = df.resample("2D", level="d").sum()
682+
with pytest.raises(TypeError, match="datetime64 type does not support sum"):
683+
df.resample("2D", level="d").sum()
684+
result = df.resample("2D", level="d").sum(numeric_only=True)
687685
tm.assert_frame_equal(exp, result)
688686

689687

@@ -819,7 +817,7 @@ def test_end_and_end_day_origin(
819817
[
820818
("sum", True, {"num": [25]}),
821819
("sum", False, {"cat": ["cat_1cat_2"], "num": [25]}),
822-
("sum", lib.no_default, {"num": [25]}),
820+
("sum", lib.no_default, {"cat": ["cat_1cat_2"], "num": [25]}),
823821
("prod", True, {"num": [100]}),
824822
("prod", False, "can't multiply sequence"),
825823
("prod", lib.no_default, "can't multiply sequence"),
@@ -837,19 +835,19 @@ def test_end_and_end_day_origin(
837835
("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
838836
("mean", True, {"num": [12.5]}),
839837
("mean", False, "Could not convert"),
840-
("mean", lib.no_default, {"num": [12.5]}),
838+
("mean", lib.no_default, "Could not convert"),
841839
("median", True, {"num": [12.5]}),
842840
("median", False, "could not convert"),
843-
("median", lib.no_default, {"num": [12.5]}),
841+
("median", lib.no_default, "could not convert"),
844842
("std", True, {"num": [10.606601717798213]}),
845843
("std", False, "could not convert string to float"),
846-
("std", lib.no_default, {"num": [10.606601717798213]}),
844+
("std", lib.no_default, "could not convert string to float"),
847845
("var", True, {"num": [112.5]}),
848846
("var", False, "could not convert string to float"),
849-
("var", lib.no_default, {"num": [112.5]}),
847+
("var", lib.no_default, "could not convert string to float"),
850848
("sem", True, {"num": [7.5]}),
851849
("sem", False, "could not convert string to float"),
852-
("sem", lib.no_default, {"num": [7.5]}),
850+
("sem", lib.no_default, "could not convert string to float"),
853851
],
854852
)
855853
def test_frame_downsample_method(method, numeric_only, expected_data):
@@ -865,31 +863,14 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
865863
kwargs = {"numeric_only": numeric_only}
866864

867865
func = getattr(resampled, method)
868-
if numeric_only is lib.no_default and method not in (
869-
"min",
870-
"max",
871-
"first",
872-
"last",
873-
"prod",
874-
):
875-
warn = FutureWarning
876-
msg = (
877-
f"default value of numeric_only in DataFrameGroupBy.{method} is deprecated"
878-
)
866+
if isinstance(expected_data, str):
867+
klass = TypeError if method in ("var", "mean", "median", "prod") else ValueError
868+
with pytest.raises(klass, match=expected_data):
869+
_ = func(**kwargs)
879870
else:
880-
warn = None
881-
msg = ""
882-
with tm.assert_produces_warning(warn, match=msg):
883-
if isinstance(expected_data, str):
884-
klass = (
885-
TypeError if method in ("var", "mean", "median", "prod") else ValueError
886-
)
887-
with pytest.raises(klass, match=expected_data):
888-
_ = func(**kwargs)
889-
else:
890-
result = func(**kwargs)
891-
expected = DataFrame(expected_data, index=expected_index)
892-
tm.assert_frame_equal(result, expected)
871+
result = func(**kwargs)
872+
expected = DataFrame(expected_data, index=expected_index)
873+
tm.assert_frame_equal(result, expected)
893874

894875

895876
@pytest.mark.parametrize(

pandas/tests/resample/test_resampler_grouper.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,7 @@ def test_resample_groupby_agg():
408408
df["date"] = pd.to_datetime(df["date"])
409409

410410
resampled = df.groupby("cat").resample("Y", on="date")
411-
msg = "The default value of numeric_only"
412-
with tm.assert_produces_warning(FutureWarning, match=msg):
413-
expected = resampled.sum()
411+
expected = resampled[["num"]].sum()
414412
result = resampled.agg({"num": "sum"})
415413

416414
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)