Skip to content

Commit 62b6d25

Browse files
authored
DEPR: numeric_only default in resampler ops (#47177)
1 parent 21e6da3 commit 62b6d25

File tree

3 files changed

+132
-51
lines changed

3 files changed

+132
-51
lines changed

doc/source/whatsnew/v1.5.0.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ In the case where ``df.columns`` is not unique, use :meth:`DataFrame.isetitem`:
605605
``numeric_only`` default value
606606
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
607607

608-
Across the DataFrame and DataFrameGroupBy operations such as
608+
Across the :class:`DataFrame`, :class:`.DataFrameGroupBy`, and :class:`.Resampler` operations such as
609609
``min``, ``sum``, and ``idxmax``, the default
610610
value of the ``numeric_only`` argument, if it exists at all, was inconsistent.
611611
Furthermore, operations with the default value ``None`` can lead to surprising
@@ -644,6 +644,11 @@ gained the ``numeric_only`` argument.
644644
- :meth:`.GroupBy.std`
645645
- :meth:`.GroupBy.sem`
646646
- :meth:`.DataFrameGroupBy.quantile`
647+
- :meth:`.Resampler.mean`
648+
- :meth:`.Resampler.median`
649+
- :meth:`.Resampler.sem`
650+
- :meth:`.Resampler.std`
651+
- :meth:`.Resampler.var`
647652

648653
.. _whatsnew_150.deprecations.other:
649654

pandas/core/resample.py

+90-42
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def transform(self, arg, *args, **kwargs):
393393
"""
394394
return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs)
395395

396-
def _downsample(self, f):
396+
def _downsample(self, f, **kwargs):
397397
raise AbstractMethodError(self)
398398

399399
def _upsample(self, f, limit=None, fill_value=None):
@@ -937,25 +937,28 @@ def asfreq(self, fill_value=None):
937937
"""
938938
return self._upsample("asfreq", fill_value=fill_value)
939939

940-
def std(self, ddof=1, *args, **kwargs):
940+
def std(self, ddof=1, numeric_only: bool = False, *args, **kwargs):
941941
"""
942942
Compute standard deviation of groups, excluding missing values.
943943
944944
Parameters
945945
----------
946946
ddof : int, default 1
947947
Degrees of freedom.
948+
numeric_only : bool, default False
949+
Include only `float`, `int` or `boolean` data.
950+
951+
.. versionadded:: 1.5.0
948952
949953
Returns
950954
-------
951955
DataFrame or Series
952956
Standard deviation of values within each group.
953957
"""
954958
nv.validate_resampler_func("std", args, kwargs)
955-
# error: Unexpected keyword argument "ddof" for "_downsample"
956-
return self._downsample("std", ddof=ddof) # type: ignore[call-arg]
959+
return self._downsample("std", ddof=ddof, numeric_only=numeric_only)
957960

958-
def var(self, ddof=1, *args, **kwargs):
961+
def var(self, ddof=1, numeric_only: bool = False, *args, **kwargs):
959962
"""
960963
Compute variance of groups, excluding missing values.
961964
@@ -964,14 +967,18 @@ def var(self, ddof=1, *args, **kwargs):
964967
ddof : int, default 1
965968
Degrees of freedom.
966969
970+
numeric_only : bool, default False
971+
Include only `float`, `int` or `boolean` data.
972+
973+
.. versionadded:: 1.5.0
974+
967975
Returns
968976
-------
969977
DataFrame or Series
970978
Variance of values within each group.
971979
"""
972980
nv.validate_resampler_func("var", args, kwargs)
973-
# error: Unexpected keyword argument "ddof" for "_downsample"
974-
return self._downsample("var", ddof=ddof) # type: ignore[call-arg]
981+
return self._downsample("var", ddof=ddof, numeric_only=numeric_only)
975982

976983
@doc(GroupBy.size)
977984
def size(self):
@@ -1027,53 +1034,94 @@ def quantile(self, q=0.5, **kwargs):
10271034
Return a DataFrame, where the coulmns are groupby columns,
10281035
and the values are its quantiles.
10291036
"""
1030-
# error: Unexpected keyword argument "q" for "_downsample"
1031-
# error: Too many arguments for "_downsample"
1032-
return self._downsample("quantile", q=q, **kwargs) # type: ignore[call-arg]
1037+
return self._downsample("quantile", q=q, **kwargs)
10331038

10341039

1035-
# downsample methods
1036-
for method in ["sum", "prod", "min", "max", "first", "last"]:
1040+
def _add_downsample_kernel(
1041+
name: str, args: tuple[str, ...], docs_class: type = GroupBy
1042+
) -> None:
1043+
"""
1044+
Add a kernel to Resampler.
1045+
1046+
Arguments
1047+
---------
1048+
name : str
1049+
Name of the kernel.
1050+
args : tuple
1051+
Arguments of the method.
1052+
docs_class : type
1053+
Class to get kernel docstring from.
1054+
"""
1055+
assert args in (
1056+
("numeric_only", "min_count"),
1057+
("numeric_only",),
1058+
("ddof", "numeric_only"),
1059+
(),
1060+
)
10371061

1038-
def f(
1039-
self,
1040-
_method: str = method,
1041-
numeric_only: bool | lib.NoDefault = lib.no_default,
1042-
min_count: int = 0,
1043-
*args,
1044-
**kwargs,
1045-
):
1046-
if numeric_only is lib.no_default:
1047-
if _method != "sum":
1062+
# Explicitly provide args rather than args/kwargs for API docs
1063+
if args == ("numeric_only", "min_count"):
1064+
1065+
def f(
1066+
self,
1067+
numeric_only: bool | lib.NoDefault = lib.no_default,
1068+
min_count: int = 0,
1069+
*args,
1070+
**kwargs,
1071+
):
1072+
nv.validate_resampler_func(name, args, kwargs)
1073+
if numeric_only is lib.no_default and name != "sum":
10481074
# For DataFrameGroupBy, set it to be False for methods other than `sum`.
10491075
numeric_only = False
10501076

1051-
nv.validate_resampler_func(_method, args, kwargs)
1052-
return self._downsample(_method, numeric_only=numeric_only, min_count=min_count)
1053-
1054-
f.__doc__ = getattr(GroupBy, method).__doc__
1055-
setattr(Resampler, method, f)
1056-
1077+
return self._downsample(
1078+
name, numeric_only=numeric_only, min_count=min_count
1079+
)
10571080

1058-
# downsample methods
1059-
for method in ["mean", "sem", "median", "ohlc"]:
1081+
elif args == ("numeric_only",):
1082+
# error: All conditional function variants must have identical signatures
1083+
def f( # type: ignore[misc]
1084+
self, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs
1085+
):
1086+
nv.validate_resampler_func(name, args, kwargs)
1087+
return self._downsample(name, numeric_only=numeric_only)
1088+
1089+
elif args == ("ddof", "numeric_only"):
1090+
# error: All conditional function variants must have identical signatures
1091+
def f( # type: ignore[misc]
1092+
self,
1093+
ddof: int = 1,
1094+
numeric_only: bool | lib.NoDefault = lib.no_default,
1095+
*args,
1096+
**kwargs,
1097+
):
1098+
nv.validate_resampler_func(name, args, kwargs)
1099+
return self._downsample(name, ddof=ddof, numeric_only=numeric_only)
10601100

1061-
def g(self, _method=method, *args, **kwargs):
1062-
nv.validate_resampler_func(_method, args, kwargs)
1063-
return self._downsample(_method)
1101+
else:
1102+
# error: All conditional function variants must have identical signatures
1103+
def f( # type: ignore[misc]
1104+
self,
1105+
*args,
1106+
**kwargs,
1107+
):
1108+
nv.validate_resampler_func(name, args, kwargs)
1109+
return self._downsample(name)
10641110

1065-
g.__doc__ = getattr(GroupBy, method).__doc__
1066-
setattr(Resampler, method, g)
1111+
f.__doc__ = getattr(docs_class, name).__doc__
1112+
setattr(Resampler, name, f)
10671113

10681114

1069-
# series only methods
1115+
for method in ["sum", "prod", "min", "max", "first", "last"]:
1116+
_add_downsample_kernel(method, ("numeric_only", "min_count"))
1117+
for method in ["mean", "median"]:
1118+
_add_downsample_kernel(method, ("numeric_only",))
1119+
for method in ["sem"]:
1120+
_add_downsample_kernel(method, ("ddof", "numeric_only"))
1121+
for method in ["ohlc"]:
1122+
_add_downsample_kernel(method, ())
10701123
for method in ["nunique"]:
1071-
1072-
def h(self, _method=method):
1073-
return self._downsample(_method)
1074-
1075-
h.__doc__ = getattr(SeriesGroupBy, method).__doc__
1076-
setattr(Resampler, method, h)
1124+
_add_downsample_kernel(method, (), SeriesGroupBy)
10771125

10781126

10791127
class _GroupByMixin(PandasObject):

pandas/tests/resample/test_resample_api.py

+36-8
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,7 @@ def test_end_and_end_day_origin(
814814

815815

816816
@pytest.mark.parametrize(
817+
# expected_data is a string when op raises a ValueError
817818
"method, numeric_only, expected_data",
818819
[
819820
("sum", True, {"num": [25]}),
@@ -834,6 +835,21 @@ def test_end_and_end_day_origin(
834835
("last", True, {"num": [20]}),
835836
("last", False, {"cat": ["cat_2"], "num": [20]}),
836837
("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
838+
("mean", True, {"num": [12.5]}),
839+
("mean", False, {"num": [12.5]}),
840+
("mean", lib.no_default, {"num": [12.5]}),
841+
("median", True, {"num": [12.5]}),
842+
("median", False, {"num": [12.5]}),
843+
("median", lib.no_default, {"num": [12.5]}),
844+
("std", True, {"num": [10.606601717798213]}),
845+
("std", False, "could not convert string to float"),
846+
("std", lib.no_default, {"num": [10.606601717798213]}),
847+
("var", True, {"num": [112.5]}),
848+
("var", False, "could not convert string to float"),
849+
("var", lib.no_default, {"num": [112.5]}),
850+
("sem", True, {"num": [7.5]}),
851+
("sem", False, "could not convert string to float"),
852+
("sem", lib.no_default, {"num": [7.5]}),
837853
],
838854
)
839855
def test_frame_downsample_method(method, numeric_only, expected_data):
@@ -845,20 +861,32 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
845861
resampled = df.resample("Y")
846862

847863
func = getattr(resampled, method)
848-
if method == "prod" and numeric_only is not True:
864+
if numeric_only is lib.no_default and method not in (
865+
"min",
866+
"max",
867+
"first",
868+
"last",
869+
"prod",
870+
):
849871
warn = FutureWarning
850-
msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated"
851-
elif method == "sum" and numeric_only is lib.no_default:
872+
msg = (
873+
f"default value of numeric_only in DataFrameGroupBy.{method} is deprecated"
874+
)
875+
elif method in ("prod", "mean", "median") and numeric_only is not True:
852876
warn = FutureWarning
853-
msg = "The default value of numeric_only in DataFrameGroupBy.sum is deprecated"
877+
msg = f"Dropping invalid columns in DataFrameGroupBy.{method} is deprecated"
854878
else:
855879
warn = None
856880
msg = ""
857881
with tm.assert_produces_warning(warn, match=msg):
858-
result = func(numeric_only=numeric_only)
859-
860-
expected = DataFrame(expected_data, index=expected_index)
861-
tm.assert_frame_equal(result, expected)
882+
if isinstance(expected_data, str):
883+
klass = TypeError if method == "var" else ValueError
884+
with pytest.raises(klass, match=expected_data):
885+
_ = func(numeric_only=numeric_only)
886+
else:
887+
result = func(numeric_only=numeric_only)
888+
expected = DataFrame(expected_data, index=expected_index)
889+
tm.assert_frame_equal(result, expected)
862890

863891

864892
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)