Skip to content

Commit 4cf8d55

Browse files
authored
ENH: Add numeric_only to resampler methods (#46792)
1 parent 4caa297 commit 4cf8d55

File tree

3 files changed

+99
-2
lines changed

3 files changed

+99
-2
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ Other enhancements
9696
- :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
9797
- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, and :meth:`DataFrame.cov` (:issue:`46560`)
9898
- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
99+
- Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`)
99100

100101
.. ---------------------------------------------------------------------------
101102
.. _whatsnew_150.notable_bug_fixes:

pandas/core/resample.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1027,9 +1027,21 @@ def quantile(self, q=0.5, **kwargs):
10271027
# downsample methods
10281028
for method in ["sum", "prod", "min", "max", "first", "last"]:
10291029

1030-
def f(self, _method=method, min_count=0, *args, **kwargs):
1030+
def f(
1031+
self,
1032+
_method: str = method,
1033+
numeric_only: bool | lib.NoDefault = lib.no_default,
1034+
min_count: int = 0,
1035+
*args,
1036+
**kwargs,
1037+
):
1038+
if numeric_only is lib.no_default:
1039+
if _method != "sum":
1040+
# For DataFrameGroupBy, set it to be False for methods other than `sum`.
1041+
numeric_only = False
1042+
10311043
nv.validate_resampler_func(_method, args, kwargs)
1032-
return self._downsample(_method, min_count=min_count)
1044+
return self._downsample(_method, numeric_only=numeric_only, min_count=min_count)
10331045

10341046
f.__doc__ = getattr(GroupBy, method).__doc__
10351047
setattr(Resampler, method, f)

pandas/tests/resample/test_resample_api.py

+84
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._libs import lib
7+
68
import pandas as pd
79
from pandas import (
810
DataFrame,
@@ -771,3 +773,85 @@ def test_end_and_end_day_origin(
771773
)
772774

773775
tm.assert_series_equal(res, expected)
776+
777+
778+
@pytest.mark.parametrize(
779+
"method, numeric_only, expected_data",
780+
[
781+
("sum", True, {"num": [25]}),
782+
("sum", False, {"cat": ["cat_1cat_2"], "num": [25]}),
783+
("sum", lib.no_default, {"num": [25]}),
784+
("prod", True, {"num": [100]}),
785+
("prod", False, {"num": [100]}),
786+
("prod", lib.no_default, {"num": [100]}),
787+
("min", True, {"num": [5]}),
788+
("min", False, {"cat": ["cat_1"], "num": [5]}),
789+
("min", lib.no_default, {"cat": ["cat_1"], "num": [5]}),
790+
("max", True, {"num": [20]}),
791+
("max", False, {"cat": ["cat_2"], "num": [20]}),
792+
("max", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
793+
("first", True, {"num": [5]}),
794+
("first", False, {"cat": ["cat_1"], "num": [5]}),
795+
("first", lib.no_default, {"cat": ["cat_1"], "num": [5]}),
796+
("last", True, {"num": [20]}),
797+
("last", False, {"cat": ["cat_2"], "num": [20]}),
798+
("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
799+
],
800+
)
801+
def test_frame_downsample_method(method, numeric_only, expected_data):
802+
# GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy
803+
804+
index = date_range("2018-01-01", periods=2, freq="D")
805+
expected_index = date_range("2018-12-31", periods=1, freq="Y")
806+
df = DataFrame({"cat": ["cat_1", "cat_2"], "num": [5, 20]}, index=index)
807+
resampled = df.resample("Y")
808+
809+
func = getattr(resampled, method)
810+
result = func(numeric_only=numeric_only)
811+
812+
expected = DataFrame(expected_data, index=expected_index)
813+
tm.assert_frame_equal(result, expected)
814+
815+
816+
@pytest.mark.parametrize(
817+
"method, numeric_only, expected_data",
818+
[
819+
("sum", True, ()),
820+
("sum", False, ["cat_1cat_2"]),
821+
("sum", lib.no_default, ["cat_1cat_2"]),
822+
("prod", True, ()),
823+
("prod", False, ()),
824+
("prod", lib.no_default, ()),
825+
("min", True, ()),
826+
("min", False, ["cat_1"]),
827+
("min", lib.no_default, ["cat_1"]),
828+
("max", True, ()),
829+
("max", False, ["cat_2"]),
830+
("max", lib.no_default, ["cat_2"]),
831+
("first", True, ()),
832+
("first", False, ["cat_1"]),
833+
("first", lib.no_default, ["cat_1"]),
834+
("last", True, ()),
835+
("last", False, ["cat_2"]),
836+
("last", lib.no_default, ["cat_2"]),
837+
],
838+
)
839+
def test_series_downsample_method(method, numeric_only, expected_data):
840+
# GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy
841+
842+
index = date_range("2018-01-01", periods=2, freq="D")
843+
expected_index = date_range("2018-12-31", periods=1, freq="Y")
844+
df = Series(["cat_1", "cat_2"], index=index)
845+
resampled = df.resample("Y")
846+
847+
func = getattr(resampled, method)
848+
if numeric_only and numeric_only is not lib.no_default:
849+
with pytest.raises(NotImplementedError, match="not implement numeric_only"):
850+
func(numeric_only=numeric_only)
851+
elif method == "prod":
852+
with pytest.raises(TypeError, match="can't multiply sequence by non-int"):
853+
func(numeric_only=numeric_only)
854+
else:
855+
result = func(numeric_only=numeric_only)
856+
expected = Series(expected_data, index=expected_index)
857+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)