diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 6bf6fd65f5633..40276c6a21e77 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -605,7 +605,7 @@ In the case where ``df.columns`` is not unique, use :meth:`DataFrame.isetitem`: ``numeric_only`` default value ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Across the DataFrame and DataFrameGroupBy operations such as +Across the :class:`DataFrame`, :class:`.DataFrameGroupBy`, and :class:`.Resampler` operations such as ``min``, ``sum``, and ``idxmax``, the default value of the ``numeric_only`` argument, if it exists at all, was inconsistent. Furthermore, operations with the default value ``None`` can lead to surprising @@ -644,6 +644,11 @@ gained the ``numeric_only`` argument. - :meth:`.GroupBy.std` - :meth:`.GroupBy.sem` - :meth:`.DataFrameGroupBy.quantile` +- :meth:`.Resampler.mean` +- :meth:`.Resampler.median` +- :meth:`.Resampler.sem` +- :meth:`.Resampler.std` +- :meth:`.Resampler.var` .. _whatsnew_150.deprecations.other: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index dcd9aceaf8474..0a62861cdaba7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -393,7 +393,7 @@ def transform(self, arg, *args, **kwargs): """ return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs) - def _downsample(self, f): + def _downsample(self, f, **kwargs): raise AbstractMethodError(self) def _upsample(self, f, limit=None, fill_value=None): @@ -937,7 +937,7 @@ def asfreq(self, fill_value=None): """ return self._upsample("asfreq", fill_value=fill_value) - def std(self, ddof=1, *args, **kwargs): + def std(self, ddof=1, numeric_only: bool = False, *args, **kwargs): """ Compute standard deviation of groups, excluding missing values. @@ -945,6 +945,10 @@ def std(self, ddof=1, *args, **kwargs): ---------- ddof : int, default 1 Degrees of freedom. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 Returns ------- @@ -952,10 +956,9 @@ def std(self, ddof=1, *args, **kwargs): Standard deviation of values within each group. """ nv.validate_resampler_func("std", args, kwargs) - # error: Unexpected keyword argument "ddof" for "_downsample" - return self._downsample("std", ddof=ddof) # type: ignore[call-arg] + return self._downsample("std", ddof=ddof, numeric_only=numeric_only) - def var(self, ddof=1, *args, **kwargs): + def var(self, ddof=1, numeric_only: bool = False, *args, **kwargs): """ Compute variance of groups, excluding missing values. @@ -964,14 +967,18 @@ def var(self, ddof=1, *args, **kwargs): ddof : int, default 1 Degrees of freedom. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + Returns ------- DataFrame or Series Variance of values within each group. """ nv.validate_resampler_func("var", args, kwargs) - # error: Unexpected keyword argument "ddof" for "_downsample" - return self._downsample("var", ddof=ddof) # type: ignore[call-arg] + return self._downsample("var", ddof=ddof, numeric_only=numeric_only) @doc(GroupBy.size) def size(self): @@ -1027,53 +1034,94 @@ def quantile(self, q=0.5, **kwargs): Return a DataFrame, where the coulmns are groupby columns, and the values are its quantiles. """ - # error: Unexpected keyword argument "q" for "_downsample" - # error: Too many arguments for "_downsample" - return self._downsample("quantile", q=q, **kwargs) # type: ignore[call-arg] + return self._downsample("quantile", q=q, **kwargs) -# downsample methods -for method in ["sum", "prod", "min", "max", "first", "last"]: +def _add_downsample_kernel( + name: str, args: tuple[str, ...], docs_class: type = GroupBy +) -> None: + """ + Add a kernel to Resampler. + + Arguments + --------- + name : str + Name of the kernel. + args : tuple + Arguments of the method. + docs_class : type + Class to get kernel docstring from. + """ + assert args in ( + ("numeric_only", "min_count"), + ("numeric_only",), + ("ddof", "numeric_only"), + (), + ) - def f( - self, - _method: str = method, - numeric_only: bool | lib.NoDefault = lib.no_default, - min_count: int = 0, - *args, - **kwargs, - ): - if numeric_only is lib.no_default: - if _method != "sum": + # Explicitly provide args rather than args/kwargs for API docs + if args == ("numeric_only", "min_count"): + + def f( + self, + numeric_only: bool | lib.NoDefault = lib.no_default, + min_count: int = 0, + *args, + **kwargs, + ): + nv.validate_resampler_func(name, args, kwargs) + if numeric_only is lib.no_default and name != "sum": # For DataFrameGroupBy, set it to be False for methods other than `sum`. numeric_only = False - nv.validate_resampler_func(_method, args, kwargs) - return self._downsample(_method, numeric_only=numeric_only, min_count=min_count) - - f.__doc__ = getattr(GroupBy, method).__doc__ - setattr(Resampler, method, f) - + return self._downsample( + name, numeric_only=numeric_only, min_count=min_count + ) -# downsample methods -for method in ["mean", "sem", "median", "ohlc"]: + elif args == ("numeric_only",): + # error: All conditional function variants must have identical signatures + def f( # type: ignore[misc] + self, numeric_only: bool | lib.NoDefault = lib.no_default, *args, **kwargs + ): + nv.validate_resampler_func(name, args, kwargs) + return self._downsample(name, numeric_only=numeric_only) + + elif args == ("ddof", "numeric_only"): + # error: All conditional function variants must have identical signatures + def f( # type: ignore[misc] + self, + ddof: int = 1, + numeric_only: bool | lib.NoDefault = lib.no_default, + *args, + **kwargs, + ): + nv.validate_resampler_func(name, args, kwargs) + return self._downsample(name, ddof=ddof, numeric_only=numeric_only) - def g(self, _method=method, *args, **kwargs): - nv.validate_resampler_func(_method, args, kwargs) - return self._downsample(_method) + else: + # error: All conditional function variants must have identical signatures + def f( # type: ignore[misc] + self, + *args, + **kwargs, + ): + nv.validate_resampler_func(name, args, kwargs) + return self._downsample(name) - g.__doc__ = getattr(GroupBy, method).__doc__ - setattr(Resampler, method, g) + f.__doc__ = getattr(docs_class, name).__doc__ + setattr(Resampler, name, f) -# series only methods +for method in ["sum", "prod", "min", "max", "first", "last"]: + _add_downsample_kernel(method, ("numeric_only", "min_count")) +for method in ["mean", "median"]: + _add_downsample_kernel(method, ("numeric_only",)) +for method in ["sem"]: + _add_downsample_kernel(method, ("ddof", "numeric_only")) +for method in ["ohlc"]: + _add_downsample_kernel(method, ()) for method in ["nunique"]: - - def h(self, _method=method): - return self._downsample(_method) - - h.__doc__ = getattr(SeriesGroupBy, method).__doc__ - setattr(Resampler, method, h) + _add_downsample_kernel(method, (), SeriesGroupBy) class _GroupByMixin(PandasObject): diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 43050c0338671..5e10b9ee5277c 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -814,6 +814,7 @@ def test_end_and_end_day_origin( @pytest.mark.parametrize( + # expected_data is a string when op raises a ValueError "method, numeric_only, expected_data", [ ("sum", True, {"num": [25]}), @@ -834,6 +835,21 @@ def test_end_and_end_day_origin( ("last", True, {"num": [20]}), ("last", False, {"cat": ["cat_2"], "num": [20]}), ("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}), + ("mean", True, {"num": [12.5]}), + ("mean", False, {"num": [12.5]}), + ("mean", lib.no_default, {"num": [12.5]}), + ("median", True, {"num": [12.5]}), + ("median", False, {"num": [12.5]}), + ("median", lib.no_default, {"num": [12.5]}), + ("std", True, {"num": [10.606601717798213]}), + ("std", False, "could not convert string to float"), + ("std", lib.no_default, {"num": [10.606601717798213]}), + ("var", True, {"num": [112.5]}), + ("var", False, "could not convert string to float"), + ("var", lib.no_default, {"num": [112.5]}), + ("sem", True, {"num": [7.5]}), + ("sem", False, "could not convert string to float"), + ("sem", lib.no_default, {"num": [7.5]}), ], ) def test_frame_downsample_method(method, numeric_only, expected_data): @@ -845,20 +861,32 @@ def test_frame_downsample_method(method, numeric_only, expected_data): resampled = df.resample("Y") func = getattr(resampled, method) - if method == "prod" and numeric_only is not True: + if numeric_only is lib.no_default and method not in ( + "min", + "max", + "first", + "last", + "prod", + ): warn = FutureWarning - msg = "Dropping invalid columns in DataFrameGroupBy.prod is deprecated" - elif method == "sum" and numeric_only is lib.no_default: + msg = ( + f"default value of numeric_only in DataFrameGroupBy.{method} is deprecated" + ) + elif method in ("prod", "mean", "median") and numeric_only is not True: warn = FutureWarning - msg = "The default value of numeric_only in DataFrameGroupBy.sum is deprecated" + msg = f"Dropping invalid columns in DataFrameGroupBy.{method} is deprecated" else: warn = None msg = "" with tm.assert_produces_warning(warn, match=msg): - result = func(numeric_only=numeric_only) - - expected = DataFrame(expected_data, index=expected_index) - tm.assert_frame_equal(result, expected) + if isinstance(expected_data, str): + klass = TypeError if method == "var" else ValueError + with pytest.raises(klass, match=expected_data): + _ = func(numeric_only=numeric_only) + else: + result = func(numeric_only=numeric_only) + expected = DataFrame(expected_data, index=expected_index) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize(