diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 19b448a1871c2..d189c4f4bf248 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -35,7 +35,7 @@ Other enhancements - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) -- +- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66a68755a2a09..d6f4e5ea25fc9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12029,20 +12029,52 @@ def kurt( product = prod @doc(make_doc("cummin", ndim=2)) - def cummin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: - return NDFrame.cummin(self, axis, skipna, *args, **kwargs) + def cummin( + self, + axis: Axis = 0, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ) -> Self: + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cummin(data, axis, skipna, *args, **kwargs) @doc(make_doc("cummax", ndim=2)) - def cummax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: - return NDFrame.cummax(self, axis, skipna, *args, **kwargs) + def cummax( + self, + axis: Axis = 0, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ) -> Self: + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cummax(data, axis, skipna, *args, **kwargs) @doc(make_doc("cumsum", ndim=2)) - def cumsum(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: - return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) + def cumsum( + self, + axis: Axis = 0, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ) -> Self: + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cumsum(data, axis, skipna, *args, **kwargs) @doc(make_doc("cumprod", 2)) - def cumprod(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self: - return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) + def cumprod( + self, + axis: Axis = 0, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ) -> Self: + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cumprod(data, axis, skipna, *args, **kwargs) def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8af9503a3691d..e1c1b21249362 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11925,7 +11925,45 @@ def last_valid_index(self) -> Hashable: DataFrame.any : Return True if one (or more) elements are True. """ -_cnum_doc = """ +_cnum_pd_doc = """ +Return cumulative {desc} over a DataFrame or Series axis. + +Returns a DataFrame or Series of the same size containing the cumulative +{desc}. + +Parameters +---------- +axis : {{0 or 'index', 1 or 'columns'}}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +numeric_only : bool, default False + Include only float, int, boolean columns. +*args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + +Returns +------- +{name1} or {name2} + Return cumulative {desc} of {name1} or {name2}. + +See Also +-------- +core.window.expanding.Expanding.{accum_func_name} : Similar functionality + but ignores ``NaN`` values. +{name2}.{accum_func_name} : Return the {desc} over + {name2} axis. +{name2}.cummax : Return cumulative maximum over {name2} axis. +{name2}.cummin : Return cumulative minimum over {name2} axis. +{name2}.cumsum : Return cumulative sum over {name2} axis. +{name2}.cumprod : Return cumulative product over {name2} axis. + +{examples}""" + +_cnum_series_doc = """ Return cumulative {desc} over a DataFrame or Series axis. Returns a DataFrame or Series of the same size containing the cumulative @@ -12716,28 +12754,44 @@ def make_doc(name: str, ndim: int) -> str: kwargs = {"min_count": ""} elif name == "cumsum": - base_doc = _cnum_doc + if ndim == 1: + base_doc = _cnum_series_doc + else: + base_doc = _cnum_pd_doc + desc = "sum" see_also = "" examples = _cumsum_examples kwargs = {"accum_func_name": "sum"} elif name == "cumprod": - base_doc = _cnum_doc + if ndim == 1: + base_doc = _cnum_series_doc + else: + base_doc = _cnum_pd_doc + desc = "product" see_also = "" examples = _cumprod_examples kwargs = {"accum_func_name": "prod"} elif name == "cummin": - base_doc = _cnum_doc + if ndim == 1: + base_doc = _cnum_series_doc + else: + base_doc = _cnum_pd_doc + desc = "minimum" see_also = "" examples = _cummin_examples kwargs = {"accum_func_name": "min"} elif name == "cummax": - base_doc = _cnum_doc + if ndim == 1: + base_doc = _cnum_series_doc + else: + base_doc = _cnum_pd_doc + desc = "maximum" see_also = "" examples = _cummax_examples diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index d7aad680d389e..ab217e1b1332a 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -7,10 +7,12 @@ """ import numpy as np +import pytest from pandas import ( DataFrame, Series, + Timestamp, ) import pandas._testing as tm @@ -81,3 +83,25 @@ def test_cumsum_preserve_dtypes(self): } ) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"]) + @pytest.mark.parametrize("axis", [0, 1]) + def test_numeric_only_flag(self, method, axis): + df = DataFrame( + { + "int": [1, 2, 3], + "bool": [True, False, False], + "string": ["a", "b", "c"], + "float": [1.0, 3.5, 4.0], + "datetime": [ + Timestamp(2018, 1, 1), + Timestamp(2019, 1, 1), + Timestamp(2020, 1, 1), + ], + } + ) + df_numeric_only = df.drop(["string", "datetime"], axis=1) + + result = getattr(df, method)(axis=axis, numeric_only=True) + expected = getattr(df_numeric_only, method)(axis) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index b5fdf058d1ab0..d2cfa530e7c65 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -183,10 +183,9 @@ def test_frame_consistency(groupby_func): elif groupby_func in ("bfill", "ffill"): exclude_expected = {"inplace", "axis", "limit_area"} elif groupby_func in ("cummax", "cummin"): - exclude_expected = {"skipna", "args"} - exclude_result = {"numeric_only"} + exclude_expected = {"axis", "skipna", "args"} elif groupby_func in ("cumprod", "cumsum"): - exclude_expected = {"skipna"} + exclude_expected = {"axis", "skipna", "numeric_only"} elif groupby_func in ("pct_change",): exclude_expected = {"kwargs"} elif groupby_func in ("rank",):