diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7e334219ac4c1..e8241dfb6829e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -113,6 +113,7 @@ Other enhancements - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`) - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"`` - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`) +- :meth:`DataFrame.cum*` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) - :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`) - :meth:`Series.explode` now supports pyarrow-backed list types (:issue:`53602`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c0f4dbb4aeb2d..67faead05ec65 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11102,20 +11102,52 @@ def kurt( product = prod @doc(make_doc("cummin", ndim=2)) - def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): - return NDFrame.cummin(self, axis, skipna, *args, **kwargs) + def cummin( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ): + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cummin(data, axis, skipna, *args, **kwargs) @doc(make_doc("cummax", ndim=2)) - def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): - return NDFrame.cummax(self, axis, skipna, *args, **kwargs) + def cummax( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ): + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cummax(data, axis, skipna, *args, **kwargs) @doc(make_doc("cumsum", ndim=2)) - def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): - return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) + def cumsum( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ): + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cumsum(data, axis, skipna, *args, **kwargs) @doc(make_doc("cumprod", 2)) - def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): - return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) + def cumprod( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ): + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cumprod(data, axis, skipna, *args, **kwargs) def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ec6788a3dc8c5..7a530876cb792 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12402,6 +12402,8 @@ def last_valid_index(self) -> Hashable | None: skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA. +numeric_only : bool, default False + Include only float, int, boolean columns. *args, **kwargs Additional keywords have no effect but might be accepted for compatibility with NumPy. diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 5bd9c42612315..fe38e250e3014 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -12,6 +12,7 @@ from pandas import ( DataFrame, Series, + Timestamp, ) import pandas._testing as tm @@ -79,3 +80,25 @@ def test_cumsum_preserve_dtypes(self): } ) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"]) + def test_numeric_only_flag(self, method): + df = DataFrame( + { + "int": [1, 2, 3], + "bool": [True, False, False], + "string": ["a", "b", "c"], + "float": [1.0, 3.5, 4.0], + "datetime": [ + Timestamp(2018, 1, 1), + Timestamp(2019, 1, 1), + Timestamp(2020, 1, 1), + ], + } + ) + df_numeric_only = df.drop(["string", "datetime"], axis=1) + + for axis in [0, 1]: + result = getattr(df, method)(axis=axis, numeric_only=True) + expected = getattr(df_numeric_only, method)(axis) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index 1122403be877f..df2ed10e5280f 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -182,9 +182,8 @@ def test_frame_consistency(groupby_func): exclude_expected = {"downcast", "inplace", "axis"} elif groupby_func in ("cummax", "cummin"): exclude_expected = {"skipna", "args"} - exclude_result = {"numeric_only"} elif groupby_func in ("cumprod", "cumsum"): - exclude_expected = {"skipna"} + exclude_expected = {"skipna", "numeric_only"} elif groupby_func in ("pct_change",): exclude_expected = {"kwargs"} exclude_result = {"axis"} diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index e3a5d308c4346..b70fc1b1a51d6 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -560,7 +560,7 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only): kwargs["numeric_only"] = numeric_only # Functions without numeric_only and axis args - no_args = ("cumprod", "cumsum", "diff", "fillna", "pct_change", "rank", "shift") + no_args = ("diff", "fillna", "pct_change", "rank", "shift") # Functions with axis args has_axis = ( "cumprod", @@ -579,9 +579,8 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only): if numeric_only is not None and groupby_func in no_args: msg = "got an unexpected keyword argument 'numeric_only'" if groupby_func in ["cumprod", "cumsum"]: - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - method(*args, **kwargs) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + method(*args, **kwargs) else: with pytest.raises(TypeError, match=msg): method(*args, **kwargs)