Skip to content

ENH: add numeric_only to Dataframe.cum* methods #53624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ Other enhancements
- Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`)
- Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"``
- :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`)
- :meth:`DataFrame.cum*` methods now have a ``numeric_only`` parameter (:issue:`53072`)
- :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`)
- :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`)
- :meth:`Series.explode` now supports pyarrow-backed list types (:issue:`53602`)
Expand Down
48 changes: 40 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11102,20 +11102,52 @@ def kurt(
product = prod

@doc(make_doc("cummin", ndim=2))
def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
return NDFrame.cummin(self, axis, skipna, *args, **kwargs)
def cummin(
self,
axis: Axis | None = None,
skipna: bool = True,
numeric_only: bool = False,
*args,
**kwargs,
):
data = self._get_numeric_data() if numeric_only else self
return NDFrame.cummin(data, axis, skipna, *args, **kwargs)

@doc(make_doc("cummax", ndim=2))
def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
return NDFrame.cummax(self, axis, skipna, *args, **kwargs)
def cummax(
self,
axis: Axis | None = None,
skipna: bool = True,
numeric_only: bool = False,
*args,
**kwargs,
):
data = self._get_numeric_data() if numeric_only else self
return NDFrame.cummax(data, axis, skipna, *args, **kwargs)

@doc(make_doc("cumsum", ndim=2))
def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
return NDFrame.cumsum(self, axis, skipna, *args, **kwargs)
def cumsum(
self,
axis: Axis | None = None,
skipna: bool = True,
numeric_only: bool = False,
*args,
**kwargs,
):
data = self._get_numeric_data() if numeric_only else self
return NDFrame.cumsum(data, axis, skipna, *args, **kwargs)

@doc(make_doc("cumprod", 2))
def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
return NDFrame.cumprod(self, axis, skipna, *args, **kwargs)
def cumprod(
self,
axis: Axis | None = None,
skipna: bool = True,
numeric_only: bool = False,
*args,
**kwargs,
):
data = self._get_numeric_data() if numeric_only else self
return NDFrame.cumprod(data, axis, skipna, *args, **kwargs)

def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
"""
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12402,6 +12402,8 @@ def last_valid_index(self) -> Hashable | None:
skipna : bool, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA.
numeric_only : bool, default False
Include only float, int, boolean columns.
*args, **kwargs
Additional keywords have no effect but might be accepted for
compatibility with NumPy.
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/frame/test_cumulative.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas import (
DataFrame,
Series,
Timestamp,
)
import pandas._testing as tm

Expand Down Expand Up @@ -79,3 +80,25 @@ def test_cumsum_preserve_dtypes(self):
}
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"])
def test_numeric_only_flag(self, method):
df = DataFrame(
{
"int": [1, 2, 3],
"bool": [True, False, False],
"string": ["a", "b", "c"],
"float": [1.0, 3.5, 4.0],
"datetime": [
Timestamp(2018, 1, 1),
Timestamp(2019, 1, 1),
Timestamp(2020, 1, 1),
],
}
)
df_numeric_only = df.drop(["string", "datetime"], axis=1)

for axis in [0, 1]:
result = getattr(df, method)(axis=axis, numeric_only=True)
expected = getattr(df_numeric_only, method)(axis)
tm.assert_frame_equal(result, expected)
3 changes: 1 addition & 2 deletions pandas/tests/groupby/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,8 @@ def test_frame_consistency(groupby_func):
exclude_expected = {"downcast", "inplace", "axis"}
elif groupby_func in ("cummax", "cummin"):
exclude_expected = {"skipna", "args"}
exclude_result = {"numeric_only"}
elif groupby_func in ("cumprod", "cumsum"):
exclude_expected = {"skipna"}
exclude_expected = {"skipna", "numeric_only"}
elif groupby_func in ("pct_change",):
exclude_expected = {"kwargs"}
exclude_result = {"axis"}
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only):
kwargs["numeric_only"] = numeric_only

# Functions without numeric_only and axis args
no_args = ("cumprod", "cumsum", "diff", "fillna", "pct_change", "rank", "shift")
no_args = ("diff", "fillna", "pct_change", "rank", "shift")
# Functions with axis args
has_axis = (
"cumprod",
Expand All @@ -579,9 +579,8 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only):
if numeric_only is not None and groupby_func in no_args:
msg = "got an unexpected keyword argument 'numeric_only'"
if groupby_func in ["cumprod", "cumsum"]:
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
method(*args, **kwargs)
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
method(*args, **kwargs)
else:
with pytest.raises(TypeError, match=msg):
method(*args, **kwargs)
Expand Down