From 3e508e6a5b4966974c193da571295db8c4127a70 Mon Sep 17 00:00:00 2001 From: matt Date: Mon, 12 Jun 2023 08:13:36 +0100 Subject: [PATCH 1/9] add numeric_only to dataframe cum methods --- pandas/core/frame.py | 20 ++++++++++++-------- pandas/core/generic.py | 2 ++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 671924c5e9607..151031ab85afb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11077,20 +11077,24 @@ def kurt( product = prod @doc(make_doc("cummin", ndim=2)) - def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): - return NDFrame.cummin(self, axis, skipna, *args, **kwargs) + def cummin(self, axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, *args, **kwargs): + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cummin(data, axis, skipna, *args, **kwargs) @doc(make_doc("cummax", ndim=2)) - def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): - return NDFrame.cummax(self, axis, skipna, *args, **kwargs) + def cummax(self, axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, *args, **kwargs): + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cummax(data, axis, skipna, *args, **kwargs) @doc(make_doc("cumsum", ndim=2)) - def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): - return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) + def cumsum(self, axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, *args, **kwargs): + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cumsum(data, axis, skipna, *args, **kwargs) @doc(make_doc("cumprod", 2)) - def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs): - return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) + def cumprod(self, axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, *args, **kwargs): + data = self._get_numeric_data() if numeric_only else self + return NDFrame.cumprod(data, axis, skipna, *args, **kwargs) def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f73ef36f76086..659acd8990d35 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12271,6 +12271,8 @@ def last_valid_index(self) -> Hashable | None: skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA. +numeric_only : bool, default False + Include only float, int, boolean columns. *args, **kwargs Additional keywords have no effect but might be accepted for compatibility with NumPy. From 589da94917c9d8fc173fa37f6c99bc91a32ea7e8 Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 12 Jun 2023 20:26:44 +0100 Subject: [PATCH 2/9] test cum numeric options --- pandas/tests/frame/test_cumulative.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 5bd9c42612315..b6016e40f6a59 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -12,6 +12,7 @@ from pandas import ( DataFrame, Series, + Timestamp ) import pandas._testing as tm @@ -79,3 +80,21 @@ def test_cumsum_preserve_dtypes(self): } ) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", ["cumsum", "cumprod", "cummin", "cummax"]) + def test_numeric_only_flag(self, method): + df = DataFrame( + { + "int": [1, 2, 3], + "bool": [True, False, False], + "string": ["a", "b", "c"], + "float": [1.0, 3.5, 4.0], + "datetime": [Timestamp(2018, 1, 1), Timestamp(2019, 1, 1), Timestamp(2020, 1, 1)]}) + df_numeric_only = df.drop(["string", "datetime"], axis=1) + + for axis in [0, 1]: + result = getattr(df, method)(axis=axis, numeric_only=True) + expected = getattr(df_numeric_only, method)(axis) + tm.assert_frame_equal(result, expected) + + From ec4cfecec54004ca92c7afd10d3918600d4b66bc Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 12 Jun 2023 20:33:27 +0100 Subject: [PATCH 3/9] black and isort --- pandas/core/frame.py | 36 ++++++++++++++++++++++++--- pandas/tests/frame/test_cumulative.py | 22 +++++++++------- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 151031ab85afb..4efe6ae5f8b20 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11077,22 +11077,50 @@ def kurt( product = prod @doc(make_doc("cummin", ndim=2)) - def cummin(self, axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, *args, **kwargs): + def cummin( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ): data = self._get_numeric_data() if numeric_only else self return NDFrame.cummin(data, axis, skipna, *args, **kwargs) @doc(make_doc("cummax", ndim=2)) - def cummax(self, axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, *args, **kwargs): + def cummax( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ): data = self._get_numeric_data() if numeric_only else self return NDFrame.cummax(data, axis, skipna, *args, **kwargs) @doc(make_doc("cumsum", ndim=2)) - def cumsum(self, axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, *args, **kwargs): + def cumsum( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ): data = self._get_numeric_data() if numeric_only else self return NDFrame.cumsum(data, axis, skipna, *args, **kwargs) @doc(make_doc("cumprod", 2)) - def cumprod(self, axis: Axis | None = None, skipna: bool = True, numeric_only: bool = False, *args, **kwargs): + def cumprod( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + *args, + **kwargs, + ): data = self._get_numeric_data() if numeric_only else self return NDFrame.cumprod(data, axis, skipna, *args, **kwargs) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index b6016e40f6a59..fe38e250e3014 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -12,7 +12,7 @@ from pandas import ( DataFrame, Series, - Timestamp + Timestamp, ) import pandas._testing as tm @@ -85,16 +85,20 @@ def test_cumsum_preserve_dtypes(self): def test_numeric_only_flag(self, method): df = DataFrame( { - "int": [1, 2, 3], - "bool": [True, False, False], - "string": ["a", "b", "c"], - "float": [1.0, 3.5, 4.0], - "datetime": [Timestamp(2018, 1, 1), Timestamp(2019, 1, 1), Timestamp(2020, 1, 1)]}) + "int": [1, 2, 3], + "bool": [True, False, False], + "string": ["a", "b", "c"], + "float": [1.0, 3.5, 4.0], + "datetime": [ + Timestamp(2018, 1, 1), + Timestamp(2019, 1, 1), + Timestamp(2020, 1, 1), + ], + } + ) df_numeric_only = df.drop(["string", "datetime"], axis=1) for axis in [0, 1]: result = getattr(df, method)(axis=axis, numeric_only=True) expected = getattr(df_numeric_only, method)(axis) - tm.assert_frame_equal(result, expected) - - + tm.assert_frame_equal(result, expected) From 10bcbb300d8b7982b1e5054599248d8d833ce79d Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 12 Jun 2023 20:38:43 +0100 Subject: [PATCH 4/9] update whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index baacc8c421414..6035270375c87 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -108,7 +108,7 @@ Other enhancements - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) - +- :meth DataFrame.cum* methods now have a ``numeric_only`` parameter. .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From f6e47fe9423c3f2c4e23389b413cca2ba2aaa7ae Mon Sep 17 00:00:00 2001 From: Matt Date: Mon, 12 Jun 2023 20:41:43 +0100 Subject: [PATCH 5/9] update whatsnew --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6035270375c87..09563fd9f4bb2 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -108,7 +108,7 @@ Other enhancements - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) -- :meth DataFrame.cum* methods now have a ``numeric_only`` parameter. +- :meth:`DataFrame.cum*` methods now have a ``numeric_only`` parameter. .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 0ce140b3bd9e6a7d68459da5e1972e8f7f18546b Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 14 Jun 2023 07:34:28 +0100 Subject: [PATCH 6/9] update groupby tests --- pandas/tests/groupby/test_api.py | 3 +-- pandas/tests/groupby/test_function.py | 7 +++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index 1122403be877f..df2ed10e5280f 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -182,9 +182,8 @@ def test_frame_consistency(groupby_func): exclude_expected = {"downcast", "inplace", "axis"} elif groupby_func in ("cummax", "cummin"): exclude_expected = {"skipna", "args"} - exclude_result = {"numeric_only"} elif groupby_func in ("cumprod", "cumsum"): - exclude_expected = {"skipna"} + exclude_expected = {"skipna", "numeric_only"} elif groupby_func in ("pct_change",): exclude_expected = {"kwargs"} exclude_result = {"axis"} diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 98fce9d668e44..9ed9188946d51 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -549,7 +549,7 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only): kwargs["numeric_only"] = numeric_only # Functions without numeric_only and axis args - no_args = ("cumprod", "cumsum", "diff", "fillna", "pct_change", "rank", "shift") + no_args = ("diff", "fillna", "pct_change", "rank", "shift") # Functions with axis args has_axis = ( "cumprod", @@ -568,9 +568,8 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only): if numeric_only is not None and groupby_func in no_args: msg = "got an unexpected keyword argument 'numeric_only'" if groupby_func in ["cumprod", "cumsum"]: - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - method(*args, **kwargs) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + method(*args, **kwargs) else: with pytest.raises(TypeError, match=msg): method(*args, **kwargs) From fb6e13291c42a6c4beb2bf72e758ac5f34512396 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 14 Jun 2023 09:06:59 +0100 Subject: [PATCH 7/9] isort --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 32c5a07ddfb20..3b1744295bc47 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -99,6 +99,7 @@ Other enhancements - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`) - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"`` - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`) +- :meth:`DataFrame.cum*` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.stack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) - :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) - :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`) @@ -109,7 +110,6 @@ Other enhancements - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) -- :meth:`DataFrame.cum*` methods now have a ``numeric_only`` parameter. .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 5e930b23b6aeb3ee1b5afcee359f0f400fc33646 Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 21 Jun 2023 21:04:46 +0100 Subject: [PATCH 8/9] add blank line in docs --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3b1744295bc47..8c0b7b0bd8409 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -110,6 +110,7 @@ Other enhancements - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) + .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: From 5ac09a57bc28e80e9a616ef1bb4a34427c01165e Mon Sep 17 00:00:00 2001 From: Matt Date: Sat, 1 Jul 2023 12:43:27 +0100 Subject: [PATCH 9/9] update whats new --- doc/source/whatsnew/v2.1.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 34d3239e9472f..34758727ea829 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -106,7 +106,6 @@ Other enhancements - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"`` - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`) - :meth:`DataFrame.cum*` methods now have a ``numeric_only`` parameter (:issue:`53072`) -- :meth:`DataFrame.stack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) - :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) - :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`) - :meth:`Series.explode` now supports pyarrow-backed list types (:issue:`53602`)