diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1ba327a4ea50c..3ddab0717396e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -635,7 +635,7 @@ Performance Improvements - Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). -- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`) +- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`) - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) - Improved performance of `rank()` for categorical data (:issue:`15498`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 578c334781d15..43c57a88b4d19 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1442,7 +1442,7 @@ def cummin(self, axis=0, **kwargs): if axis != 0: return self.apply(lambda x: np.minimum.accumulate(x, axis)) - return self._cython_transform('cummin', **kwargs) + return self._cython_transform('cummin', numeric_only=False) @Substitution(name='groupby') @Appender(_doc_template) @@ -1451,7 +1451,7 @@ def cummax(self, axis=0, **kwargs): if axis != 0: return self.apply(lambda x: np.maximum.accumulate(x, axis)) - return self._cython_transform('cummax', **kwargs) + return self._cython_transform('cummax', numeric_only=False) @Substitution(name='groupby') @Appender(_doc_template) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 74e8c6c45946f..e846963732883 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1954,7 +1954,8 @@ def test_arg_passthru(self): for attr in ['cummin', 'cummax']: f = getattr(df.groupby('group'), attr) result = f() - tm.assert_index_equal(result.columns, expected_columns_numeric) + # GH 15561: numeric_only=False set by default like min/max + tm.assert_index_equal(result.columns, expected_columns) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) @@ -4295,6 +4296,13 @@ def test_cummin_cummax(self): result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame() tm.assert_frame_equal(expected, result) + # GH 15561 + df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001']))) + expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b') + for method in ['cummax', 'cummin']: + result = getattr(df.groupby('a')['b'], method)() + tm.assert_series_equal(expected, result) + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values)