From 7edceb611a5c951f99592998d37cbe438ddfe3f5 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 29 May 2020 17:39:08 +0800 Subject: [PATCH 1/7] improved implmentation path to allow pad, backfill and cumcount in groupby.transform --- pandas/core/generic.py | 4 ++++ pandas/core/groupby/generic.py | 4 ++++ .../tests/groupby/transform/test_transform.py | 18 +++++++++++++----- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0260f30b9e7e2..ee7dfd4c25a89 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6193,6 +6193,8 @@ def ffill( method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast ) + pad = ffill + def bfill( self: FrameOrSeries, axis=None, @@ -6212,6 +6214,8 @@ def bfill( method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast ) + backfill = bfill + @doc(klass=_shared_doc_kwargs["klass"]) def replace( self, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ea4b6f4e65341..e47cfddeeaad7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -483,6 +483,8 @@ def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): elif func in base.cythonized_kernels: # cythonized transform or canned "agg+broadcast" return getattr(self, func)(*args, **kwargs) + elif func in base.transformation_kernels: + return getattr(self, func)(*args, **kwargs) # If func is a reduction, we need to broadcast the # result to the whole group. Compute func result @@ -1464,6 +1466,8 @@ def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): elif func in base.cythonized_kernels: # cythonized transformation or canned "reduction+broadcast" return getattr(self, func)(*args, **kwargs) + elif func in base.transformation_kernels: + return getattr(self, func)(*args, **kwargs) # GH 30918 # Use _transform_fast only when we know func is an aggregation diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index b3347b3c64e6c..f8d251dc302cd 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -323,15 +323,20 @@ def test_transform_transformation_func(transformation_func): { "A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"], "B": [1, 2, np.nan, 3, 3, np.nan, 4], - } + }, + index=pd.date_range("2020-01-01", "2020-01-07"), ) - if transformation_func in ["pad", "backfill", "tshift", "cumcount"]: - # These transformation functions are not yet covered in this test - pytest.xfail("See GH 31269") + if transformation_func == "cumcount": + test_op = lambda x: x.transform("cumcount") + mock_op = lambda x: Series(range(len(x)), x.index) elif transformation_func == "fillna": test_op = lambda x: x.transform("fillna", value=0) mock_op = lambda x: x.fillna(value=0) + elif transformation_func == "tshift": + pytest.xfail( + "Current behavior of groupby.tshift is inconsitent with other transformations" + ) else: test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() @@ -340,7 +345,10 @@ def test_transform_transformation_func(transformation_func): groups = [df[["B"]].iloc[:4], df[["B"]].iloc[4:6], df[["B"]].iloc[6:]] expected = concat([mock_op(g) for g in groups]) - tm.assert_frame_equal(result, expected) + if transformation_func == "cumcount": + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) def test_transform_select_columns(df): From e335ad876958e29def6a4bac17b1bb57bceaf57b Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 29 May 2020 18:16:12 +0800 Subject: [PATCH 2/7] added more details in xfail message --- pandas/tests/groupby/transform/test_transform.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index f8d251dc302cd..affefd4dff0c4 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -334,9 +334,11 @@ def test_transform_transformation_func(transformation_func): test_op = lambda x: x.transform("fillna", value=0) mock_op = lambda x: x.fillna(value=0) elif transformation_func == "tshift": - pytest.xfail( - "Current behavior of groupby.tshift is inconsitent with other transformations" + msg = ( + "Current behavior of groupby.tshift is inconsitent with other transformations." + "See GH34452" ) + pytest.xfail(msg) else: test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() From 854958035c4b0f93b55461d866bb94f078aa5da6 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 29 May 2020 18:19:42 +0800 Subject: [PATCH 3/7] reformated xfail message --- pandas/tests/groupby/transform/test_transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index affefd4dff0c4..e7bc3801a08a7 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -335,8 +335,8 @@ def test_transform_transformation_func(transformation_func): mock_op = lambda x: x.fillna(value=0) elif transformation_func == "tshift": msg = ( - "Current behavior of groupby.tshift is inconsitent with other transformations." - "See GH34452" + "Current behavior of groupby.tshift is inconsistent with other " + "transformations. See GH34452 for more details" ) pytest.xfail(msg) else: From d30d132dfd0b721207524a7bd93ad80d8581cdae Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Fri, 29 May 2020 18:29:06 +0800 Subject: [PATCH 4/7] added whatsnew entry (GH31269) --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 88bf0e005a221..59e7c466dab96 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -288,6 +288,7 @@ Other enhancements - :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`). - Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`) - Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`). +- :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`). .. --------------------------------------------------------------------------- From ba85875cf56103571dec4d3fc190650c1719bac0 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Mon, 1 Jun 2020 21:12:05 +0800 Subject: [PATCH 5/7] added entries in api references, and filled some other missing ones --- doc/source/reference/frame.rst | 11 ++++++++--- doc/source/reference/groupby.rst | 5 +++++ doc/source/reference/series.rst | 10 ++++++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 12b9f67ddb846..d222db3d76d37 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -47,8 +47,6 @@ Conversion DataFrame.convert_dtypes DataFrame.infer_objects DataFrame.copy - DataFrame.isna - DataFrame.notna DataFrame.bool Indexing, iteration @@ -211,10 +209,17 @@ Missing data handling .. autosummary:: :toctree: api/ + DataFrame.backfill + DataFrame.bfill DataFrame.dropna + DataFrame.ffill DataFrame.fillna - DataFrame.replace DataFrame.interpolate + DataFrame.isna + DataFrame.isnull + DataFrame.notna + DataFrame.notnull + DataFrame.replace Reshaping, sorting, transposing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index ca444dac9d77d..5f6bef2579d27 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -50,6 +50,7 @@ Computations / descriptive stats GroupBy.all GroupBy.any GroupBy.bfill + GroupBy.backfill GroupBy.count GroupBy.cumcount GroupBy.cummax @@ -67,6 +68,7 @@ Computations / descriptive stats GroupBy.ngroup GroupBy.nth GroupBy.ohlc + GroupBy.pad GroupBy.prod GroupBy.rank GroupBy.pct_change @@ -88,10 +90,12 @@ application to columns of a specific data type. DataFrameGroupBy.all DataFrameGroupBy.any + DataFrameGroupBy.backfill DataFrameGroupBy.bfill DataFrameGroupBy.corr DataFrameGroupBy.count DataFrameGroupBy.cov + DataFrameGroupBy.cumcount DataFrameGroupBy.cummax DataFrameGroupBy.cummin DataFrameGroupBy.cumprod @@ -106,6 +110,7 @@ application to columns of a specific data type. DataFrameGroupBy.idxmin DataFrameGroupBy.mad DataFrameGroupBy.nunique + DataFrameGroupBy.pad DataFrameGroupBy.pct_change DataFrameGroupBy.plot DataFrameGroupBy.quantile diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 797ade9594c7d..6aff16d0ae1b7 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -214,11 +214,17 @@ Missing data handling .. autosummary:: :toctree: api/ - Series.isna - Series.notna + Series.backfill + Series.bfill Series.dropna + Series.ffill Series.fillna Series.interpolate + Series.isna + Series.isnull + Series.notna + Series.notnull + Series.pad Reshaping, sorting ------------------ From 35b392c73bdfec092d3c92aaef8aca1024e2afaf Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 3 Jun 2020 10:15:32 +0800 Subject: [PATCH 6/7] minor addition in frame and series reference api doc --- doc/source/reference/frame.rst | 1 + doc/source/reference/series.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index d222db3d76d37..774daab122a09 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -220,6 +220,7 @@ Missing data handling DataFrame.notna DataFrame.notnull DataFrame.replace + DataFrame.pad Reshaping, sorting, transposing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 6aff16d0ae1b7..3b595ba5ab206 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -225,6 +225,7 @@ Missing data handling Series.notna Series.notnull Series.pad + Series.replace Reshaping, sorting ------------------ From fb6edf9fd3cd338e88538a9451c05cb2500e2dcf Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 3 Jun 2020 10:16:42 +0800 Subject: [PATCH 7/7] reordering in frame and series reference api doc --- doc/source/reference/frame.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 774daab122a09..e3dfb552651a0 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -219,8 +219,8 @@ Missing data handling DataFrame.isnull DataFrame.notna DataFrame.notnull - DataFrame.replace DataFrame.pad + DataFrame.replace Reshaping, sorting, transposing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~