diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 12b9f67ddb846..e3dfb552651a0 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -47,8 +47,6 @@ Conversion DataFrame.convert_dtypes DataFrame.infer_objects DataFrame.copy - DataFrame.isna - DataFrame.notna DataFrame.bool Indexing, iteration @@ -211,10 +209,18 @@ Missing data handling .. autosummary:: :toctree: api/ + DataFrame.backfill + DataFrame.bfill DataFrame.dropna + DataFrame.ffill DataFrame.fillna - DataFrame.replace DataFrame.interpolate + DataFrame.isna + DataFrame.isnull + DataFrame.notna + DataFrame.notnull + DataFrame.pad + DataFrame.replace Reshaping, sorting, transposing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index ca444dac9d77d..5f6bef2579d27 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -50,6 +50,7 @@ Computations / descriptive stats GroupBy.all GroupBy.any GroupBy.bfill + GroupBy.backfill GroupBy.count GroupBy.cumcount GroupBy.cummax @@ -67,6 +68,7 @@ Computations / descriptive stats GroupBy.ngroup GroupBy.nth GroupBy.ohlc + GroupBy.pad GroupBy.prod GroupBy.rank GroupBy.pct_change @@ -88,10 +90,12 @@ application to columns of a specific data type. DataFrameGroupBy.all DataFrameGroupBy.any + DataFrameGroupBy.backfill DataFrameGroupBy.bfill DataFrameGroupBy.corr DataFrameGroupBy.count DataFrameGroupBy.cov + DataFrameGroupBy.cumcount DataFrameGroupBy.cummax DataFrameGroupBy.cummin DataFrameGroupBy.cumprod @@ -106,6 +110,7 @@ application to columns of a specific data type. DataFrameGroupBy.idxmin DataFrameGroupBy.mad DataFrameGroupBy.nunique + DataFrameGroupBy.pad DataFrameGroupBy.pct_change DataFrameGroupBy.plot DataFrameGroupBy.quantile diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 797ade9594c7d..3b595ba5ab206 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -214,11 +214,18 @@ Missing data handling .. autosummary:: :toctree: api/ - Series.isna - Series.notna + Series.backfill + Series.bfill Series.dropna + Series.ffill Series.fillna Series.interpolate + Series.isna + Series.isnull + Series.notna + Series.notnull + Series.pad + Series.replace Reshaping, sorting ------------------ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 88bf0e005a221..59e7c466dab96 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -288,6 +288,7 @@ Other enhancements - :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`). - Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`) - Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`). +- :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`). .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0260f30b9e7e2..ee7dfd4c25a89 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6193,6 +6193,8 @@ def ffill( method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast ) + pad = ffill + def bfill( self: FrameOrSeries, axis=None, @@ -6212,6 +6214,8 @@ def bfill( method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast ) + backfill = bfill + @doc(klass=_shared_doc_kwargs["klass"]) def replace( self, diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ea4b6f4e65341..e47cfddeeaad7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -483,6 +483,8 @@ def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): elif func in base.cythonized_kernels: # cythonized transform or canned "agg+broadcast" return getattr(self, func)(*args, **kwargs) + elif func in base.transformation_kernels: + return getattr(self, func)(*args, **kwargs) # If func is a reduction, we need to broadcast the # result to the whole group. Compute func result @@ -1464,6 +1466,8 @@ def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): elif func in base.cythonized_kernels: # cythonized transformation or canned "reduction+broadcast" return getattr(self, func)(*args, **kwargs) + elif func in base.transformation_kernels: + return getattr(self, func)(*args, **kwargs) # GH 30918 # Use _transform_fast only when we know func is an aggregation diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index b3347b3c64e6c..e7bc3801a08a7 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -323,15 +323,22 @@ def test_transform_transformation_func(transformation_func): { "A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"], "B": [1, 2, np.nan, 3, 3, np.nan, 4], - } + }, + index=pd.date_range("2020-01-01", "2020-01-07"), ) - if transformation_func in ["pad", "backfill", "tshift", "cumcount"]: - # These transformation functions are not yet covered in this test - pytest.xfail("See GH 31269") + if transformation_func == "cumcount": + test_op = lambda x: x.transform("cumcount") + mock_op = lambda x: Series(range(len(x)), x.index) elif transformation_func == "fillna": test_op = lambda x: x.transform("fillna", value=0) mock_op = lambda x: x.fillna(value=0) + elif transformation_func == "tshift": + msg = ( + "Current behavior of groupby.tshift is inconsistent with other " + "transformations. See GH34452 for more details" + ) + pytest.xfail(msg) else: test_op = lambda x: x.transform(transformation_func) mock_op = lambda x: getattr(x, transformation_func)() @@ -340,7 +347,10 @@ def test_transform_transformation_func(transformation_func): groups = [df[["B"]].iloc[:4], df[["B"]].iloc[4:6], df[["B"]].iloc[6:]] expected = concat([mock_op(g) for g in groups]) - tm.assert_frame_equal(result, expected) + if transformation_func == "cumcount": + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) def test_transform_select_columns(df):