Skip to content

Commit ea402a3

Browse files
authored
[ENH] Allow pad, backfill and cumcount in groupby.transform (#34453)
1 parent 019d891 commit ea402a3

File tree

7 files changed

+47
-10
lines changed

7 files changed

+47
-10
lines changed

doc/source/reference/frame.rst

+9-3
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ Conversion
4747
DataFrame.convert_dtypes
4848
DataFrame.infer_objects
4949
DataFrame.copy
50-
DataFrame.isna
51-
DataFrame.notna
5250
DataFrame.bool
5351

5452
Indexing, iteration
@@ -211,10 +209,18 @@ Missing data handling
211209
.. autosummary::
212210
:toctree: api/
213211

212+
DataFrame.backfill
213+
DataFrame.bfill
214214
DataFrame.dropna
215+
DataFrame.ffill
215216
DataFrame.fillna
216-
DataFrame.replace
217217
DataFrame.interpolate
218+
DataFrame.isna
219+
DataFrame.isnull
220+
DataFrame.notna
221+
DataFrame.notnull
222+
DataFrame.pad
223+
DataFrame.replace
218224

219225
Reshaping, sorting, transposing
220226
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/reference/groupby.rst

+5
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Computations / descriptive stats
5050
GroupBy.all
5151
GroupBy.any
5252
GroupBy.bfill
53+
GroupBy.backfill
5354
GroupBy.count
5455
GroupBy.cumcount
5556
GroupBy.cummax
@@ -67,6 +68,7 @@ Computations / descriptive stats
6768
GroupBy.ngroup
6869
GroupBy.nth
6970
GroupBy.ohlc
71+
GroupBy.pad
7072
GroupBy.prod
7173
GroupBy.rank
7274
GroupBy.pct_change
@@ -88,10 +90,12 @@ application to columns of a specific data type.
8890

8991
DataFrameGroupBy.all
9092
DataFrameGroupBy.any
93+
DataFrameGroupBy.backfill
9194
DataFrameGroupBy.bfill
9295
DataFrameGroupBy.corr
9396
DataFrameGroupBy.count
9497
DataFrameGroupBy.cov
98+
DataFrameGroupBy.cumcount
9599
DataFrameGroupBy.cummax
96100
DataFrameGroupBy.cummin
97101
DataFrameGroupBy.cumprod
@@ -106,6 +110,7 @@ application to columns of a specific data type.
106110
DataFrameGroupBy.idxmin
107111
DataFrameGroupBy.mad
108112
DataFrameGroupBy.nunique
113+
DataFrameGroupBy.pad
109114
DataFrameGroupBy.pct_change
110115
DataFrameGroupBy.plot
111116
DataFrameGroupBy.quantile

doc/source/reference/series.rst

+9-2
Original file line numberDiff line numberDiff line change
@@ -214,11 +214,18 @@ Missing data handling
214214
.. autosummary::
215215
:toctree: api/
216216

217-
Series.isna
218-
Series.notna
217+
Series.backfill
218+
Series.bfill
219219
Series.dropna
220+
Series.ffill
220221
Series.fillna
221222
Series.interpolate
223+
Series.isna
224+
Series.isnull
225+
Series.notna
226+
Series.notnull
227+
Series.pad
228+
Series.replace
222229

223230
Reshaping, sorting
224231
------------------

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ Other enhancements
288288
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
289289
- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`)
290290
- Make ``option_context`` a :class:`contextlib.ContextDecorator`, which allows it to be used as a decorator over an entire function (:issue:`34253`).
291+
- :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
291292

292293
.. ---------------------------------------------------------------------------
293294

pandas/core/generic.py

+4
Original file line numberDiff line numberDiff line change
@@ -6193,6 +6193,8 @@ def ffill(
61936193
method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
61946194
)
61956195

6196+
pad = ffill
6197+
61966198
def bfill(
61976199
self: FrameOrSeries,
61986200
axis=None,
@@ -6212,6 +6214,8 @@ def bfill(
62126214
method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
62136215
)
62146216

6217+
backfill = bfill
6218+
62156219
@doc(klass=_shared_doc_kwargs["klass"])
62166220
def replace(
62176221
self,

pandas/core/groupby/generic.py

+4
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,8 @@ def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs):
480480
elif func in base.cythonized_kernels:
481481
# cythonized transform or canned "agg+broadcast"
482482
return getattr(self, func)(*args, **kwargs)
483+
elif func in base.transformation_kernels:
484+
return getattr(self, func)(*args, **kwargs)
483485

484486
# If func is a reduction, we need to broadcast the
485487
# result to the whole group. Compute func result
@@ -1461,6 +1463,8 @@ def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs):
14611463
elif func in base.cythonized_kernels:
14621464
# cythonized transformation or canned "reduction+broadcast"
14631465
return getattr(self, func)(*args, **kwargs)
1466+
elif func in base.transformation_kernels:
1467+
return getattr(self, func)(*args, **kwargs)
14641468

14651469
# GH 30918
14661470
# Use _transform_fast only when we know func is an aggregation

pandas/tests/groupby/transform/test_transform.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -323,15 +323,22 @@ def test_transform_transformation_func(transformation_func):
323323
{
324324
"A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"],
325325
"B": [1, 2, np.nan, 3, 3, np.nan, 4],
326-
}
326+
},
327+
index=pd.date_range("2020-01-01", "2020-01-07"),
327328
)
328329

329-
if transformation_func in ["pad", "backfill", "tshift", "cumcount"]:
330-
# These transformation functions are not yet covered in this test
331-
pytest.xfail("See GH 31269")
330+
if transformation_func == "cumcount":
331+
test_op = lambda x: x.transform("cumcount")
332+
mock_op = lambda x: Series(range(len(x)), x.index)
332333
elif transformation_func == "fillna":
333334
test_op = lambda x: x.transform("fillna", value=0)
334335
mock_op = lambda x: x.fillna(value=0)
336+
elif transformation_func == "tshift":
337+
msg = (
338+
"Current behavior of groupby.tshift is inconsistent with other "
339+
"transformations. See GH34452 for more details"
340+
)
341+
pytest.xfail(msg)
335342
else:
336343
test_op = lambda x: x.transform(transformation_func)
337344
mock_op = lambda x: getattr(x, transformation_func)()
@@ -340,7 +347,10 @@ def test_transform_transformation_func(transformation_func):
340347
groups = [df[["B"]].iloc[:4], df[["B"]].iloc[4:6], df[["B"]].iloc[6:]]
341348
expected = concat([mock_op(g) for g in groups])
342349

343-
tm.assert_frame_equal(result, expected)
350+
if transformation_func == "cumcount":
351+
tm.assert_series_equal(result, expected)
352+
else:
353+
tm.assert_frame_equal(result, expected)
344354

345355

346356
def test_transform_select_columns(df):

0 commit comments

Comments
 (0)