Skip to content

Commit f787764

Browse files
authored
ENH: Support pipe() method in Rolling and Expanding (pandas-dev#60697)
* ENH: Support pipe() method in Rolling and Expanding * Fix mypy errors * Fix docstring errors * Add pipe method to doc reference
1 parent 55a6d0a commit f787764

File tree

6 files changed

+236
-2
lines changed

6 files changed

+236
-2
lines changed

doc/source/reference/window.rst

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Rolling window functions
3535
Rolling.skew
3636
Rolling.kurt
3737
Rolling.apply
38+
Rolling.pipe
3839
Rolling.aggregate
3940
Rolling.quantile
4041
Rolling.sem
@@ -76,6 +77,7 @@ Expanding window functions
7677
Expanding.skew
7778
Expanding.kurt
7879
Expanding.apply
80+
Expanding.pipe
7981
Expanding.aggregate
8082
Expanding.quantile
8183
Expanding.sem

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Other enhancements
4444
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
4545
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
4646
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
47+
- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`)
4748
- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
4849
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
4950
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)

pandas/core/window/doc.py

+57
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,63 @@ def create_section_header(header: str) -> str:
8585
"""
8686
).replace("\n", "", 1)
8787

88+
template_pipe = """
89+
Apply a ``func`` with arguments to this %(klass)s object and return its result.
90+
91+
Use `.pipe` when you want to improve readability by chaining together
92+
functions that expect Series, DataFrames, GroupBy, Rolling, Expanding or Resampler
93+
objects.
94+
Instead of writing
95+
96+
>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
97+
>>> g = lambda x, arg1: x * 5 / arg1
98+
>>> f = lambda x: x ** 4
99+
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, index=pd.date_range('2012-08-02', periods=4))
100+
>>> h(g(f(df.rolling('2D')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP
101+
102+
You can write
103+
104+
>>> (df.rolling('2D')
105+
... .pipe(f)
106+
... .pipe(g, arg1=1)
107+
... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP
108+
109+
which is much more readable.
110+
111+
Parameters
112+
----------
113+
func : callable or tuple of (callable, str)
114+
Function to apply to this %(klass)s object or, alternatively,
115+
a `(callable, data_keyword)` tuple where `data_keyword` is a
116+
string indicating the keyword of `callable` that expects the
117+
%(klass)s object.
118+
*args : iterable, optional
119+
Positional arguments passed into `func`.
120+
**kwargs : dict, optional
121+
A dictionary of keyword arguments passed into `func`.
122+
123+
Returns
124+
-------
125+
%(klass)s
126+
The original object with the function `func` applied.
127+
128+
See Also
129+
--------
130+
Series.pipe : Apply a function with arguments to a series.
131+
DataFrame.pipe: Apply a function with arguments to a dataframe.
132+
apply : Apply function to each group instead of to the
133+
full %(klass)s object.
134+
135+
Notes
136+
-----
137+
See more `here
138+
<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_
139+
140+
Examples
141+
--------
142+
%(examples)s
143+
"""
144+
88145
numba_notes = (
89146
"See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for "
90147
"extended documentation and performance considerations for the Numba engine.\n\n"

pandas/core/window/expanding.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,15 @@
55
TYPE_CHECKING,
66
Any,
77
Literal,
8+
final,
9+
overload,
810
)
911

10-
from pandas.util._decorators import doc
12+
from pandas.util._decorators import (
13+
Appender,
14+
Substitution,
15+
doc,
16+
)
1117

1218
from pandas.core.indexers.objects import (
1319
BaseIndexer,
@@ -20,6 +26,7 @@
2026
kwargs_numeric_only,
2127
numba_notes,
2228
template_header,
29+
template_pipe,
2330
template_returns,
2431
template_see_also,
2532
window_agg_numba_parameters,
@@ -34,7 +41,11 @@
3441
from collections.abc import Callable
3542

3643
from pandas._typing import (
44+
Concatenate,
45+
P,
3746
QuantileInterpolation,
47+
Self,
48+
T,
3849
WindowingRankType,
3950
)
4051

@@ -241,6 +252,54 @@ def apply(
241252
kwargs=kwargs,
242253
)
243254

255+
@overload
256+
def pipe(
257+
self,
258+
func: Callable[Concatenate[Self, P], T],
259+
*args: P.args,
260+
**kwargs: P.kwargs,
261+
) -> T: ...
262+
263+
@overload
264+
def pipe(
265+
self,
266+
func: tuple[Callable[..., T], str],
267+
*args: Any,
268+
**kwargs: Any,
269+
) -> T: ...
270+
271+
@final
272+
@Substitution(
273+
klass="Expanding",
274+
examples="""
275+
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
276+
... index=pd.date_range('2012-08-02', periods=4))
277+
>>> df
278+
A
279+
2012-08-02 1
280+
2012-08-03 2
281+
2012-08-04 3
282+
2012-08-05 4
283+
284+
To get the difference between each expanding window's maximum and minimum
285+
value in one pass, you can do
286+
287+
>>> df.expanding().pipe(lambda x: x.max() - x.min())
288+
A
289+
2012-08-02 0.0
290+
2012-08-03 1.0
291+
2012-08-04 2.0
292+
2012-08-05 3.0""",
293+
)
294+
@Appender(template_pipe)
295+
def pipe(
296+
self,
297+
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
298+
*args: Any,
299+
**kwargs: Any,
300+
) -> T:
301+
return super().pipe(func, *args, **kwargs)
302+
244303
@doc(
245304
template_header,
246305
create_section_header("Parameters"),

pandas/core/window/rolling.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
TYPE_CHECKING,
1515
Any,
1616
Literal,
17+
final,
18+
overload,
1719
)
1820

1921
import numpy as np
@@ -26,7 +28,11 @@
2628
import pandas._libs.window.aggregations as window_aggregations
2729
from pandas.compat._optional import import_optional_dependency
2830
from pandas.errors import DataError
29-
from pandas.util._decorators import doc
31+
from pandas.util._decorators import (
32+
Appender,
33+
Substitution,
34+
doc,
35+
)
3036

3137
from pandas.core.dtypes.common import (
3238
ensure_float64,
@@ -81,6 +87,7 @@
8187
kwargs_scipy,
8288
numba_notes,
8389
template_header,
90+
template_pipe,
8491
template_returns,
8592
template_see_also,
8693
window_agg_numba_parameters,
@@ -102,8 +109,12 @@
102109

103110
from pandas._typing import (
104111
ArrayLike,
112+
Concatenate,
105113
NDFrameT,
106114
QuantileInterpolation,
115+
P,
116+
Self,
117+
T,
107118
WindowingRankType,
108119
npt,
109120
)
@@ -1529,6 +1540,30 @@ def apply_func(values, begin, end, min_periods, raw=raw):
15291540

15301541
return apply_func
15311542

1543+
@overload
1544+
def pipe(
1545+
self,
1546+
func: Callable[Concatenate[Self, P], T],
1547+
*args: P.args,
1548+
**kwargs: P.kwargs,
1549+
) -> T: ...
1550+
1551+
@overload
1552+
def pipe(
1553+
self,
1554+
func: tuple[Callable[..., T], str],
1555+
*args: Any,
1556+
**kwargs: Any,
1557+
) -> T: ...
1558+
1559+
def pipe(
1560+
self,
1561+
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
1562+
*args: Any,
1563+
**kwargs: Any,
1564+
) -> T:
1565+
return com.pipe(self, func, *args, **kwargs)
1566+
15321567
def sum(
15331568
self,
15341569
numeric_only: bool = False,
@@ -2044,6 +2079,54 @@ def apply(
20442079
kwargs=kwargs,
20452080
)
20462081

2082+
@overload
2083+
def pipe(
2084+
self,
2085+
func: Callable[Concatenate[Self, P], T],
2086+
*args: P.args,
2087+
**kwargs: P.kwargs,
2088+
) -> T: ...
2089+
2090+
@overload
2091+
def pipe(
2092+
self,
2093+
func: tuple[Callable[..., T], str],
2094+
*args: Any,
2095+
**kwargs: Any,
2096+
) -> T: ...
2097+
2098+
@final
2099+
@Substitution(
2100+
klass="Rolling",
2101+
examples="""
2102+
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
2103+
... index=pd.date_range('2012-08-02', periods=4))
2104+
>>> df
2105+
A
2106+
2012-08-02 1
2107+
2012-08-03 2
2108+
2012-08-04 3
2109+
2012-08-05 4
2110+
2111+
To get the difference between each rolling 2-day window's maximum and minimum
2112+
value in one pass, you can do
2113+
2114+
>>> df.rolling('2D').pipe(lambda x: x.max() - x.min())
2115+
A
2116+
2012-08-02 0.0
2117+
2012-08-03 1.0
2118+
2012-08-04 1.0
2119+
2012-08-05 1.0""",
2120+
)
2121+
@Appender(template_pipe)
2122+
def pipe(
2123+
self,
2124+
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
2125+
*args: Any,
2126+
**kwargs: Any,
2127+
) -> T:
2128+
return super().pipe(func, *args, **kwargs)
2129+
20472130
@doc(
20482131
template_header,
20492132
create_section_header("Parameters"),

pandas/tests/window/test_api.py

+32
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,38 @@ def test_agg_nested_dicts():
177177
r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
178178

179179

180+
@pytest.mark.parametrize(
181+
"func,window_size",
182+
[
183+
(
184+
"rolling",
185+
2,
186+
),
187+
(
188+
"expanding",
189+
None,
190+
),
191+
],
192+
)
193+
def test_pipe(func, window_size):
194+
# Issue #57076
195+
df = DataFrame(
196+
{
197+
"B": np.random.default_rng(2).standard_normal(10),
198+
"C": np.random.default_rng(2).standard_normal(10),
199+
}
200+
)
201+
r = getattr(df, func)(window_size)
202+
203+
expected = r.max() - r.mean()
204+
result = r.pipe(lambda x: x.max() - x.mean())
205+
tm.assert_frame_equal(result, expected)
206+
207+
expected = r.max() - 2 * r.min()
208+
result = r.pipe(lambda x, k: x.max() - k * x.min(), k=2)
209+
tm.assert_frame_equal(result, expected)
210+
211+
180212
def test_count_nonnumeric_types(step):
181213
# GH12541
182214
cols = [

0 commit comments

Comments
 (0)