ENH: Support pipe() method in Rolling and Expanding (pandas-dev#60697)

snitish · web-flow · commit f787764c43fd · 2025-01-13T14:20:50.000-08:00
* ENH: Support pipe() method in Rolling and Expanding

* Fix mypy errors

* Fix docstring errors

* Add pipe method to doc reference
diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst
@@ -35,6 +35,7 @@ Rolling window functions
    Rolling.skew
    Rolling.kurt
    Rolling.apply
+   Rolling.pipe
    Rolling.aggregate
    Rolling.quantile
    Rolling.sem
@@ -76,6 +77,7 @@ Expanding window functions
    Expanding.skew
    Expanding.kurt
    Expanding.apply
+   Expanding.pipe
    Expanding.aggregate
    Expanding.quantile
    Expanding.sem
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -44,6 +44,7 @@ Other enhancements
 - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
 - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
+- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`)
 - :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
 - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
 - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
diff --git a/pandas/core/window/doc.py b/pandas/core/window/doc.py
@@ -85,6 +85,63 @@ def create_section_header(header: str) -> str:
     """
 ).replace("\n", "", 1)
 
+template_pipe = """
+Apply a ``func`` with arguments to this %(klass)s object and return its result.
+
+Use `.pipe` when you want to improve readability by chaining together
+functions that expect Series, DataFrames, GroupBy, Rolling, Expanding or Resampler
+objects.
+Instead of writing
+
+>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
+>>> g = lambda x, arg1: x * 5 / arg1
+>>> f = lambda x: x ** 4
+>>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, index=pd.date_range('2012-08-02', periods=4))
+>>> h(g(f(df.rolling('2D')), arg1=1), arg2=2, arg3=3)  # doctest: +SKIP
+
+You can write
+
+>>> (df.rolling('2D')
+...    .pipe(f)
+...    .pipe(g, arg1=1)
+...    .pipe(h, arg2=2, arg3=3))  # doctest: +SKIP
+
+which is much more readable.
+
+Parameters
+----------
+func : callable or tuple of (callable, str)
+    Function to apply to this %(klass)s object or, alternatively,
+    a `(callable, data_keyword)` tuple where `data_keyword` is a
+    string indicating the keyword of `callable` that expects the
+    %(klass)s object.
+*args : iterable, optional
+       Positional arguments passed into `func`.
+**kwargs : dict, optional
+         A dictionary of keyword arguments passed into `func`.
+
+Returns
+-------
+%(klass)s
+    The original object with the function `func` applied.
+
+See Also
+--------
+Series.pipe : Apply a function with arguments to a series.
+DataFrame.pipe: Apply a function with arguments to a dataframe.
+apply : Apply function to each group instead of to the
+    full %(klass)s object.
+
+Notes
+-----
+See more `here
+<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_
+
+Examples
+--------
+%(examples)s
+"""
+
 numba_notes = (
     "See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for "
     "extended documentation and performance considerations for the Numba engine.\n\n"
diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py
@@ -5,9 +5,15 @@
     TYPE_CHECKING,
     Any,
     Literal,
+    final,
+    overload,
 )
 
-from pandas.util._decorators import doc
+from pandas.util._decorators import (
+    Appender,
+    Substitution,
+    doc,
+)
 
 from pandas.core.indexers.objects import (
     BaseIndexer,
@@ -20,6 +26,7 @@
     kwargs_numeric_only,
     numba_notes,
     template_header,
+    template_pipe,
     template_returns,
     template_see_also,
     window_agg_numba_parameters,
@@ -34,7 +41,11 @@
     from collections.abc import Callable
 
     from pandas._typing import (
+        Concatenate,
+        P,
         QuantileInterpolation,
+        Self,
+        T,
         WindowingRankType,
     )
 
@@ -241,6 +252,54 @@ def apply(
             kwargs=kwargs,
         )
 
+    @overload
+    def pipe(
+        self,
+        func: Callable[Concatenate[Self, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> T: ...
+
+    @overload
+    def pipe(
+        self,
+        func: tuple[Callable[..., T], str],
+        *args: Any,
+        **kwargs: Any,
+    ) -> T: ...
+
+    @final
+    @Substitution(
+        klass="Expanding",
+        examples="""
+    >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
+    ...                   index=pd.date_range('2012-08-02', periods=4))
+    >>> df
+                A
+    2012-08-02  1
+    2012-08-03  2
+    2012-08-04  3
+    2012-08-05  4
+
+    To get the difference between each expanding window's maximum and minimum
+    value in one pass, you can do
+
+    >>> df.expanding().pipe(lambda x: x.max() - x.min())
+                  A
+    2012-08-02  0.0
+    2012-08-03  1.0
+    2012-08-04  2.0
+    2012-08-05  3.0""",
+    )
+    @Appender(template_pipe)
+    def pipe(
+        self,
+        func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
+        *args: Any,
+        **kwargs: Any,
+    ) -> T:
+        return super().pipe(func, *args, **kwargs)
+
     @doc(
         template_header,
         create_section_header("Parameters"),
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -14,6 +14,8 @@
     TYPE_CHECKING,
     Any,
     Literal,
+    final,
+    overload,
 )
 
 import numpy as np
@@ -26,7 +28,11 @@
 import pandas._libs.window.aggregations as window_aggregations
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import DataError
-from pandas.util._decorators import doc
+from pandas.util._decorators import (
+    Appender,
+    Substitution,
+    doc,
+)
 
 from pandas.core.dtypes.common import (
     ensure_float64,
@@ -81,6 +87,7 @@
     kwargs_scipy,
     numba_notes,
     template_header,
+    template_pipe,
     template_returns,
     template_see_also,
     window_agg_numba_parameters,
@@ -102,8 +109,12 @@
 
     from pandas._typing import (
         ArrayLike,
+        Concatenate,
         NDFrameT,
         QuantileInterpolation,
+        P,
+        Self,
+        T,
         WindowingRankType,
         npt,
     )
@@ -1529,6 +1540,30 @@ def apply_func(values, begin, end, min_periods, raw=raw):
 
         return apply_func
 
+    @overload
+    def pipe(
+        self,
+        func: Callable[Concatenate[Self, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> T: ...
+
+    @overload
+    def pipe(
+        self,
+        func: tuple[Callable[..., T], str],
+        *args: Any,
+        **kwargs: Any,
+    ) -> T: ...
+
+    def pipe(
+        self,
+        func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
+        *args: Any,
+        **kwargs: Any,
+    ) -> T:
+        return com.pipe(self, func, *args, **kwargs)
+
     def sum(
         self,
         numeric_only: bool = False,
@@ -2044,6 +2079,54 @@ def apply(
             kwargs=kwargs,
         )
 
+    @overload
+    def pipe(
+        self,
+        func: Callable[Concatenate[Self, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> T: ...
+
+    @overload
+    def pipe(
+        self,
+        func: tuple[Callable[..., T], str],
+        *args: Any,
+        **kwargs: Any,
+    ) -> T: ...
+
+    @final
+    @Substitution(
+        klass="Rolling",
+        examples="""
+    >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
+    ...                   index=pd.date_range('2012-08-02', periods=4))
+    >>> df
+                A
+    2012-08-02  1
+    2012-08-03  2
+    2012-08-04  3
+    2012-08-05  4
+
+    To get the difference between each rolling 2-day window's maximum and minimum
+    value in one pass, you can do
+
+    >>> df.rolling('2D').pipe(lambda x: x.max() - x.min())
+                  A
+    2012-08-02  0.0
+    2012-08-03  1.0
+    2012-08-04  1.0
+    2012-08-05  1.0""",
+    )
+    @Appender(template_pipe)
+    def pipe(
+        self,
+        func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
+        *args: Any,
+        **kwargs: Any,
+    ) -> T:
+        return super().pipe(func, *args, **kwargs)
+
     @doc(
         template_header,
         create_section_header("Parameters"),
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
@@ -177,6 +177,38 @@ def test_agg_nested_dicts():
         r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
 
 
+@pytest.mark.parametrize(
+    "func,window_size",
+    [
+        (
+            "rolling",
+            2,
+        ),
+        (
+            "expanding",
+            None,
+        ),
+    ],
+)
+def test_pipe(func, window_size):
+    # Issue #57076
+    df = DataFrame(
+        {
+            "B": np.random.default_rng(2).standard_normal(10),
+            "C": np.random.default_rng(2).standard_normal(10),
+        }
+    )
+    r = getattr(df, func)(window_size)
+
+    expected = r.max() - r.mean()
+    result = r.pipe(lambda x: x.max() - x.mean())
+    tm.assert_frame_equal(result, expected)
+
+    expected = r.max() - 2 * r.min()
+    result = r.pipe(lambda x, k: x.max() - k * x.min(), k=2)
+    tm.assert_frame_equal(result, expected)
+
+
 def test_count_nonnumeric_types(step):
     # GH12541
     cols = [