Skip to content

Commit dc09a8a

Browse files
authored
ENH: Add method argument to rolling constructor to allow table-wise rolling (#38417)
1 parent 382e806 commit dc09a8a

File tree

11 files changed

+285
-20
lines changed

11 files changed

+285
-20
lines changed

asv_bench/benchmarks/rolling.py

+18
Original file line numberDiff line numberDiff line change
@@ -252,4 +252,22 @@ def time_groupby_mean(self, engine):
252252
self.gb_ewm.mean(engine=engine)
253253

254254

255+
def table_method_func(x):
256+
return np.sum(x, axis=0) + 1
257+
258+
259+
class TableMethod:
260+
261+
params = ["single", "table"]
262+
param_names = ["method"]
263+
264+
def setup(self, method):
265+
self.df = pd.DataFrame(np.random.randn(10, 1000))
266+
267+
def time_apply(self, method):
268+
self.df.rolling(2, method=method).apply(
269+
table_method_func, raw=True, engine="numba"
270+
)
271+
272+
255273
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/user_guide/window.rst

+31-8
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@ pandas supports 4 types of windowing operations:
3737
#. Expanding window: Accumulating window over the values.
3838
#. Exponentially Weighted window: Accumulating and exponentially weighted window over the values.
3939

40-
============================= ================= =========================== =========================== ========================
41-
Concept Method Returned Object Supports time-based windows Supports chained groupby
42-
============================= ================= =========================== =========================== ========================
43-
Rolling window ``rolling`` ``Rolling`` Yes Yes
44-
Weighted window ``rolling`` ``Window`` No No
45-
Expanding window ``expanding`` ``Expanding`` No Yes
46-
Exponentially Weighted window ``ewm`` ``ExponentialMovingWindow`` No Yes (as of version 1.2)
47-
============================= ================= =========================== =========================== ========================
40+
============================= ================= =========================== =========================== ======================== ===================================
41+
Concept Method Returned Object Supports time-based windows Supports chained groupby Supports table method
42+
============================= ================= =========================== =========================== ======================== ===================================
43+
Rolling window ``rolling`` ``Rolling`` Yes Yes Yes (as of version 1.3)
44+
Weighted window ``rolling`` ``Window`` No No No
45+
Expanding window ``expanding`` ``Expanding`` No Yes Yes (as of version 1.3)
46+
Exponentially Weighted window ``ewm`` ``ExponentialMovingWindow`` No Yes (as of version 1.2) No
47+
============================= ================= =========================== =========================== ======================== ===================================
4848

4949
As noted above, some operations support specifying a window based on a time offset:
5050

@@ -76,6 +76,29 @@ which will first group the data by the specified keys and then perform a windowi
7676
to compute the rolling sums to preserve accuracy as much as possible.
7777

7878

79+
.. versionadded:: 1.3
80+
81+
Some windowing operations also support the ``method='table'`` option in the constructor which
82+
performs the windowing operaion over an entire :class:`DataFrame` instead of a single column or row at a time.
83+
This can provide a useful performance benefit for a :class:`DataFrame` with many columns or rows
84+
(with the corresponding ``axis`` argument) or the ability to utilize other columns during the windowing
85+
operation. The ``method='table'`` option can only be used if ``engine='numba'`` is specified
86+
in the corresponding method call.
87+
88+
For example, a `weighted mean <https://en.wikipedia.org/wiki/Weighted_arithmetic_mean>`__ calculation can
89+
be calculated with :meth:`~Rolling.apply` by specifying a separate column of weights.
90+
91+
.. ipython:: python
92+
93+
def weighted_mean(x):
94+
arr = np.ones((1, x.shape[1]))
95+
arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum()
96+
return arr
97+
98+
df = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
99+
df.rolling(2, method="table", min_periods=0).apply(weighted_mean, raw=True, engine="numba") # noqa:E501
100+
101+
79102
All windowing operations support a ``min_periods`` argument that dictates the minimum amount of
80103
non-``np.nan`` values a window must have; otherwise, the resulting value is ``np.nan``.
81104
``min_peridos`` defaults to 1 for time-based windows and ``window`` for fixed windows

doc/source/whatsnew/v1.3.0.rst

+5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ For example:
3333
storage_options=headers
3434
)
3535
36+
.. _whatsnew_130.window_method_table:
37+
38+
:class:`Rolling` and :class:`Expanding` now support a ``method`` argument with a
39+
``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`.
40+
See ref:`window.overview` for performance and functional benefits. (:issue:`15095`)
3641

3742
.. _whatsnew_130.enhancements.other:
3843

pandas/core/generic.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -10996,6 +10996,7 @@ def rolling(
1099610996
on: Optional[str] = None,
1099710997
axis: Axis = 0,
1099810998
closed: Optional[str] = None,
10999+
method: str = "single",
1099911000
):
1100011001
axis = self._get_axis_number(axis)
1100111002

@@ -11009,6 +11010,7 @@ def rolling(
1100911010
on=on,
1101011011
axis=axis,
1101111012
closed=closed,
11013+
method=method,
1101211014
)
1101311015

1101411016
return Rolling(
@@ -11020,12 +11022,17 @@ def rolling(
1102011022
on=on,
1102111023
axis=axis,
1102211024
closed=closed,
11025+
method=method,
1102311026
)
1102411027

1102511028
@final
1102611029
@doc(Expanding)
1102711030
def expanding(
11028-
self, min_periods: int = 1, center: Optional[bool_t] = None, axis: Axis = 0
11031+
self,
11032+
min_periods: int = 1,
11033+
center: Optional[bool_t] = None,
11034+
axis: Axis = 0,
11035+
method: str = "single",
1102911036
) -> Expanding:
1103011037
axis = self._get_axis_number(axis)
1103111038
if center is not None:
@@ -11037,7 +11044,9 @@ def expanding(
1103711044
else:
1103811045
center = False
1103911046

11040-
return Expanding(self, min_periods=min_periods, center=center, axis=axis)
11047+
return Expanding(
11048+
self, min_periods=min_periods, center=center, axis=axis, method=method
11049+
)
1104111050

1104211051
@final
1104311052
@doc(ExponentialMovingWindow)

pandas/core/window/ewm.py

+1
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ def __init__(
242242
self.on = None
243243
self.center = False
244244
self.closed = None
245+
self.method = "single"
245246
if times is not None:
246247
if isinstance(times, str):
247248
times = self._selected_obj[times]

pandas/core/window/expanding.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ class Expanding(RollingAndExpandingMixin):
2424
center : bool, default False
2525
Set the labels at the center of the window.
2626
axis : int or str, default 0
27+
method : str {'single', 'table'}, default 'single'
28+
Execute the rolling operation per single column or row (``'single'``)
29+
or over the entire object (``'table'``).
30+
31+
This argument is only implemented when specifying ``engine='numba'``
32+
in the method call.
33+
34+
.. versionadded:: 1.3.0
2735
2836
Returns
2937
-------
@@ -59,10 +67,14 @@ class Expanding(RollingAndExpandingMixin):
5967
4 7.0
6068
"""
6169

62-
_attributes = ["min_periods", "center", "axis"]
70+
_attributes = ["min_periods", "center", "axis", "method"]
6371

64-
def __init__(self, obj, min_periods=1, center=None, axis=0, **kwargs):
65-
super().__init__(obj=obj, min_periods=min_periods, center=center, axis=axis)
72+
def __init__(
73+
self, obj, min_periods=1, center=None, axis=0, method="single", **kwargs
74+
):
75+
super().__init__(
76+
obj=obj, min_periods=min_periods, center=center, axis=axis, method=method
77+
)
6678

6779
def _get_window_indexer(self) -> BaseIndexer:
6880
"""

pandas/core/window/numba_.py

+68-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def generate_numba_apply_func(
1717
kwargs: Dict[str, Any],
1818
func: Callable[..., Scalar],
1919
engine_kwargs: Optional[Dict[str, bool]],
20+
name: str,
2021
):
2122
"""
2223
Generate a numba jitted apply function specified by values from engine_kwargs.
@@ -37,14 +38,16 @@ def generate_numba_apply_func(
3738
function to be applied to each window and will be JITed
3839
engine_kwargs : dict
3940
dictionary of arguments to be passed into numba.jit
41+
name: str
42+
name of the caller (Rolling/Expanding)
4043
4144
Returns
4245
-------
4346
Numba function
4447
"""
4548
nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs)
4649

47-
cache_key = (func, "rolling_apply")
50+
cache_key = (func, f"{name}_apply_single")
4851
if cache_key in NUMBA_FUNC_CACHE:
4952
return NUMBA_FUNC_CACHE[cache_key]
5053

@@ -153,3 +156,67 @@ def groupby_ewma(
153156
return result
154157

155158
return groupby_ewma
159+
160+
161+
def generate_numba_table_func(
162+
args: Tuple,
163+
kwargs: Dict[str, Any],
164+
func: Callable[..., np.ndarray],
165+
engine_kwargs: Optional[Dict[str, bool]],
166+
name: str,
167+
):
168+
"""
169+
Generate a numba jitted function to apply window calculations table-wise.
170+
171+
Func will be passed a M window size x N number of columns array, and
172+
must return a 1 x N number of columns array. Func is intended to operate
173+
row-wise, but the result will be transposed for axis=1.
174+
175+
1. jit the user's function
176+
2. Return a rolling apply function with the jitted function inline
177+
178+
Parameters
179+
----------
180+
args : tuple
181+
*args to be passed into the function
182+
kwargs : dict
183+
**kwargs to be passed into the function
184+
func : function
185+
function to be applied to each window and will be JITed
186+
engine_kwargs : dict
187+
dictionary of arguments to be passed into numba.jit
188+
name : str
189+
caller (Rolling/Expanding) and original method name for numba cache key
190+
191+
Returns
192+
-------
193+
Numba function
194+
"""
195+
nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs)
196+
197+
cache_key = (func, f"{name}_table")
198+
if cache_key in NUMBA_FUNC_CACHE:
199+
return NUMBA_FUNC_CACHE[cache_key]
200+
201+
numba_func = jit_user_function(func, nopython, nogil, parallel)
202+
numba = import_optional_dependency("numba")
203+
204+
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
205+
def roll_table(
206+
values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int
207+
):
208+
result = np.empty(values.shape)
209+
min_periods_mask = np.empty(values.shape)
210+
for i in numba.prange(len(result)):
211+
start = begin[i]
212+
stop = end[i]
213+
window = values[start:stop]
214+
count_nan = np.sum(np.isnan(window), axis=0)
215+
sub_result = numba_func(window, *args)
216+
nan_mask = len(window) - count_nan >= minimum_periods
217+
min_periods_mask[i, :] = nan_mask
218+
result[i, :] = sub_result
219+
result = np.where(min_periods_mask, result, np.nan)
220+
return result
221+
222+
return roll_table

0 commit comments

Comments
 (0)