From 26b29dc7a70711b78280b25cfe4062765f36ad6c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 4 Dec 2020 13:04:12 -0800 Subject: [PATCH 01/34] Add method keyword and validation --- pandas/core/generic.py | 3 +++ pandas/core/window/rolling.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4a9e020a0fe46..7555eac0736a4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11216,6 +11216,7 @@ def rolling( on: Optional[str] = None, axis: Axis = 0, closed: Optional[str] = None, + method: str = "column", ): axis = self._get_axis_number(axis) @@ -11229,6 +11230,7 @@ def rolling( on=on, axis=axis, closed=closed, + method=method, ) return Rolling( @@ -11240,6 +11242,7 @@ def rolling( on=on, axis=axis, closed=closed, + method=method, ) @final diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e6185f8ae0679..fb24200ce22cb 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -84,6 +84,7 @@ class BaseWindow(ShallowMixin, SelectionMixin): "axis", "on", "closed", + "method", ] exclusions: Set[str] = set() @@ -97,6 +98,7 @@ def __init__( axis: Axis = 0, on: Optional[Union[str, Index]] = None, closed: Optional[str] = None, + method: str = "column", **kwargs, ): @@ -110,6 +112,7 @@ def __init__( self.win_type = win_type self.win_freq = None self.axis = obj._get_axis_number(axis) if axis is not None else None + self.method = method self.validate() @property @@ -159,6 +162,8 @@ def validate(self) -> None: f"{type(self.window).__name__} does not implement " f"the correct signature for get_window_bounds" ) + if self.method not in ["table", "column"]: + raise ValueError("method must be 'table' or 'column") def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: """ @@ -1033,6 +1038,9 @@ def validate(self): else: raise ValueError(f"Invalid window {self.window}") + if self.method != "column": + raise NotImplementedError("'column' is the only supported method type.") + def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: """ Center the result in the window for weighted rolling aggregations. From 7739b57534e98d218c91790829e58bb4b0d59bd9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 4 Dec 2020 16:47:52 -0800 Subject: [PATCH 02/34] Add sub numba table func --- pandas/core/window/numba_.py | 57 +++++++++++++++++++++++++++++++++++ pandas/core/window/rolling.py | 13 +++++++- 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 274586e1745b5..324ed4d87ed77 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -161,3 +161,60 @@ def groupby_ewma( return result return groupby_ewma + + +def generate_numba_table_func( + args: Tuple, + kwargs: Dict[str, Any], + func: Callable[..., Scalar], + engine_kwargs: Optional[Dict[str, bool]], +): + """ + Generate a numba jitted function to apply window calculations table-wise + + 1. jit the user's function + 2. Return a rolling apply function with the jitted function inline + + Parameters + ---------- + func : function + function to be applied to each window and will be JITed + + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + + Returns + ------- + Numba function + """ + nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) + + cache_key = (func, "rolling_apply") + if cache_key in NUMBA_FUNC_CACHE: + return NUMBA_FUNC_CACHE[cache_key] + + numba_func = jit_user_function(func, nopython, nogil, parallel) + numba = import_optional_dependency("numba") + if parallel: + loop_range = numba.prange + else: + loop_range = range + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def roll_table( + values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int + ): + # TODO: consider axis argument, len should be replaced with axis aware result + result = np.empty(values.shape) + for i in loop_range(len(result)): + start = begin[i] + stop = end[i] + window = values[start:stop, :] + count_nan = np.sum(np.isnan(window)) + sub_result = numba_func(window, *args) + nan_mask = len(window) - count_nan >= minimum_periods + sub_result[~nan_mask] = np.nan + result[i, :] = sub_result + return result + + return roll_table diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index fb24200ce22cb..163e02cb822e1 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -412,6 +412,17 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: self._insert_on_column(out, obj) return out + def _apply_tablewise( + self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None + ) -> FrameOrSeriesUnion: + if self._selected_obj.ndim == 1: + raise ValueError("method='table' not applicable for Series objects.") + obj = self._create_data(self._selected_obj) + values = self._prep_values(obj) + # Need to ensure we wrap this correctly, will return ndarray + result = homogeneous_func(values) + return result + def _apply( self, func: Callable[..., Any], @@ -460,7 +471,7 @@ def calc(x): return func(x, start, end, min_periods) with np.errstate(all="ignore"): - if values.ndim > 1: + if values.ndim > 1 and self.method == "column": result = np.apply_along_axis(calc, self.axis, values) else: result = calc(values) From 8d66fc447d343853eb5609c9cdf1cfdd167bc4d7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 9 Dec 2020 17:52:01 -0800 Subject: [PATCH 03/34] Fix output result and result min_periods handling --- pandas/core/window/numba_.py | 9 ++++++--- pandas/core/window/rolling.py | 35 ++++++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 324ed4d87ed77..f1593589c9b55 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -189,6 +189,7 @@ def generate_numba_table_func( """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) + # TODO: change this key cache_key = (func, "rolling_apply") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] @@ -206,15 +207,17 @@ def roll_table( ): # TODO: consider axis argument, len should be replaced with axis aware result result = np.empty(values.shape) + min_periods_mask = np.empty(values.shape) for i in loop_range(len(result)): start = begin[i] stop = end[i] - window = values[start:stop, :] - count_nan = np.sum(np.isnan(window)) + window = values[start:stop] + count_nan = np.sum(np.isnan(window), axis=0) sub_result = numba_func(window, *args) nan_mask = len(window) - count_nan >= minimum_periods - sub_result[~nan_mask] = np.nan + min_periods_mask[i, :] = nan_mask result[i, :] = sub_result + result = np.where(min_periods_mask, result, np.nan) return result return roll_table diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 163e02cb822e1..3dff4f926ea33 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -66,7 +66,10 @@ GroupbyIndexer, VariableWindowIndexer, ) -from pandas.core.window.numba_ import generate_numba_apply_func +from pandas.core.window.numba_ import ( + generate_numba_apply_func, + generate_numba_table_func, +) if TYPE_CHECKING: from pandas import DataFrame, Series @@ -418,10 +421,17 @@ def _apply_tablewise( if self._selected_obj.ndim == 1: raise ValueError("method='table' not applicable for Series objects.") obj = self._create_data(self._selected_obj) - values = self._prep_values(obj) - # Need to ensure we wrap this correctly, will return ndarray + values = self._prep_values(obj.to_numpy()) result = homogeneous_func(values) - return result + out = obj._constructor(result, index=obj.index, columns=obj.columns) + + if out.shape[1] == 0 and obj.shape[1] > 0: + raise DataError("No numeric types to aggregate") + elif out.shape[1] == 0: + return obj.astype("float64") + + self._insert_on_column(out, obj) + return out def _apply( self, @@ -482,7 +492,10 @@ def calc(x): return result - return self._apply_blockwise(homogeneous_func, name) + if self.method == "column": + return self._apply_blockwise(homogeneous_func, name) + else: + return self._apply_tablewise(homogeneous_func, name) def aggregate(self, func, *args, **kwargs): result, how = aggregate(self, func, *args, **kwargs) @@ -1326,8 +1339,16 @@ def apply( if maybe_use_numba(engine): if raw is False: raise ValueError("raw must be `True` when using the numba engine") - apply_func = generate_numba_apply_func(args, kwargs, func, engine_kwargs) - numba_cache_key = (func, "rolling_apply") + if self.method == "column": + apply_func = generate_numba_apply_func( + args, kwargs, func, engine_kwargs + ) + numba_cache_key = (func, "rolling_apply_column") + else: + apply_func = generate_numba_table_func( + args, kwargs, func, engine_kwargs + ) + numba_cache_key = (func, "rolling_apply_table") elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") From b67e7257a97768c6846debaadcb34a462633b573 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 10 Dec 2020 15:52:52 -0800 Subject: [PATCH 04/34] Fix cache key --- pandas/core/window/numba_.py | 13 +++++++++---- pandas/core/window/rolling.py | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index f1593589c9b55..277f50d74771b 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -44,7 +44,7 @@ def generate_numba_apply_func( """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) - cache_key = (func, "rolling_apply") + cache_key = (func, "rolling_apply_column") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] @@ -168,6 +168,7 @@ def generate_numba_table_func( kwargs: Dict[str, Any], func: Callable[..., Scalar], engine_kwargs: Optional[Dict[str, bool]], + name: str, ): """ Generate a numba jitted function to apply window calculations table-wise @@ -177,11 +178,16 @@ def generate_numba_table_func( Parameters ---------- + args : tuple + *args to be passed into the function + kwargs : dict + **kwargs to be passed into the function func : function function to be applied to each window and will be JITed - engine_kwargs : dict dictionary of arguments to be passed into numba.jit + name : str + original method name for numba cache key Returns ------- @@ -189,8 +195,7 @@ def generate_numba_table_func( """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) - # TODO: change this key - cache_key = (func, "rolling_apply") + cache_key = (func, f"rolling_{name}_table") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3dff4f926ea33..a660e294ec865 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1346,7 +1346,7 @@ def apply( numba_cache_key = (func, "rolling_apply_column") else: apply_func = generate_numba_table_func( - args, kwargs, func, engine_kwargs + args, kwargs, func, engine_kwargs, "apply" ) numba_cache_key = (func, "rolling_apply_table") elif engine in ("cython", None): From ea89fb65eb5d3cf6e455f12c14aadbdf831e3f61 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 12:01:40 -0800 Subject: [PATCH 05/34] Just use transpose for axis = 1 --- pandas/core/window/numba_.py | 1 - pandas/core/window/rolling.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 277f50d74771b..41816bfa090d6 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -210,7 +210,6 @@ def generate_numba_table_func( def roll_table( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int ): - # TODO: consider axis argument, len should be replaced with axis aware result result = np.empty(values.shape) min_periods_mask = np.empty(values.shape) for i in loop_range(len(result)): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a660e294ec865..351676b150e6e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -422,7 +422,9 @@ def _apply_tablewise( raise ValueError("method='table' not applicable for Series objects.") obj = self._create_data(self._selected_obj) values = self._prep_values(obj.to_numpy()) + values = values.T if self.axis == 1 else values result = homogeneous_func(values) + result = result.T if self.axis == 1 else result out = obj._constructor(result, index=obj.index, columns=obj.columns) if out.shape[1] == 0 and obj.shape[1] > 0: From 87570ef10b343ff24e44383e9f196e77159242e2 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 12:13:59 -0800 Subject: [PATCH 06/34] Add test comparing column and table methods --- pandas/tests/window/test_numba.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index e890108b22c3e..9751582764229 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -121,3 +121,20 @@ def test_invalid_kwargs_nopython(): Series(range(1)).rolling(1).apply( lambda x: x, kwargs={"a": 1}, engine="numba", raw=True ) + + +@td.skip_if_no("numba", "0.46.0") +def test_table_method(axis, center, closed, nogil, parallel, nopython): + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + def f(x): + return np.sum(x, axis=0) + 1 + + df = DataFrame(np.eye(3)) + result = df.rolling( + 2, method="table", axis=axis, center=center, closed=closed + ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") + expected = df.rolling( + 2, method="column", axis=axis, center=center, closed=closed + ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") + tm.assert_frame_equal(result, expected) From 1477004b9684e890bef6451be851fa5b4deb5f5e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 12:17:43 -0800 Subject: [PATCH 07/34] Test invalid series table rolling --- pandas/tests/window/test_numba.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 9751582764229..afc52ec2ee316 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -123,6 +123,17 @@ def test_invalid_kwargs_nopython(): ) +@td.skip_if_no("numba", "0.46.0") +def test_table_series_valueerror(): + def f(x): + return np.sum(x, axis=0) + 1 + + with pytest.raises( + ValueError, match="method='table' not applicable for Series objects." + ): + Series(range(1)).rolling(1, method="table").apply(f, engine="numba", raw=True) + + @td.skip_if_no("numba", "0.46.0") def test_table_method(axis, center, closed, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} From 9bd2d0dd866312f65a05f6bd557e9ae4f0e72ebf Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 12:42:57 -0800 Subject: [PATCH 08/34] Adjust some tests --- pandas/core/window/ewm.py | 1 + pandas/tests/window/test_numba.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index f8237a436f436..1b8c6853615a2 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -241,6 +241,7 @@ def __init__( self.on = None self.center = False self.closed = None + self.method = "column" if times is not None: if isinstance(times, str): times = self._selected_obj[times] diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index afc52ec2ee316..0aafed2790d57 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -63,7 +63,7 @@ def func_2(x): tm.assert_series_equal(result, expected) # func_1 should be in the cache now - assert (func_1, "rolling_apply") in NUMBA_FUNC_CACHE + assert (func_1, "rolling_apply_column") in NUMBA_FUNC_CACHE result = roll.apply( func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True From 9be922b3a1a2feba8dc93a6ad9966469018a80b3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 12:47:45 -0800 Subject: [PATCH 09/34] Add test for invalid method keyword --- pandas/tests/window/test_rolling.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 10b23cadfe279..cb5ba8e381fa3 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1123,3 +1123,8 @@ def test_rolling_skew_kurt_large_value_range(method, values): result = getattr(s.rolling(4), method)() expected = Series([np.nan] * 3 + values) tm.assert_series_equal(result, expected) + + +def test_invalid_method(): + with pytest.raises(ValueError, match="method must be 'table' or 'column"): + Series(range(1)).rolling(1, method="foo") From a6473a5dbd32f88c75c502448a024a44f8fc88fc Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 12:50:39 -0800 Subject: [PATCH 10/34] Notimplemented test with win_type --- pandas/tests/window/test_win_type.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 091b5914a7c3e..e4000f0036a06 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -117,3 +117,10 @@ def b(x): expected.columns = ["a", "b"] result = r.aggregate([a, b]) tm.assert_frame_equal(result, expected) + + +def test_win_type_with_method_invalid(): + with pytest.raises( + NotImplementedError, match="'column' is the only supported method type." + ): + Series(range(1)).rolling(1, win_type="triang", method="table") From 4fa7abd3de4b6f2f60a34ed90c6d8165f82b2940 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 12:51:57 -0800 Subject: [PATCH 11/34] wrap table method tests in a class --- pandas/tests/window/test_numba.py | 45 ++++++++++++++++--------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 0aafed2790d57..5cd93d928e3bf 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -124,28 +124,29 @@ def test_invalid_kwargs_nopython(): @td.skip_if_no("numba", "0.46.0") -def test_table_series_valueerror(): - def f(x): - return np.sum(x, axis=0) + 1 +class TestTableMethod: + def test_table_series_valueerror(self): + def f(x): + return np.sum(x, axis=0) + 1 + + with pytest.raises( + ValueError, match="method='table' not applicable for Series objects." + ): + Series(range(1)).rolling(1, method="table").apply( + f, engine="numba", raw=True + ) - with pytest.raises( - ValueError, match="method='table' not applicable for Series objects." - ): - Series(range(1)).rolling(1, method="table").apply(f, engine="numba", raw=True) + def test_table_method(self, axis, center, closed, nogil, parallel, nopython): + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + def f(x): + return np.sum(x, axis=0) + 1 -@td.skip_if_no("numba", "0.46.0") -def test_table_method(axis, center, closed, nogil, parallel, nopython): - engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - - def f(x): - return np.sum(x, axis=0) + 1 - - df = DataFrame(np.eye(3)) - result = df.rolling( - 2, method="table", axis=axis, center=center, closed=closed - ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") - expected = df.rolling( - 2, method="column", axis=axis, center=center, closed=closed - ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") - tm.assert_frame_equal(result, expected) + df = DataFrame(np.eye(3)) + result = df.rolling( + 2, method="table", axis=axis, center=center, closed=closed + ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") + expected = df.rolling( + 2, method="column", axis=axis, center=center, closed=closed + ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") + tm.assert_frame_equal(result, expected) From 2d7a58ec363aab65cfb0cd659eeb156ff7766bce Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 13:05:21 -0800 Subject: [PATCH 12/34] Add method keyword for expanding --- pandas/core/window/expanding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 94875ba86db65..f81075941b277 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -61,8 +61,8 @@ class Expanding(RollingAndExpandingMixin): _attributes = ["min_periods", "center", "axis"] - def __init__(self, obj, min_periods=1, center=None, axis=0, **kwargs): - super().__init__(obj=obj, min_periods=min_periods, center=center, axis=axis) + def __init__(self, obj, min_periods=1, center=None, axis=0, method='column', **kwargs): + super().__init__(obj=obj, min_periods=min_periods, center=center, axis=axis, method=method) @property def _constructor(self): From 76ead37f17773e281cc7ba83d5aca21c2d45db4f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 15:23:03 -0800 Subject: [PATCH 13/34] Change caching key to handing callers from Rolling or Expanding --- pandas/core/generic.py | 10 ++++++++-- pandas/core/window/expanding.py | 10 +++++++--- pandas/core/window/numba_.py | 9 ++++++--- pandas/core/window/rolling.py | 9 +++++---- pandas/tests/window/test_numba.py | 8 ++++++-- 5 files changed, 32 insertions(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1f638ce390273..9b5c68ae3abdd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11242,7 +11242,11 @@ def rolling( @final @doc(Expanding) def expanding( - self, min_periods: int = 1, center: Optional[bool_t] = None, axis: Axis = 0 + self, + min_periods: int = 1, + center: Optional[bool_t] = None, + axis: Axis = 0, + method: str = "method", ) -> Expanding: axis = self._get_axis_number(axis) if center is not None: @@ -11254,7 +11258,9 @@ def expanding( else: center = False - return Expanding(self, min_periods=min_periods, center=center, axis=axis) + return Expanding( + self, min_periods=min_periods, center=center, axis=axis, method=method + ) @final @doc(ExponentialMovingWindow) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index f81075941b277..95ec6c64a4bf9 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -59,10 +59,14 @@ class Expanding(RollingAndExpandingMixin): 4 7.0 """ - _attributes = ["min_periods", "center", "axis"] + _attributes = ["min_periods", "center", "axis", "method"] - def __init__(self, obj, min_periods=1, center=None, axis=0, method='column', **kwargs): - super().__init__(obj=obj, min_periods=min_periods, center=center, axis=axis, method=method) + def __init__( + self, obj, min_periods=1, center=None, axis=0, method="column", **kwargs + ): + super().__init__( + obj=obj, min_periods=min_periods, center=center, axis=axis, method=method + ) @property def _constructor(self): diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 41816bfa090d6..24854afb88b84 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -17,6 +17,7 @@ def generate_numba_apply_func( kwargs: Dict[str, Any], func: Callable[..., Scalar], engine_kwargs: Optional[Dict[str, bool]], + name: str, ): """ Generate a numba jitted apply function specified by values from engine_kwargs. @@ -37,6 +38,8 @@ def generate_numba_apply_func( function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit + name: str + name of the caller (Rolling/Expanding) Returns ------- @@ -44,7 +47,7 @@ def generate_numba_apply_func( """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) - cache_key = (func, "rolling_apply_column") + cache_key = (func, f"{name}_apply_column") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] @@ -187,7 +190,7 @@ def generate_numba_table_func( engine_kwargs : dict dictionary of arguments to be passed into numba.jit name : str - original method name for numba cache key + caller (Rolling/Expanding) and original method name for numba cache key Returns ------- @@ -195,7 +198,7 @@ def generate_numba_table_func( """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) - cache_key = (func, f"rolling_{name}_table") + cache_key = (func, f"{name}_table") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 351676b150e6e..203fd2f9caeff 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1341,16 +1341,17 @@ def apply( if maybe_use_numba(engine): if raw is False: raise ValueError("raw must be `True` when using the numba engine") + caller_name = type(self).__name__ if self.method == "column": apply_func = generate_numba_apply_func( - args, kwargs, func, engine_kwargs + args, kwargs, func, engine_kwargs, caller_name ) - numba_cache_key = (func, "rolling_apply_column") + numba_cache_key = (func, f"{caller_name}_apply_column") else: apply_func = generate_numba_table_func( - args, kwargs, func, engine_kwargs, "apply" + args, kwargs, func, engine_kwargs, f"{caller_name}_apply" ) - numba_cache_key = (func, "rolling_apply_table") + numba_cache_key = (func, f"{caller_name}_apply_table") elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 5cd93d928e3bf..3aede5e5503f5 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -63,7 +63,7 @@ def func_2(x): tm.assert_series_equal(result, expected) # func_1 should be in the cache now - assert (func_1, "rolling_apply_column") in NUMBA_FUNC_CACHE + assert (func_1, "Rolling_apply_column") in NUMBA_FUNC_CACHE result = roll.apply( func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True @@ -124,6 +124,8 @@ def test_invalid_kwargs_nopython(): @td.skip_if_no("numba", "0.46.0") +@pytest.mark.filterwarnings("ignore:\\nThe keyword argument") +# Filter warnings when parallel=True and the function can't be parallelized by Numba class TestTableMethod: def test_table_series_valueerror(self): def f(x): @@ -136,7 +138,9 @@ def f(x): f, engine="numba", raw=True ) - def test_table_method(self, axis, center, closed, nogil, parallel, nopython): + def test_table_method_rolling( + self, axis, center, closed, nogil, parallel, nopython + ): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): From 12e7c57a5c3e2d565e78a0e78cf1ff5569eec516 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 15:43:16 -0800 Subject: [PATCH 14/34] Test expanding table --- pandas/tests/window/test_numba.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 3aede5e5503f5..a96a047f6deef 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -154,3 +154,18 @@ def f(x): 2, method="column", axis=axis, center=center, closed=closed ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") tm.assert_frame_equal(result, expected) + + def test_table_method_expanding(self, axis, nogil, parallel, nopython): + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + def f(x): + return np.sum(x, axis=0) + 1 + + df = DataFrame(np.eye(3)) + result = df.expanding(method="table", axis=axis).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + expected = df.expanding(method="column", axis=axis).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + tm.assert_frame_equal(result, expected) From 93c546e76092c678acd393ae28e7d54eba184335 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 18:02:17 -0800 Subject: [PATCH 15/34] Change column to single --- pandas/core/generic.py | 4 ++-- pandas/core/window/ewm.py | 2 +- pandas/core/window/expanding.py | 10 +++++++++- pandas/core/window/numba_.py | 2 +- pandas/core/window/rolling.py | 26 +++++++++++++++++--------- pandas/tests/window/test_numba.py | 6 +++--- pandas/tests/window/test_rolling.py | 2 +- pandas/tests/window/test_win_type.py | 2 +- 8 files changed, 35 insertions(+), 19 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9b5c68ae3abdd..c4ac985d72f8b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11210,7 +11210,7 @@ def rolling( on: Optional[str] = None, axis: Axis = 0, closed: Optional[str] = None, - method: str = "column", + method: str = "single", ): axis = self._get_axis_number(axis) @@ -11246,7 +11246,7 @@ def expanding( min_periods: int = 1, center: Optional[bool_t] = None, axis: Axis = 0, - method: str = "method", + method: str = "single", ) -> Expanding: axis = self._get_axis_number(axis) if center is not None: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 1b8c6853615a2..d0a49b76c1419 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -241,7 +241,7 @@ def __init__( self.on = None self.center = False self.closed = None - self.method = "column" + self.method = "single" if times is not None: if isinstance(times, str): times = self._selected_obj[times] diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 95ec6c64a4bf9..74fb4b3c59175 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -24,6 +24,14 @@ class Expanding(RollingAndExpandingMixin): center : bool, default False Set the labels at the center of the window. axis : int or str, default 0 + method : str, default 'single' + Execute the expanding operation per single column or row (`'single'`) or over the entire + object (`'table'`). + + This argument is only implemented when specifying ``engine='numba'`` in the method + call. + + .. versionadded:: 1.3.0 Returns ------- @@ -62,7 +70,7 @@ class Expanding(RollingAndExpandingMixin): _attributes = ["min_periods", "center", "axis", "method"] def __init__( - self, obj, min_periods=1, center=None, axis=0, method="column", **kwargs + self, obj, min_periods=1, center=None, axis=0, method="single", **kwargs ): super().__init__( obj=obj, min_periods=min_periods, center=center, axis=axis, method=method diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 24854afb88b84..2a7202aba35ac 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -47,7 +47,7 @@ def generate_numba_apply_func( """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) - cache_key = (func, f"{name}_apply_column") + cache_key = (func, f"{name}_apply_single") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 203fd2f9caeff..ea063e00556c5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -101,7 +101,7 @@ def __init__( axis: Axis = 0, on: Optional[Union[str, Index]] = None, closed: Optional[str] = None, - method: str = "column", + method: str = "single", **kwargs, ): @@ -165,8 +165,8 @@ def validate(self) -> None: f"{type(self.window).__name__} does not implement " f"the correct signature for get_window_bounds" ) - if self.method not in ["table", "column"]: - raise ValueError("method must be 'table' or 'column") + if self.method not in ["table", "single"]: + raise ValueError("method must be 'table' or 'single") def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: """ @@ -483,7 +483,7 @@ def calc(x): return func(x, start, end, min_periods) with np.errstate(all="ignore"): - if values.ndim > 1 and self.method == "column": + if values.ndim > 1 and self.method == "single": result = np.apply_along_axis(calc, self.axis, values) else: result = calc(values) @@ -494,7 +494,7 @@ def calc(x): return result - if self.method == "column": + if self.method == "single": return self._apply_blockwise(homogeneous_func, name) else: return self._apply_tablewise(homogeneous_func, name) @@ -918,6 +918,14 @@ class Window(BaseWindow): .. versionchanged:: 1.2.0 The closed parameter with fixed windows is now supported. + method : str, default 'single' + Execute the rolling operation per single column or row (`'single'`) or over the entire + object (`'table'`). + + This argument is only implemented when specifying ``engine='numba'`` in the method + call. + + .. versionadded:: 1.3.0 Returns ------- @@ -1064,8 +1072,8 @@ def validate(self): else: raise ValueError(f"Invalid window {self.window}") - if self.method != "column": - raise NotImplementedError("'column' is the only supported method type.") + if self.method != "single": + raise NotImplementedError("'single' is the only supported method type.") def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: """ @@ -1342,11 +1350,11 @@ def apply( if raw is False: raise ValueError("raw must be `True` when using the numba engine") caller_name = type(self).__name__ - if self.method == "column": + if self.method == "single": apply_func = generate_numba_apply_func( args, kwargs, func, engine_kwargs, caller_name ) - numba_cache_key = (func, f"{caller_name}_apply_column") + numba_cache_key = (func, f"{caller_name}_apply_single") else: apply_func = generate_numba_table_func( args, kwargs, func, engine_kwargs, f"{caller_name}_apply" diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index a96a047f6deef..6ac98574e122d 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -63,7 +63,7 @@ def func_2(x): tm.assert_series_equal(result, expected) # func_1 should be in the cache now - assert (func_1, "Rolling_apply_column") in NUMBA_FUNC_CACHE + assert (func_1, "Rolling_apply_single") in NUMBA_FUNC_CACHE result = roll.apply( func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True @@ -151,7 +151,7 @@ def f(x): 2, method="table", axis=axis, center=center, closed=closed ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") expected = df.rolling( - 2, method="column", axis=axis, center=center, closed=closed + 2, method="single", axis=axis, center=center, closed=closed ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") tm.assert_frame_equal(result, expected) @@ -165,7 +165,7 @@ def f(x): result = df.expanding(method="table", axis=axis).apply( f, raw=True, engine_kwargs=engine_kwargs, engine="numba" ) - expected = df.expanding(method="column", axis=axis).apply( + expected = df.expanding(method="single", axis=axis).apply( f, raw=True, engine_kwargs=engine_kwargs, engine="numba" ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index cb5ba8e381fa3..880220f062efc 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1126,5 +1126,5 @@ def test_rolling_skew_kurt_large_value_range(method, values): def test_invalid_method(): - with pytest.raises(ValueError, match="method must be 'table' or 'column"): + with pytest.raises(ValueError, match="method must be 'table' or 'single"): Series(range(1)).rolling(1, method="foo") diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index e4000f0036a06..5cb7644a3c4d8 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -121,6 +121,6 @@ def b(x): def test_win_type_with_method_invalid(): with pytest.raises( - NotImplementedError, match="'column' is the only supported method type." + NotImplementedError, match="'single' is the only supported method type." ): Series(range(1)).rolling(1, win_type="triang", method="table") From 1eaad79728189c377e960b2ff12b6643eb3c1340 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 11 Dec 2020 20:33:38 -0800 Subject: [PATCH 16/34] flake8 --- pandas/core/window/expanding.py | 8 ++++---- pandas/core/window/rolling.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 74fb4b3c59175..9c521dd1f5fcf 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -25,11 +25,11 @@ class Expanding(RollingAndExpandingMixin): Set the labels at the center of the window. axis : int or str, default 0 method : str, default 'single' - Execute the expanding operation per single column or row (`'single'`) or over the entire - object (`'table'`). + Execute the expanding operation per single column or row (`'single'`) + or over the entire object (`'table'`). - This argument is only implemented when specifying ``engine='numba'`` in the method - call. + This argument is only implemented when specifying ``engine='numba'`` + in the method call. .. versionadded:: 1.3.0 diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ea063e00556c5..e30dbd33fcd59 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -919,11 +919,11 @@ class Window(BaseWindow): The closed parameter with fixed windows is now supported. method : str, default 'single' - Execute the rolling operation per single column or row (`'single'`) or over the entire - object (`'table'`). + Execute the rolling operation per single column or row (`'single'`) + or over the entire object (`'table'`). - This argument is only implemented when specifying ``engine='numba'`` in the method - call. + This argument is only implemented when specifying ``engine='numba'`` + in the method call. .. versionadded:: 1.3.0 From 5d040d0e68062a67d81614181df91afd38a30e4c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 12 Dec 2020 01:37:40 -0800 Subject: [PATCH 17/34] Add weighted mean example as a test --- pandas/tests/window/test_numba.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 6ac98574e122d..fed28804d290d 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -155,6 +155,27 @@ def f(x): ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") tm.assert_frame_equal(result, expected) + def test_table_method_rolling_weighted_mean(self): + + def weighted_mean(x): + arr = np.ones((1, x.shape[1])) + arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum() + return arr + + df = DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) + result = df.rolling(2, method="table", min_periods=0).apply( + weighted_mean, raw=True, engine="numba" + ) + expected = DataFrame( + [ + [1.0, 2.0, 1.0], + [1.8, 2.0, 1.0], + [3.333333, 2.333333, 1.0], + [1.555556, 7, 1.0], + ] + ) + tm.assert_frame_equal(result, expected) + def test_table_method_expanding(self, axis, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} From 57fc5256448e07c86ddc21590322d8f08436df09 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 12 Dec 2020 01:39:38 -0800 Subject: [PATCH 18/34] black --- pandas/tests/window/test_numba.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index fed28804d290d..035a83b33e140 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -156,7 +156,6 @@ def f(x): tm.assert_frame_equal(result, expected) def test_table_method_rolling_weighted_mean(self): - def weighted_mean(x): arr = np.ones((1, x.shape[1])) arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum() From 84619a1beaa8b31a81480fa1d816c5b52678e914 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 12 Dec 2020 02:06:58 -0800 Subject: [PATCH 19/34] Add whatsnew and window.rst example --- doc/source/user_guide/window.rst | 37 +++++++++++++++++++++++++------- doc/source/whatsnew/v1.3.0.rst | 5 +++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 05f8be091fa25..2c410cdf8eb39 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -37,14 +37,14 @@ pandas supports 4 types of windowing operations: #. Expanding window: Accumulating window over the values. #. Exponentially Weighted window: Accumulating and exponentially weighted window over the values. -============================= ================= =========================== =========================== ======================== -Concept Method Returned Object Supports time-based windows Supports chained groupby -============================= ================= =========================== =========================== ======================== -Rolling window ``rolling`` ``Rolling`` Yes Yes -Weighted window ``rolling`` ``Window`` No No -Expanding window ``expanding`` ``Expanding`` No Yes -Exponentially Weighted window ``ewm`` ``ExponentialMovingWindow`` No Yes (as of version 1.2) -============================= ================= =========================== =========================== ======================== +============================= ================= =========================== =========================== ======================== =================================== +Concept Method Returned Object Supports time-based windows Supports chained groupby Supports table method (version 1.3) +============================= ================= =========================== =========================== ======================== =================================== +Rolling window ``rolling`` ``Rolling`` Yes Yes Yes +Weighted window ``rolling`` ``Window`` No No No +Expanding window ``expanding`` ``Expanding`` No Yes Yes +Exponentially Weighted window ``ewm`` ``ExponentialMovingWindow`` No Yes (as of version 1.2) No +============================= ================= =========================== =========================== ======================== =================================== As noted above, some operations support specifying a window based on a time offset: @@ -76,6 +76,27 @@ which will first group the data by the specified keys and then perform a windowi to compute the rolling sums to preserve accuracy as much as possible. +.. versionadded:: 1.3 + +Some windowing operations also support the ``method='table'`` option in the constructor which +performs the windowing operaion over an entire :class:`DataFrame` instead of a single column or row at a time. +This can provide a useful performance benefit for :class:`DataFrame`s with a many columns or rows +(with the corresponding ``axis`` argument) or the ability to utilize other columns during the windowing +operation. The ``method='table'`` option can only be used if ``engine='numba'`` is specified +in the corresponding method call. + + +.. ipython:: python + + def weighted_mean(x): + arr = np.ones((1, x.shape[1])) + arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum() + return arr + + df = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) + df.rolling(2, method="table", min_periods=0).apply(weighted_mean, raw=True, engine="numba") + + All windowing operations support a ``min_periods`` argument that dictates the minimum amount of non-``np.nan`` values a window must have; otherwise, the resulting value is ``np.nan``. ``min_peridos`` defaults to 1 for time-based windows and ``window`` for fixed windows diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index ab9f303bec6aa..c3b0165d06ee7 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -13,6 +13,11 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ +.. _whatsnew_130.window_method_table: + +:class:`Rolling` and :class:`Expanding` now support a ``method`` argument with a +``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. +See ref:`window.overview` for performance and functional benefits. (:issue:`15095`) .. _whatsnew_130.enhancements.other: From 9f172bd1ae42d340a898f2db468224336df4890d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 17:53:43 -0800 Subject: [PATCH 20/34] Add as of version to table --- doc/source/user_guide/window.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 2c410cdf8eb39..c09fe1e3a9c6e 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -40,9 +40,9 @@ pandas supports 4 types of windowing operations: ============================= ================= =========================== =========================== ======================== =================================== Concept Method Returned Object Supports time-based windows Supports chained groupby Supports table method (version 1.3) ============================= ================= =========================== =========================== ======================== =================================== -Rolling window ``rolling`` ``Rolling`` Yes Yes Yes +Rolling window ``rolling`` ``Rolling`` Yes Yes Yes (as of version 1.3) Weighted window ``rolling`` ``Window`` No No No -Expanding window ``expanding`` ``Expanding`` No Yes Yes +Expanding window ``expanding`` ``Expanding`` No Yes Yes (as of version 1.3) Exponentially Weighted window ``ewm`` ``ExponentialMovingWindow`` No Yes (as of version 1.2) No ============================= ================= =========================== =========================== ======================== =================================== From cfe909cc2d04e99721d508468a047aa1f9b9fe12 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 18:05:18 -0800 Subject: [PATCH 21/34] Fix typo and add wikipedia link in window.rst --- doc/source/user_guide/window.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index c09fe1e3a9c6e..6dbef4f528fd9 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -80,11 +80,13 @@ which will first group the data by the specified keys and then perform a windowi Some windowing operations also support the ``method='table'`` option in the constructor which performs the windowing operaion over an entire :class:`DataFrame` instead of a single column or row at a time. -This can provide a useful performance benefit for :class:`DataFrame`s with a many columns or rows +This can provide a useful performance benefit for :class:`DataFrame`s with many columns or rows (with the corresponding ``axis`` argument) or the ability to utilize other columns during the windowing operation. The ``method='table'`` option can only be used if ``engine='numba'`` is specified in the corresponding method call. +For example, a `weighted mean `__ calculation can +be calculated with :meth:`~Rolling.apply` by specifying a separate column of weights. .. ipython:: python From 884c7fff76b5580d58ef4e688e162a56d76d7cac Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 18:17:03 -0800 Subject: [PATCH 22/34] Use correct type and document func output and input --- pandas/core/window/numba_.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 2a7202aba35ac..9e5453171a45e 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -169,12 +169,16 @@ def groupby_ewma( def generate_numba_table_func( args: Tuple, kwargs: Dict[str, Any], - func: Callable[..., Scalar], + func: Callable[..., np.ndarray], engine_kwargs: Optional[Dict[str, bool]], name: str, ): """ - Generate a numba jitted function to apply window calculations table-wise + Generate a numba jitted function to apply window calculations table-wise. + + Func will be passed a M window size x N number of columns array, and + must return a 1 x N number of columns array. Func is intended to operate row-wise, + but the result will be transposed for axis=1. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline From 08f23e0347a3f51ae41db2977dc2f92ebd87ba11 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 18:17:45 -0800 Subject: [PATCH 23/34] Newline --- pandas/core/window/numba_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 9e5453171a45e..b41da3728b4fb 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -177,8 +177,8 @@ def generate_numba_table_func( Generate a numba jitted function to apply window calculations table-wise. Func will be passed a M window size x N number of columns array, and - must return a 1 x N number of columns array. Func is intended to operate row-wise, - but the result will be transposed for axis=1. + must return a 1 x N number of columns array. Func is intended to operate + row-wise, but the result will be transposed for axis=1. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline From 27e9bbb9907b8bdb0daaed02832d55949cbdd765 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 18:25:01 -0800 Subject: [PATCH 24/34] Plural -> singular --- doc/source/user_guide/window.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 6dbef4f528fd9..6af775268e518 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -80,7 +80,7 @@ which will first group the data by the specified keys and then perform a windowi Some windowing operations also support the ``method='table'`` option in the constructor which performs the windowing operaion over an entire :class:`DataFrame` instead of a single column or row at a time. -This can provide a useful performance benefit for :class:`DataFrame`s with many columns or rows +This can provide a useful performance benefit for a :class:`DataFrame` with many columns or rows (with the corresponding ``axis`` argument) or the ability to utilize other columns during the windowing operation. The ``method='table'`` option can only be used if ``engine='numba'`` is specified in the corresponding method call. From b4fa92c903a3f963cd31c3a0e6f2399f28257ba7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 18:30:15 -0800 Subject: [PATCH 25/34] Add noqa in rst --- doc/source/user_guide/window.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 6af775268e518..e6b6e06426552 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -96,7 +96,7 @@ be calculated with :meth:`~Rolling.apply` by specifying a separate column of wei return arr df = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) - df.rolling(2, method="table", min_periods=0).apply(weighted_mean, raw=True, engine="numba") + df.rolling(2, method="table", min_periods=0).apply(weighted_mean, raw=True, engine="numba") # noqa:E501 All windowing operations support a ``min_periods`` argument that dictates the minimum amount of From 354c698131256455cc1ec1de0b5d8a5dbb4721ec Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 18:31:47 -0800 Subject: [PATCH 26/34] Skip if scipy isnt installed --- pandas/tests/window/test_win_type.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 5cb7644a3c4d8..5734051f5f432 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -119,6 +119,7 @@ def b(x): tm.assert_frame_equal(result, expected) +@td.skip_if_no_scipy def test_win_type_with_method_invalid(): with pytest.raises( NotImplementedError, match="'single' is the only supported method type." From 6fdfef4cf91a81c1ebae6a3a2c1921a1ed5759d3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 23:12:55 -0800 Subject: [PATCH 27/34] Add asv benchmark --- asv_bench/benchmarks/rolling.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 5a36cff7908f0..894e6efccbbf6 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -252,4 +252,21 @@ def time_groupby_mean(self, engine): self.gb_ewm.mean(engine=engine) +def table_method_func(x): + return np.sum(x, axis=0) + 1 + + +class TableMethod: + + params = ["single", "table"] + param_names = ["method"] + + def setup(self, method): + df = pd.DataFrame(np.random.randn(10, 1000)) + self.roll = df.rolling(2, method=method) + + def time_apply(self, method): + self.roll.apply(table_method_func, raw=True, engine="numba") + + from .pandas_vb_common import setup # noqa: F401 isort:skip From 72e50d3905ef4f37aa07ca6628fc2e707b6415fe Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 13 Dec 2020 23:35:06 -0800 Subject: [PATCH 28/34] Add min periods for min version build --- pandas/tests/window/test_numba.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 035a83b33e140..a946bcef23a15 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -148,10 +148,10 @@ def f(x): df = DataFrame(np.eye(3)) result = df.rolling( - 2, method="table", axis=axis, center=center, closed=closed + 2, method="table", axis=axis, center=center, closed=closed, min_periods=0 ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") expected = df.rolling( - 2, method="single", axis=axis, center=center, closed=closed + 2, method="single", axis=axis, center=center, closed=closed, min_periods=0 ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") tm.assert_frame_equal(result, expected) From 55f44af1c417e20c3023c26b8d2e73eac3130733 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 17 Dec 2020 09:58:08 -0800 Subject: [PATCH 29/34] Remove redundant 1.3 doc --- doc/source/user_guide/window.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index e6b6e06426552..08641bc5b17ae 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -38,7 +38,7 @@ pandas supports 4 types of windowing operations: #. Exponentially Weighted window: Accumulating and exponentially weighted window over the values. ============================= ================= =========================== =========================== ======================== =================================== -Concept Method Returned Object Supports time-based windows Supports chained groupby Supports table method (version 1.3) +Concept Method Returned Object Supports time-based windows Supports chained groupby Supports table method ============================= ================= =========================== =========================== ======================== =================================== Rolling window ``rolling`` ``Rolling`` Yes Yes Yes (as of version 1.3) Weighted window ``rolling`` ``Window`` No No No From 24a80dbe10f26f4cf541dd32a6671b285863b0cb Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 17 Dec 2020 14:31:00 -0800 Subject: [PATCH 30/34] Remove closed and center parameterizations --- pandas/tests/window/test_numba.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index a946bcef23a15..4d22495e6c69a 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -138,21 +138,19 @@ def f(x): f, engine="numba", raw=True ) - def test_table_method_rolling( - self, axis, center, closed, nogil, parallel, nopython - ): + def test_table_method_rolling(self, axis, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): return np.sum(x, axis=0) + 1 df = DataFrame(np.eye(3)) - result = df.rolling( - 2, method="table", axis=axis, center=center, closed=closed, min_periods=0 - ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") - expected = df.rolling( - 2, method="single", axis=axis, center=center, closed=closed, min_periods=0 - ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") + result = df.rolling(2, method="table", axis=axis, min_periods=0).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + expected = df.rolling(2, method="single", axis=axis, min_periods=0).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) tm.assert_frame_equal(result, expected) def test_table_method_rolling_weighted_mean(self): From ba86276b3d6d9e30086adc4689a3d4cca3e9ee09 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Dec 2020 12:23:23 -0800 Subject: [PATCH 31/34] Clarify doc --- pandas/core/window/expanding.py | 6 +++--- pandas/core/window/rolling.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 9c521dd1f5fcf..da538be117e5a 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -24,9 +24,9 @@ class Expanding(RollingAndExpandingMixin): center : bool, default False Set the labels at the center of the window. axis : int or str, default 0 - method : str, default 'single' - Execute the expanding operation per single column or row (`'single'`) - or over the entire object (`'table'`). + method : str {'single', 'table'}, default 'single' + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). This argument is only implemented when specifying ``engine='numba'`` in the method call. diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e30dbd33fcd59..849e78bfea360 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -918,9 +918,9 @@ class Window(BaseWindow): .. versionchanged:: 1.2.0 The closed parameter with fixed windows is now supported. - method : str, default 'single' - Execute the rolling operation per single column or row (`'single'`) - or over the entire object (`'table'`). + method : str {'single', 'table'}, default 'single' + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). This argument is only implemented when specifying ``engine='numba'`` in the method call. From 55156cd1aed0af9a50bffc11d0c8d9b308b5815b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 22 Dec 2020 12:26:42 -0800 Subject: [PATCH 32/34] Fix benchmark --- asv_bench/benchmarks/rolling.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index 894e6efccbbf6..ab9c46fd2bf0b 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -262,11 +262,12 @@ class TableMethod: param_names = ["method"] def setup(self, method): - df = pd.DataFrame(np.random.randn(10, 1000)) - self.roll = df.rolling(2, method=method) + self.df = pd.DataFrame(np.random.randn(10, 1000)) def time_apply(self, method): - self.roll.apply(table_method_func, raw=True, engine="numba") + self.df.rolling(2, method=method).apply( + table_method_func, raw=True, engine="numba" + ) from .pandas_vb_common import setup # noqa: F401 isort:skip From f4ba0c6a279d27f61a4a8dcc894d5efa18933c0e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 23 Dec 2020 09:51:14 -0800 Subject: [PATCH 33/34] just use prange --- pandas/core/window/numba_.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index b41da3728b4fb..7f968e19bceac 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -208,10 +208,6 @@ def generate_numba_table_func( numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") - if parallel: - loop_range = numba.prange - else: - loop_range = range @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def roll_table( @@ -219,7 +215,7 @@ def roll_table( ): result = np.empty(values.shape) min_periods_mask = np.empty(values.shape) - for i in loop_range(len(result)): + for i in numba.prange(len(result)): start = begin[i] stop = end[i] window = values[start:stop] From 9ad8064864110bbf3b00b30915817648058118de Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 23 Dec 2020 13:16:29 -0800 Subject: [PATCH 34/34] Forgot decorator as part of merge conflict fixup --- pandas/tests/window/test_win_type.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 679f3f147f112..4b1028e165c80 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -128,6 +128,7 @@ def test_win_type_with_method_invalid(): Series(range(1)).rolling(1, win_type="triang", method="table") +@td.skip_if_no_scipy @pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")]) def test_consistent_win_type_freq(arg): # GH 15969