From 69f084fac9dd7761ddb869308f4a1eb4b4e82dd6 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 4 Sep 2020 18:39:45 -0400 Subject: [PATCH 01/22] updated fixed indexer to work with rolling df and groupby --- pandas/core/window/indexers.py | 20 ++++++++++---------- pandas/core/window/rolling.py | 11 +---------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index a21521f4ce8bb..a23fc4fba0b8a 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -78,17 +78,17 @@ def get_window_bounds( closed: Optional[str] = None, ) -> Tuple[np.ndarray, np.ndarray]: - start_s = np.zeros(self.window_size, dtype="int64") - start_e = ( - np.arange(self.window_size, num_values, dtype="int64") - - self.window_size - + 1 - ) - start = np.concatenate([start_s, start_e])[:num_values] + if center: + offset = self.window_size // 2 + else: + offset = 0 + + end = np.arange(1 + offset, num_values + 1 + offset) + start = end - self.window_size + + end = np.clip(end, 1, num_values) + start = np.clip(start, 0, num_values - 1) - end_s = np.arange(self.window_size, dtype="int64") + 1 - end_e = start_e + self.window_size - end = np.concatenate([end_s, end_e])[:num_values] return start, end diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 39fcfcbe2bff6..331be6ff69eb0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -494,9 +494,7 @@ def _get_cython_func_type(self, func: str) -> Callable: Variable algorithms do not use window while fixed do. """ - if self.is_freq_type or isinstance(self.window, BaseIndexer): - return self._get_roll_func(f"{func}_variable") - return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) + return self._get_roll_func(f"{func}_variable") def _get_window_indexer(self, window: int) -> BaseIndexer: """ @@ -611,13 +609,9 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() - offset = calculate_center_offset(window) if center else 0 - additional_nans = np.array([np.nan] * offset) - if not is_weighted: def calc(x): - x = np.concatenate((x, additional_nans)) if not isinstance(self.window, BaseIndexer): min_periods = calculate_min_periods( window, self.min_periods, len(x), require_min_periods, floor @@ -654,9 +648,6 @@ def calc(x): if use_numba_cache: NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func - if center: - result = self._center_window(result, window) - return result return self._apply_blockwise(homogeneous_func) From 71830c85133e296d1ef1c0370dfa7e30811408ce Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 4 Sep 2020 18:49:46 -0400 Subject: [PATCH 02/22] updated is_weighted case --- pandas/core/window/rolling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 331be6ff69eb0..e92a78b8a31e4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -635,7 +635,6 @@ def calc(x): else: def calc(x): - x = np.concatenate((x, additional_nans)) return func(x, window, self.min_periods) with np.errstate(all="ignore"): From a449d9b0462b3b045e6eb73c8452ed889bbee466 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Fri, 4 Sep 2020 18:51:12 -0400 Subject: [PATCH 03/22] added comment --- pandas/core/window/indexers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index a23fc4fba0b8a..3861c8b6e5979 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -86,6 +86,8 @@ def get_window_bounds( end = np.arange(1 + offset, num_values + 1 + offset) start = end - self.window_size + # end is exclusive, whereas start is inclusive + # thus the bounds for end should be 1 greater than the bounds for start end = np.clip(end, 1, num_values) start = np.clip(start, 0, num_values - 1) From 476fe8308728c27ae19875e5b66ad7271a69cf96 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 5 Sep 2020 12:59:06 -0400 Subject: [PATCH 04/22] corrected offset for even window sizes --- pandas/core/window/indexers.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 3861c8b6e5979..16831421426fa 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -79,17 +79,15 @@ def get_window_bounds( ) -> Tuple[np.ndarray, np.ndarray]: if center: - offset = self.window_size // 2 + offset = (self.window_size - 1) // 2 else: offset = 0 end = np.arange(1 + offset, num_values + 1 + offset) start = end - self.window_size - - # end is exclusive, whereas start is inclusive - # thus the bounds for end should be 1 greater than the bounds for start - end = np.clip(end, 1, num_values) - start = np.clip(start, 0, num_values - 1) + + end = np.clip(end, 0, num_values) + start = np.clip(start, 0, num_values) return start, end From 9dfd9f3e1b2e57f84705cf9572054048b19ce033 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 5 Sep 2020 12:59:27 -0400 Subject: [PATCH 05/22] reverted changes for weighted windows --- pandas/core/window/rolling.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e92a78b8a31e4..d9f6089cd7935 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -633,8 +633,11 @@ def calc(x): return func(x, start, end, min_periods) else: + offset = calculate_center_offset(window) if center else 0 + additional_nans = np.array([np.nan] * offset) def calc(x): + x = np.concatenate((x, additional_nans)) return func(x, window, self.min_periods) with np.errstate(all="ignore"): @@ -646,6 +649,9 @@ def calc(x): if use_numba_cache: NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func + + if center and is_weighted: + result = self._center_window(result, window) return result @@ -1429,7 +1435,8 @@ def apply( # Cython apply functions handle center, so don't need to use # _apply's center handling window = self._get_window() - offset = calculate_center_offset(window) if self.center else 0 + + offset = calculate_center_offset(window) if self.center and self.win_type else 0 apply_func = self._generate_cython_apply_func( args, kwargs, raw, offset, func ) From f0256005490cf2535a67ef582fda90e0c53eb6da Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 6 Sep 2020 11:39:04 -0400 Subject: [PATCH 06/22] reverted back to fixed func type; added func_type variable --- pandas/core/window/common.py | 1 + pandas/core/window/indexers.py | 2 +- pandas/core/window/rolling.py | 132 +++++++++++++++++++++------------ 3 files changed, 87 insertions(+), 48 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 51a067427e867..b5f5df3200069 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -70,6 +70,7 @@ def _apply( floor: int = 1, is_weighted: bool = False, name: Optional[str] = None, + func_type: Optional[str] = None, use_numba_cache: bool = False, **kwargs, ): diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 16831421426fa..121bc0ae9f1ff 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -83,7 +83,7 @@ def get_window_bounds( else: offset = 0 - end = np.arange(1 + offset, num_values + 1 + offset) + end = np.arange(1 + offset, num_values + 1 + offset).astype('int64') start = end - self.window_size end = np.clip(end, 0, num_values) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d9f6089cd7935..f7fdaa7fa64a0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -70,24 +70,6 @@ from pandas.core.internals import Block # noqa:F401 -def calculate_center_offset(window) -> int: - """ - Calculate an offset necessary to have the window label to be centered. - - Parameters - ---------- - window: ndarray or int - window weights or window - - Returns - ------- - int - """ - if not is_integer(window): - window = len(window) - return int((window - 1) / 2.0) - - def calculate_min_periods( window: int, min_periods: Optional[int], @@ -453,19 +435,40 @@ def _insert_on_column(self, result: "DataFrame", obj: "DataFrame"): else: # insert at the end result[name] = extra_col + + def calculate_center_offset(self, window, center) -> int: + """ + Calculate an offset necessary to have the window label to be centered. + + Parameters + ---------- + window: ndarray or int + window weights or window - def _center_window(self, result, window) -> np.ndarray: + Returns + ------- + int + """ + if not center: + return 0 + + if not is_integer(window): + window = len(window) + return int((window - 1) / 2.0) + + def _center_window(self, result, window, center) -> np.ndarray: """ Center the result in the window. """ if self.axis > result.ndim - 1: raise ValueError("Requested axis is larger then no. of argument dimensions") - offset = calculate_center_offset(window) + offset = self.calculate_center_offset(window, center) if offset > 0: lead_indexer = [slice(None)] * result.ndim lead_indexer[self.axis] = slice(offset, None) result = np.copy(result[tuple(lead_indexer)]) + return result def _get_roll_func(self, func_name: str) -> Callable: @@ -494,7 +497,9 @@ def _get_cython_func_type(self, func: str) -> Callable: Variable algorithms do not use window while fixed do. """ - return self._get_roll_func(f"{func}_variable") + if self.is_freq_type or isinstance(self.window, BaseIndexer): + return self._get_roll_func(f"{func}_variable"), "variable" + return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()), "fixed" def _get_window_indexer(self, window: int) -> BaseIndexer: """ @@ -572,6 +577,7 @@ def _apply( floor: int = 1, is_weighted: bool = False, name: Optional[str] = None, + func_type: Optional[str] = None, use_numba_cache: bool = False, **kwargs, ): @@ -608,10 +614,14 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() + + offset = self.calculate_center_offset(window, center=center) if func_type in ["fixed", "weighted"] else 0 + additional_nans = np.array([np.nan] * offset) if not is_weighted: def calc(x): + x = np.concatenate((x, additional_nans)) if not isinstance(self.window, BaseIndexer): min_periods = calculate_min_periods( window, self.min_periods, len(x), require_min_periods, floor @@ -633,8 +643,6 @@ def calc(x): return func(x, start, end, min_periods) else: - offset = calculate_center_offset(window) if center else 0 - additional_nans = np.array([np.nan] * offset) def calc(x): x = np.concatenate((x, additional_nans)) @@ -650,8 +658,8 @@ def calc(x): if use_numba_cache: NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func - if center and is_weighted: - result = self._center_window(result, window) + if func_type in ["fixed", "weighted"]: + result = self._center_window(result, window, center) return result @@ -1237,7 +1245,7 @@ def sum(self, *args, **kwargs): window_func = self._get_roll_func("roll_weighted_sum") window_func = get_weighted_roll_func(window_func) return self._apply( - window_func, center=self.center, is_weighted=True, name="sum", **kwargs + window_func, center=self.center, is_weighted=True, name="sum", func_type="weighted", **kwargs ) @Substitution(name="window") @@ -1247,7 +1255,7 @@ def mean(self, *args, **kwargs): window_func = self._get_roll_func("roll_weighted_mean") window_func = get_weighted_roll_func(window_func) return self._apply( - window_func, center=self.center, is_weighted=True, name="mean", **kwargs + window_func, center=self.center, is_weighted=True, name="mean", func_type="weighted", **kwargs ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @@ -1258,7 +1266,7 @@ def var(self, ddof=1, *args, **kwargs): window_func = get_weighted_roll_func(window_func) kwargs.pop("name", None) return self._apply( - window_func, center=self.center, is_weighted=True, name="var", **kwargs + window_func, center=self.center, is_weighted=True, name="var", func_type="weighted", **kwargs ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @@ -1436,7 +1444,7 @@ def apply( # _apply's center handling window = self._get_window() - offset = calculate_center_offset(window) if self.center and self.win_type else 0 + offset = self.calculate_center_offset(window, center=self.center) apply_func = self._generate_cython_apply_func( args, kwargs, raw, offset, func ) @@ -1460,8 +1468,10 @@ def apply( def _generate_cython_apply_func(self, args, kwargs, raw, offset, func): from pandas import Series + cython_func, _ = self._get_cython_func_type("roll_generic") + window_func = partial( - self._get_cython_func_type("roll_generic"), + cython_func, args=args, kwargs=kwargs, raw=raw, @@ -1478,10 +1488,10 @@ def apply_func(values, begin, end, min_periods, raw=raw): def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) - window_func = self._get_cython_func_type("roll_sum") + window_func, func_type = self._get_cython_func_type("roll_sum") kwargs.pop("floor", None) return self._apply( - window_func, center=self.center, floor=0, name="sum", **kwargs + window_func, center=self.center, floor=0, name="sum", func_type=func_type, **kwargs ) _shared_docs["max"] = dedent( @@ -1497,8 +1507,8 @@ def sum(self, *args, **kwargs): def max(self, *args, **kwargs): nv.validate_window_func("max", args, kwargs) - window_func = self._get_cython_func_type("roll_max") - return self._apply(window_func, center=self.center, name="max", **kwargs) + window_func, func_type = self._get_cython_func_type("roll_max") + return self._apply(window_func, center=self.center, name="max", func_type=func_type, **kwargs) _shared_docs["min"] = dedent( """ @@ -1539,13 +1549,13 @@ def max(self, *args, **kwargs): def min(self, *args, **kwargs): nv.validate_window_func("min", args, kwargs) - window_func = self._get_cython_func_type("roll_min") - return self._apply(window_func, center=self.center, name="min", **kwargs) + window_func, func_type = self._get_cython_func_type("roll_min") + return self._apply(window_func, center=self.center, name="min", func_type=func_type, **kwargs) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) - window_func = self._get_cython_func_type("roll_mean") - return self._apply(window_func, center=self.center, name="mean", **kwargs) + window_func, func_type = self._get_cython_func_type("roll_mean") + return self._apply(window_func, center=self.center, name="mean", func_type=func_type, **kwargs) _shared_docs["median"] = dedent( """ @@ -1588,12 +1598,12 @@ def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") # GH 32865. Move max window size calculation to # the median function implementation - return self._apply(window_func, center=self.center, name="median", **kwargs) + return self._apply(window_func, center=False, name="median", **kwargs) def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) kwargs.pop("require_min_periods", None) - window_func = self._get_cython_func_type("roll_var") + window_func, func_type = self._get_cython_func_type("roll_var") def zsqrt_func(values, begin, end, min_periods): return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) @@ -1604,6 +1614,7 @@ def zsqrt_func(values, begin, end, min_periods): center=self.center, require_min_periods=1, name="std", + func_type=func_type, ddof=ddof, **kwargs, ) @@ -1611,13 +1622,15 @@ def zsqrt_func(values, begin, end, min_periods): def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) kwargs.pop("require_min_periods", None) - window_func = partial(self._get_cython_func_type("roll_var"), ddof=ddof) + cython_func, func_type = self._get_cython_func_type("roll_var") + window_func = partial(cython_func, ddof=ddof) # ddof passed again for compat with groupby.rolling return self._apply( window_func, center=self.center, require_min_periods=1, name="var", + func_type=func_type, ddof=ddof, **kwargs, ) @@ -1634,13 +1647,14 @@ def var(self, ddof=1, *args, **kwargs): """ def skew(self, **kwargs): - window_func = self._get_cython_func_type("roll_skew") + window_func, func_type = self._get_cython_func_type("roll_skew") kwargs.pop("require_min_periods", None) return self._apply( window_func, center=self.center, require_min_periods=3, name="skew", + func_type=func_type, **kwargs, ) @@ -1677,13 +1691,14 @@ def skew(self, **kwargs): ) def kurt(self, **kwargs): - window_func = self._get_cython_func_type("roll_kurt") + window_func, func_type = self._get_cython_func_type("roll_kurt") kwargs.pop("require_min_periods", None) return self._apply( window_func, center=self.center, require_min_periods=4, name="kurt", + func_type=func_type, **kwargs, ) @@ -1745,10 +1760,11 @@ def kurt(self, **kwargs): def quantile(self, quantile, interpolation="linear", **kwargs): if quantile == 1.0: - window_func = self._get_cython_func_type("roll_max") + window_func, func_type = self._get_cython_func_type("roll_max") elif quantile == 0.0: - window_func = self._get_cython_func_type("roll_min") + window_func, func_type = self._get_cython_func_type("roll_min") else: + func_type = None window_func = partial( self._get_roll_func("roll_quantile"), win=self._get_window(), @@ -1759,7 +1775,7 @@ def quantile(self, quantile, interpolation="linear", **kwargs): # Pass through for groupby.rolling kwargs["quantile"] = quantile kwargs["interpolation"] = interpolation - return self._apply(window_func, center=self.center, name="quantile", **kwargs) + return self._apply(window_func, center=self.center, name="quantile", func_type=func_type, **kwargs) _shared_docs[ "cov" @@ -2239,6 +2255,7 @@ def _apply( floor: int = 1, is_weighted: bool = False, name: Optional[str] = None, + func_type: Optional[str] = None, use_numba_cache: bool = False, **kwargs, ): @@ -2250,6 +2267,7 @@ def _apply( floor, is_weighted, name, + func_type, use_numba_cache, **kwargs, ) @@ -2291,6 +2309,26 @@ def _create_blocks(self, obj: FrameOrSeriesUnion): ).astype(np.int64) obj = obj.take(groupby_order) return super()._create_blocks(obj) + + def calculate_center_offset(self, window, center) -> int: + """ + Calculate an offset necessary to have the window label to be centered. + + Parameters + ---------- + window: ndarray or int + window weights or window + + Returns + ------- + int + """ + if not center or not self.win_type: + return 0 + + if not is_integer(window): + window = len(window) + return int((window - 1) / 2.0) def _get_cython_func_type(self, func: str) -> Callable: """ @@ -2300,7 +2338,7 @@ def _get_cython_func_type(self, func: str) -> Callable: the data in group order may not be monotonic with the data which "fixed" algorithms assume """ - return self._get_roll_func(f"{func}_variable") + return self._get_roll_func(f"{func}_variable"), "variable" def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer: """ From 5d902fd2001f3bf0fe821abf8d34424550eebf58 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 6 Sep 2020 11:42:02 -0400 Subject: [PATCH 07/22] reformatted --- pandas/core/window/indexers.py | 4 +- pandas/core/window/rolling.py | 74 +++++++++++++++++++++++++--------- 2 files changed, 56 insertions(+), 22 deletions(-) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 121bc0ae9f1ff..d8aeed0578836 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -82,8 +82,8 @@ def get_window_bounds( offset = (self.window_size - 1) // 2 else: offset = 0 - - end = np.arange(1 + offset, num_values + 1 + offset).astype('int64') + + end = np.arange(1 + offset, num_values + 1 + offset).astype("int64") start = end - self.window_size end = np.clip(end, 0, num_values) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f7fdaa7fa64a0..d3e6ddff39627 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -435,7 +435,7 @@ def _insert_on_column(self, result: "DataFrame", obj: "DataFrame"): else: # insert at the end result[name] = extra_col - + def calculate_center_offset(self, window, center) -> int: """ Calculate an offset necessary to have the window label to be centered. @@ -499,7 +499,10 @@ def _get_cython_func_type(self, func: str) -> Callable: """ if self.is_freq_type or isinstance(self.window, BaseIndexer): return self._get_roll_func(f"{func}_variable"), "variable" - return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()), "fixed" + return ( + partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()), + "fixed", + ) def _get_window_indexer(self, window: int) -> BaseIndexer: """ @@ -614,8 +617,12 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() - - offset = self.calculate_center_offset(window, center=center) if func_type in ["fixed", "weighted"] else 0 + + offset = ( + self.calculate_center_offset(window, center=center) + if func_type in ["fixed", "weighted"] + else 0 + ) additional_nans = np.array([np.nan] * offset) if not is_weighted: @@ -657,7 +664,7 @@ def calc(x): if use_numba_cache: NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func - + if func_type in ["fixed", "weighted"]: result = self._center_window(result, window, center) @@ -1245,7 +1252,12 @@ def sum(self, *args, **kwargs): window_func = self._get_roll_func("roll_weighted_sum") window_func = get_weighted_roll_func(window_func) return self._apply( - window_func, center=self.center, is_weighted=True, name="sum", func_type="weighted", **kwargs + window_func, + center=self.center, + is_weighted=True, + name="sum", + func_type="weighted", + **kwargs, ) @Substitution(name="window") @@ -1255,7 +1267,12 @@ def mean(self, *args, **kwargs): window_func = self._get_roll_func("roll_weighted_mean") window_func = get_weighted_roll_func(window_func) return self._apply( - window_func, center=self.center, is_weighted=True, name="mean", func_type="weighted", **kwargs + window_func, + center=self.center, + is_weighted=True, + name="mean", + func_type="weighted", + **kwargs, ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @@ -1266,7 +1283,12 @@ def var(self, ddof=1, *args, **kwargs): window_func = get_weighted_roll_func(window_func) kwargs.pop("name", None) return self._apply( - window_func, center=self.center, is_weighted=True, name="var", func_type="weighted", **kwargs + window_func, + center=self.center, + is_weighted=True, + name="var", + func_type="weighted", + **kwargs, ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @@ -1471,12 +1493,7 @@ def _generate_cython_apply_func(self, args, kwargs, raw, offset, func): cython_func, _ = self._get_cython_func_type("roll_generic") window_func = partial( - cython_func, - args=args, - kwargs=kwargs, - raw=raw, - offset=offset, - func=func, + cython_func, args=args, kwargs=kwargs, raw=raw, offset=offset, func=func, ) def apply_func(values, begin, end, min_periods, raw=raw): @@ -1491,7 +1508,12 @@ def sum(self, *args, **kwargs): window_func, func_type = self._get_cython_func_type("roll_sum") kwargs.pop("floor", None) return self._apply( - window_func, center=self.center, floor=0, name="sum", func_type=func_type, **kwargs + window_func, + center=self.center, + floor=0, + name="sum", + func_type=func_type, + **kwargs, ) _shared_docs["max"] = dedent( @@ -1508,7 +1530,9 @@ def sum(self, *args, **kwargs): def max(self, *args, **kwargs): nv.validate_window_func("max", args, kwargs) window_func, func_type = self._get_cython_func_type("roll_max") - return self._apply(window_func, center=self.center, name="max", func_type=func_type, **kwargs) + return self._apply( + window_func, center=self.center, name="max", func_type=func_type, **kwargs + ) _shared_docs["min"] = dedent( """ @@ -1550,12 +1574,16 @@ def max(self, *args, **kwargs): def min(self, *args, **kwargs): nv.validate_window_func("min", args, kwargs) window_func, func_type = self._get_cython_func_type("roll_min") - return self._apply(window_func, center=self.center, name="min", func_type=func_type, **kwargs) + return self._apply( + window_func, center=self.center, name="min", func_type=func_type, **kwargs + ) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) window_func, func_type = self._get_cython_func_type("roll_mean") - return self._apply(window_func, center=self.center, name="mean", func_type=func_type, **kwargs) + return self._apply( + window_func, center=self.center, name="mean", func_type=func_type, **kwargs + ) _shared_docs["median"] = dedent( """ @@ -1775,7 +1803,13 @@ def quantile(self, quantile, interpolation="linear", **kwargs): # Pass through for groupby.rolling kwargs["quantile"] = quantile kwargs["interpolation"] = interpolation - return self._apply(window_func, center=self.center, name="quantile", func_type=func_type, **kwargs) + return self._apply( + window_func, + center=self.center, + name="quantile", + func_type=func_type, + **kwargs, + ) _shared_docs[ "cov" @@ -2309,7 +2343,7 @@ def _create_blocks(self, obj: FrameOrSeriesUnion): ).astype(np.int64) obj = obj.take(groupby_order) return super()._create_blocks(obj) - + def calculate_center_offset(self, window, center) -> int: """ Calculate an offset necessary to have the window label to be centered. From cdecf3430a21ab41b471d15d7b59475f9ea547fc Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 6 Sep 2020 12:32:20 -0400 Subject: [PATCH 08/22] corrected return typing --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index cb65197ba06df..06ffc463d1d3e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -448,7 +448,7 @@ def _get_roll_func(self, func_name: str) -> Callable: ) return window_func - def _get_cython_func_type(self, func: str) -> Callable: + def _get_cython_func_type(self, func: str) -> Tuple[Callable[..., Any], str]: """ Return a variable or fixed cython function type. @@ -2315,7 +2315,7 @@ def calculate_center_offset(self, window, center) -> int: window = len(window) return int((window - 1) / 2.0) - def _get_cython_func_type(self, func: str) -> Callable: + def _get_cython_func_type(self, func: str) -> Tuple[Callable[..., Any], str]: """ Return the cython function type. From 4e8f84494f72f4343c18552ca2b1af2de86ecffe Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 6 Sep 2020 12:32:42 -0400 Subject: [PATCH 09/22] added consistency tests --- .../test_moments_consistency_rolling.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index 158b994cf03ae..e962ff49b1f24 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -136,6 +136,46 @@ def test_rolling_apply_consistency( tm.assert_equal(rolling_f_result, rolling_apply_f_result) +@pytest.mark.slow +@pytest.mark.parametrize( + "window,min_periods,center", list(_rolling_consistency_cases()) +) +def test_rolling_groupby( + base_functions, window, min_periods, center +): + base_df = DataFrame({'group': 'A', 'data': randn(20)}) + + b_df = base_df.copy() + b_df['group'] = 'B' + + grp_df = pd.concat([base_df, b_df]).groupby('group') + + for (f, require_min_periods, name) in base_functions: + if ( + require_min_periods + and (min_periods is not None) + and (min_periods < require_min_periods) + ): + continue + + base_rolling_f = getattr( + base_df[['data']].rolling(window=window, center=center, min_periods=min_periods), name + ) + + grp_rolling_f = getattr( + grp_df[['data']].rolling(window=window, center=center, min_periods=min_periods), name + ) + + base_result = base_rolling_f().reset_index(drop=True) + grp_result = grp_rolling_f().reset_index() + + a_result = grp_result[grp_result['group'] == 'A'][['data']].reset_index(drop=True) + b_result = grp_result[grp_result['group'] == 'B'][['data']].reset_index(drop=True) + + tm.assert_frame_equal(base_result, a_result) + tm.assert_frame_equal(base_result, b_result) + + @pytest.mark.parametrize("window", range(7)) def test_rolling_corr_with_zero_variance(window): # GH 18430 From 6e66a491cf40f72f79e72f5913083c7623dfaef7 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 6 Sep 2020 12:36:07 -0400 Subject: [PATCH 10/22] corrected typing change --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 06ffc463d1d3e..b36ecc489cf66 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -448,7 +448,7 @@ def _get_roll_func(self, func_name: str) -> Callable: ) return window_func - def _get_cython_func_type(self, func: str) -> Tuple[Callable[..., Any], str]: + def _get_cython_func_type(self, func: str) -> Tuple[Callable, str]: """ Return a variable or fixed cython function type. @@ -2315,7 +2315,7 @@ def calculate_center_offset(self, window, center) -> int: window = len(window) return int((window - 1) / 2.0) - def _get_cython_func_type(self, func: str) -> Tuple[Callable[..., Any], str]: + def _get_cython_func_type(self, func: str) -> Tuple[Callable, str]: """ Return the cython function type. From e7fb3848641bbcc09c5bac288fb68963a4889c4b Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 6 Sep 2020 12:37:40 -0400 Subject: [PATCH 11/22] reformatted test to pass blac --- .../test_moments_consistency_rolling.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index e962ff49b1f24..327e155660a79 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -140,15 +140,13 @@ def test_rolling_apply_consistency( @pytest.mark.parametrize( "window,min_periods,center", list(_rolling_consistency_cases()) ) -def test_rolling_groupby( - base_functions, window, min_periods, center -): - base_df = DataFrame({'group': 'A', 'data': randn(20)}) +def test_rolling_groupby(base_functions, window, min_periods, center): + base_df = DataFrame({"group": "A", "data": randn(20)}) b_df = base_df.copy() - b_df['group'] = 'B' + b_df["group"] = "B" - grp_df = pd.concat([base_df, b_df]).groupby('group') + grp_df = pd.concat([base_df, b_df]).groupby("group") for (f, require_min_periods, name) in base_functions: if ( @@ -159,18 +157,28 @@ def test_rolling_groupby( continue base_rolling_f = getattr( - base_df[['data']].rolling(window=window, center=center, min_periods=min_periods), name + base_df[["data"]].rolling( + window=window, center=center, min_periods=min_periods + ), + name, ) grp_rolling_f = getattr( - grp_df[['data']].rolling(window=window, center=center, min_periods=min_periods), name + grp_df[["data"]].rolling( + window=window, center=center, min_periods=min_periods + ), + name, ) base_result = base_rolling_f().reset_index(drop=True) grp_result = grp_rolling_f().reset_index() - a_result = grp_result[grp_result['group'] == 'A'][['data']].reset_index(drop=True) - b_result = grp_result[grp_result['group'] == 'B'][['data']].reset_index(drop=True) + a_result = grp_result[grp_result["group"] == "A"][["data"]].reset_index( + drop=True + ) + b_result = grp_result[grp_result["group"] == "B"][["data"]].reset_index( + drop=True + ) tm.assert_frame_equal(base_result, a_result) tm.assert_frame_equal(base_result, b_result) From 3649ca225b1d24b580d9d36eb203e5c800ad165e Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 6 Sep 2020 15:30:17 -0400 Subject: [PATCH 12/22] added typing and docstring --- pandas/core/window/rolling.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b36ecc489cf66..c57fbb9471c88 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -393,14 +393,16 @@ def _insert_on_column(self, result: "DataFrame", obj: "DataFrame"): # insert at the end result[name] = extra_col - def calculate_center_offset(self, window, center) -> int: + def calculate_center_offset(self, window, center: bool) -> int: """ Calculate an offset necessary to have the window label to be centered. Parameters ---------- - window: ndarray or int + window : ndarray or int window weights or window + center : bool + Set the labels at the center of the window. Returns ------- @@ -561,11 +563,11 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() - offset = ( - self.calculate_center_offset(window, center=center) - if func_type in ["fixed", "weighted"] - else 0 - ) + if func_type in ["fixed", "weighted"]: + offset = self.calculate_center_offset(window, center) + else: + offset = 0 + additional_nans = np.array([np.nan] * offset) if not is_weighted: @@ -1417,7 +1419,7 @@ def apply( # _apply's center handling window = self._get_window() - offset = self.calculate_center_offset(window, center=self.center) + offset = self.calculate_center_offset(window, self.center) apply_func = self._generate_cython_apply_func( args, kwargs, raw, offset, func ) @@ -2295,14 +2297,16 @@ def _create_blocks(self, obj: FrameOrSeriesUnion): obj = obj.take(groupby_order) return super()._create_blocks(obj) - def calculate_center_offset(self, window, center) -> int: + def calculate_center_offset(self, window, center: bool) -> int: """ Calculate an offset necessary to have the window label to be centered. Parameters ---------- - window: ndarray or int + window : ndarray or int window weights or window + center : bool + Set the labels at the center of the window. Returns ------- From 00cc1dcbe1300779e04b3a1f5ba5ffa70bde0872 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 6 Sep 2020 17:45:19 -0400 Subject: [PATCH 13/22] fixing center param in median's _apply --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c57fbb9471c88..c3b1ec49e1ca5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1579,7 +1579,7 @@ def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") # GH 32865. Move max window size calculation to # the median function implementation - return self._apply(window_func, center=False, name="median", **kwargs) + return self._apply(window_func, center=self.center, name="median", **kwargs) def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) From 3de7fcc1a8fb0b20b77f61df1428300ccb2699ba Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Mon, 7 Sep 2020 16:44:58 -0400 Subject: [PATCH 14/22] added center_min_periods test to test_grouper --- pandas/tests/window/test_grouper.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 170bf100b3891..3b1f4de71a4cf 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -274,6 +274,32 @@ def test_groupby_rolling_center_center(self): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("min_periods", [5, 4, 3]) + def test_groupby_rolling_center_min_periods(self, min_periods): + df = pd.DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)}) + + window_size = 5 + result = ( + df.groupby("group") + .rolling(window_size, center=True, min_periods=min_periods) + .mean() + ) + result = result.reset_index()[["group", "data"]] + + grp_A_mean = [1.0, 1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.5, 8.0] + grp_B_mean = [x + 10.0 for x in grp_A_mean] + + num_nans = max(0, min_periods - 3) # For window_size of 5 + nans = [np.nan] * num_nans + grp_A_expected = nans + grp_A_mean[num_nans : 10 - num_nans] + nans + grp_B_expected = nans + grp_B_mean[num_nans : 10 - num_nans] + nans + + expected = pd.DataFrame( + {"group": ["A"] * 10 + ["B"] * 10, "data": grp_A_expected + grp_B_expected} + ) + + tm.assert_frame_equal(result, expected) + def test_groupby_subselect_rolling(self): # GH 35486 df = DataFrame( From f7793210f34581624ca7042b66143b661b789fc5 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 9 Sep 2020 18:27:43 -0400 Subject: [PATCH 15/22] replaced func_type with skip_offset --- pandas/core/window/common.py | 2 +- pandas/core/window/rolling.py | 110 ++++++++++++++-------------------- 2 files changed, 45 insertions(+), 67 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index c7c911ee511f6..247e834e67a64 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -70,8 +70,8 @@ def _apply( floor: int = 1, is_weighted: bool = False, name: Optional[str] = None, - func_type: Optional[str] = None, use_numba_cache: bool = False, + skip_offset: bool = False, **kwargs, ): """ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c3b1ec49e1ca5..37a67dd63f913 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -165,6 +165,8 @@ def __init__( self.axis = obj._get_axis_number(axis) if axis is not None else None self.validate() + self.use_fixed_func = self.is_freq_type or isinstance(self.window, BaseIndexer) + @property def _constructor(self): return Window @@ -450,18 +452,15 @@ def _get_roll_func(self, func_name: str) -> Callable: ) return window_func - def _get_cython_func_type(self, func: str) -> Tuple[Callable, str]: + def _get_cython_func_type(self, func: str) -> Callable: """ Return a variable or fixed cython function type. Variable algorithms do not use window while fixed do. """ if self.is_freq_type or isinstance(self.window, BaseIndexer): - return self._get_roll_func(f"{func}_variable"), "variable" - return ( - partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()), - "fixed", - ) + return self._get_roll_func(f"{func}_variable") + return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) def _get_window_indexer(self, window: int) -> BaseIndexer: """ @@ -525,8 +524,8 @@ def _apply( floor: int = 1, is_weighted: bool = False, name: Optional[str] = None, - func_type: Optional[str] = None, use_numba_cache: bool = False, + skip_offset: bool = False, **kwargs, ): """ @@ -546,6 +545,8 @@ def _apply( use_numba_cache : bool whether to cache a numba compiled function. Only available for numba enabled methods (so far only apply) + skip_offset : bool + whether to skip offsetting x **kwargs additional arguments for rolling function and window function @@ -563,10 +564,10 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() - if func_type in ["fixed", "weighted"]: - offset = self.calculate_center_offset(window, center) - else: + if self.is_freq_type or isinstance(self.window, BaseIndexer) or skip_offset: offset = 0 + else: + offset = self.calculate_center_offset(window, center) additional_nans = np.array([np.nan] * offset) @@ -610,7 +611,9 @@ def calc(x): if use_numba_cache: NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func - if func_type in ["fixed", "weighted"]: + if not ( + self.is_freq_type or isinstance(self.window, BaseIndexer) or skip_offset + ): result = self._center_window(result, window, center) return result @@ -1197,12 +1200,7 @@ def sum(self, *args, **kwargs): window_func = self._get_roll_func("roll_weighted_sum") window_func = get_weighted_roll_func(window_func) return self._apply( - window_func, - center=self.center, - is_weighted=True, - name="sum", - func_type="weighted", - **kwargs, + window_func, center=self.center, is_weighted=True, name="sum", **kwargs, ) @Substitution(name="window") @@ -1212,12 +1210,7 @@ def mean(self, *args, **kwargs): window_func = self._get_roll_func("roll_weighted_mean") window_func = get_weighted_roll_func(window_func) return self._apply( - window_func, - center=self.center, - is_weighted=True, - name="mean", - func_type="weighted", - **kwargs, + window_func, center=self.center, is_weighted=True, name="mean", **kwargs, ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @@ -1228,12 +1221,7 @@ def var(self, ddof=1, *args, **kwargs): window_func = get_weighted_roll_func(window_func) kwargs.pop("name", None) return self._apply( - window_func, - center=self.center, - is_weighted=True, - name="var", - func_type="weighted", - **kwargs, + window_func, center=self.center, is_weighted=True, name="var", **kwargs, ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @@ -1437,13 +1425,14 @@ def apply( raw=raw, original_func=func, args=args, + skip_offset=True, kwargs=kwargs, ) def _generate_cython_apply_func(self, args, kwargs, raw, offset, func): from pandas import Series - cython_func, _ = self._get_cython_func_type("roll_generic") + cython_func = self._get_cython_func_type("roll_generic") window_func = partial( cython_func, args=args, kwargs=kwargs, raw=raw, offset=offset, func=func, @@ -1458,15 +1447,10 @@ def apply_func(values, begin, end, min_periods, raw=raw): def sum(self, *args, **kwargs): nv.validate_window_func("sum", args, kwargs) - window_func, func_type = self._get_cython_func_type("roll_sum") + window_func = self._get_cython_func_type("roll_sum") kwargs.pop("floor", None) return self._apply( - window_func, - center=self.center, - floor=0, - name="sum", - func_type=func_type, - **kwargs, + window_func, center=self.center, floor=0, name="sum", **kwargs, ) _shared_docs["max"] = dedent( @@ -1482,10 +1466,8 @@ def sum(self, *args, **kwargs): def max(self, *args, **kwargs): nv.validate_window_func("max", args, kwargs) - window_func, func_type = self._get_cython_func_type("roll_max") - return self._apply( - window_func, center=self.center, name="max", func_type=func_type, **kwargs - ) + window_func = self._get_cython_func_type("roll_max") + return self._apply(window_func, center=self.center, name="max", **kwargs) _shared_docs["min"] = dedent( """ @@ -1526,17 +1508,13 @@ def max(self, *args, **kwargs): def min(self, *args, **kwargs): nv.validate_window_func("min", args, kwargs) - window_func, func_type = self._get_cython_func_type("roll_min") - return self._apply( - window_func, center=self.center, name="min", func_type=func_type, **kwargs - ) + window_func = self._get_cython_func_type("roll_min") + return self._apply(window_func, center=self.center, name="min", **kwargs) def mean(self, *args, **kwargs): nv.validate_window_func("mean", args, kwargs) - window_func, func_type = self._get_cython_func_type("roll_mean") - return self._apply( - window_func, center=self.center, name="mean", func_type=func_type, **kwargs - ) + window_func = self._get_cython_func_type("roll_mean") + return self._apply(window_func, center=self.center, name="mean", **kwargs) _shared_docs["median"] = dedent( """ @@ -1579,12 +1557,14 @@ def median(self, **kwargs): window_func = self._get_roll_func("roll_median_c") # GH 32865. Move max window size calculation to # the median function implementation - return self._apply(window_func, center=self.center, name="median", **kwargs) + return self._apply( + window_func, center=self.center, name="median", skip_offset=True, **kwargs + ) def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) kwargs.pop("require_min_periods", None) - window_func, func_type = self._get_cython_func_type("roll_var") + window_func = self._get_cython_func_type("roll_var") def zsqrt_func(values, begin, end, min_periods): return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) @@ -1595,7 +1575,6 @@ def zsqrt_func(values, begin, end, min_periods): center=self.center, require_min_periods=1, name="std", - func_type=func_type, ddof=ddof, **kwargs, ) @@ -1603,7 +1582,7 @@ def zsqrt_func(values, begin, end, min_periods): def var(self, ddof=1, *args, **kwargs): nv.validate_window_func("var", args, kwargs) kwargs.pop("require_min_periods", None) - cython_func, func_type = self._get_cython_func_type("roll_var") + cython_func = self._get_cython_func_type("roll_var") window_func = partial(cython_func, ddof=ddof) # ddof passed again for compat with groupby.rolling return self._apply( @@ -1611,7 +1590,6 @@ def var(self, ddof=1, *args, **kwargs): center=self.center, require_min_periods=1, name="var", - func_type=func_type, ddof=ddof, **kwargs, ) @@ -1628,14 +1606,13 @@ def var(self, ddof=1, *args, **kwargs): """ def skew(self, **kwargs): - window_func, func_type = self._get_cython_func_type("roll_skew") + window_func = self._get_cython_func_type("roll_skew") kwargs.pop("require_min_periods", None) return self._apply( window_func, center=self.center, require_min_periods=3, name="skew", - func_type=func_type, **kwargs, ) @@ -1672,14 +1649,13 @@ def skew(self, **kwargs): ) def kurt(self, **kwargs): - window_func, func_type = self._get_cython_func_type("roll_kurt") + window_func = self._get_cython_func_type("roll_kurt") kwargs.pop("require_min_periods", None) return self._apply( window_func, center=self.center, require_min_periods=4, name="kurt", - func_type=func_type, **kwargs, ) @@ -1741,17 +1717,19 @@ def kurt(self, **kwargs): def quantile(self, quantile, interpolation="linear", **kwargs): if quantile == 1.0: - window_func, func_type = self._get_cython_func_type("roll_max") + window_func = self._get_cython_func_type("roll_max") + skip_offset = False elif quantile == 0.0: - window_func, func_type = self._get_cython_func_type("roll_min") + window_func = self._get_cython_func_type("roll_min") + skip_offset = False else: - func_type = None window_func = partial( self._get_roll_func("roll_quantile"), win=self._get_window(), quantile=quantile, interpolation=interpolation, ) + skip_offset = True # Pass through for groupby.rolling kwargs["quantile"] = quantile @@ -1760,7 +1738,7 @@ def quantile(self, quantile, interpolation="linear", **kwargs): window_func, center=self.center, name="quantile", - func_type=func_type, + skip_offset=skip_offset, **kwargs, ) @@ -2242,8 +2220,8 @@ def _apply( floor: int = 1, is_weighted: bool = False, name: Optional[str] = None, - func_type: Optional[str] = None, use_numba_cache: bool = False, + skip_offset: bool = True, **kwargs, ): result = Rolling._apply( @@ -2254,8 +2232,8 @@ def _apply( floor, is_weighted, name, - func_type, use_numba_cache, + skip_offset, **kwargs, ) # Cannot use _wrap_outputs because we calculate the result all at once @@ -2319,7 +2297,7 @@ def calculate_center_offset(self, window, center: bool) -> int: window = len(window) return int((window - 1) / 2.0) - def _get_cython_func_type(self, func: str) -> Tuple[Callable, str]: + def _get_cython_func_type(self, func: str) -> Callable: """ Return the cython function type. @@ -2327,7 +2305,7 @@ def _get_cython_func_type(self, func: str) -> Tuple[Callable, str]: the data in group order may not be monotonic with the data which "fixed" algorithms assume """ - return self._get_roll_func(f"{func}_variable"), "variable" + return self._get_roll_func(f"{func}_variable") def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer: """ From a817f87410507006924193dc2d5d6374bba985a9 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 9 Sep 2020 18:31:12 -0400 Subject: [PATCH 16/22] removed unneeded class attribute --- pandas/core/window/rolling.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 37a67dd63f913..a90be40be3657 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -165,8 +165,6 @@ def __init__( self.axis = obj._get_axis_number(axis) if axis is not None else None self.validate() - self.use_fixed_func = self.is_freq_type or isinstance(self.window, BaseIndexer) - @property def _constructor(self): return Window From d72812d42f8b3949b748c37ce6d122fa002553ab Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 9 Sep 2020 18:50:27 -0400 Subject: [PATCH 17/22] moved logic into calculate_center_offset --- pandas/core/window/rolling.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a90be40be3657..9cc104e392e57 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -411,6 +411,9 @@ def calculate_center_offset(self, window, center: bool) -> int: if not center: return 0 + if self.is_freq_type or isinstance(self.window, BaseIndexer): + return 0 + if not is_integer(window): window = len(window) return int((window - 1) / 2.0) @@ -562,7 +565,7 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() - if self.is_freq_type or isinstance(self.window, BaseIndexer) or skip_offset: + if skip_offset: offset = 0 else: offset = self.calculate_center_offset(window, center) @@ -609,9 +612,7 @@ def calc(x): if use_numba_cache: NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func - if not ( - self.is_freq_type or isinstance(self.window, BaseIndexer) or skip_offset - ): + if not skip_offset: result = self._center_window(result, window, center) return result @@ -2290,6 +2291,9 @@ def calculate_center_offset(self, window, center: bool) -> int: """ if not center or not self.win_type: return 0 + + if self.is_freq_type or isinstance(self.window, BaseIndexer): + return 0 if not is_integer(window): window = len(window) From 96c69592bf85f0b9b6a4e346ccdb362db41e9358 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Wed, 9 Sep 2020 19:17:59 -0400 Subject: [PATCH 18/22] removed whitespace --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9cc104e392e57..43a3d67676b88 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2291,7 +2291,7 @@ def calculate_center_offset(self, window, center: bool) -> int: """ if not center or not self.win_type: return 0 - + if self.is_freq_type or isinstance(self.window, BaseIndexer): return 0 From daacae74ebc6f767b6a5c576fa7b26d7c7c957e1 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 12 Sep 2020 11:46:02 -0400 Subject: [PATCH 19/22] added whatsnew entry --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ff9e803b4990a..97d0a6d6eca68 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -314,7 +314,7 @@ Groupby/resample/rolling - Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`) - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`) - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) -- +- Bug in :meth:`DataFrame.groupby.rolling` output incorrect when using a partial window (:issue:`36040`) Reshaping ^^^^^^^^^ From 950018cd351d1f7424e084781c4efd56561a4338 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 13 Sep 2020 09:54:10 -0400 Subject: [PATCH 20/22] formatting fixes --- pandas/core/window/rolling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9312066d0fba4..0d1bbdd5b9026 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -392,7 +392,7 @@ def _insert_on_column(self, result: "DataFrame", obj: "DataFrame"): # insert at the end result[name] = extra_col - def calculate_center_offset(self, window, center: bool) -> int: + def calculate_center_offset(self, window: Union[np.ndarray, int], center: bool) -> int: """ Calculate an offset necessary to have the window label to be centered. @@ -1208,7 +1208,7 @@ def mean(self, *args, **kwargs): window_func = self._get_roll_func("roll_weighted_mean") window_func = get_weighted_roll_func(window_func) return self._apply( - window_func, center=self.center, is_weighted=True, name="mean", **kwargs, + window_func, center=self.center, is_weighted=True, name="mean", **kwargs ) @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") @@ -2275,7 +2275,7 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: obj = obj.take(groupby_order) return super()._create_data(obj) - def calculate_center_offset(self, window, center: bool) -> int: + def calculate_center_offset(self, window: Union[np.ndarray, int], center: bool) -> int: """ Calculate an offset necessary to have the window label to be centered. From 0798c702d85666b04018da25b391a9c90450db58 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 13 Sep 2020 10:00:52 -0400 Subject: [PATCH 21/22] removed pytest.slow --- pandas/tests/window/moments/test_moments_consistency_rolling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index 7525f9efea5f4..42c3d02089c6f 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -136,7 +136,6 @@ def test_rolling_apply_consistency( tm.assert_equal(rolling_f_result, rolling_apply_f_result) -@pytest.mark.slow @pytest.mark.parametrize( "window,min_periods,center", list(_rolling_consistency_cases()) ) From f413ec80004f72929d355fd4ae5adf93a007472f Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sun, 13 Sep 2020 10:44:51 -0400 Subject: [PATCH 22/22] removed typing of window --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 0d1bbdd5b9026..84e1c97f7c471 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -392,7 +392,7 @@ def _insert_on_column(self, result: "DataFrame", obj: "DataFrame"): # insert at the end result[name] = extra_col - def calculate_center_offset(self, window: Union[np.ndarray, int], center: bool) -> int: + def calculate_center_offset(self, window, center: bool) -> int: """ Calculate an offset necessary to have the window label to be centered. @@ -2275,7 +2275,7 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: obj = obj.take(groupby_order) return super()._create_data(obj) - def calculate_center_offset(self, window: Union[np.ndarray, int], center: bool) -> int: + def calculate_center_offset(self, window, center: bool) -> int: """ Calculate an offset necessary to have the window label to be centered.