diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 845f7773c263c..0e87b319add6b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -175,8 +175,7 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew``, ``cov``, ``corr`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) - Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations. - diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 082c2f533f3de..12b73646e14bf 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -324,25 +324,3 @@ def func(arg, window, min_periods=None): return cfunc(arg, window, min_periods) return func - - -def validate_baseindexer_support(func_name: Optional[str]) -> None: - # GH 32865: These functions work correctly with a BaseIndexer subclass - BASEINDEXER_WHITELIST = { - "count", - "min", - "max", - "mean", - "sum", - "median", - "std", - "var", - "skew", - "kurt", - "quantile", - } - if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST: - raise NotImplementedError( - f"{func_name} is not supported with using a BaseIndexer " - f"subclasses. You can use .apply() with {func_name}." - ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 24130c044d186..6c775953e18db 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -48,7 +48,6 @@ calculate_center_offset, calculate_min_periods, get_weighted_roll_func, - validate_baseindexer_support, zsqrt, ) from pandas.core.window.indexers import ( @@ -393,12 +392,11 @@ def _get_cython_func_type(self, func: str) -> Callable: return self._get_roll_func(f"{func}_variable") return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) - def _get_window_indexer(self, window: int, func_name: Optional[str]) -> BaseIndexer: + def _get_window_indexer(self, window: int) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ if isinstance(self.window, BaseIndexer): - validate_baseindexer_support(func_name) return self.window if self.is_freq_type: return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) @@ -444,7 +442,7 @@ def _apply( blocks, obj = self._create_blocks() block_list = list(blocks) - window_indexer = self._get_window_indexer(window, name) + window_indexer = self._get_window_indexer(window) results = [] exclude: List[Scalar] = [] @@ -1632,20 +1630,23 @@ def quantile(self, quantile, interpolation="linear", **kwargs): """ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): - if isinstance(self.window, BaseIndexer): - validate_baseindexer_support("cov") - if other is None: other = self._selected_obj # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) - # GH 16058: offset window - if self.is_freq_type: - window = self.win_freq + # GH 32865. We leverage rolling.mean, so we pass + # to the rolling constructors the data used when constructing self: + # window width, frequency data, or a BaseIndexer subclass + if isinstance(self.window, BaseIndexer): + window = self.window else: - window = self._get_window(other) + # GH 16058: offset window + if self.is_freq_type: + window = self.win_freq + else: + window = self._get_window(other) def _get_cov(X, Y): # GH #12373 : rolling functions error on float32 data @@ -1778,15 +1779,19 @@ def _get_cov(X, Y): ) def corr(self, other=None, pairwise=None, **kwargs): - if isinstance(self.window, BaseIndexer): - validate_baseindexer_support("corr") - if other is None: other = self._selected_obj # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) - window = self._get_window(other) if not self.is_freq_type else self.win_freq + + # GH 32865. We leverage rolling.cov and rolling.std here, so we pass + # to the rolling constructors the data used when constructing self: + # window width, frequency data, or a BaseIndexer subclass + if isinstance(self.window, BaseIndexer): + window = self.window + else: + window = self._get_window(other) if not self.is_freq_type else self.win_freq def _get_corr(a, b): a = a.rolling( diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 15e6a904dd11a..df58028dee862 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -82,19 +82,6 @@ def get_window_bounds(self, num_values, min_periods, center, closed): df.rolling(indexer, win_type="boxcar") -@pytest.mark.parametrize("func", ["cov", "corr"]) -def test_notimplemented_functions(func): - # GH 32865 - class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): - return np.array([0, 1]), np.array([1, 2]) - - df = DataFrame({"values": range(2)}) - indexer = CustomIndexer() - with pytest.raises(NotImplementedError, match=f"{func} is not supported"): - getattr(df.rolling(indexer), func)() - - @pytest.mark.parametrize("constructor", [Series, DataFrame]) @pytest.mark.parametrize( "func,np_func,expected,np_kwargs", @@ -210,3 +197,40 @@ def test_rolling_forward_skewness(constructor): ] ) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "func,expected", + [ + ("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan],), + ( + "corr", + [ + 1.0, + 1.0, + 1.0, + 0.8704775290207161, + 0.018229084250926637, + -0.861357304646493, + 1.0, + 1.0, + np.nan, + np.nan, + ], + ), + ], +) +def test_rolling_forward_cov_corr(func, expected): + values1 = np.arange(10).reshape(-1, 1) + values2 = values1 * 2 + values1[5, 0] = 100 + values = np.concatenate([values1, values2], axis=1) + + indexer = FixedForwardWindowIndexer(window_size=3) + rolling = DataFrame(values).rolling(window=indexer, min_periods=3) + # We are interested in checking only pairwise covariance / correlation + result = getattr(rolling, func)().loc[(slice(None), 1), 0] + result = result.reset_index(drop=True) + expected = Series(expected) + expected.name = result.name + tm.assert_equal(result, expected)