From 474f5c182105aae94b60fd1e2ec52cf9e0f1ec27 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 26 Mar 2020 20:36:30 -0700 Subject: [PATCH 1/8] ERR: Raise NotImplementedError with BaseIndexer and certain rolling operations: --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/window/rolling.py | 31 +++++++++++++++++++++--- pandas/tests/window/test_base_indexer.py | 15 ++++++++++++ 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7920820b32620..5f8c873a86e55 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -85,7 +85,7 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- +- Using a :func:`pandas.api.indexers.BaseIndexer` with `min`, `max`, `std`, `var`, `count`, `skew`, `cov", `corr` will now raise a ``NotImplementedError` (:issue:`32865`) Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index aeab51149ec4e..221aa47da31ff 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -56,6 +56,10 @@ from pandas.core.window.numba_ import generate_numba_apply_func +# GH 32865: These functions work correctly with a BaseIndexer subclass +BASEINDEXER_WHITELIST = {"mean", "sum", "median", "kurt", "quantile"} + + class _Window(PandasObject, ShallowMixin, SelectionMixin): _attributes: List[str] = [ "window", @@ -391,11 +395,18 @@ def _get_cython_func_type(self, func: str) -> Callable: return self._get_roll_func(f"{func}_variable") return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) - def _get_window_indexer(self, window: int) -> BaseIndexer: + def _get_window_indexer( + self, window: int, func_name: Union[Callable, str] + ) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ if isinstance(self.window, BaseIndexer): + if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST: + raise NotImplementedError( + f"{func_name} is not supported with using a BaseIndexer subclasses. " + f"You can use .apply() with {func_name}." + ) return self.window if self.is_freq_type: return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) @@ -441,7 +452,7 @@ def _apply( blocks, obj = self._create_blocks() block_list = list(blocks) - window_indexer = self._get_window_indexer(window) + window_indexer = self._get_window_indexer(window, name) results = [] exclude: List[Scalar] = [] @@ -1173,7 +1184,11 @@ class _Rolling_and_Expanding(_Rolling): ) def count(self): - + if isinstance(self.window, BaseIndexer): + raise NotImplementedError( + f"count is not supported with using a BaseIndexer subclasses. " + f"You can use .apply() with count." + ) blocks, obj = self._create_blocks() results = [] for b in blocks: @@ -1627,6 +1642,11 @@ def quantile(self, quantile, interpolation="linear", **kwargs): """ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + if isinstance(self.window, BaseIndexer): + raise NotImplementedError( + f"cov is not supported with using a BaseIndexer subclasses. " + f"You can use .apply() with cov." + ) if other is None: other = self._selected_obj # only default unset @@ -1770,6 +1790,11 @@ def _get_cov(X, Y): ) def corr(self, other=None, pairwise=None, **kwargs): + if isinstance(self.window, BaseIndexer): + raise NotImplementedError( + f"corr is not supported with using a BaseIndexer subclasses. " + f"You can use .apply() with corr." + ) if other is None: other = self._selected_obj # only default unset diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 606520c6d68ca..e9190dfde4fc4 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -80,3 +80,18 @@ def get_window_bounds(self, num_values, min_periods, center, closed): indexer = CustomIndexer() with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"): df.rolling(indexer, win_type="boxcar") + + +@pytest.mark.parametrize( + "func", ["min", "max", "std", "var", "count", "skew", "cov", "corr"] +) +def test_notimplemented_functions(func): + # GH 32865 + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([0, 1]), np.array([1, 2]) + + df = DataFrame({"values": range(2)}) + indexer = CustomIndexer() + with pytest.raises(NotImplementedError, match=f"{func} is not supported"): + getattr(df.rolling(indexer), func)() From c92d86d2e21bf33cc731ecb8e839d956ca406d80 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 26 Mar 2020 20:39:58 -0700 Subject: [PATCH 2/8] Lint --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 221aa47da31ff..34facc5876b1f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -404,8 +404,8 @@ def _get_window_indexer( if isinstance(self.window, BaseIndexer): if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST: raise NotImplementedError( - f"{func_name} is not supported with using a BaseIndexer subclasses. " - f"You can use .apply() with {func_name}." + f"{func_name} is not supported with using a BaseIndexer " + f"subclasses. You can use .apply() with {func_name}." ) return self.window if self.is_freq_type: From 3dbb65fe34903614cfb245a381e55a0df27bc7eb Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 26 Mar 2020 21:06:32 -0700 Subject: [PATCH 3/8] Fix typing --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 34facc5876b1f..69e9b22e2bfdf 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -396,7 +396,7 @@ def _get_cython_func_type(self, func: str) -> Callable: return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) def _get_window_indexer( - self, window: int, func_name: Union[Callable, str] + self, window: int, func_name: Optional[str] ) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds From c4dd78d3e0394507eb7643b8b21e609c7555e394 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 26 Mar 2020 22:03:24 -0700 Subject: [PATCH 4/8] Add back tick mark --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5f8c873a86e55..c788481b4ed54 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -86,6 +86,7 @@ Other API changes - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) - Using a :func:`pandas.api.indexers.BaseIndexer` with `min`, `max`, `std`, `var`, `count`, `skew`, `cov", `corr` will now raise a ``NotImplementedError` (:issue:`32865`) +- Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 393096306976346f69e6a567ebcb75f4a56d31f0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 26 Mar 2020 22:03:58 -0700 Subject: [PATCH 5/8] black --- pandas/core/window/rolling.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 69e9b22e2bfdf..ece16c183fa3b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -395,9 +395,7 @@ def _get_cython_func_type(self, func: str) -> Callable: return self._get_roll_func(f"{func}_variable") return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) - def _get_window_indexer( - self, window: int, func_name: Optional[str] - ) -> BaseIndexer: + def _get_window_indexer(self, window: int, func_name: Optional[str]) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ From bd3eb2994d66f4fdd8d5d6b3c811abd213a3a8da Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 26 Mar 2020 23:46:51 -0700 Subject: [PATCH 6/8] isort and fix whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/window/rolling.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c788481b4ed54..7459dc0029897 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -85,7 +85,7 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with `min`, `max`, `std`, `var`, `count`, `skew`, `cov", `corr` will now raise a ``NotImplementedError` (:issue:`32865`) +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``min``, ``max``, ``std``, ``var``, ``count``, ``skew``, ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) - Backwards incompatible API changes diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ece16c183fa3b..c33d83e75ccf2 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -55,7 +55,6 @@ ) from pandas.core.window.numba_ import generate_numba_apply_func - # GH 32865: These functions work correctly with a BaseIndexer subclass BASEINDEXER_WHITELIST = {"mean", "sum", "median", "kurt", "quantile"} From af71e07b4b564ac7c75c9cfcfe489dd2fc6bf997 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 27 Mar 2020 11:13:05 -0700 Subject: [PATCH 7/8] Create common function for check --- pandas/core/window/rolling.py | 45 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c33d83e75ccf2..559a055419340 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -55,9 +55,6 @@ ) from pandas.core.window.numba_ import generate_numba_apply_func -# GH 32865: These functions work correctly with a BaseIndexer subclass -BASEINDEXER_WHITELIST = {"mean", "sum", "median", "kurt", "quantile"} - class _Window(PandasObject, ShallowMixin, SelectionMixin): _attributes: List[str] = [ @@ -148,6 +145,20 @@ def _validate_get_window_bounds_signature(window: BaseIndexer) -> None: f"get_window_bounds" ) + @staticmethod + def _validate_baseindexer_support(window, func_name: Optional[str]) -> None: + # GH 32865: These functions work correctly with a BaseIndexer subclass + BASEINDEXER_WHITELIST = {"mean", "sum", "median", "kurt", "quantile"} + if ( + isinstance(window, BaseIndexer) + and isinstance(func_name, str) + and func_name not in BASEINDEXER_WHITELIST + ): + raise NotImplementedError( + f"{func_name} is not supported with using a BaseIndexer " + f"subclasses. You can use .apply() with {func_name}." + ) + def _create_blocks(self): """ Split data into blocks & return conformed data. @@ -398,12 +409,9 @@ def _get_window_indexer(self, window: int, func_name: Optional[str]) -> BaseInde """ Return an indexer class that will compute the window start and end bounds """ + self._validate_baseindexer_support(self.window, func_name) + if isinstance(self.window, BaseIndexer): - if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST: - raise NotImplementedError( - f"{func_name} is not supported with using a BaseIndexer " - f"subclasses. You can use .apply() with {func_name}." - ) return self.window if self.is_freq_type: return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) @@ -1181,11 +1189,8 @@ class _Rolling_and_Expanding(_Rolling): ) def count(self): - if isinstance(self.window, BaseIndexer): - raise NotImplementedError( - f"count is not supported with using a BaseIndexer subclasses. " - f"You can use .apply() with count." - ) + self._validate_baseindexer_support(self.window, "count") + blocks, obj = self._create_blocks() results = [] for b in blocks: @@ -1639,11 +1644,8 @@ def quantile(self, quantile, interpolation="linear", **kwargs): """ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): - if isinstance(self.window, BaseIndexer): - raise NotImplementedError( - f"cov is not supported with using a BaseIndexer subclasses. " - f"You can use .apply() with cov." - ) + self._validate_baseindexer_support(self.window, "cov") + if other is None: other = self._selected_obj # only default unset @@ -1787,11 +1789,8 @@ def _get_cov(X, Y): ) def corr(self, other=None, pairwise=None, **kwargs): - if isinstance(self.window, BaseIndexer): - raise NotImplementedError( - f"corr is not supported with using a BaseIndexer subclasses. " - f"You can use .apply() with corr." - ) + self._validate_baseindexer_support(self.window, "corr") + if other is None: other = self._selected_obj # only default unset From 71a5db1818500720664e78ec0b93b941d3432c32 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 28 Mar 2020 13:52:07 -0700 Subject: [PATCH 8/8] make validation function a common function --- pandas/core/window/common.py | 10 ++++++++++ pandas/core/window/rolling.py | 27 ++++++++------------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index fcde494f7f751..8abc47886d261 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -323,3 +323,13 @@ def func(arg, window, min_periods=None): return cfunc(arg, window, min_periods) return func + + +def validate_baseindexer_support(func_name: Optional[str]) -> None: + # GH 32865: These functions work correctly with a BaseIndexer subclass + BASEINDEXER_WHITELIST = {"mean", "sum", "median", "kurt", "quantile"} + if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST: + raise NotImplementedError( + f"{func_name} is not supported with using a BaseIndexer " + f"subclasses. You can use .apply() with {func_name}." + ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 559a055419340..dc8cf839d0bcb 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -46,6 +46,7 @@ calculate_center_offset, calculate_min_periods, get_weighted_roll_func, + validate_baseindexer_support, zsqrt, ) from pandas.core.window.indexers import ( @@ -145,20 +146,6 @@ def _validate_get_window_bounds_signature(window: BaseIndexer) -> None: f"get_window_bounds" ) - @staticmethod - def _validate_baseindexer_support(window, func_name: Optional[str]) -> None: - # GH 32865: These functions work correctly with a BaseIndexer subclass - BASEINDEXER_WHITELIST = {"mean", "sum", "median", "kurt", "quantile"} - if ( - isinstance(window, BaseIndexer) - and isinstance(func_name, str) - and func_name not in BASEINDEXER_WHITELIST - ): - raise NotImplementedError( - f"{func_name} is not supported with using a BaseIndexer " - f"subclasses. You can use .apply() with {func_name}." - ) - def _create_blocks(self): """ Split data into blocks & return conformed data. @@ -409,9 +396,8 @@ def _get_window_indexer(self, window: int, func_name: Optional[str]) -> BaseInde """ Return an indexer class that will compute the window start and end bounds """ - self._validate_baseindexer_support(self.window, func_name) - if isinstance(self.window, BaseIndexer): + validate_baseindexer_support(func_name) return self.window if self.is_freq_type: return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) @@ -1189,7 +1175,8 @@ class _Rolling_and_Expanding(_Rolling): ) def count(self): - self._validate_baseindexer_support(self.window, "count") + if isinstance(self.window, BaseIndexer): + validate_baseindexer_support("count") blocks, obj = self._create_blocks() results = [] @@ -1644,7 +1631,8 @@ def quantile(self, quantile, interpolation="linear", **kwargs): """ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): - self._validate_baseindexer_support(self.window, "cov") + if isinstance(self.window, BaseIndexer): + validate_baseindexer_support("cov") if other is None: other = self._selected_obj @@ -1789,7 +1777,8 @@ def _get_cov(X, Y): ) def corr(self, other=None, pairwise=None, **kwargs): - self._validate_baseindexer_support(self.window, "corr") + if isinstance(self.window, BaseIndexer): + validate_baseindexer_support("corr") if other is None: other = self._selected_obj