From 246e90e50047276b5bdca38c6e473130b6c3e9ec Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 14 Jul 2019 19:53:38 -0700 Subject: [PATCH 1/4] TYPING: pandas/core/window.py --- pandas/core/window.py | 121 +++++++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 0c1f6a1a6dace..e6e18966b01f6 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -5,7 +5,7 @@ from collections import defaultdict from datetime import timedelta from textwrap import dedent -from typing import Set +from typing import List, Optional, Set, Union import warnings import numpy as np @@ -63,19 +63,19 @@ class _Window(PandasObject, SelectionMixin): "axis", "on", "closed", - ] + ] # type: List[str] exclusions = set() # type: Set[str] def __init__( self, obj, window=None, - min_periods=None, - center=False, - win_type=None, - axis=0, - on=None, - closed=None, + min_periods: Optional[int] = None, + center: Optional[bool] = False, + win_type: Optional[str] = None, + axis: Union[str, int] = 0, + on: Optional[str] = None, + closed: Optional[str] = None, **kwargs ): @@ -97,7 +97,7 @@ def _constructor(self): return Window @property - def is_datetimelike(self): + def is_datetimelike(self) -> Optional[bool]: return None @property @@ -105,7 +105,7 @@ def _on(self): return None @property - def is_freq_type(self): + def is_freq_type(self) -> bool: return self.win_type == "freq" def validate(self): @@ -121,22 +121,12 @@ def validate(self): ]: raise ValueError("closed must be 'right', 'left', 'both' or " "'neither'") - def _convert_freq(self): - """ - Resample according to the how, return a new object. - """ - obj = self._selected_obj - index = None - return obj, index - def _create_blocks(self): """ Split data into blocks & return conformed data. """ - obj, index = self._convert_freq() - if index is not None: - index = self._on + obj = self._selected_obj # filter out the on from the object if self.on is not None: @@ -144,7 +134,7 @@ def _create_blocks(self): obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) blocks = obj._to_dict_of_blocks(copy=False).values() - return blocks, obj, index + return blocks, obj def _gotitem(self, key, ndim, subset=None): """ @@ -186,10 +176,10 @@ def _get_window(self, other=None): return self.window @property - def _window_type(self): + def _window_type(self) -> str: return self.__class__.__name__ - def __repr__(self): + def __repr__(self) -> str: """ Provide a nice str repr of our rolling object. """ @@ -207,23 +197,21 @@ def __iter__(self): url = "https://github.com/pandas-dev/pandas/issues/11704" raise NotImplementedError("See issue #11704 {url}".format(url=url)) - def _get_index(self, index=None): + def _get_index(self) -> Optional[np.ndarray]: """ - Return index as ndarrays. + Return index as an ndarray. Returns ------- - tuple of (index, index_as_ndarray) + None or ndarray """ if self.is_freq_type: - if index is None: - index = self._on - return index, index.asi8 - return index, index - - def _prep_values(self, values=None, kill_inf=True): + return self._on.asi8 + return None + def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: + """Convert input to numpy arrays for Cython routines""" if values is None: values = getattr(self._selected_obj, "values", self._selected_obj) @@ -247,13 +235,14 @@ def _prep_values(self, values=None, kill_inf=True): "cannot handle this type -> {0}" "".format(values.dtype) ) - if kill_inf: - values = values.copy() - values[np.isinf(values)] = np.NaN + # Always convert inf to nan + values[np.isinf(values)] = np.NaN return values - def _wrap_result(self, result, block=None, obj=None): + def _wrap_result( + self, result, block=None, obj=None + ) -> Union[ABCSeries, ABCDataFrame]: """ Wrap a single result. """ @@ -281,7 +270,9 @@ def _wrap_result(self, result, block=None, obj=None): return type(obj)(result, index=index, columns=block.columns) return result - def _wrap_results(self, results, blocks, obj, exclude=None): + def _wrap_results( + self, results, blocks, obj, exclude=None + ) -> Union[ABCSeries, ABCDataFrame]: """ Wrap the results. @@ -335,7 +326,7 @@ def _wrap_results(self, results, blocks, obj, exclude=None): return obj.astype("float64") return concat(final, axis=1).reindex(columns=columns, copy=False) - def _center_window(self, result, window): + def _center_window(self, result, window) -> np.ndarray: """ Center the result in the window. """ @@ -724,7 +715,7 @@ def _apply_window(self, mean=True, **kwargs): window = self._prep_window(**kwargs) center = self.center - blocks, obj, index = self._create_blocks() + blocks, obj = self._create_blocks() block_list = list(blocks) results = [] @@ -912,9 +903,9 @@ def _apply( if check_minp is None: check_minp = _use_window - blocks, obj, index = self._create_blocks() + blocks, obj = self._create_blocks() block_list = list(blocks) - index, indexi = self._get_index(index=index) + index_as_array = self._get_index() results = [] exclude = [] @@ -947,7 +938,7 @@ def func(arg, window, min_periods=None, closed=None): minp = check_minp(min_periods, window) # ensure we are only rolling on floats arg = ensure_float64(arg) - return cfunc(arg, window, minp, indexi, closed, **kwargs) + return cfunc(arg, window, minp, index_as_array, closed, **kwargs) # calculation function if center: @@ -1027,9 +1018,9 @@ class _Rolling_and_Expanding(_Rolling): def count(self): - blocks, obj, index = self._create_blocks() + blocks, obj = self._create_blocks() # Validate the index - self._get_index(index=index) + self._get_index() window = self._get_window() window = min(window, len(obj)) if not self.center else window @@ -1088,11 +1079,10 @@ def count(self): def apply(self, func, raw=None, args=(), kwargs={}): from pandas import Series - # TODO: _level is unused? - _level = kwargs.pop("_level", None) # noqa + kwargs.pop("_level", None) window = self._get_window() offset = _offset(window, self.center) - index, indexi = self._get_index() + index_as_array = self._get_index() # TODO: default is for backward compat # change to False in the future @@ -1113,7 +1103,16 @@ def f(arg, window, min_periods, closed): if not raw: arg = Series(arg, index=self.obj.index) return libwindow.roll_generic( - arg, window, minp, indexi, closed, offset, func, raw, args, kwargs + arg, + window, + minp, + index_as_array, + closed, + offset, + func, + raw, + args, + kwargs, ) return self._apply(f, func, args=args, kwargs=kwargs, center=False, raw=raw) @@ -1285,12 +1284,12 @@ def median(self, **kwargs): def std(self, ddof=1, *args, **kwargs): nv.validate_window_func("std", args, kwargs) window = self._get_window() - index, indexi = self._get_index() + index_as_array = self._get_index() def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) return _zsqrt( - libwindow.roll_var(arg, window, minp, indexi, self.closed, ddof) + libwindow.roll_var(arg, window, minp, index_as_array, self.closed, ddof) ) return self._apply( @@ -1474,17 +1473,27 @@ def kurt(self, **kwargs): def quantile(self, quantile, interpolation="linear", **kwargs): window = self._get_window() - index, indexi = self._get_index() + index_as_array = self._get_index() def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) if quantile == 1.0: - return libwindow.roll_max(arg, window, minp, indexi, self.closed) + return libwindow.roll_max( + arg, window, minp, index_as_array, self.closed + ) elif quantile == 0.0: - return libwindow.roll_min(arg, window, minp, indexi, self.closed) + return libwindow.roll_min( + arg, window, minp, index_as_array, self.closed + ) else: return libwindow.roll_quantile( - arg, window, minp, indexi, self.closed, quantile, interpolation + arg, + window, + minp, + index_as_array, + self.closed, + quantile, + interpolation, ) return self._apply(f, "quantile", quantile=quantile, **kwargs) @@ -2450,7 +2459,7 @@ def _apply(self, func, **kwargs): ------- y : same type as input argument """ - blocks, obj, index = self._create_blocks() + blocks, obj = self._create_blocks() block_list = list(blocks) results = [] From 85e7f2ea574c021351656eec91085292fa2424d0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 15 Jul 2019 19:38:17 -0700 Subject: [PATCH 2/4] Address comments --- pandas/_typing.py | 1 + pandas/core/window.py | 12 ++++-------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 46b1b4685ec9f..a1224a609579e 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -31,3 +31,4 @@ FrameOrSeries = TypeVar("FrameOrSeries", ABCSeries, ABCDataFrame) Scalar = Union[str, int, float] +Axis = Union[str, int] diff --git a/pandas/core/window.py b/pandas/core/window.py index e6e18966b01f6..7472714cc48ee 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -11,6 +11,7 @@ import numpy as np import pandas._libs.window as libwindow +from pandas._typing import Axis, FrameOrSeries from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -73,14 +74,13 @@ def __init__( min_periods: Optional[int] = None, center: Optional[bool] = False, win_type: Optional[str] = None, - axis: Union[str, int] = 0, + axis: Axis = 0, on: Optional[str] = None, closed: Optional[str] = None, **kwargs ): self.__dict__.update(kwargs) - self.blocks = [] self.obj = obj self.on = on self.closed = closed @@ -240,9 +240,7 @@ def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: return values - def _wrap_result( - self, result, block=None, obj=None - ) -> Union[ABCSeries, ABCDataFrame]: + def _wrap_result(self, result, block=None, obj=None) -> FrameOrSeries: """ Wrap a single result. """ @@ -270,9 +268,7 @@ def _wrap_result( return type(obj)(result, index=index, columns=block.columns) return result - def _wrap_results( - self, results, blocks, obj, exclude=None - ) -> Union[ABCSeries, ABCDataFrame]: + def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries: """ Wrap the results. From 10bd26c424c1142659e172b031219ff32f727ee4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 15 Jul 2019 19:50:48 -0700 Subject: [PATCH 3/4] Remove unused import --- pandas/core/window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 7472714cc48ee..37e9cde02c5e6 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -5,7 +5,7 @@ from collections import defaultdict from datetime import timedelta from textwrap import dedent -from typing import List, Optional, Set, Union +from typing import List, Optional, Set import warnings import numpy as np From 0f1faea51ae9386cacdabf9faffcb327cfa47824 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 15 Jul 2019 20:19:18 -0700 Subject: [PATCH 4/4] Sort imports --- pandas/core/window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 37e9cde02c5e6..86574208a3fc0 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -11,7 +11,6 @@ import numpy as np import pandas._libs.window as libwindow -from pandas._typing import Axis, FrameOrSeries from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -36,6 +35,7 @@ ABCTimedeltaIndex, ) +from pandas._typing import Axis, FrameOrSeries from pandas.core.base import DataError, PandasObject, SelectionMixin import pandas.core.common as com from pandas.core.generic import _shared_docs