From 424ad4240766aa94579ab07a45c3d96b1f1e9a12 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 25 Sep 2020 17:12:45 -0700 Subject: [PATCH 01/10] DEPR: min_periods=None behavior for Rolling.count --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/_libs/window/aggregations.pyx | 56 ---------- pandas/core/window/rolling.py | 65 +++++------ .../test_moments_consistency_rolling.py | 2 +- .../window/moments/test_moments_rolling.py | 1 + pandas/tests/window/test_base_indexer.py | 1 + pandas/tests/window/test_dtypes.py | 103 +++++++++++------- pandas/tests/window/test_grouper.py | 5 +- pandas/tests/window/test_rolling.py | 4 +- pandas/tests/window/test_timeseries_window.py | 26 +++-- 10 files changed, 115 insertions(+), 150 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2a8b6fe3ade6a..cf90788b99010 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -214,7 +214,7 @@ Deprecations - :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`) - The :meth:`Index.to_native_types` is deprecated. Use ``.astype(str)`` instead (:issue:`28867`) - Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`36179`) - +- :meth:`Rolling.count` with `min_periods=None` will default to the size of the window in a future version (:issue:`31302`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 5f60b884c6ada..c6fd569247b90 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -89,62 +89,6 @@ cdef bint is_monotonic_start_end_bounds( # Physical description: 366 p. # Series: Prentice-Hall Series in Automatic Computation -# ---------------------------------------------------------------------- -# Rolling count -# this is only an impl for index not None, IOW, freq aware - - -def roll_count( - ndarray[float64_t] values, - ndarray[int64_t] start, - ndarray[int64_t] end, - int64_t minp, -): - cdef: - float64_t val, count_x = 0.0 - int64_t s, e, nobs, N = len(values) - Py_ssize_t i, j - ndarray[float64_t] output - - output = np.empty(N, dtype=float) - - with nogil: - - for i in range(0, N): - s = start[i] - e = end[i] - - if i == 0: - - # setup - count_x = 0.0 - for j in range(s, e): - val = values[j] - if notnan(val): - count_x += 1.0 - - else: - - # calculate deletes - for j in range(start[i - 1], s): - val = values[j] - if notnan(val): - count_x -= 1.0 - - # calculate adds - for j in range(end[i - 1], e): - val = values[j] - if notnan(val): - count_x += 1.0 - - if count_x >= minp: - output[i] = count_x - else: - output[i] = NaN - - return output - - # ---------------------------------------------------------------------- # Rolling sum diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 335fc3db5cd86..9fe4a2127d302 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -17,6 +17,7 @@ Type, Union, ) +import warnings import numpy as np @@ -471,14 +472,17 @@ def _get_window_indexer(self, window: int) -> BaseIndexer: return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) return FixedWindowIndexer(window_size=window) - def _apply_series(self, homogeneous_func: Callable[..., ArrayLike]) -> "Series": + def _apply_series( + self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None + ) -> "Series": """ Series version of _apply_blockwise """ obj = self._create_data(self._selected_obj) try: - values = self._prep_values(obj.values) + input = obj.values if name != "count" else notna(obj.values).astype(int) + values = self._prep_values(input) except (TypeError, NotImplementedError) as err: raise DataError("No numeric types to aggregate") from err @@ -486,16 +490,20 @@ def _apply_series(self, homogeneous_func: Callable[..., ArrayLike]) -> "Series": return obj._constructor(result, index=obj.index, name=obj.name) def _apply_blockwise( - self, homogeneous_func: Callable[..., ArrayLike] + self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None ) -> FrameOrSeriesUnion: """ Apply the given function to the DataFrame broken down into homogeneous sub-frames. """ if self._selected_obj.ndim == 1: - return self._apply_series(homogeneous_func) + return self._apply_series(homogeneous_func, name) obj = self._create_data(self._selected_obj) + if name == "count": + # GH 12541: Special case for count where we support date-like types + obj = notna(obj).astype(int) + obj._mgr = obj._mgr.consolidate() mgr = obj._mgr def hfunc(bvalues: ArrayLike) -> ArrayLike: @@ -608,7 +616,7 @@ def calc(x): return result - return self._apply_blockwise(homogeneous_func) + return self._apply_blockwise(homogeneous_func, name) def aggregate(self, func, *args, **kwargs): result, how = self._aggregate(func, *args, **kwargs) @@ -1269,33 +1277,8 @@ class RollingAndExpandingMixin(RollingMixin): ) def count(self): - # GH 32865. Using count with custom BaseIndexer subclass - # implementations shouldn't end up here - assert not isinstance(self.window, BaseIndexer) - - obj = self._create_data(self._selected_obj) - - def hfunc(values: np.ndarray) -> np.ndarray: - result = notna(values) - result = result.astype(int) - frame = type(obj)(result.T) - result = self._constructor( - frame, - window=self._get_window(), - min_periods=self.min_periods or 0, - center=self.center, - axis=self.axis, - closed=self.closed, - ).sum() - return result.values.T - - new_mgr = obj._mgr.apply(hfunc) - out = obj._constructor(new_mgr) - if obj.ndim == 1: - out.name = obj.name - else: - self._insert_on_column(out, obj) - return out + window_func = self._get_cython_func_type("roll_sum") + return self._apply(window_func, center=self.center, name="count") _shared_docs["apply"] = dedent( r""" @@ -2050,14 +2033,16 @@ def aggregate(self, func, *args, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["count"]) def count(self): - - # different impl for freq counting - # GH 32865. Use a custom count function implementation - # when using a BaseIndexer subclass as a window - if self.is_freq_type or isinstance(self.window, BaseIndexer): - window_func = self._get_roll_func("roll_count") - return self._apply(window_func, center=self.center, name="count") - + if self.min_periods is None: + warnings.warn( + ( + "min_periods=None will default to the size of window " + "consistent with other methods in a future version. " + "Specify min_periods=0 instead." + ), + DeprecationWarning, + ) + self.min_periods = 0 return super().count() @Substitution(name="rolling") diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index dfcbdde466d44..99c2c4dd0045b 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -452,7 +452,7 @@ def test_moment_functions_zero_length(): df2_expected = df2 functions = [ - lambda x: x.rolling(window=10).count(), + lambda x: x.rolling(window=10, min_periods=0).count(), lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), lambda x: x.rolling(window=10, min_periods=5).max(), diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index da256e80dff7e..1f74689683f92 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -223,6 +223,7 @@ def test_rolling_sum(raw, series, frame): ) +@pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning") def test_rolling_count(raw, series, frame): counter = lambda x: np.isfinite(x).astype(float).sum() _check_moment_func( diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index ab73e075eed04..555383e0b5ab6 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -138,6 +138,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): ), ], ) +@pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning") def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs): # GH 32865 values = np.arange(10.0) diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index 245b48b351684..fc7a51834780f 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -21,82 +21,111 @@ def get_dtype(dtype, coerce_int=None): @pytest.mark.parametrize( - "method, data, expected_data, coerce_int", + "method, data, expected_data, coerce_int, min_periods", [ - ("count", np.arange(5), [1, 2, 2, 2, 2], True), - ("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True), - ("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False), - ("max", np.arange(5), [np.nan, 1, 2, 3, 4], True), - ("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True), - ("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False), - ("min", np.arange(5), [np.nan, 0, 1, 2, 3], True), - ("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True), - ("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False), - ("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True), - ("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True), - ("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False), - ("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True), - ("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True), - ("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False), - ("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True), - ("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True), + ("count", np.arange(5), [1, 2, 2, 2, 2], True, 0), + ("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True, 0), + ("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False, 0), + ("max", np.arange(5), [np.nan, 1, 2, 3, 4], True, None), + ("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True, None), + ("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False, None), + ("min", np.arange(5), [np.nan, 0, 1, 2, 3], True, None), + ("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True, None), + ("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False, None), + ("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True, None), + ("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True, None), + ("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False, None), + ("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None), + ("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None), + ("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False, None), + ("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True, None), + ("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True, None), ( "std", [0, 1, 2, np.nan, 4], [np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2, False, + None, + ), + ("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True, None), + ("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True, None), + ("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False, None), + ("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None), + ("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None), + ( + "median", + [0, 1, 2, np.nan, 4], + [np.nan, 0.5, 1.5, np.nan, np.nan], + False, + None, ), - ("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True), - ("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True), - ("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False), - ("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True), - ("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True), - ("median", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False), ], ) -def test_series_dtypes(method, data, expected_data, coerce_int, dtypes): +def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_periods): s = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int)) if dtypes in ("m8[ns]", "M8[ns]") and method != "count": msg = "No numeric types to aggregate" with pytest.raises(DataError, match=msg): - getattr(s.rolling(2), method)() + getattr(s.rolling(2, min_periods=min_periods), method)() else: - result = getattr(s.rolling(2), method)() + result = getattr(s.rolling(2, min_periods=min_periods), method)() expected = Series(expected_data, dtype="float64") tm.assert_almost_equal(result, expected) @pytest.mark.parametrize( - "method, expected_data", + "method, expected_data, min_periods", [ - ("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}), - ("max", {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}), - ("min", {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}), + ("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, 0), + ( + "max", + {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}, + None, + ), + ( + "min", + {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}, + None, + ), ( "sum", {0: Series([np.nan, 2, 6, 10, 14]), 1: Series([np.nan, 4, 8, 12, 16])}, + None, + ), + ( + "mean", + {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, + None, ), - ("mean", {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}), ( "std", { 0: Series([np.nan] + [np.sqrt(2)] * 4), 1: Series([np.nan] + [np.sqrt(2)] * 4), }, + None, + ), + ( + "var", + {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, + None, + ), + ( + "median", + {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, + None, ), - ("var", {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}), - ("median", {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}), ], ) -def test_dataframe_dtypes(method, expected_data, dtypes): +def test_dataframe_dtypes(method, expected_data, dtypes, min_periods): if dtypes == "category": pytest.skip("Category dataframe testing not implemented.") df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes)) if dtypes in ("m8[ns]", "M8[ns]") and method != "count": msg = "No numeric types to aggregate" with pytest.raises(DataError, match=msg): - getattr(df.rolling(2), method)() + getattr(df.rolling(2, min_periods=min_periods), method)() else: - result = getattr(df.rolling(2), method)() + result = getattr(df.rolling(2, min_periods=min_periods), method)() expected = DataFrame(expected_data, dtype="float64") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 786cf68d28871..5f0203cc7f117 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -45,9 +45,9 @@ def test_getitem_multiple(self): # GH 13174 g = self.frame.groupby("A") - r = g.rolling(2) + r = g.rolling(2, min_periods=0) g_mutated = get_groupby(self.frame, by="A", mutated=True) - expected = g_mutated.B.apply(lambda x: x.rolling(2).count()) + expected = g_mutated.B.apply(lambda x: x.rolling(2, min_periods=0).count()) result = r.B.count() tm.assert_series_equal(result, expected) @@ -55,6 +55,7 @@ def test_getitem_multiple(self): result = r.B.count() tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning") def test_rolling(self): g = self.frame.groupby("A") r = g.rolling(window=4) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 4dfa0287bbb03..c1df80acbfede 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -455,7 +455,9 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): values = [1, 2, 3, np.nan, 4, 5, 6] expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] - result = constructor(values).rolling(3).count() + # GH 31302 + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 8aa4d7103e48a..bef94e7efbfa1 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -585,14 +585,9 @@ def test_freqs_ops(self, freq, op, result_data): tm.assert_series_equal(result, expected) - def test_all(self): - - # simple comparison of integer vs time-based windowing - df = self.regular * 2 - er = df.rolling(window=1) - r = df.rolling(window="1s") - - for f in [ + @pytest.mark.parametrize( + "f", + [ "sum", "mean", "count", @@ -603,11 +598,18 @@ def test_all(self): "skew", "min", "max", - ]: + ], + ) + def test_all(self, f): - result = getattr(r, f)() - expected = getattr(er, f)() - tm.assert_frame_equal(result, expected) + # simple comparison of integer vs time-based windowing + df = self.regular * 2 + er = df.rolling(window=1) + r = df.rolling(window="1s") + + result = getattr(r, f)() + expected = getattr(er, f)() + tm.assert_frame_equal(result, expected) result = r.quantile(0.5) expected = er.quantile(0.5) From f7c82f51fe3208be244dbfc027db640287862d4d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 25 Sep 2020 17:18:38 -0700 Subject: [PATCH 02/10] Add additional comment --- pandas/core/window/rolling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9fe4a2127d302..eedf9802963a8 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -481,6 +481,7 @@ def _apply_series( obj = self._create_data(self._selected_obj) try: + # GH 12541: Special case for count where we support date-like types input = obj.values if name != "count" else notna(obj.values).astype(int) values = self._prep_values(input) except (TypeError, NotImplementedError) as err: From 4f2c0fc83ab236b9211c88fc5a5e3007cd798876 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 25 Sep 2020 20:37:27 -0700 Subject: [PATCH 03/10] Add double backticks --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cf90788b99010..bd4d7e784bc3c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -214,7 +214,7 @@ Deprecations - :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`) - The :meth:`Index.to_native_types` is deprecated. Use ``.astype(str)`` instead (:issue:`28867`) - Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`36179`) -- :meth:`Rolling.count` with `min_periods=None` will default to the size of the window in a future version (:issue:`31302`) +- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`) .. --------------------------------------------------------------------------- From a2105143060acbcc50e318065b8d1d7823e0fbbd Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 25 Sep 2020 20:40:58 -0700 Subject: [PATCH 04/10] Filter warning for count --- pandas/tests/window/test_timeseries_window.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index bef94e7efbfa1..94bfa8d205900 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -600,6 +600,7 @@ def test_freqs_ops(self, freq, op, result_data): "max", ], ) + @pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning") def test_all(self, f): # simple comparison of integer vs time-based windowing From bdd0ef75899982e5f00c94811413b36452199f0a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 25 Sep 2020 22:17:33 -0700 Subject: [PATCH 05/10] end bulleted list with a blank line in whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 72d7585e58888..6ed235ad0bb78 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -215,6 +215,7 @@ Deprecations - The :meth:`Index.to_native_types` is deprecated. Use ``.astype(str)`` instead (:issue:`28867`) - Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`36179`) - :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`) + .. --------------------------------------------------------------------------- From bcb3be80b0b5b927050de0e534aa90c38f2729fb Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 29 Sep 2020 21:06:34 -0700 Subject: [PATCH 06/10] black --- pandas/tests/window/test_grouper.py | 16 +++++++++++++++- pandas/tests/window/test_timeseries_window.py | 7 ++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index e468627a8ea0f..dd05127405f8e 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -56,7 +56,21 @@ def test_getitem_multiple(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - "f", ["sum", "mean", "min", "max", pytest.param("count", marks=pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning")), "kurt", "skew"] + "f", + [ + "sum", + "mean", + "min", + "max", + pytest.param( + "count", + marks=pytest.mark.filterwarnings( + "ignore:min_periods:DeprecationWarning" + ), + ), + "kurt", + "skew", + ], ) def test_rolling(self, f): g = self.frame.groupby("A") diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 894620929ebf1..3fc16b4eef192 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -593,7 +593,12 @@ def test_freqs_ops(self, freq, op, result_data): [ "sum", "mean", - pytest.param("count", marks=pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning")), + pytest.param( + "count", + marks=pytest.mark.filterwarnings( + "ignore:min_periods:DeprecationWarning" + ), + ), "median", "std", "var", From 93601c2b1397b690185a4c1e102c3b25c3a2d3d8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 30 Sep 2020 16:31:16 -0700 Subject: [PATCH 07/10] Change to futurewarning --- pandas/core/window/rolling.py | 2 +- pandas/tests/window/moments/test_moments_rolling_functions.py | 4 ++-- pandas/tests/window/test_base_indexer.py | 2 +- pandas/tests/window/test_grouper.py | 4 +--- pandas/tests/window/test_timeseries_window.py | 4 +--- 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3f24d0b7375d0..f207ea4cd67d4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2041,7 +2041,7 @@ def count(self): "consistent with other methods in a future version. " "Specify min_periods=0 instead." ), - DeprecationWarning, + FutureWarning, ) self.min_periods = 0 return super().count() diff --git a/pandas/tests/window/moments/test_moments_rolling_functions.py b/pandas/tests/window/moments/test_moments_rolling_functions.py index 1c926cef06fc1..abe75c7289ed4 100644 --- a/pandas/tests/window/moments/test_moments_rolling_functions.py +++ b/pandas/tests/window/moments/test_moments_rolling_functions.py @@ -16,7 +16,7 @@ lambda x: np.isfinite(x).astype(float).sum(), "count", {}, - marks=pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning"), + marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"), ), [np.median, "median", {}], [np.min, "min", {}], @@ -42,7 +42,7 @@ def test_series(series, compare_func, roll_func, kwargs): lambda x: np.isfinite(x).astype(float).sum(), "count", {}, - marks=pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning"), + marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"), ), [np.median, "median", {}], [np.min, "min", {}], diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 7ae74af2f6054..fa4a314517594 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -138,7 +138,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): ), ], ) -@pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:min_periods:FutureWarning") def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs): # GH 32865 values = np.arange(10.0) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index dd05127405f8e..7cfac7c6a752a 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -64,9 +64,7 @@ def test_getitem_multiple(self): "max", pytest.param( "count", - marks=pytest.mark.filterwarnings( - "ignore:min_periods:DeprecationWarning" - ), + marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"), ), "kurt", "skew", diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 3fc16b4eef192..d9fcb538c97c1 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -595,9 +595,7 @@ def test_freqs_ops(self, freq, op, result_data): "mean", pytest.param( "count", - marks=pytest.mark.filterwarnings( - "ignore:min_periods:DeprecationWarning" - ), + marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"), ), "median", "std", From 94abc9959f364855d99fa1da15a96ece391abd19 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 30 Sep 2020 17:22:46 -0700 Subject: [PATCH 08/10] Change test to reflect warning type change --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 451b5eadeca0f..ffa39cfa4d8be 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -461,7 +461,7 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] # GH 31302 - with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) From ff751c151cc559309887f4e8a2211ef76144ba87 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 1 Oct 2020 17:03:43 -0700 Subject: [PATCH 09/10] Add unit tests for 35579 --- pandas/tests/window/test_base_indexer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index fa4a314517594..7f2d58effe1ae 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -254,3 +254,12 @@ def test_non_fixed_variable_window_indexer(closed, expected_data): result = df.rolling(indexer, closed=closed).sum() expected = DataFrame(expected_data, index=index) tm.assert_frame_equal(result, expected) + + +def test_fixed_forward_indexer_count(): + # GH: 35579 + df = DataFrame({"b": [None, None, None, 7]}) + indexer = FixedForwardWindowIndexer(window_size=2) + result = df.rolling(window=indexer, min_periods=0).count() + expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]}) + tm.assert_frame_equal(result, expected) From 03ce0554e69cbe08dd3139da697574fffabb7ea1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 1 Oct 2020 18:13:06 -0700 Subject: [PATCH 10/10] add additional whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6a9b687326bcf..3bfb507d2e140 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -405,6 +405,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`) - Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`) - Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`) +- Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`) Reshaping ^^^^^^^^^