diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ec6ad38bbc7cf..b06ed684cd525 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1132,6 +1132,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f7efa69778c44..580c7cc0554d0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1182,17 +1182,13 @@ class _Rolling_and_Expanding(_Rolling): def count(self): blocks, obj = self._create_blocks() - - window = self._get_window() - window = min(window, len(obj)) if not self.center else window - results = [] for b in blocks: result = b.notna().astype(int) result = self._constructor( result, - window=window, - min_periods=0, + window=self._get_window(), + min_periods=self.min_periods or 0, center=self.center, axis=self.axis, closed=self.closed, @@ -1657,7 +1653,11 @@ def _get_cov(X, Y): mean = lambda x: x.rolling( window, self.min_periods, center=self.center ).mean(**kwargs) - count = (X + Y).rolling(window=window, center=self.center).count(**kwargs) + count = ( + (X + Y) + .rolling(window=window, min_periods=0, center=self.center) + .count(**kwargs) + ) bias_adj = count / (count - ddof) return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj diff --git a/pandas/tests/window/moments/test_moments_expanding.py b/pandas/tests/window/moments/test_moments_expanding.py index 322082187f531..9dfaecee9caeb 100644 --- a/pandas/tests/window/moments/test_moments_expanding.py +++ b/pandas/tests/window/moments/test_moments_expanding.py @@ -40,9 +40,9 @@ def test_expanding_corr(self): tm.assert_almost_equal(rolling_result, result) def test_expanding_count(self): - result = self.series.expanding().count() + result = self.series.expanding(min_periods=0).count() tm.assert_almost_equal( - result, self.series.rolling(window=len(self.series)).count() + result, self.series.rolling(window=len(self.series), min_periods=0).count() ) def test_expanding_quantile(self): @@ -369,7 +369,7 @@ def test_expanding_consistency(self, min_periods): ) self._test_moments_consistency( min_periods=min_periods, - count=lambda x: x.expanding().count(), + count=lambda x: x.expanding(min_periods=min_periods).count(), mean=lambda x: x.expanding(min_periods=min_periods).mean(), corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index 9acb4ffcb40b8..83e4ee25558b5 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -777,8 +777,8 @@ def get_result(obj, window, min_periods=None, center=False): series_result = get_result(series, window=win, min_periods=minp) frame_result = get_result(frame, window=win, min_periods=minp) else: - series_result = get_result(series, window=win) - frame_result = get_result(frame, window=win) + series_result = get_result(series, window=win, min_periods=0) + frame_result = get_result(frame, window=win, min_periods=0) last_date = series_result.index[-1] prev_date = last_date - 24 * offsets.BDay() @@ -835,8 +835,8 @@ def get_result(obj, window, min_periods=None, center=False): nan_mask = ~nan_mask tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) else: - result = get_result(self.series, len(self.series) + 1) - expected = get_result(self.series, len(self.series)) + result = get_result(self.series, len(self.series) + 1, min_periods=0) + expected = get_result(self.series, len(self.series), min_periods=0) nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) @@ -851,10 +851,11 @@ def get_result(obj, window, min_periods=None, center=False): pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 )[9:].reset_index(drop=True) else: - result = get_result(obj, 20, center=True) - expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20)[ - 9: - ].reset_index(drop=True) + result = get_result(obj, 20, min_periods=0, center=True) + print(result) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 + )[9:].reset_index(drop=True) tm.assert_series_equal(result, expected) @@ -893,21 +894,27 @@ def get_result(obj, window, min_periods=None, center=False): else: series_xp = ( get_result( - self.series.reindex(list(self.series.index) + s), window=25 + self.series.reindex(list(self.series.index) + s), + window=25, + min_periods=0, ) .shift(-12) .reindex(self.series.index) ) frame_xp = ( get_result( - self.frame.reindex(list(self.frame.index) + s), window=25 + self.frame.reindex(list(self.frame.index) + s), + window=25, + min_periods=0, ) .shift(-12) .reindex(self.frame.index) ) - series_rs = get_result(self.series, window=25, center=True) - frame_rs = get_result(self.frame, window=25, center=True) + series_rs = get_result( + self.series, window=25, min_periods=0, center=True + ) + frame_rs = get_result(self.frame, window=25, min_periods=0, center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) @@ -964,7 +971,11 @@ def test_rolling_consistency(self, window, min_periods, center): self._test_moments_consistency_is_constant( min_periods=min_periods, - count=lambda x: (x.rolling(window=window, center=center).count()), + count=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ), mean=lambda x: ( x.rolling( window=window, min_periods=min_periods, center=center @@ -989,19 +1000,26 @@ def test_rolling_consistency(self, window, min_periods, center): ).var(ddof=0) ), var_debiasing_factors=lambda x: ( - x.rolling(window=window, center=center) + x.rolling(window=window, min_periods=min_periods, center=center) .count() .divide( - (x.rolling(window=window, center=center).count() - 1.0).replace( - 0.0, np.nan - ) + ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + - 1.0 + ).replace(0.0, np.nan) ) ), ) self._test_moments_consistency( min_periods=min_periods, - count=lambda x: (x.rolling(window=window, center=center).count()), + count=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ), mean=lambda x: ( x.rolling( window=window, min_periods=min_periods, center=center @@ -1071,7 +1089,7 @@ def test_rolling_consistency(self, window, min_periods, center): if name == "count": rolling_f_result = rolling_f() rolling_apply_f_result = x.rolling( - window=window, min_periods=0, center=center + window=window, min_periods=min_periods, center=center ).apply(func=f, raw=True) else: if name in ["cov", "corr"]: diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 5e70e13209de5..680237db0535b 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -237,10 +237,10 @@ def test_count_nonnumeric_types(self): columns=cols, ) - result = df.rolling(window=2).count() + result = df.rolling(window=2, min_periods=0).count() tm.assert_frame_equal(result, expected) - result = df.rolling(1).count() + result = df.rolling(1, min_periods=0).count() expected = df.notna().astype(float) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index fc4bd50f25c73..6b6367fd80b26 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -113,3 +113,22 @@ def test_expanding_axis(self, axis_frame): result = df.expanding(3, axis=axis_frame).sum() tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_expanding_count_with_min_periods(constructor): + # GH 26996 + result = constructor(range(5)).expanding(min_periods=3).count() + expected = constructor([np.nan, np.nan, 3.0, 4.0, 5.0]) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_expanding_count_default_min_periods_with_null_values(constructor): + # GH 26996 + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0] + + result = constructor(values).expanding().count() + expected = constructor(expected_counts) + tm.assert_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index ff435f8386a85..ab2c7fcb7a0dc 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -344,7 +344,7 @@ def test_rolling_axis_count(self, axis_frame): else: expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) - result = df.rolling(2, axis=axis_frame).count() + result = df.rolling(2, axis=axis_frame, min_periods=0).count() tm.assert_frame_equal(result, expected) def test_readonly_array(self): @@ -446,3 +446,22 @@ def test_min_periods1(): result = df["a"].rolling(3, center=True, min_periods=1).max() expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_count_with_min_periods(constructor): + # GH 26996 + result = constructor(range(5)).rolling(3, min_periods=3).count() + expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_count_default_min_periods_with_null_values(constructor): + # GH 26996 + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] + + result = constructor(values).rolling(3).count() + expected = constructor(expected_counts) + tm.assert_equal(result, expected)