Skip to content

Commit 0685ce0

Browse files
fujiaxiangjreback
authored andcommitted
Backport PR: Series rolling count ignores min_periods (#31320)
1 parent aaaa6fa commit 0685ce0

File tree

7 files changed

+89
-32
lines changed

7 files changed

+89
-32
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,7 @@ Groupby/resample/rolling
11651165
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
11661166
- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
11671167
- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
1168+
- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`)
11681169

11691170
Reshaping
11701171
^^^^^^^^^

pandas/core/window/rolling.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1182,17 +1182,13 @@ class _Rolling_and_Expanding(_Rolling):
11821182
def count(self):
11831183

11841184
blocks, obj = self._create_blocks()
1185-
1186-
window = self._get_window()
1187-
window = min(window, len(obj)) if not self.center else window
1188-
11891185
results = []
11901186
for b in blocks:
11911187
result = b.notna().astype(int)
11921188
result = self._constructor(
11931189
result,
1194-
window=window,
1195-
min_periods=0,
1190+
window=self._get_window(),
1191+
min_periods=self.min_periods or 0,
11961192
center=self.center,
11971193
axis=self.axis,
11981194
closed=self.closed,
@@ -1657,7 +1653,11 @@ def _get_cov(X, Y):
16571653
mean = lambda x: x.rolling(
16581654
window, self.min_periods, center=self.center
16591655
).mean(**kwargs)
1660-
count = (X + Y).rolling(window=window, center=self.center).count(**kwargs)
1656+
count = (
1657+
(X + Y)
1658+
.rolling(window=window, min_periods=0, center=self.center)
1659+
.count(**kwargs)
1660+
)
16611661
bias_adj = count / (count - ddof)
16621662
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
16631663

pandas/tests/window/moments/test_moments_expanding.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ def test_expanding_corr(self):
3838
tm.assert_almost_equal(rolling_result, result)
3939

4040
def test_expanding_count(self):
41-
result = self.series.expanding().count()
41+
result = self.series.expanding(min_periods=0).count()
4242
tm.assert_almost_equal(
43-
result, self.series.rolling(window=len(self.series)).count()
43+
result, self.series.rolling(window=len(self.series), min_periods=0).count()
4444
)
4545

4646
def test_expanding_quantile(self):
@@ -358,7 +358,7 @@ def test_expanding_consistency(self, min_periods):
358358
)
359359
self._test_moments_consistency(
360360
min_periods=min_periods,
361-
count=lambda x: x.expanding().count(),
361+
count=lambda x: x.expanding(min_periods=min_periods).count(),
362362
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
363363
corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y),
364364
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),

pandas/tests/window/moments/test_moments_rolling.py

+37-19
Original file line numberDiff line numberDiff line change
@@ -777,8 +777,8 @@ def get_result(obj, window, min_periods=None, center=False):
777777
series_result = get_result(series, window=win, min_periods=minp)
778778
frame_result = get_result(frame, window=win, min_periods=minp)
779779
else:
780-
series_result = get_result(series, window=win)
781-
frame_result = get_result(frame, window=win)
780+
series_result = get_result(series, window=win, min_periods=0)
781+
frame_result = get_result(frame, window=win, min_periods=0)
782782

783783
last_date = series_result.index[-1]
784784
prev_date = last_date - 24 * offsets.BDay()
@@ -835,8 +835,8 @@ def get_result(obj, window, min_periods=None, center=False):
835835
nan_mask = ~nan_mask
836836
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
837837
else:
838-
result = get_result(self.series, len(self.series) + 1)
839-
expected = get_result(self.series, len(self.series))
838+
result = get_result(self.series, len(self.series) + 1, min_periods=0)
839+
expected = get_result(self.series, len(self.series), min_periods=0)
840840
nan_mask = isna(result)
841841
tm.assert_series_equal(nan_mask, isna(expected))
842842

@@ -851,10 +851,11 @@ def get_result(obj, window, min_periods=None, center=False):
851851
pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15
852852
)[9:].reset_index(drop=True)
853853
else:
854-
result = get_result(obj, 20, center=True)
855-
expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20)[
856-
9:
857-
].reset_index(drop=True)
854+
result = get_result(obj, 20, min_periods=0, center=True)
855+
print(result)
856+
expected = get_result(
857+
pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0
858+
)[9:].reset_index(drop=True)
858859

859860
tm.assert_series_equal(result, expected)
860861

@@ -893,21 +894,27 @@ def get_result(obj, window, min_periods=None, center=False):
893894
else:
894895
series_xp = (
895896
get_result(
896-
self.series.reindex(list(self.series.index) + s), window=25
897+
self.series.reindex(list(self.series.index) + s),
898+
window=25,
899+
min_periods=0,
897900
)
898901
.shift(-12)
899902
.reindex(self.series.index)
900903
)
901904
frame_xp = (
902905
get_result(
903-
self.frame.reindex(list(self.frame.index) + s), window=25
906+
self.frame.reindex(list(self.frame.index) + s),
907+
window=25,
908+
min_periods=0,
904909
)
905910
.shift(-12)
906911
.reindex(self.frame.index)
907912
)
908913

909-
series_rs = get_result(self.series, window=25, center=True)
910-
frame_rs = get_result(self.frame, window=25, center=True)
914+
series_rs = get_result(
915+
self.series, window=25, min_periods=0, center=True
916+
)
917+
frame_rs = get_result(self.frame, window=25, min_periods=0, center=True)
911918

912919
if fill_value is not None:
913920
series_xp = series_xp.fillna(fill_value)
@@ -964,7 +971,11 @@ def test_rolling_consistency(self, window, min_periods, center):
964971

965972
self._test_moments_consistency_is_constant(
966973
min_periods=min_periods,
967-
count=lambda x: (x.rolling(window=window, center=center).count()),
974+
count=lambda x: (
975+
x.rolling(
976+
window=window, min_periods=min_periods, center=center
977+
).count()
978+
),
968979
mean=lambda x: (
969980
x.rolling(
970981
window=window, min_periods=min_periods, center=center
@@ -989,19 +1000,26 @@ def test_rolling_consistency(self, window, min_periods, center):
9891000
).var(ddof=0)
9901001
),
9911002
var_debiasing_factors=lambda x: (
992-
x.rolling(window=window, center=center)
1003+
x.rolling(window=window, min_periods=min_periods, center=center)
9931004
.count()
9941005
.divide(
995-
(x.rolling(window=window, center=center).count() - 1.0).replace(
996-
0.0, np.nan
997-
)
1006+
(
1007+
x.rolling(
1008+
window=window, min_periods=min_periods, center=center
1009+
).count()
1010+
- 1.0
1011+
).replace(0.0, np.nan)
9981012
)
9991013
),
10001014
)
10011015

10021016
self._test_moments_consistency(
10031017
min_periods=min_periods,
1004-
count=lambda x: (x.rolling(window=window, center=center).count()),
1018+
count=lambda x: (
1019+
x.rolling(
1020+
window=window, min_periods=min_periods, center=center
1021+
).count()
1022+
),
10051023
mean=lambda x: (
10061024
x.rolling(
10071025
window=window, min_periods=min_periods, center=center
@@ -1071,7 +1089,7 @@ def test_rolling_consistency(self, window, min_periods, center):
10711089
if name == "count":
10721090
rolling_f_result = rolling_f()
10731091
rolling_apply_f_result = x.rolling(
1074-
window=window, min_periods=0, center=center
1092+
window=window, min_periods=min_periods, center=center
10751093
).apply(func=f, raw=True)
10761094
else:
10771095
if name in ["cov", "corr"]:

pandas/tests/window/test_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,10 @@ def test_count_nonnumeric_types(self):
237237
columns=cols,
238238
)
239239

240-
result = df.rolling(window=2).count()
240+
result = df.rolling(window=2, min_periods=0).count()
241241
tm.assert_frame_equal(result, expected)
242242

243-
result = df.rolling(1).count()
243+
result = df.rolling(1, min_periods=0).count()
244244
expected = df.notna().astype(float)
245245
tm.assert_frame_equal(result, expected)
246246

pandas/tests/window/test_expanding.py

+19
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,22 @@ def test_expanding_axis(self, axis_frame):
113113

114114
result = df.expanding(3, axis=axis_frame).sum()
115115
tm.assert_frame_equal(result, expected)
116+
117+
118+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
119+
def test_expanding_count_with_min_periods(constructor):
120+
# GH 26996
121+
result = constructor(range(5)).expanding(min_periods=3).count()
122+
expected = constructor([np.nan, np.nan, 3.0, 4.0, 5.0])
123+
tm.assert_equal(result, expected)
124+
125+
126+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
127+
def test_expanding_count_default_min_periods_with_null_values(constructor):
128+
# GH 26996
129+
values = [1, 2, 3, np.nan, 4, 5, 6]
130+
expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0]
131+
132+
result = constructor(values).expanding().count()
133+
expected = constructor(expected_counts)
134+
tm.assert_equal(result, expected)

pandas/tests/window/test_rolling.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def test_rolling_axis_count(self, axis_frame):
324324
else:
325325
expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]})
326326

327-
result = df.rolling(2, axis=axis_frame).count()
327+
result = df.rolling(2, axis=axis_frame, min_periods=0).count()
328328
tm.assert_frame_equal(result, expected)
329329

330330
def test_readonly_array(self):
@@ -426,3 +426,22 @@ def test_min_periods1():
426426
result = df["a"].rolling(3, center=True, min_periods=1).max()
427427
expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a")
428428
tm.assert_series_equal(result, expected)
429+
430+
431+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
432+
def test_rolling_count_with_min_periods(constructor):
433+
# GH 26996
434+
result = constructor(range(5)).rolling(3, min_periods=3).count()
435+
expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0])
436+
tm.assert_equal(result, expected)
437+
438+
439+
@pytest.mark.parametrize("constructor", [Series, DataFrame])
440+
def test_rolling_count_default_min_periods_with_null_values(constructor):
441+
# GH 26996
442+
values = [1, 2, 3, np.nan, 4, 5, 6]
443+
expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0]
444+
445+
result = constructor(values).rolling(3).count()
446+
expected = constructor(expected_counts)
447+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)