Skip to content

Backport PR: Series rolling count ignores min_periods #31320

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jan 26, 2020
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`)

Reshaping
^^^^^^^^^
Expand Down
14 changes: 7 additions & 7 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,17 +1182,13 @@ class _Rolling_and_Expanding(_Rolling):
def count(self):

blocks, obj = self._create_blocks()

window = self._get_window()
window = min(window, len(obj)) if not self.center else window

results = []
for b in blocks:
result = b.notna().astype(int)
result = self._constructor(
result,
window=window,
min_periods=0,
window=self._get_window(),
min_periods=self.min_periods or 0,
center=self.center,
axis=self.axis,
closed=self.closed,
Expand Down Expand Up @@ -1657,7 +1653,11 @@ def _get_cov(X, Y):
mean = lambda x: x.rolling(
window, self.min_periods, center=self.center
).mean(**kwargs)
count = (X + Y).rolling(window=window, center=self.center).count(**kwargs)
count = (
(X + Y)
.rolling(window=window, min_periods=0, center=self.center)
.count(**kwargs)
)
bias_adj = count / (count - ddof)
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/window/moments/test_moments_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def test_expanding_corr(self):
tm.assert_almost_equal(rolling_result, result)

def test_expanding_count(self):
result = self.series.expanding().count()
result = self.series.expanding(min_periods=0).count()
tm.assert_almost_equal(
result, self.series.rolling(window=len(self.series)).count()
result, self.series.rolling(window=len(self.series), min_periods=0).count()
)

def test_expanding_quantile(self):
Expand Down Expand Up @@ -358,7 +358,7 @@ def test_expanding_consistency(self, min_periods):
)
self._test_moments_consistency(
min_periods=min_periods,
count=lambda x: x.expanding().count(),
count=lambda x: x.expanding(min_periods=min_periods).count(),
mean=lambda x: x.expanding(min_periods=min_periods).mean(),
corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y),
var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(),
Expand Down
56 changes: 37 additions & 19 deletions pandas/tests/window/moments/test_moments_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,8 +777,8 @@ def get_result(obj, window, min_periods=None, center=False):
series_result = get_result(series, window=win, min_periods=minp)
frame_result = get_result(frame, window=win, min_periods=minp)
else:
series_result = get_result(series, window=win)
frame_result = get_result(frame, window=win)
series_result = get_result(series, window=win, min_periods=0)
frame_result = get_result(frame, window=win, min_periods=0)

last_date = series_result.index[-1]
prev_date = last_date - 24 * offsets.BDay()
Expand Down Expand Up @@ -835,8 +835,8 @@ def get_result(obj, window, min_periods=None, center=False):
nan_mask = ~nan_mask
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
else:
result = get_result(self.series, len(self.series) + 1)
expected = get_result(self.series, len(self.series))
result = get_result(self.series, len(self.series) + 1, min_periods=0)
expected = get_result(self.series, len(self.series), min_periods=0)
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))

Expand All @@ -851,10 +851,11 @@ def get_result(obj, window, min_periods=None, center=False):
pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15
)[9:].reset_index(drop=True)
else:
result = get_result(obj, 20, center=True)
expected = get_result(pd.concat([obj, Series([np.NaN] * 9)]), 20)[
9:
].reset_index(drop=True)
result = get_result(obj, 20, min_periods=0, center=True)
print(result)
expected = get_result(
pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0
)[9:].reset_index(drop=True)

tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -893,21 +894,27 @@ def get_result(obj, window, min_periods=None, center=False):
else:
series_xp = (
get_result(
self.series.reindex(list(self.series.index) + s), window=25
self.series.reindex(list(self.series.index) + s),
window=25,
min_periods=0,
)
.shift(-12)
.reindex(self.series.index)
)
frame_xp = (
get_result(
self.frame.reindex(list(self.frame.index) + s), window=25
self.frame.reindex(list(self.frame.index) + s),
window=25,
min_periods=0,
)
.shift(-12)
.reindex(self.frame.index)
)

series_rs = get_result(self.series, window=25, center=True)
frame_rs = get_result(self.frame, window=25, center=True)
series_rs = get_result(
self.series, window=25, min_periods=0, center=True
)
frame_rs = get_result(self.frame, window=25, min_periods=0, center=True)

if fill_value is not None:
series_xp = series_xp.fillna(fill_value)
Expand Down Expand Up @@ -964,7 +971,11 @@ def test_rolling_consistency(self, window, min_periods, center):

self._test_moments_consistency_is_constant(
min_periods=min_periods,
count=lambda x: (x.rolling(window=window, center=center).count()),
count=lambda x: (
x.rolling(
window=window, min_periods=min_periods, center=center
).count()
),
mean=lambda x: (
x.rolling(
window=window, min_periods=min_periods, center=center
Expand All @@ -989,19 +1000,26 @@ def test_rolling_consistency(self, window, min_periods, center):
).var(ddof=0)
),
var_debiasing_factors=lambda x: (
x.rolling(window=window, center=center)
x.rolling(window=window, min_periods=min_periods, center=center)
.count()
.divide(
(x.rolling(window=window, center=center).count() - 1.0).replace(
0.0, np.nan
)
(
x.rolling(
window=window, min_periods=min_periods, center=center
).count()
- 1.0
).replace(0.0, np.nan)
)
),
)

self._test_moments_consistency(
min_periods=min_periods,
count=lambda x: (x.rolling(window=window, center=center).count()),
count=lambda x: (
x.rolling(
window=window, min_periods=min_periods, center=center
).count()
),
mean=lambda x: (
x.rolling(
window=window, min_periods=min_periods, center=center
Expand Down Expand Up @@ -1071,7 +1089,7 @@ def test_rolling_consistency(self, window, min_periods, center):
if name == "count":
rolling_f_result = rolling_f()
rolling_apply_f_result = x.rolling(
window=window, min_periods=0, center=center
window=window, min_periods=min_periods, center=center
).apply(func=f, raw=True)
else:
if name in ["cov", "corr"]:
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/window/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,10 @@ def test_count_nonnumeric_types(self):
columns=cols,
)

result = df.rolling(window=2).count()
result = df.rolling(window=2, min_periods=0).count()
tm.assert_frame_equal(result, expected)

result = df.rolling(1).count()
result = df.rolling(1, min_periods=0).count()
expected = df.notna().astype(float)
tm.assert_frame_equal(result, expected)

Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,22 @@ def test_expanding_axis(self, axis_frame):

result = df.expanding(3, axis=axis_frame).sum()
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("constructor", [Series, DataFrame])
def test_expanding_count_with_min_periods(constructor):
# GH 26996
result = constructor(range(5)).expanding(min_periods=3).count()
expected = constructor([np.nan, np.nan, 3.0, 4.0, 5.0])
tm.assert_equal(result, expected)


@pytest.mark.parametrize("constructor", [Series, DataFrame])
def test_expanding_count_default_min_periods_with_null_values(constructor):
# GH 26996
values = [1, 2, 3, np.nan, 4, 5, 6]
expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0]

result = constructor(values).expanding().count()
expected = constructor(expected_counts)
tm.assert_equal(result, expected)
21 changes: 20 additions & 1 deletion pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def test_rolling_axis_count(self, axis_frame):
else:
expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]})

result = df.rolling(2, axis=axis_frame).count()
result = df.rolling(2, axis=axis_frame, min_periods=0).count()
tm.assert_frame_equal(result, expected)

def test_readonly_array(self):
Expand Down Expand Up @@ -426,3 +426,22 @@ def test_min_periods1():
result = df["a"].rolling(3, center=True, min_periods=1).max()
expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a")
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("constructor", [Series, DataFrame])
def test_rolling_count_with_min_periods(constructor):
# GH 26996
result = constructor(range(5)).rolling(3, min_periods=3).count()
expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0])
tm.assert_equal(result, expected)


@pytest.mark.parametrize("constructor", [Series, DataFrame])
def test_rolling_count_default_min_periods_with_null_values(constructor):
# GH 26996
values = [1, 2, 3, np.nan, 4, 5, 6]
expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0]

result = constructor(values).rolling(3).count()
expected = constructor(expected_counts)
tm.assert_equal(result, expected)