Skip to content

Commit f44f2b1

Browse files
justinessertKevin D Smith
authored and
Kevin D Smith
committed
API: reimplement FixedWindowIndexer.get_window_bounds (pandas-dev#37035)
1 parent faeb757 commit f44f2b1

File tree

3 files changed

+36
-22
lines changed

3 files changed

+36
-22
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ Groupby/resample/rolling
446446
- Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`)
447447
- Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`)
448448
- Bug in :meth:`RollingGroupby.count` where a ``ValueError`` was raised when specifying the ``closed`` parameter (:issue:`35869`)
449+
- Bug in :meth:`DataFrame.groupby.rolling` returning wrong values with partial centered window (:issue:`36040`).
449450

450451
Reshaping
451452
^^^^^^^^^

pandas/core/window/indexers.py

+8-22
Original file line numberDiff line numberDiff line change
@@ -78,30 +78,16 @@ def get_window_bounds(
7878
closed: Optional[str] = None,
7979
) -> Tuple[np.ndarray, np.ndarray]:
8080

81-
start_s = np.zeros(self.window_size, dtype="int64")
82-
start_e = (
83-
np.arange(self.window_size, num_values, dtype="int64")
84-
- self.window_size
85-
+ 1
86-
)
87-
start = np.concatenate([start_s, start_e])[:num_values]
88-
89-
end_s = np.arange(self.window_size, dtype="int64") + 1
90-
end_e = start_e + self.window_size
91-
end = np.concatenate([end_s, end_e])[:num_values]
92-
93-
if center and self.window_size > 2:
94-
offset = min((self.window_size - 1) // 2, num_values - 1)
95-
start_s_buffer = np.roll(start, -offset)[: num_values - offset]
96-
end_s_buffer = np.roll(end, -offset)[: num_values - offset]
81+
if center:
82+
offset = (self.window_size - 1) // 2
83+
else:
84+
offset = 0
9785

98-
start_e_buffer = np.arange(
99-
start[-1] + 1, start[-1] + 1 + offset, dtype="int64"
100-
)
101-
end_e_buffer = np.array([end[-1]] * offset, dtype="int64")
86+
end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64")
87+
start = end - self.window_size
10288

103-
start = np.concatenate([start_s_buffer, start_e_buffer])
104-
end = np.concatenate([end_s_buffer, end_e_buffer])
89+
end = np.clip(end, 0, num_values)
90+
start = np.clip(start, 0, num_values)
10591

10692
return start, end
10793

pandas/tests/window/test_grouper.py

+27
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,33 @@ def test_groupby_rolling_center_center(self):
297297
)
298298
tm.assert_frame_equal(result, expected)
299299

300+
@pytest.mark.parametrize("min_periods", [5, 4, 3])
301+
def test_groupby_rolling_center_min_periods(self, min_periods):
302+
# GH 36040
303+
df = pd.DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)})
304+
305+
window_size = 5
306+
result = (
307+
df.groupby("group")
308+
.rolling(window_size, center=True, min_periods=min_periods)
309+
.mean()
310+
)
311+
result = result.reset_index()[["group", "data"]]
312+
313+
grp_A_mean = [1.0, 1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.5, 8.0]
314+
grp_B_mean = [x + 10.0 for x in grp_A_mean]
315+
316+
num_nans = max(0, min_periods - 3) # For window_size of 5
317+
nans = [np.nan] * num_nans
318+
grp_A_expected = nans + grp_A_mean[num_nans : 10 - num_nans] + nans
319+
grp_B_expected = nans + grp_B_mean[num_nans : 10 - num_nans] + nans
320+
321+
expected = pd.DataFrame(
322+
{"group": ["A"] * 10 + ["B"] * 10, "data": grp_A_expected + grp_B_expected}
323+
)
324+
325+
tm.assert_frame_equal(result, expected)
326+
300327
def test_groupby_subselect_rolling(self):
301328
# GH 35486
302329
df = DataFrame(

0 commit comments

Comments
 (0)