Skip to content

Commit 1922ec4

Browse files
authored
BUG: Ensure rolling groupby doesn't segfault with center=True (#35562)
1 parent 9843926 commit 1922ec4

File tree

3 files changed

+72
-0
lines changed

3 files changed

+72
-0
lines changed

doc/source/whatsnew/v1.1.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`).
1919
- Fixed regression in :class:`pandas.core.groupby.RollingGroupby` where column selection was ignored (:issue:`35486`)
2020
- Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`)
21+
- Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`)
2122

2223
.. ---------------------------------------------------------------------------
2324

pandas/core/window/indexers.py

+6
Original file line numberDiff line numberDiff line change
@@ -319,4 +319,10 @@ def get_window_bounds(
319319
end_arrays.append(window_indicies.take(end))
320320
start = np.concatenate(start_arrays)
321321
end = np.concatenate(end_arrays)
322+
# GH 35552: Need to adjust start and end based on the nans appended to values
323+
# when center=True
324+
if num_values > len(start):
325+
offset = num_values - len(start)
326+
start = np.concatenate([start, np.array([end[-1]] * offset)])
327+
end = np.concatenate([end, np.array([end[-1]] * offset)])
322328
return start, end

pandas/tests/window/test_grouper.py

+65
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,71 @@ def foo(x):
215215
)
216216
tm.assert_series_equal(result, expected)
217217

218+
def test_groupby_rolling_center_center(self):
219+
# GH 35552
220+
series = Series(range(1, 6))
221+
result = series.groupby(series).rolling(center=True, window=3).mean()
222+
expected = Series(
223+
[np.nan] * 5,
224+
index=pd.MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3), (5, 4))),
225+
)
226+
tm.assert_series_equal(result, expected)
227+
228+
series = Series(range(1, 5))
229+
result = series.groupby(series).rolling(center=True, window=3).mean()
230+
expected = Series(
231+
[np.nan] * 4,
232+
index=pd.MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3))),
233+
)
234+
tm.assert_series_equal(result, expected)
235+
236+
df = pd.DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)})
237+
result = df.groupby("a").rolling(center=True, window=3).mean()
238+
expected = pd.DataFrame(
239+
[np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, 9, np.nan],
240+
index=pd.MultiIndex.from_tuples(
241+
(
242+
("a", 0),
243+
("a", 1),
244+
("a", 2),
245+
("a", 3),
246+
("a", 4),
247+
("b", 5),
248+
("b", 6),
249+
("b", 7),
250+
("b", 8),
251+
("b", 9),
252+
("b", 10),
253+
),
254+
names=["a", None],
255+
),
256+
columns=["b"],
257+
)
258+
tm.assert_frame_equal(result, expected)
259+
260+
df = pd.DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)})
261+
result = df.groupby("a").rolling(center=True, window=3).mean()
262+
expected = pd.DataFrame(
263+
[np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, np.nan],
264+
index=pd.MultiIndex.from_tuples(
265+
(
266+
("a", 0),
267+
("a", 1),
268+
("a", 2),
269+
("a", 3),
270+
("a", 4),
271+
("b", 5),
272+
("b", 6),
273+
("b", 7),
274+
("b", 8),
275+
("b", 9),
276+
),
277+
names=["a", None],
278+
),
279+
columns=["b"],
280+
)
281+
tm.assert_frame_equal(result, expected)
282+
218283
def test_groupby_subselect_rolling(self):
219284
# GH 35486
220285
df = DataFrame(

0 commit comments

Comments
 (0)