Skip to content

Commit 9f87934

Browse files
benHeidCGe0516
authored andcommitted
BUG: Mitigate division with zero in roll_var (pandas-dev#42459)
1 parent c04ba31 commit 9f87934

File tree

3 files changed

+30
-2
lines changed

3 files changed

+30
-2
lines changed

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ Groupby/resample/rolling
261261
^^^^^^^^^^^^^^^^^^^^^^^^
262262
- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
263263
- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
264-
-
264+
- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
265265

266266
Reshaping
267267
^^^^^^^^^

pandas/_libs/window/aggregations.pyx

+4-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,10 @@ cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x,
310310
t = y - mean_x[0]
311311
compensation[0] = t + mean_x[0] - y
312312
delta = t
313-
mean_x[0] = mean_x[0] + delta / nobs[0]
313+
if nobs[0]:
314+
mean_x[0] = mean_x[0] + delta / nobs[0]
315+
else:
316+
mean_x[0] = 0
314317
ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0])
315318

316319

pandas/tests/window/test_groupby.py

+25
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,31 @@ def test_groupby_rolling_object_doesnt_affect_groupby_apply(self):
695695
assert not g.mutated
696696
assert not g.grouper.mutated
697697

698+
@pytest.mark.parametrize(
699+
("window", "min_periods", "closed", "expected"),
700+
[
701+
(2, 0, "left", [None, 0.0, 1.0, 1.0, None, 0.0, 1.0, 1.0]),
702+
(2, 2, "left", [None, None, 1.0, 1.0, None, None, 1.0, 1.0]),
703+
(4, 4, "left", [None, None, None, None, None, None, None, None]),
704+
(4, 4, "right", [None, None, None, 5.0, None, None, None, 5.0]),
705+
],
706+
)
707+
def test_groupby_rolling_var(self, window, min_periods, closed, expected):
708+
df = DataFrame([1, 2, 3, 4, 5, 6, 7, 8])
709+
result = (
710+
df.groupby([1, 2, 1, 2, 1, 2, 1, 2])
711+
.rolling(window=window, min_periods=min_periods, closed=closed)
712+
.var(0)
713+
)
714+
expected_result = DataFrame(
715+
np.array(expected, dtype="float64"),
716+
index=MultiIndex(
717+
levels=[[1, 2], [0, 1, 2, 3, 4, 5, 6, 7]],
718+
codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 2, 4, 6, 1, 3, 5, 7]],
719+
),
720+
)
721+
tm.assert_frame_equal(result, expected_result)
722+
698723
@pytest.mark.parametrize(
699724
"columns", [MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]]
700725
)

0 commit comments

Comments
 (0)