Skip to content

Commit 20c9ad8

Browse files
phoflJulianWgs
authored andcommitted
BUG: rolling mean and sum not numerical stable for all nan window (pandas-dev#41106)
1 parent cc2b837 commit 20c9ad8

File tree

3 files changed

+60
-2
lines changed

3 files changed

+60
-2
lines changed

doc/source/whatsnew/v1.3.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,8 @@ Groupby/resample/rolling
848848
- Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`)
849849
- Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising ``TypeError`` (:issue:`41010`)
850850
- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`)
851+
- Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
852+
- Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
851853

852854
Reshaping
853855
^^^^^^^^^

pandas/_libs/window/aggregations.pyx

+4-2
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogi
7373
cdef:
7474
float64_t result
7575

76-
if nobs >= minp:
76+
if nobs == 0 == minp:
77+
result = 0
78+
elif nobs >= minp:
7779
result = sum_x
7880
else:
7981
result = NaN
@@ -170,7 +172,7 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
170172
cdef:
171173
float64_t result
172174

173-
if nobs >= minp:
175+
if nobs >= minp and nobs > 0:
174176
result = sum_x / <float64_t>nobs
175177
if neg_ct == 0 and result < 0:
176178
# all positive

pandas/tests/window/test_rolling.py

+54
Original file line numberDiff line numberDiff line change
@@ -1357,3 +1357,57 @@ def test_rolling_std_small_values():
13571357
result = s.rolling(2).std()
13581358
expected = Series([np.nan, 7.071068e-9, 7.071068e-9])
13591359
tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15)
1360+
1361+
1362+
@pytest.mark.parametrize(
1363+
"start, exp_values",
1364+
[
1365+
(1, [0.03, 0.0155, 0.0155, 0.011, 0.01025]),
1366+
(2, [0.001, 0.001, 0.0015, 0.00366666]),
1367+
],
1368+
)
1369+
def test_rolling_mean_all_nan_window_floating_artifacts(start, exp_values):
1370+
# GH#41053
1371+
df = DataFrame(
1372+
[
1373+
0.03,
1374+
0.03,
1375+
0.001,
1376+
np.NaN,
1377+
0.002,
1378+
0.008,
1379+
np.NaN,
1380+
np.NaN,
1381+
np.NaN,
1382+
np.NaN,
1383+
np.NaN,
1384+
np.NaN,
1385+
0.005,
1386+
0.2,
1387+
]
1388+
)
1389+
1390+
values = exp_values + [
1391+
0.00366666,
1392+
0.005,
1393+
0.005,
1394+
0.008,
1395+
np.NaN,
1396+
np.NaN,
1397+
0.005,
1398+
0.102500,
1399+
]
1400+
expected = DataFrame(
1401+
values,
1402+
index=list(range(start, len(values) + start)),
1403+
)
1404+
result = df.iloc[start:].rolling(5, min_periods=0).mean()
1405+
tm.assert_frame_equal(result, expected)
1406+
1407+
1408+
def test_rolling_sum_all_nan_window_floating_artifacts():
1409+
# GH#41053
1410+
df = DataFrame([0.002, 0.008, 0.005, np.NaN, np.NaN, np.NaN])
1411+
result = df.rolling(3, min_periods=0).sum()
1412+
expected = DataFrame([0.002, 0.010, 0.015, 0.013, 0.005, 0.0])
1413+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)