Skip to content

Commit 20601a9

Browse files
mroeschkeJulianWgs
authored andcommitted
BUG: Remove artificial precision limit in rolling var & std (pandas-dev#40505)
1 parent ed18983 commit 20601a9

File tree

4 files changed

+62
-20
lines changed

4 files changed

+62
-20
lines changed

doc/source/whatsnew/v1.3.0.rst

+18
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,24 @@ cast to ``dtype=object`` (:issue:`38709`)
299299
ser
300300
ser2
301301
302+
303+
.. _whatsnew_130.notable_bug_fixes.rolling_var_precision:
304+
305+
Removed artificial truncation in rolling variance and standard deviation
306+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
307+
308+
:meth:`core.window.Rolling.std` and :meth:`core.window.Rolling.var` will no longer
309+
artificially truncate results that are less than ``~1e-8`` and ``~1e-15`` respectively to
310+
zero (:issue:`37051`, :issue:`40448`, :issue:`39872`).
311+
312+
However, floating point artifacts may now exist in the results when rolling over larger values.
313+
314+
.. ipython:: python
315+
316+
s = pd.Series([7, 5, 5, 5])
317+
s.rolling(3).var()
318+
319+
302320
.. _whatsnew_130.api_breaking.deps:
303321

304322
Increased minimum versions for dependencies

pandas/_libs/window/aggregations.pyx

-4
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,6 @@ cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs,
283283
result = 0
284284
else:
285285
result = ssqdm_x / (nobs - <float64_t>ddof)
286-
# Fix for numerical imprecision.
287-
# Can be result < 0 once Kahan Summation is implemented
288-
if result < 1e-14:
289-
result = 0
290286
else:
291287
result = NaN
292288

pandas/core/window/rolling.py

+22-16
Original file line numberDiff line numberDiff line change
@@ -1882,21 +1882,24 @@ def median(
18821882
The default ``ddof`` of 1 used in :meth:`Series.std` is different
18831883
than the default ``ddof`` of 0 in :func:`numpy.std`.
18841884
1885-
A minimum of one period is required for the rolling calculation.\n
1885+
A minimum of one period is required for the rolling calculation.
1886+
1887+
The implementation is susceptible to floating point imprecision as
1888+
shown in the example below.\n
18861889
"""
18871890
).replace("\n", "", 1),
18881891
create_section_header("Examples"),
18891892
dedent(
18901893
"""
18911894
>>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
18921895
>>> s.rolling(3).std()
1893-
0 NaN
1894-
1 NaN
1895-
2 0.577350
1896-
3 1.000000
1897-
4 1.000000
1898-
5 1.154701
1899-
6 0.000000
1896+
0 NaN
1897+
1 NaN
1898+
2 5.773503e-01
1899+
3 1.000000e+00
1900+
4 1.000000e+00
1901+
5 1.154701e+00
1902+
6 2.580957e-08
19001903
dtype: float64
19011904
"""
19021905
).replace("\n", "", 1),
@@ -1931,21 +1934,24 @@ def std(self, ddof: int = 1, *args, **kwargs):
19311934
The default ``ddof`` of 1 used in :meth:`Series.var` is different
19321935
than the default ``ddof`` of 0 in :func:`numpy.var`.
19331936
1934-
A minimum of one period is required for the rolling calculation.\n
1937+
A minimum of one period is required for the rolling calculation.
1938+
1939+
The implementation is susceptible to floating point imprecision as
1940+
shown in the example below.\n
19351941
"""
19361942
).replace("\n", "", 1),
19371943
create_section_header("Examples"),
19381944
dedent(
19391945
"""
19401946
>>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
19411947
>>> s.rolling(3).var()
1942-
0 NaN
1943-
1 NaN
1944-
2 0.333333
1945-
3 1.000000
1946-
4 1.000000
1947-
5 1.333333
1948-
6 0.000000
1948+
0 NaN
1949+
1 NaN
1950+
2 3.333333e-01
1951+
3 1.000000e+00
1952+
4 1.000000e+00
1953+
5 1.333333e+00
1954+
6 6.661338e-16
19491955
dtype: float64
19501956
"""
19511957
).replace("\n", "", 1),

pandas/tests/window/test_rolling.py

+22
Original file line numberDiff line numberDiff line change
@@ -1150,3 +1150,25 @@ def test_rolling_descending_date_order_with_offset(window, frame_or_series):
11501150
idx = date_range(start="2020-01-03", end="2020-01-01", freq="-1d")
11511151
expected = frame_or_series([np.nan, 3, 2], index=idx)
11521152
tm.assert_equal(result, expected)
1153+
1154+
1155+
def test_rolling_var_floating_artifact_precision():
1156+
# GH 37051
1157+
s = Series([7, 5, 5, 5])
1158+
result = s.rolling(3).var()
1159+
expected = Series([np.nan, np.nan, 4 / 3, 0])
1160+
tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15)
1161+
1162+
1163+
def test_rolling_std_small_values():
1164+
# GH 37051
1165+
s = Series(
1166+
[
1167+
0.00000054,
1168+
0.00000053,
1169+
0.00000054,
1170+
]
1171+
)
1172+
result = s.rolling(2).std()
1173+
expected = Series([np.nan, 7.071068e-9, 7.071068e-9])
1174+
tm.assert_series_equal(result, expected, atol=1.0e-15, rtol=1.0e-15)

0 commit comments

Comments
 (0)