diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 05996efb6d332..ff25c4f2551f2 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -499,6 +499,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby.rolling` returning wrong values with partial centered window (:issue:`36040`). - Bug in :meth:`DataFrameGroupBy.rolling` returned wrong values with timeaware window containing ``NaN``. Raises ``ValueError`` because windows are not monotonic now (:issue:`34617`) - Bug in :meth:`Rolling.__iter__` where a ``ValueError`` was not raised when ``min_periods`` was larger than ``window`` (:issue:`37156`) +- Using :meth:`Rolling.var()` instead of :meth:`Rolling.std()` avoids numerical issues for :meth:`Rolling.corr()` when :meth:`Rolling.var()` is still within floating point precision while :meth:`Rolling.std()` is not (:issue:`31286`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9136f9398799b..bfc31021a8f87 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1906,8 +1906,10 @@ def _get_corr(a, b): b = b.rolling( window=window, min_periods=self.min_periods, center=self.center ) - - return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) + # GH 31286: Through using var instead of std we can avoid numerical + # issues when the result of var is withing floating proint precision + # while std is not. + return a.cov(b, **kwargs) / (a.var(**kwargs) * b.var(**kwargs)) ** 0.5 return flex_binary_moment( self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 02365906c55bb..2c8439aae75e5 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1073,3 +1073,17 @@ def get_window_bounds(self, num_values, min_periods, center, closed): result = getattr(df.rolling(indexer), method)() expected = DataFrame({"values": expected}) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + ("index", "window"), + [([0, 1, 2, 3, 4], 2), (pd.date_range("2001-01-01", freq="D", periods=5), "2D")], +) +def test_rolling_corr_timedelta_index(index, window): + # GH: 31286 + x = Series([1, 2, 3, 4, 5], index=index) + y = x.copy() + x[0:2] = 0.0 + result = x.rolling(window).corr(y) + expected = Series([np.nan, np.nan, 1, 1, 1], index=index) + tm.assert_almost_equal(result, expected)