From 57d50a2bd9678dd02864148a41219162a7f8fba7 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 16 Sep 2020 22:55:36 +0200 Subject: [PATCH 1/8] Add warning to userguide about rolling sums --- doc/source/user_guide/groupby.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index f745dab00bab8..0738266c08a1e 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -851,6 +851,12 @@ and that the transformed data contains no NAs. Window and resample operations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. warning:: + + When using ``rolling()`` and an associated function the results are calculated using rolling sums. This means in case + of having values differing with magnitude ``1/np.finfo(np.double).eps`` results in truncation. It must be noted, that + large value may have an impact on windows, which do not include these values. + It is possible to use ``resample()``, ``expanding()`` and ``rolling()`` as methods on groupbys. From 0456939c407e841dc5cd0addef53f98b7b8f2e4d Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 17 Sep 2020 20:46:37 +0200 Subject: [PATCH 2/8] Include var --- doc/source/user_guide/groupby.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 0738266c08a1e..baf033dbf0347 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -853,9 +853,10 @@ Window and resample operations .. warning:: - When using ``rolling()`` and an associated function the results are calculated using rolling sums. This means in case - of having values differing with magnitude ``1/np.finfo(np.double).eps`` results in truncation. It must be noted, that - large value may have an impact on windows, which do not include these values. + When using ``rolling()`` and an associated function the results are calculated with rolling sums. As a consequence + when having values differing with magnitude :math:`1/np.finfo(np.double).eps` this results in truncation. It must be + noted, that large values may have an impact on windows, which do not include these values. The same holds true for + ``Rolling.var()`` for values differing with magnitude :math:`(1/np.finfo(np.double).eps)^{0.5}`. It is possible to use ``resample()``, ``expanding()`` and ``rolling()`` as methods on groupbys. From d2ae8c645f921f581a8a0fc1660648cdac7c7d13 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 17 Sep 2020 23:15:42 +0200 Subject: [PATCH 3/8] Move warning note --- doc/source/user_guide/computation.rst | 7 +++++++ doc/source/user_guide/groupby.rst | 9 +-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index 151ef36be7c98..4d86602ff656f 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -229,6 +229,13 @@ see the :ref:`groupby docs `. The API for window statistics is quite similar to the way one works with ``GroupBy`` objects, see the documentation :ref:`here `. +.. warning:: + + When using ``rolling()`` and an associated function the results are calculated with rolling sums. As a consequence + when having values differing with magnitude :math:`1/np.finfo(np.double).eps` this results in truncation. It must be + noted, that large values may have an impact on windows, which do not include these values. The same holds true for + ``Rolling.var()`` for values differing with magnitude :math:`(1/np.finfo(np.double).eps)^{0.5}`. + We work with ``rolling``, ``expanding`` and ``exponentially weighted`` data through the corresponding objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.ExponentialMovingWindow`. diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index baf033dbf0347..eb9a4725c607c 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -849,14 +849,7 @@ and that the transformed data contains no NAs. .. _groupby.transform.window_resample: Window and resample operations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. warning:: - - When using ``rolling()`` and an associated function the results are calculated with rolling sums. As a consequence - when having values differing with magnitude :math:`1/np.finfo(np.double).eps` this results in truncation. It must be - noted, that large values may have an impact on windows, which do not include these values. The same holds true for - ``Rolling.var()`` for values differing with magnitude :math:`(1/np.finfo(np.double).eps)^{0.5}`. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It is possible to use ``resample()``, ``expanding()`` and ``rolling()`` as methods on groupbys. From c0fc2c4d379e93a4ff6b56918b2949eb57520391 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 09:38:07 +0200 Subject: [PATCH 4/8] Fix title underline --- doc/source/user_guide/groupby.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index eb9a4725c607c..f745dab00bab8 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -849,7 +849,7 @@ and that the transformed data contains no NAs. .. _groupby.transform.window_resample: Window and resample operations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It is possible to use ``resample()``, ``expanding()`` and ``rolling()`` as methods on groupbys. From 4d2fa29dab3a3beeade4841c9d0d4f217e33e534 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 17:42:22 +0200 Subject: [PATCH 5/8] Add comment about kahan summation --- doc/source/user_guide/computation.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index 4d86602ff656f..adc5feaaf1dc4 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -233,8 +233,9 @@ see the :ref:`groupby docs `. When using ``rolling()`` and an associated function the results are calculated with rolling sums. As a consequence when having values differing with magnitude :math:`1/np.finfo(np.double).eps` this results in truncation. It must be - noted, that large values may have an impact on windows, which do not include these values. The same holds true for - ``Rolling.var()`` for values differing with magnitude :math:`(1/np.finfo(np.double).eps)^{0.5}`. + noted, that large values may have an impact on windows, which do not include these values. Kahan summation is used + to compute the rolling sums to preserve accuracy as much as possible.The same holds true for ``Rolling.var()`` for + values differing with magnitude :math:`(1/np.finfo(np.double).eps)^{0.5}`. We work with ``rolling``, ``expanding`` and ``exponentially weighted`` data through the corresponding objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.ExponentialMovingWindow`. From 9c0870660213e0ed0e2b401e0997cadae187b6e8 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 17:45:37 +0200 Subject: [PATCH 6/8] Add wikipedia link --- doc/source/user_guide/computation.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index adc5feaaf1dc4..10e27606a1415 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -233,8 +233,9 @@ see the :ref:`groupby docs `. When using ``rolling()`` and an associated function the results are calculated with rolling sums. As a consequence when having values differing with magnitude :math:`1/np.finfo(np.double).eps` this results in truncation. It must be - noted, that large values may have an impact on windows, which do not include these values. Kahan summation is used - to compute the rolling sums to preserve accuracy as much as possible.The same holds true for ``Rolling.var()`` for + noted, that large values may have an impact on windows, which do not include these values. `Kahan summation + `__ is used + to compute the rolling sums to preserve accuracy as much as possible. The same holds true for ``Rolling.var()`` for values differing with magnitude :math:`(1/np.finfo(np.double).eps)^{0.5}`. We work with ``rolling``, ``expanding`` and ``exponentially weighted`` data through the corresponding From 88e05894e5acc8f5497e26675473121b81751e60 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 21:40:34 +0200 Subject: [PATCH 7/8] Replace std with var --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 00fdf0813b027..6668e31188d57 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1908,7 +1908,7 @@ def _get_corr(a, b): window=window, min_periods=self.min_periods, center=self.center ) - return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) + return a.cov(b, **kwargs) / (a.var(**kwargs) * b.var(**kwargs)) ** 0.5 return flex_binary_moment( self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) From b5f911133187e746fac956f7ac8c8bc14b5af985 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Sep 2020 21:42:49 +0200 Subject: [PATCH 8/8] Revert "Replace std with var" This reverts commit 88e05894 --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6668e31188d57..00fdf0813b027 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1908,7 +1908,7 @@ def _get_corr(a, b): window=window, min_periods=self.min_periods, center=self.center ) - return a.cov(b, **kwargs) / (a.var(**kwargs) * b.var(**kwargs)) ** 0.5 + return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) return flex_binary_moment( self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)