pandas-dev · ihsansecer · Sep 15, 2023 · Sep 15, 2023 · Sep 15, 2023 · Sep 15, 2023
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -222,6 +222,7 @@ Bug fixes
 - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`)
 - Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)
 - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
+- Bug in :meth:`pandas.core.window.Window.var` and :meth:`pandas.core.window.Window.std` where implementation of the algorithm was incorrect (:issue:`53273` and :issue:`54333`)
 
 Categorical
 ^^^^^^^^^^^

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -1558,7 +1558,7 @@ cdef float64_t calc_weighted_var(float64_t t,
         if nobs == 1:
             result = 0
         else:
-            result = t * win_n / ((win_n - ddof) * sum_w)
+            result = t * nobs / ((nobs - ddof) * sum_w)
             if result < 0:
                 result = 0
     else:
@@ -1599,68 +1599,21 @@ cdef void add_weighted_var(float64_t val,
     cdef:
         float64_t temp, q, r
 
-    if val != val:
-        return
-
     nobs[0] = nobs[0] + 1
 
-    q = val - mean[0]
-    temp = sum_w[0] + w
-    r = q * w / temp
-
-    mean[0] = mean[0] + r
-    t[0] = t[0] + r * sum_w[0] * q
-    sum_w[0] = temp
-
-
-cdef void remove_weighted_var(float64_t val,
-                              float64_t w,
-                              float64_t *t,
-                              float64_t *sum_w,
-                              float64_t *mean,
-                              float64_t *nobs) noexcept nogil:
-    """
-    Update weighted mean, sum of weights and sum of weighted squared
-    differences to remove value and weight pair from weighted variance
-    calculation using West's method.
-
-    Paper: https://dl.acm.org/citation.cfm?id=359153
-
-    Parameters
-    ----------
-    val: float64_t
-        window values
-    w: float64_t
-        window weights
-    t: float64_t
-        sum of weighted squared differences
-    sum_w: float64_t
-        sum of weights
-    mean: float64_t
-        weighted mean
-    nobs: float64_t
-        number of observations
-    """
-
-    cdef:
-        float64_t temp, q, r
-
-    if val == val:
-        nobs[0] = nobs[0] - 1
-
-        if nobs[0]:
-            q = val - mean[0]
-            temp = sum_w[0] - w
-            r = q * w / temp
+    if nobs[0] == 1:
+        sum_w[0] = w
+        mean[0] = val
+        t[0] = 0
 
-            mean[0] = mean[0] - r
-            t[0] = t[0] - r * sum_w[0] * q
-            sum_w[0] = temp
+    else:
+        q = val - mean[0]
+        temp = sum_w[0] + w
+        r = q * w / temp
 
-        else:
-            t[0] = 0
-            sum_w[0] = 0
-            mean[0] = 0
+        mean[0] = mean[0] + r
+        t[0] = t[0] + r * sum_w[0] * q
+        sum_w[0] = temp
 
 
 def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
@@ -1690,44 +1643,26 @@ def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
     """
 
     cdef:
-        float64_t t = 0, sum_w = 0, mean = 0, nobs = 0
-        float64_t val, pre_val, w, pre_w
-        Py_ssize_t i, n, win_n
+        float64_t t, sum_w, mean, nobs
+        float64_t val, w
+        Py_ssize_t in_i, win_i, n, win_n
         float64_t[:] output
 
     n = len(values)
     win_n = len(weights)
-    output = np.empty(n, dtype=np.float64)
-
-    with nogil:
-
-        for i in range(min(win_n, n)):
-            add_weighted_var(values[i], weights[i], &t,
-                             &sum_w, &mean, &nobs)
 
-            output[i] = calc_weighted_var(t, sum_w, win_n,
-                                          ddof, nobs, minp)
-
-        for i in range(win_n, n):
-            val = values[i]
-            pre_val = values[i - win_n]
+    output = np.empty(n, dtype=np.float64)
 
-            w = weights[i % win_n]
-            pre_w = weights[(i - win_n) % win_n]
-
-            if val == val:
-                if pre_val == pre_val:
-                    remove_weighted_var(pre_val, pre_w, &t,
-                                        &sum_w, &mean, &nobs)
+    for in_i in range(n):
+        nobs = 0
+        for win_i in range(max(0, win_n - in_i - 1), win_n):
+            w = weights[win_i]
+            val = values[win_i - win_n + in_i + 1]
 
+            if val == val and w == w:
                 add_weighted_var(val, w, &t, &sum_w, &mean, &nobs)
 
-            elif pre_val == pre_val:
-                remove_weighted_var(pre_val, pre_w, &t,
-                                    &sum_w, &mean, &nobs)
-
-            output[i] = calc_weighted_var(t, sum_w, win_n,
-                                          ddof, nobs, minp)
+        output[in_i] = calc_weighted_var(t, sum_w, win_n, ddof, nobs, minp)
 
     return output
 

diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py
@@ -661,6 +661,23 @@ def test_cmov_window_special_linear_range(win_types_special, step):
     tm.assert_series_equal(xp, rs)
 
 
+@pytest.mark.parametrize("size", [10, 100, 1000])
+@pytest.mark.parametrize("scale", [0, 0.1, 0.01, 0.001, 0.0001])
+def test_weighted_std_through_mean(win_types, step, scale, size):
+    # GH 53273
+    pytest.importorskip("scipy")
+    window = 5
+    s = Series(np.random.default_rng(0).normal(loc=1, scale=scale, size=size))
+
+    squared_mean = (s**2).rolling(window, win_type=win_types).mean()
+    mean = s.rolling(window, win_type=win_types).mean()
+    expected = (squared_mean - mean**2).pow(0.5)
+
+    result = s.rolling(window, win_type=win_types).std(ddof=0)
+
+    tm.assert_series_equal(expected, result)
+
+
 def test_weighted_var_big_window_no_segfault(win_types, center):
     # GitHub Issue #46772
     pytest.importorskip("scipy")