Skip to content

Commit 9cf6269

Browse files
ihsansecerjreback
authored andcommitted
ENH: Implement weighted rolling var and std (#27682)
1 parent 947bb8e commit 9cf6269

File tree

7 files changed

+522
-220
lines changed

7 files changed

+522
-220
lines changed

doc/source/reference/window.rst

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ Standard moving window functions
3434
Rolling.quantile
3535
Window.mean
3636
Window.sum
37+
Window.var
38+
Window.std
3739

3840
.. _api.functions_expanding:
3941

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Other enhancements
110110
(depending on the presence of missing data) or object dtype column. (:issue:`28368`)
111111
- :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`)
112112
- :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`)
113+
- Implemented :meth:`pandas.core.window.Window.var` and :meth:`pandas.core.window.Window.std` functions (:issue:`26597`)
113114
- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`)
114115
- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
115116
- :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`)

pandas/_libs/window.pyx

+220
Original file line numberDiff line numberDiff line change
@@ -1752,6 +1752,226 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
17521752
return np.asarray(output)
17531753

17541754

1755+
# ----------------------------------------------------------------------
1756+
# Rolling var for weighted window
1757+
1758+
1759+
cdef inline float64_t calc_weighted_var(float64_t t,
1760+
float64_t sum_w,
1761+
Py_ssize_t win_n,
1762+
unsigned int ddof,
1763+
float64_t nobs,
1764+
int64_t minp) nogil:
1765+
"""
1766+
Calculate weighted variance for a window using West's method.
1767+
1768+
Paper: https://dl.acm.org/citation.cfm?id=359153
1769+
1770+
Parameters
1771+
----------
1772+
t: float64_t
1773+
sum of weighted squared differences
1774+
sum_w: float64_t
1775+
sum of weights
1776+
win_n: Py_ssize_t
1777+
window size
1778+
ddof: unsigned int
1779+
delta degrees of freedom
1780+
nobs: float64_t
1781+
number of observations
1782+
minp: int64_t
1783+
minimum number of observations
1784+
1785+
Returns
1786+
-------
1787+
result : float64_t
1788+
weighted variance of the window
1789+
"""
1790+
1791+
cdef:
1792+
float64_t result
1793+
1794+
# Variance is unchanged if no observation is added or removed
1795+
if (nobs >= minp) and (nobs > ddof):
1796+
1797+
# pathological case
1798+
if nobs == 1:
1799+
result = 0
1800+
else:
1801+
result = t * win_n / ((win_n - ddof) * sum_w)
1802+
if result < 0:
1803+
result = 0
1804+
else:
1805+
result = NaN
1806+
1807+
return result
1808+
1809+
1810+
cdef inline void add_weighted_var(float64_t val,
1811+
float64_t w,
1812+
float64_t *t,
1813+
float64_t *sum_w,
1814+
float64_t *mean,
1815+
float64_t *nobs) nogil:
1816+
"""
1817+
Update weighted mean, sum of weights and sum of weighted squared
1818+
differences to include value and weight pair in weighted variance
1819+
calculation using West's method.
1820+
1821+
Paper: https://dl.acm.org/citation.cfm?id=359153
1822+
1823+
Parameters
1824+
----------
1825+
val: float64_t
1826+
window values
1827+
w: float64_t
1828+
window weights
1829+
t: float64_t
1830+
sum of weighted squared differences
1831+
sum_w: float64_t
1832+
sum of weights
1833+
mean: float64_t
1834+
weighted mean
1835+
nobs: float64_t
1836+
number of observations
1837+
"""
1838+
1839+
cdef:
1840+
float64_t temp, q, r
1841+
1842+
if isnan(val):
1843+
return
1844+
1845+
nobs[0] = nobs[0] + 1
1846+
1847+
q = val - mean[0]
1848+
temp = sum_w[0] + w
1849+
r = q * w / temp
1850+
1851+
mean[0] = mean[0] + r
1852+
t[0] = t[0] + r * sum_w[0] * q
1853+
sum_w[0] = temp
1854+
1855+
1856+
cdef inline void remove_weighted_var(float64_t val,
1857+
float64_t w,
1858+
float64_t *t,
1859+
float64_t *sum_w,
1860+
float64_t *mean,
1861+
float64_t *nobs) nogil:
1862+
"""
1863+
Update weighted mean, sum of weights and sum of weighted squared
1864+
differences to remove value and weight pair from weighted variance
1865+
calculation using West's method.
1866+
1867+
Paper: https://dl.acm.org/citation.cfm?id=359153
1868+
1869+
Parameters
1870+
----------
1871+
val: float64_t
1872+
window values
1873+
w: float64_t
1874+
window weights
1875+
t: float64_t
1876+
sum of weighted squared differences
1877+
sum_w: float64_t
1878+
sum of weights
1879+
mean: float64_t
1880+
weighted mean
1881+
nobs: float64_t
1882+
number of observations
1883+
"""
1884+
1885+
cdef:
1886+
float64_t temp, q, r
1887+
1888+
if notnan(val):
1889+
nobs[0] = nobs[0] - 1
1890+
1891+
if nobs[0]:
1892+
q = val - mean[0]
1893+
temp = sum_w[0] - w
1894+
r = q * w / temp
1895+
1896+
mean[0] = mean[0] - r
1897+
t[0] = t[0] - r * sum_w[0] * q
1898+
sum_w[0] = temp
1899+
1900+
else:
1901+
t[0] = 0
1902+
sum_w[0] = 0
1903+
mean[0] = 0
1904+
1905+
1906+
def roll_weighted_var(float64_t[:] values, float64_t[:] weights,
1907+
int64_t minp, unsigned int ddof):
1908+
"""
1909+
Calculates weighted rolling variance using West's online algorithm.
1910+
1911+
Paper: https://dl.acm.org/citation.cfm?id=359153
1912+
1913+
Parameters
1914+
----------
1915+
values: float64_t[:]
1916+
values to roll window over
1917+
weights: float64_t[:]
1918+
array of weights whose lenght is window size
1919+
minp: int64_t
1920+
minimum number of observations to calculate
1921+
variance of a window
1922+
ddof: unsigned int
1923+
the divisor used in variance calculations
1924+
is the window size - ddof
1925+
1926+
Returns
1927+
-------
1928+
output: float64_t[:]
1929+
weighted variances of windows
1930+
"""
1931+
1932+
cdef:
1933+
float64_t t = 0, sum_w = 0, mean = 0, nobs = 0
1934+
float64_t val, pre_val, w, pre_w
1935+
Py_ssize_t i, n, win_n
1936+
float64_t[:] output
1937+
1938+
n = len(values)
1939+
win_n = len(weights)
1940+
output = np.empty(n, dtype=float)
1941+
1942+
with nogil:
1943+
1944+
for i in range(win_n):
1945+
add_weighted_var(values[i], weights[i], &t,
1946+
&sum_w, &mean, &nobs)
1947+
1948+
output[i] = calc_weighted_var(t, sum_w, win_n,
1949+
ddof, nobs, minp)
1950+
1951+
for i in range(win_n, n):
1952+
val = values[i]
1953+
pre_val = values[i - win_n]
1954+
1955+
w = weights[i % win_n]
1956+
pre_w = weights[(i - win_n) % win_n]
1957+
1958+
if notnan(val):
1959+
if pre_val == pre_val:
1960+
remove_weighted_var(pre_val, pre_w, &t,
1961+
&sum_w, &mean, &nobs)
1962+
1963+
add_weighted_var(val, w, &t, &sum_w, &mean, &nobs)
1964+
1965+
elif pre_val == pre_val:
1966+
remove_weighted_var(pre_val, pre_w, &t,
1967+
&sum_w, &mean, &nobs)
1968+
1969+
output[i] = calc_weighted_var(t, sum_w, win_n,
1970+
ddof, nobs, minp)
1971+
1972+
return output
1973+
1974+
17551975
# ----------------------------------------------------------------------
17561976
# Exponentially weighted moving average
17571977

pandas/core/window/expanding.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -181,13 +181,13 @@ def mean(self, *args, **kwargs):
181181
def median(self, **kwargs):
182182
return super().median(**kwargs)
183183

184-
@Substitution(name="expanding")
184+
@Substitution(name="expanding", versionadded="")
185185
@Appender(_shared_docs["std"])
186186
def std(self, ddof=1, *args, **kwargs):
187187
nv.validate_expanding_func("std", args, kwargs)
188188
return super().std(ddof=ddof, **kwargs)
189189

190-
@Substitution(name="expanding")
190+
@Substitution(name="expanding", versionadded="")
191191
@Appender(_shared_docs["var"])
192192
def var(self, ddof=1, *args, **kwargs):
193193
nv.validate_expanding_func("var", args, kwargs)

0 commit comments

Comments
 (0)