Skip to content

Commit 1d2a591

Browse files
committed
initial implementation of min_weight
1 parent 837db72 commit 1d2a591

File tree

3 files changed

+117
-25
lines changed

3 files changed

+117
-25
lines changed

pandas/core/generic.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -5488,31 +5488,34 @@ def _add_series_or_dataframe_operations(cls):
54885488
from pandas.core import window as rwindow
54895489

54905490
@Appender(rwindow.rolling.__doc__)
5491-
def rolling(self, window, min_periods=None, freq=None, center=False,
5492-
win_type=None, on=None, axis=0):
5491+
def rolling(self, window, min_periods=None, min_weight=None,
5492+
freq=None, center=False, win_type=None, on=None, axis=0):
54935493
axis = self._get_axis_number(axis)
5494-
return rwindow.rolling(self, window=window,
5495-
min_periods=min_periods, freq=freq,
5496-
center=center, win_type=win_type,
5497-
on=on, axis=axis)
5494+
return rwindow.rolling(
5495+
self, window=window, min_periods=min_periods,
5496+
min_weight=min_weight, freq=freq, center=center,
5497+
win_type=win_type, on=on, axis=axis)
54985498

54995499
cls.rolling = rolling
55005500

55015501
@Appender(rwindow.expanding.__doc__)
5502-
def expanding(self, min_periods=1, freq=None, center=False, axis=0):
5502+
def expanding(self, min_periods=1, min_weight=None, freq=None,
5503+
center=False, axis=0):
55035504
axis = self._get_axis_number(axis)
5504-
return rwindow.expanding(self, min_periods=min_periods, freq=freq,
5505+
return rwindow.expanding(self, min_periods=min_periods,
5506+
min_weight=min_weight, freq=freq,
55055507
center=center, axis=axis)
55065508

55075509
cls.expanding = expanding
55085510

55095511
@Appender(rwindow.ewm.__doc__)
55105512
def ewm(self, com=None, span=None, halflife=None, alpha=None,
5511-
min_periods=0, freq=None, adjust=True, ignore_na=False,
5512-
axis=0):
5513+
min_periods=0, min_weight=None, freq=None, adjust=True,
5514+
ignore_na=False, axis=0):
55135515
axis = self._get_axis_number(axis)
55145516
return rwindow.ewm(self, com=com, span=span, halflife=halflife,
5515-
alpha=alpha, min_periods=min_periods, freq=freq,
5517+
alpha=alpha, min_periods=min_periods,
5518+
min_weight=min_weight, freq=freq,
55165519
adjust=adjust, ignore_na=ignore_na, axis=axis)
55175520

55185521
cls.ewm = ewm

pandas/core/window.py

+49-13
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,11 @@
5353

5454

5555
class _Window(PandasObject, SelectionMixin):
56-
_attributes = ['window', 'min_periods', 'freq', 'center', 'win_type',
57-
'axis', 'on']
56+
_attributes = ['window', 'min_periods', 'min_weight', 'freq', 'center',
57+
'win_type', 'axis', 'on']
5858
exclusions = set()
5959

60-
def __init__(self, obj, window=None, min_periods=None, freq=None,
60+
def __init__(self, obj, window=None, min_periods=None, min_weight=None, freq=None,
6161
center=False, win_type=None, axis=0, on=None, **kwargs):
6262

6363
if freq is not None:
@@ -71,6 +71,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None,
7171
self.on = on
7272
self.window = window
7373
self.min_periods = min_periods
74+
self.min_weight = min_weight
7475
self.freq = freq
7576
self.center = center
7677
self.win_type = win_type
@@ -744,7 +745,12 @@ def calc(x):
744745

745746
results.append(result)
746747

747-
return self._wrap_results(results, blocks, obj)
748+
result = self._wrap_results(results, blocks, obj)
749+
750+
if self.min_weight:
751+
result = result.where(_min_weight_mask(self, self.min_weight))
752+
753+
return result
748754

749755

750756
class _Rolling_and_Expanding(_Rolling):
@@ -1187,6 +1193,9 @@ class Expanding(_Rolling_and_Expanding):
11871193
min_periods : int, default None
11881194
Minimum number of observations in window required to have a value
11891195
(otherwise result is NA).
1196+
min_weight : int, default None
1197+
Minimum proportion of weight in available values in window required
1198+
to have a value (otherwies result in NA)
11901199
freq : string or DateOffset object, optional (default None) (DEPRECATED)
11911200
Frequency to conform the data to before computing the statistic.
11921201
Specified as a frequency string or DateOffset object.
@@ -1227,12 +1236,13 @@ class Expanding(_Rolling_and_Expanding):
12271236
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
12281237
"""
12291238

1230-
_attributes = ['min_periods', 'freq', 'center', 'axis']
1239+
_attributes = ['min_periods', 'min_weight', 'freq', 'center', 'axis']
12311240

1232-
def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0,
1233-
**kwargs):
1234-
super(Expanding, self).__init__(obj=obj, min_periods=min_periods,
1235-
freq=freq, center=center, axis=axis)
1241+
def __init__(self, obj, min_periods=1, min_weight=None, freq=None,
1242+
center=False, axis=0, **kwargs):
1243+
super(Expanding, self).__init__(
1244+
obj=obj, min_periods=min_periods, min_weight=min_weight,
1245+
freq=freq, center=center, axis=axis)
12361246

12371247
@property
12381248
def _constructor(self):
@@ -1473,14 +1483,16 @@ class EWM(_Rolling):
14731483
More details can be found at
14741484
http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
14751485
"""
1476-
_attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis']
1486+
_attributes = ['com', 'min_periods', 'min_weight', 'freq', 'adjust',
1487+
'ignore_na', 'axis']
14771488

14781489
def __init__(self, obj, com=None, span=None, halflife=None, alpha=None,
1479-
min_periods=0, freq=None, adjust=True, ignore_na=False,
1480-
axis=0):
1490+
min_periods=0, min_weight=None, freq=None, adjust=True,
1491+
ignore_na=False, axis=0):
14811492
self.obj = obj
14821493
self.com = _get_center_of_mass(com, span, halflife, alpha)
14831494
self.min_periods = min_periods
1495+
self.min_weight = min_weight
14841496
self.freq = freq
14851497
self.adjust = adjust
14861498
self.ignore_na = ignore_na
@@ -1540,7 +1552,12 @@ def func(arg):
15401552

15411553
results.append(np.apply_along_axis(func, self.axis, values))
15421554

1543-
return self._wrap_results(results, blocks, obj)
1555+
result = self._wrap_results(results, blocks, obj)
1556+
1557+
if self.min_weight:
1558+
result = result.where(_min_weight_mask(self, self.min_weight))
1559+
1560+
return result
15441561

15451562
@Substitution(name='ewm')
15461563
@Appender(_doc_template)
@@ -1751,6 +1768,25 @@ def _check_func(minp, window):
17511768
return _check_func
17521769

17531770

1771+
def _min_weight_mask(rolling, min_weight):
1772+
"""
1773+
Takes a rolling object and a min_weight proportion, and returns
1774+
a pandas bool object with True where enough weight exists
1775+
"""
1776+
1777+
data = rolling.obj
1778+
# all valid values have a value of 1 in valid_data
1779+
valid_data = data.notnull()
1780+
1781+
# This copies the rolling object, replacing obj with valid_data
1782+
# The resulting values are the proportion of weight from values that _do_
1783+
# contribute out of those that _could_
1784+
valid_proportion = rolling._shallow_copy(
1785+
obj=valid_data, min_periods=0, min_weight=None).mean()
1786+
1787+
return valid_proportion >= min_weight
1788+
1789+
17541790
def _use_window(minp, window):
17551791
if minp is None:
17561792
return window

pandas/tests/test_window.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pandas.stats.moments as mom
1616
import pandas.core.window as rwindow
1717
import pandas.tseries.offsets as offsets
18+
from pandas.core.window import _min_weight_mask
1819
from pandas.core.base import SpecificationError
1920
from pandas.core.common import UnsupportedFunctionCall
2021
import pandas.util.testing as tm
@@ -495,7 +496,7 @@ def test_deprecations(self):
495496
# make sure rolling functions works for different dtypes
496497
#
497498
# NOTE that these are yielded tests and so _create_data is
498-
# explicity called, nor do these inherit from unittest.TestCase
499+
# explicitly called, nor do these inherit from unittest.TestCase
499500
#
500501
# further note that we are only checking rolling for fully dtype
501502
# compliance (though both expanding and ewm inherit)
@@ -1619,6 +1620,58 @@ def _check_ew_structures(self, func, name):
16191620
frame_result = getattr(self.frame.ewm(com=10), name)()
16201621
self.assertEqual(type(frame_result), DataFrame)
16211622

1623+
def test_min_weight_mask_series(self):
1624+
1625+
rolling = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5]).rolling(3)
1626+
1627+
# 30%
1628+
result = _min_weight_mask(rolling, 0.3)
1629+
expected = Series([False, True, True, True, True, True])
1630+
tm.assert_series_equal(result, expected)
1631+
1632+
# 50%
1633+
result = _min_weight_mask(rolling, 0.6)
1634+
expected = Series([False, False, True, True, True, True])
1635+
tm.assert_series_equal(result, expected)
1636+
1637+
# 70%
1638+
result = _min_weight_mask(rolling, 0.7)
1639+
expected = Series([False, False, False, True, False, False])
1640+
tm.assert_series_equal(result, expected)
1641+
1642+
def test_min_weight_rolling(self):
1643+
1644+
series = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5])
1645+
rolling = series.rolling(3, min_periods=1, min_weight=0.6)
1646+
1647+
result = rolling.sum()
1648+
expected = Series([pd.np.NaN, pd.np.NaN, -5, 5, 13, 15])
1649+
1650+
tm.assert_series_equal(result, expected)
1651+
1652+
def test_min_weight_expanding(self):
1653+
1654+
series = Series([pd.np.NaN, -8, 3, pd.np.NaN, 10, 5])
1655+
rolling = series.expanding(min_periods=1, min_weight=0.51)
1656+
1657+
result = rolling.sum()
1658+
expected = Series([pd.np.NaN, pd.np.NaN, -5, pd.np.NaN, 5, 10])
1659+
1660+
tm.assert_series_equal(result, expected)
1661+
1662+
def test_min_weight_ewm(self):
1663+
1664+
from itertools import chain
1665+
1666+
# create a series with a big gap in the middle
1667+
series = Series(list(chain(range(9), [pd.np.NaN] * 80, range(9, 0))))
1668+
rolling = series.ewm(span=10, min_weight=0.5)
1669+
1670+
result = rolling.mean()
1671+
1672+
# check that all points between 25 and 90 are NaN
1673+
self.assertTrue(result.iloc[24:89].isnull().all())
1674+
16221675

16231676
# create the data only once as we are not setting it
16241677
def _create_consistency_data():

0 commit comments

Comments
 (0)