Skip to content

Commit 3c42153

Browse files
committed
initial implementation of min_weight
1 parent 4c3d4d4 commit 3c42153

File tree

4 files changed

+120
-26
lines changed

4 files changed

+120
-26
lines changed

pandas/core/generic.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -5567,31 +5567,34 @@ def _add_series_or_dataframe_operations(cls):
55675567
from pandas.core import window as rwindow
55685568

55695569
@Appender(rwindow.rolling.__doc__)
5570-
def rolling(self, window, min_periods=None, freq=None, center=False,
5571-
win_type=None, on=None, axis=0):
5570+
def rolling(self, window, min_periods=None, min_weight=None,
5571+
freq=None, center=False, win_type=None, on=None, axis=0):
55725572
axis = self._get_axis_number(axis)
5573-
return rwindow.rolling(self, window=window,
5574-
min_periods=min_periods, freq=freq,
5575-
center=center, win_type=win_type,
5576-
on=on, axis=axis)
5573+
return rwindow.rolling(
5574+
self, window=window, min_periods=min_periods,
5575+
min_weight=min_weight, freq=freq, center=center,
5576+
win_type=win_type, on=on, axis=axis)
55775577

55785578
cls.rolling = rolling
55795579

55805580
@Appender(rwindow.expanding.__doc__)
5581-
def expanding(self, min_periods=1, freq=None, center=False, axis=0):
5581+
def expanding(self, min_periods=1, min_weight=None, freq=None,
5582+
center=False, axis=0):
55825583
axis = self._get_axis_number(axis)
5583-
return rwindow.expanding(self, min_periods=min_periods, freq=freq,
5584+
return rwindow.expanding(self, min_periods=min_periods,
5585+
min_weight=min_weight, freq=freq,
55845586
center=center, axis=axis)
55855587

55865588
cls.expanding = expanding
55875589

55885590
@Appender(rwindow.ewm.__doc__)
55895591
def ewm(self, com=None, span=None, halflife=None, alpha=None,
5590-
min_periods=0, freq=None, adjust=True, ignore_na=False,
5591-
axis=0):
5592+
min_periods=0, min_weight=None, freq=None, adjust=True,
5593+
ignore_na=False, axis=0):
55925594
axis = self._get_axis_number(axis)
55935595
return rwindow.ewm(self, com=com, span=span, halflife=halflife,
5594-
alpha=alpha, min_periods=min_periods, freq=freq,
5596+
alpha=alpha, min_periods=min_periods,
5597+
min_weight=min_weight, freq=freq,
55955598
adjust=adjust, ignore_na=ignore_na, axis=axis)
55965599

55975600
cls.ewm = ewm

pandas/core/window.py

+51-14
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,13 @@
5353

5454

5555
class _Window(PandasObject, SelectionMixin):
56-
_attributes = ['window', 'min_periods', 'freq', 'center', 'win_type',
57-
'axis', 'on']
56+
_attributes = ['window', 'min_periods', 'min_weight', 'freq', 'center',
57+
'win_type', 'axis', 'on']
5858
exclusions = set()
5959

60-
def __init__(self, obj, window=None, min_periods=None, freq=None,
61-
center=False, win_type=None, axis=0, on=None, **kwargs):
60+
def __init__(self, obj, window=None, min_periods=None, min_weight=None,
61+
freq=None, center=False, win_type=None, axis=0, on=None,
62+
**kwargs):
6263

6364
if freq is not None:
6465
warnings.warn("The freq kw is deprecated and will be removed in a "
@@ -71,6 +72,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None,
7172
self.on = on
7273
self.window = window
7374
self.min_periods = min_periods
75+
self.min_weight = min_weight
7476
self.freq = freq
7577
self.center = center
7678
self.win_type = win_type
@@ -744,7 +746,12 @@ def calc(x):
744746

745747
results.append(result)
746748

747-
return self._wrap_results(results, blocks, obj)
749+
result = self._wrap_results(results, blocks, obj)
750+
751+
if self.min_weight:
752+
result = result.where(_min_weight_mask(self, self.min_weight))
753+
754+
return result
748755

749756

750757
class _Rolling_and_Expanding(_Rolling):
@@ -1187,6 +1194,9 @@ class Expanding(_Rolling_and_Expanding):
11871194
min_periods : int, default None
11881195
Minimum number of observations in window required to have a value
11891196
(otherwise result is NA).
1197+
min_weight : int, default None
1198+
Minimum proportion of weight in available values in window required
1199+
to have a value (otherwies result in NA)
11901200
freq : string or DateOffset object, optional (default None) (DEPRECATED)
11911201
Frequency to conform the data to before computing the statistic.
11921202
Specified as a frequency string or DateOffset object.
@@ -1227,12 +1237,13 @@ class Expanding(_Rolling_and_Expanding):
12271237
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
12281238
"""
12291239

1230-
_attributes = ['min_periods', 'freq', 'center', 'axis']
1240+
_attributes = ['min_periods', 'min_weight', 'freq', 'center', 'axis']
12311241

1232-
def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0,
1233-
**kwargs):
1234-
super(Expanding, self).__init__(obj=obj, min_periods=min_periods,
1235-
freq=freq, center=center, axis=axis)
1242+
def __init__(self, obj, min_periods=1, min_weight=None, freq=None,
1243+
center=False, axis=0, **kwargs):
1244+
super(Expanding, self).__init__(
1245+
obj=obj, min_periods=min_periods, min_weight=min_weight,
1246+
freq=freq, center=center, axis=axis)
12361247

12371248
@property
12381249
def _constructor(self):
@@ -1473,14 +1484,16 @@ class EWM(_Rolling):
14731484
More details can be found at
14741485
http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
14751486
"""
1476-
_attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis']
1487+
_attributes = ['com', 'min_periods', 'min_weight', 'freq', 'adjust',
1488+
'ignore_na', 'axis']
14771489

14781490
def __init__(self, obj, com=None, span=None, halflife=None, alpha=None,
1479-
min_periods=0, freq=None, adjust=True, ignore_na=False,
1480-
axis=0):
1491+
min_periods=0, min_weight=None, freq=None, adjust=True,
1492+
ignore_na=False, axis=0):
14811493
self.obj = obj
14821494
self.com = _get_center_of_mass(com, span, halflife, alpha)
14831495
self.min_periods = min_periods
1496+
self.min_weight = min_weight
14841497
self.freq = freq
14851498
self.adjust = adjust
14861499
self.ignore_na = ignore_na
@@ -1540,7 +1553,12 @@ def func(arg):
15401553

15411554
results.append(np.apply_along_axis(func, self.axis, values))
15421555

1543-
return self._wrap_results(results, blocks, obj)
1556+
result = self._wrap_results(results, blocks, obj)
1557+
1558+
if self.min_weight:
1559+
result = result.where(_min_weight_mask(self, self.min_weight))
1560+
1561+
return result
15441562

15451563
@Substitution(name='ewm')
15461564
@Appender(_doc_template)
@@ -1751,6 +1769,25 @@ def _check_func(minp, window):
17511769
return _check_func
17521770

17531771

1772+
def _min_weight_mask(rolling, min_weight):
1773+
"""
1774+
Takes a rolling object and a min_weight proportion, and returns
1775+
a pandas bool object with True where enough weight exists
1776+
"""
1777+
1778+
data = rolling.obj
1779+
# all valid values have a value of 1 in valid_data
1780+
valid_data = data.notnull()
1781+
1782+
# This copies the rolling object, replacing obj with valid_data
1783+
# The resulting values are the proportion of weight from values that _do_
1784+
# contribute out of those that _could_
1785+
valid_proportion = rolling._shallow_copy(
1786+
obj=valid_data, min_periods=0, min_weight=None).mean()
1787+
1788+
return valid_proportion >= min_weight
1789+
1790+
17541791
def _use_window(minp, window):
17551792
if minp is None:
17561793
return window

pandas/tests/test_window.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pandas.stats.moments as mom
1616
import pandas.core.window as rwindow
1717
import pandas.tseries.offsets as offsets
18+
from pandas.core.window import _min_weight_mask
1819
from pandas.core.base import SpecificationError
1920
from pandas.core.common import UnsupportedFunctionCall
2021
import pandas.util.testing as tm
@@ -495,7 +496,7 @@ def test_deprecations(self):
495496
# make sure rolling functions works for different dtypes
496497
#
497498
# NOTE that these are yielded tests and so _create_data is
498-
# explicity called, nor do these inherit from unittest.TestCase
499+
# explicitly called, nor do these inherit from unittest.TestCase
499500
#
500501
# further note that we are only checking rolling for fully dtype
501502
# compliance (though both expanding and ewm inherit)
@@ -1619,6 +1620,58 @@ def _check_ew_structures(self, func, name):
16191620
frame_result = getattr(self.frame.ewm(com=10), name)()
16201621
self.assertEqual(type(frame_result), DataFrame)
16211622

1623+
def test_min_weight_mask_series(self):
1624+
1625+
rolling = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5]).rolling(3)
1626+
1627+
# 30%
1628+
result = _min_weight_mask(rolling, 0.3)
1629+
expected = Series([False, True, True, True, True, True])
1630+
tm.assert_series_equal(result, expected)
1631+
1632+
# 50%
1633+
result = _min_weight_mask(rolling, 0.6)
1634+
expected = Series([False, False, True, True, True, True])
1635+
tm.assert_series_equal(result, expected)
1636+
1637+
# 70%
1638+
result = _min_weight_mask(rolling, 0.7)
1639+
expected = Series([False, False, False, True, False, False])
1640+
tm.assert_series_equal(result, expected)
1641+
1642+
def test_min_weight_rolling(self):
1643+
1644+
series = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5])
1645+
rolling = series.rolling(3, min_periods=1, min_weight=0.6)
1646+
1647+
result = rolling.sum()
1648+
expected = Series([pd.np.NaN, pd.np.NaN, -5, 5, 13, 15])
1649+
1650+
tm.assert_series_equal(result, expected)
1651+
1652+
def test_min_weight_expanding(self):
1653+
1654+
series = Series([pd.np.NaN, -8, 3, pd.np.NaN, 10, 5])
1655+
rolling = series.expanding(min_periods=1, min_weight=0.51)
1656+
1657+
result = rolling.sum()
1658+
expected = Series([pd.np.NaN, pd.np.NaN, -5, pd.np.NaN, 5, 10])
1659+
1660+
tm.assert_series_equal(result, expected)
1661+
1662+
def test_min_weight_ewm(self):
1663+
1664+
from itertools import chain
1665+
1666+
# create a series with a big gap in the middle
1667+
series = Series(list(chain(range(9), [pd.np.NaN] * 80, range(9, 0))))
1668+
rolling = series.ewm(span=10, min_weight=0.5)
1669+
1670+
result = rolling.mean()
1671+
1672+
# check that all points between 25 and 90 are NaN
1673+
self.assertTrue(result.iloc[24:89].isnull().all())
1674+
16221675

16231676
# create the data only once as we are not setting it
16241677
def _create_consistency_data():

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ parentdir_prefix = pandas-
1313

1414
[flake8]
1515
ignore = E731
16+
max-line-length = 79
1617

1718
[yapf]
1819
based_on_style = pep8

0 commit comments

Comments
 (0)