Skip to content

Commit 0ca3efc

Browse files
committed
initial implementation of min_weight
1 parent 4e4a7d9 commit 0ca3efc

File tree

3 files changed

+111
-28
lines changed

3 files changed

+111
-28
lines changed

pandas/core/generic.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -5175,30 +5175,34 @@ def _add_series_or_dataframe_operations(cls):
51755175
from pandas.core import window as rwindow
51765176

51775177
@Appender(rwindow.rolling.__doc__)
5178-
def rolling(self, window, min_periods=None, freq=None, center=False,
5179-
win_type=None, axis=0):
5178+
def rolling(self, window, min_periods=None, min_weight=None,
5179+
freq=None, center=False, win_type=None, axis=0):
51805180
axis = self._get_axis_number(axis)
5181-
return rwindow.rolling(self, window=window,
5182-
min_periods=min_periods, freq=freq,
5183-
center=center, win_type=win_type, axis=axis)
5181+
return rwindow.rolling(
5182+
self, window=window, min_periods=min_periods,
5183+
min_weight=min_weight, freq=freq, center=center,
5184+
win_type=win_type, axis=axis)
51845185

51855186
cls.rolling = rolling
51865187

51875188
@Appender(rwindow.expanding.__doc__)
5188-
def expanding(self, min_periods=1, freq=None, center=False, axis=0):
5189+
def expanding(self, min_periods=1, min_weight=None, freq=None,
5190+
center=False, axis=0):
51895191
axis = self._get_axis_number(axis)
5190-
return rwindow.expanding(self, min_periods=min_periods, freq=freq,
5192+
return rwindow.expanding(self, min_periods=min_periods,
5193+
min_weight=min_weight, freq=freq,
51915194
center=center, axis=axis)
51925195

51935196
cls.expanding = expanding
51945197

51955198
@Appender(rwindow.ewm.__doc__)
51965199
def ewm(self, com=None, span=None, halflife=None, alpha=None,
5197-
min_periods=0, freq=None, adjust=True, ignore_na=False,
5198-
axis=0):
5200+
min_periods=0, min_weight=None, freq=None, adjust=True,
5201+
ignore_na=False, axis=0):
51995202
axis = self._get_axis_number(axis)
52005203
return rwindow.ewm(self, com=com, span=span, halflife=halflife,
5201-
alpha=alpha, min_periods=min_periods, freq=freq,
5204+
alpha=alpha, min_periods=min_periods,
5205+
min_weight=min_weight, freq=freq,
52025206
adjust=adjust, ignore_na=ignore_na, axis=axis)
52035207

52045208
cls.ewm = ewm

pandas/core/window.py

+43-17
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@
3636

3737

3838
class _Window(PandasObject, SelectionMixin):
39-
_attributes = ['window', 'min_periods', 'freq', 'center', 'win_type',
40-
'axis']
39+
_attributes = ['window', 'min_periods', 'min_weight', 'freq', 'center',
40+
'win_type', 'axis']
4141
exclusions = set()
4242

43-
def __init__(self, obj, window=None, min_periods=None, freq=None,
44-
center=False, win_type=None, axis=0, **kwargs):
43+
def __init__(self, obj, window=None, min_periods=None, min_weight=None,
44+
freq=None, center=False, win_type=None, axis=0, **kwargs):
4545

4646
if freq is not None:
4747
warnings.warn("The freq kw is deprecated and will be removed in a "
@@ -52,6 +52,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None,
5252
self.obj = obj
5353
self.window = window
5454
self.min_periods = min_periods
55+
self.min_weight = min_weight
5556
self.freq = freq
5657
self.center = center
5758
self.win_type = win_type
@@ -564,7 +565,12 @@ def calc(x):
564565

565566
results.append(result)
566567

567-
return self._wrap_results(results, blocks, obj)
568+
result = self._wrap_results(results, blocks, obj)
569+
570+
if self.min_weight:
571+
result = result.where(_min_weight_mask(self, self.min_weight))
572+
573+
return result
568574

569575

570576
class _Rolling_and_Expanding(_Rolling):
@@ -804,7 +810,7 @@ def _get_corr(a, b):
804810

805811
class Rolling(_Rolling_and_Expanding):
806812
"""
807-
Provides rolling window calculcations.
813+
Provides rolling window calculations.
808814
809815
.. versionadded:: 0.18.0
810816
@@ -981,14 +987,14 @@ class Expanding(_Rolling_and_Expanding):
981987
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
982988
"""
983989

984-
_attributes = ['min_periods', 'freq', 'center', 'axis']
990+
_attributes = ['min_periods', 'min_weight', 'freq', 'center', 'axis']
985991

986-
def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0,
987-
**kwargs):
988-
return super(Expanding, self).__init__(obj=obj,
989-
min_periods=min_periods,
990-
freq=freq, center=center,
991-
axis=axis)
992+
def __init__(self, obj, min_periods=1, min_weight=None, freq=None,
993+
center=False, axis=0, **kwargs):
994+
super(Expanding, self).__init__(
995+
obj=obj, min_periods=min_periods,
996+
min_weight=min_weight, freq=freq, center=center,
997+
axis=axis)
992998

993999
@property
9941000
def _constructor(self):
@@ -1204,14 +1210,16 @@ class EWM(_Rolling):
12041210
More details can be found at
12051211
http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
12061212
"""
1207-
_attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis']
1213+
_attributes = ['com', 'min_periods', 'min_weight', 'freq', 'adjust',
1214+
'ignore_na', 'axis']
12081215

12091216
def __init__(self, obj, com=None, span=None, halflife=None, alpha=None,
1210-
min_periods=0, freq=None, adjust=True, ignore_na=False,
1211-
axis=0):
1217+
min_periods=0, min_weight=None, freq=None, adjust=True,
1218+
ignore_na=False, axis=0):
12121219
self.obj = obj
12131220
self.com = _get_center_of_mass(com, span, halflife, alpha)
12141221
self.min_periods = min_periods
1222+
self.min_weight = min_weight
12151223
self.freq = freq
12161224
self.adjust = adjust
12171225
self.ignore_na = ignore_na
@@ -1271,7 +1279,12 @@ def func(arg):
12711279

12721280
results.append(np.apply_along_axis(func, self.axis, values))
12731281

1274-
return self._wrap_results(results, blocks, obj)
1282+
result = self._wrap_results(results, blocks, obj)
1283+
1284+
if self.min_weight:
1285+
result = result.where(_min_weight_mask(self, self.min_weight))
1286+
1287+
return result
12751288

12761289
@Substitution(name='ewm')
12771290
@Appender(_doc_template)
@@ -1477,6 +1490,19 @@ def _check_func(minp, window):
14771490
return _check_func
14781491

14791492

1493+
def _min_weight_mask(rolling, min_weight):
1494+
data = rolling.obj
1495+
valid_data = data.notnull()
1496+
# fill the object with 1s
1497+
potential_valid = rolling.obj.copy()
1498+
potential_valid[:] = 1
1499+
1500+
valid_proportion = rolling._shallow_copy(obj=valid_data, min_periods=0,
1501+
min_weight=None).mean()
1502+
1503+
return valid_proportion >= min_weight
1504+
1505+
14801506
def _use_window(minp, window):
14811507
if minp is None:
14821508
return window

pandas/tests/test_window.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import pandas.core.datetools as datetools
2020
import pandas.stats.moments as mom
2121
import pandas.core.window as rwindow
22+
from pandas.core.window import _min_weight_mask
2223
from pandas.core.base import SpecificationError
2324
import pandas.util.testing as tm
2425
from pandas.compat import range, zip, PY3
@@ -365,7 +366,7 @@ def test_deprecations(self):
365366
# make sure rolling functions works for different dtypes
366367
#
367368
# NOTE that these are yielded tests and so _create_data is
368-
# explicity called, nor do these inherit from unittest.TestCase
369+
# explicitly called, nor do these inherit from unittest.TestCase
369370
#
370371
# further note that we are only checking rolling for fully dtype
371372
# compliance (though both expanding and ewm inherit)
@@ -1482,6 +1483,58 @@ def _check_ew_structures(self, func, name):
14821483
frame_result = getattr(self.frame.ewm(com=10), name)()
14831484
self.assertEqual(type(frame_result), DataFrame)
14841485

1486+
def test_min_weight_mask_series(self):
1487+
1488+
rolling = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5]).rolling(3)
1489+
1490+
# 30%
1491+
result = _min_weight_mask(rolling, 0.3)
1492+
expected = Series([False, True, True, True, True, True])
1493+
assert_series_equal(result, expected)
1494+
1495+
# 50%
1496+
result = _min_weight_mask(rolling, 0.6)
1497+
expected = Series([False, False, True, True, True, True])
1498+
assert_series_equal(result, expected)
1499+
1500+
# 70%
1501+
result = _min_weight_mask(rolling, 0.7)
1502+
expected = Series([False, False, False, True, False, False])
1503+
assert_series_equal(result, expected)
1504+
1505+
def test_min_weight_rolling(self):
1506+
1507+
series = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5])
1508+
rolling = series.rolling(3, min_periods=1, min_weight=0.6)
1509+
1510+
result = rolling.sum()
1511+
expected = Series([pd.np.NaN, pd.np.NaN, -5, 5, 13, 15])
1512+
1513+
assert_series_equal(result, expected)
1514+
1515+
def test_min_weight_expanding(self):
1516+
1517+
series = Series([pd.np.NaN, -8, 3, pd.np.NaN, 10, 5])
1518+
rolling = series.expanding(min_periods=1, min_weight=0.51)
1519+
1520+
result = rolling.sum()
1521+
expected = Series([pd.np.NaN, pd.np.NaN, -5, pd.np.NaN, 5, 10])
1522+
1523+
assert_series_equal(result, expected)
1524+
1525+
def test_min_weight_ewm(self):
1526+
1527+
from itertools import chain
1528+
1529+
# create a series with a big gap in the middle
1530+
series = Series(list(chain(range(9), [pd.np.NaN] * 80, range(9, 0))))
1531+
rolling = series.ewm(span=10, min_weight=0.5)
1532+
1533+
result = rolling.mean()
1534+
1535+
# check that all points between 25 and 90 are NaN
1536+
self.assertTrue(result.iloc[24:89].isnull().all())
1537+
14851538

14861539
# create the data only once as we are not setting it
14871540
def _create_consistency_data():

0 commit comments

Comments
 (0)