diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3678168890444..8d11340ecaaf5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5567,31 +5567,34 @@ def _add_series_or_dataframe_operations(cls): from pandas.core import window as rwindow @Appender(rwindow.rolling.__doc__) - def rolling(self, window, min_periods=None, freq=None, center=False, - win_type=None, on=None, axis=0): + def rolling(self, window, min_periods=None, min_weight=None, + freq=None, center=False, win_type=None, on=None, axis=0): axis = self._get_axis_number(axis) - return rwindow.rolling(self, window=window, - min_periods=min_periods, freq=freq, - center=center, win_type=win_type, - on=on, axis=axis) + return rwindow.rolling( + self, window=window, min_periods=min_periods, + min_weight=min_weight, freq=freq, center=center, + win_type=win_type, on=on, axis=axis) cls.rolling = rolling @Appender(rwindow.expanding.__doc__) - def expanding(self, min_periods=1, freq=None, center=False, axis=0): + def expanding(self, min_periods=1, min_weight=None, freq=None, + center=False, axis=0): axis = self._get_axis_number(axis) - return rwindow.expanding(self, min_periods=min_periods, freq=freq, + return rwindow.expanding(self, min_periods=min_periods, + min_weight=min_weight, freq=freq, center=center, axis=axis) cls.expanding = expanding @Appender(rwindow.ewm.__doc__) def ewm(self, com=None, span=None, halflife=None, alpha=None, - min_periods=0, freq=None, adjust=True, ignore_na=False, - axis=0): + min_periods=0, min_weight=None, freq=None, adjust=True, + ignore_na=False, axis=0): axis = self._get_axis_number(axis) return rwindow.ewm(self, com=com, span=span, halflife=halflife, - alpha=alpha, min_periods=min_periods, freq=freq, + alpha=alpha, min_periods=min_periods, + min_weight=min_weight, freq=freq, adjust=adjust, ignore_na=ignore_na, axis=axis) cls.ewm = ewm diff --git a/pandas/core/window.py b/pandas/core/window.py index b7276aed506de..e472caf81c8dd 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -53,12 +53,13 @@ class _Window(PandasObject, SelectionMixin): - _attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', - 'axis', 'on'] + _attributes = ['window', 'min_periods', 'min_weight', 'freq', 'center', + 'win_type', 'axis', 'on'] exclusions = set() - def __init__(self, obj, window=None, min_periods=None, freq=None, - center=False, win_type=None, axis=0, on=None, **kwargs): + def __init__(self, obj, window=None, min_periods=None, min_weight=None, + freq=None, center=False, win_type=None, axis=0, on=None, + **kwargs): if freq is not None: warnings.warn("The freq kw is deprecated and will be removed in a " @@ -71,6 +72,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None, self.on = on self.window = window self.min_periods = min_periods + self.min_weight = min_weight self.freq = freq self.center = center self.win_type = win_type @@ -744,7 +746,12 @@ def calc(x): results.append(result) - return self._wrap_results(results, blocks, obj) + result = self._wrap_results(results, blocks, obj) + + if self.min_weight: + result = result.where(_min_weight_mask(self, self.min_weight)) + + return result class _Rolling_and_Expanding(_Rolling): @@ -1187,6 +1194,9 @@ class Expanding(_Rolling_and_Expanding): min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). + min_weight : int, default None + Minimum proportion of weight in available values in window required + to have a value (otherwies result in NA) freq : string or DateOffset object, optional (default None) (DEPRECATED) Frequency to conform the data to before computing the statistic. Specified as a frequency string or DateOffset object. @@ -1227,12 +1237,13 @@ class Expanding(_Rolling_and_Expanding): of :meth:`~pandas.Series.resample` (i.e. using the `mean`). """ - _attributes = ['min_periods', 'freq', 'center', 'axis'] + _attributes = ['min_periods', 'min_weight', 'freq', 'center', 'axis'] - def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0, - **kwargs): - super(Expanding, self).__init__(obj=obj, min_periods=min_periods, - freq=freq, center=center, axis=axis) + def __init__(self, obj, min_periods=1, min_weight=None, freq=None, + center=False, axis=0, **kwargs): + super(Expanding, self).__init__( + obj=obj, min_periods=min_periods, min_weight=min_weight, + freq=freq, center=center, axis=axis) @property def _constructor(self): @@ -1473,14 +1484,16 @@ class EWM(_Rolling): More details can be found at http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows """ - _attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis'] + _attributes = ['com', 'min_periods', 'min_weight', 'freq', 'adjust', + 'ignore_na', 'axis'] def __init__(self, obj, com=None, span=None, halflife=None, alpha=None, - min_periods=0, freq=None, adjust=True, ignore_na=False, - axis=0): + min_periods=0, min_weight=None, freq=None, adjust=True, + ignore_na=False, axis=0): self.obj = obj self.com = _get_center_of_mass(com, span, halflife, alpha) self.min_periods = min_periods + self.min_weight = min_weight self.freq = freq self.adjust = adjust self.ignore_na = ignore_na @@ -1540,7 +1553,12 @@ def func(arg): results.append(np.apply_along_axis(func, self.axis, values)) - return self._wrap_results(results, blocks, obj) + result = self._wrap_results(results, blocks, obj) + + if self.min_weight: + result = result.where(_min_weight_mask(self, self.min_weight)) + + return result @Substitution(name='ewm') @Appender(_doc_template) @@ -1751,6 +1769,25 @@ def _check_func(minp, window): return _check_func +def _min_weight_mask(rolling, min_weight): + """ + Takes a rolling object and a min_weight proportion, and returns + a pandas bool object with True where enough weight exists + """ + + data = rolling.obj + # all valid values have a value of 1 in valid_data + valid_data = data.notnull() + + # This copies the rolling object, replacing obj with valid_data + # The resulting values are the proportion of weight from values that _do_ + # contribute out of those that _could_ + valid_proportion = rolling._shallow_copy( + obj=valid_data, min_periods=0, min_weight=None).mean() + + return valid_proportion >= min_weight + + def _use_window(minp, window): if minp is None: return window diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 929ff43bfaaad..752c97d5c5178 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -15,6 +15,7 @@ import pandas.stats.moments as mom import pandas.core.window as rwindow import pandas.tseries.offsets as offsets +from pandas.core.window import _min_weight_mask from pandas.core.base import SpecificationError from pandas.core.common import UnsupportedFunctionCall import pandas.util.testing as tm @@ -495,7 +496,7 @@ def test_deprecations(self): # make sure rolling functions works for different dtypes # # NOTE that these are yielded tests and so _create_data is -# explicity called, nor do these inherit from unittest.TestCase +# explicitly called, nor do these inherit from unittest.TestCase # # further note that we are only checking rolling for fully dtype # compliance (though both expanding and ewm inherit) @@ -1619,6 +1620,58 @@ def _check_ew_structures(self, func, name): frame_result = getattr(self.frame.ewm(com=10), name)() self.assertEqual(type(frame_result), DataFrame) + def test_min_weight_mask_series(self): + + rolling = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5]).rolling(3) + + # 30% + result = _min_weight_mask(rolling, 0.3) + expected = Series([False, True, True, True, True, True]) + tm.assert_series_equal(result, expected) + + # 50% + result = _min_weight_mask(rolling, 0.6) + expected = Series([False, False, True, True, True, True]) + tm.assert_series_equal(result, expected) + + # 70% + result = _min_weight_mask(rolling, 0.7) + expected = Series([False, False, False, True, False, False]) + tm.assert_series_equal(result, expected) + + def test_min_weight_rolling(self): + + series = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5]) + rolling = series.rolling(3, min_periods=1, min_weight=0.6) + + result = rolling.sum() + expected = Series([pd.np.NaN, pd.np.NaN, -5, 5, 13, 15]) + + tm.assert_series_equal(result, expected) + + def test_min_weight_expanding(self): + + series = Series([pd.np.NaN, -8, 3, pd.np.NaN, 10, 5]) + rolling = series.expanding(min_periods=1, min_weight=0.51) + + result = rolling.sum() + expected = Series([pd.np.NaN, pd.np.NaN, -5, pd.np.NaN, 5, 10]) + + tm.assert_series_equal(result, expected) + + def test_min_weight_ewm(self): + + from itertools import chain + + # create a series with a big gap in the middle + series = Series(list(chain(range(9), [pd.np.NaN] * 80, range(9, 0)))) + rolling = series.ewm(span=10, min_weight=0.5) + + result = rolling.mean() + + # check that all points between 25 and 90 are NaN + self.assertTrue(result.iloc[24:89].isnull().all()) + # create the data only once as we are not setting it def _create_consistency_data(): diff --git a/setup.cfg b/setup.cfg index f69e256b80869..f08e4b1590c15 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,7 @@ parentdir_prefix = pandas- [flake8] ignore = E731 +max-line-length = 79 [yapf] based_on_style = pep8