Skip to content

ENH: Min_weight for Rolling #12750

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5567,31 +5567,34 @@ def _add_series_or_dataframe_operations(cls):
from pandas.core import window as rwindow

@Appender(rwindow.rolling.__doc__)
def rolling(self, window, min_periods=None, freq=None, center=False,
win_type=None, on=None, axis=0):
def rolling(self, window, min_periods=None, min_weight=None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add to the doc-string min_weight with a versionadded tag (I don't remember exactly where these are defined)

freq=None, center=False, win_type=None, on=None, axis=0):
axis = self._get_axis_number(axis)
return rwindow.rolling(self, window=window,
min_periods=min_periods, freq=freq,
center=center, win_type=win_type,
on=on, axis=axis)
return rwindow.rolling(
self, window=window, min_periods=min_periods,
min_weight=min_weight, freq=freq, center=center,
win_type=win_type, on=on, axis=axis)

cls.rolling = rolling

@Appender(rwindow.expanding.__doc__)
def expanding(self, min_periods=1, freq=None, center=False, axis=0):
def expanding(self, min_periods=1, min_weight=None, freq=None,
center=False, axis=0):
axis = self._get_axis_number(axis)
return rwindow.expanding(self, min_periods=min_periods, freq=freq,
return rwindow.expanding(self, min_periods=min_periods,
min_weight=min_weight, freq=freq,
center=center, axis=axis)

cls.expanding = expanding

@Appender(rwindow.ewm.__doc__)
def ewm(self, com=None, span=None, halflife=None, alpha=None,
min_periods=0, freq=None, adjust=True, ignore_na=False,
axis=0):
min_periods=0, min_weight=None, freq=None, adjust=True,
ignore_na=False, axis=0):
axis = self._get_axis_number(axis)
return rwindow.ewm(self, com=com, span=span, halflife=halflife,
alpha=alpha, min_periods=min_periods, freq=freq,
alpha=alpha, min_periods=min_periods,
min_weight=min_weight, freq=freq,
adjust=adjust, ignore_na=ignore_na, axis=axis)

cls.ewm = ewm
Expand Down
65 changes: 51 additions & 14 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@


class _Window(PandasObject, SelectionMixin):
_attributes = ['window', 'min_periods', 'freq', 'center', 'win_type',
'axis', 'on']
_attributes = ['window', 'min_periods', 'min_weight', 'freq', 'center',
'win_type', 'axis', 'on']
exclusions = set()

def __init__(self, obj, window=None, min_periods=None, freq=None,
center=False, win_type=None, axis=0, on=None, **kwargs):
def __init__(self, obj, window=None, min_periods=None, min_weight=None,
freq=None, center=False, win_type=None, axis=0, on=None,
**kwargs):

if freq is not None:
warnings.warn("The freq kw is deprecated and will be removed in a "
Expand All @@ -71,6 +72,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None,
self.on = on
self.window = window
self.min_periods = min_periods
self.min_weight = min_weight
self.freq = freq
self.center = center
self.win_type = win_type
Expand Down Expand Up @@ -744,7 +746,12 @@ def calc(x):

results.append(result)

return self._wrap_results(results, blocks, obj)
result = self._wrap_results(results, blocks, obj)

if self.min_weight:
result = result.where(_min_weight_mask(self, self.min_weight))

return result


class _Rolling_and_Expanding(_Rolling):
Expand Down Expand Up @@ -1187,6 +1194,9 @@ class Expanding(_Rolling_and_Expanding):
min_periods : int, default None
Minimum number of observations in window required to have a value
(otherwise result is NA).
min_weight : int, default None
Minimum proportion of weight in available values in window required
to have a value (otherwies result in NA)
freq : string or DateOffset object, optional (default None) (DEPRECATED)
Frequency to conform the data to before computing the statistic.
Specified as a frequency string or DateOffset object.
Expand Down Expand Up @@ -1227,12 +1237,13 @@ class Expanding(_Rolling_and_Expanding):
of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
"""

_attributes = ['min_periods', 'freq', 'center', 'axis']
_attributes = ['min_periods', 'min_weight', 'freq', 'center', 'axis']

def __init__(self, obj, min_periods=1, freq=None, center=False, axis=0,
**kwargs):
super(Expanding, self).__init__(obj=obj, min_periods=min_periods,
freq=freq, center=center, axis=axis)
def __init__(self, obj, min_periods=1, min_weight=None, freq=None,
center=False, axis=0, **kwargs):
super(Expanding, self).__init__(
obj=obj, min_periods=min_periods, min_weight=min_weight,
freq=freq, center=center, axis=axis)

@property
def _constructor(self):
Expand Down Expand Up @@ -1473,14 +1484,16 @@ class EWM(_Rolling):
More details can be found at
http://pandas.pydata.org/pandas-docs/stable/computation.html#exponentially-weighted-windows
"""
_attributes = ['com', 'min_periods', 'freq', 'adjust', 'ignore_na', 'axis']
_attributes = ['com', 'min_periods', 'min_weight', 'freq', 'adjust',
'ignore_na', 'axis']

def __init__(self, obj, com=None, span=None, halflife=None, alpha=None,
min_periods=0, freq=None, adjust=True, ignore_na=False,
axis=0):
min_periods=0, min_weight=None, freq=None, adjust=True,
ignore_na=False, axis=0):
self.obj = obj
self.com = _get_center_of_mass(com, span, halflife, alpha)
self.min_periods = min_periods
self.min_weight = min_weight
self.freq = freq
self.adjust = adjust
self.ignore_na = ignore_na
Expand Down Expand Up @@ -1540,7 +1553,12 @@ def func(arg):

results.append(np.apply_along_axis(func, self.axis, values))

return self._wrap_results(results, blocks, obj)
result = self._wrap_results(results, blocks, obj)

if self.min_weight:
result = result.where(_min_weight_mask(self, self.min_weight))

return result

@Substitution(name='ewm')
@Appender(_doc_template)
Expand Down Expand Up @@ -1751,6 +1769,25 @@ def _check_func(minp, window):
return _check_func


def _min_weight_mask(rolling, min_weight):
"""
Takes a rolling object and a min_weight proportion, and returns
a pandas bool object with True where enough weight exists
"""

data = rolling.obj
# all valid values have a value of 1 in valid_data
valid_data = data.notnull()

# This copies the rolling object, replacing obj with valid_data
# The resulting values are the proportion of weight from values that _do_
# contribute out of those that _could_
valid_proportion = rolling._shallow_copy(
obj=valid_data, min_periods=0, min_weight=None).mean()

return valid_proportion >= min_weight


def _use_window(minp, window):
if minp is None:
return window
Expand Down
55 changes: 54 additions & 1 deletion pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pandas.stats.moments as mom
import pandas.core.window as rwindow
import pandas.tseries.offsets as offsets
from pandas.core.window import _min_weight_mask
from pandas.core.base import SpecificationError
from pandas.core.common import UnsupportedFunctionCall
import pandas.util.testing as tm
Expand Down Expand Up @@ -495,7 +496,7 @@ def test_deprecations(self):
# make sure rolling functions works for different dtypes
#
# NOTE that these are yielded tests and so _create_data is
# explicity called, nor do these inherit from unittest.TestCase
# explicitly called, nor do these inherit from unittest.TestCase
#
# further note that we are only checking rolling for fully dtype
# compliance (though both expanding and ewm inherit)
Expand Down Expand Up @@ -1619,6 +1620,58 @@ def _check_ew_structures(self, func, name):
frame_result = getattr(self.frame.ewm(com=10), name)()
self.assertEqual(type(frame_result), DataFrame)

def test_min_weight_mask_series(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make this a separate class (you can inherit from Base)


rolling = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5]).rolling(3)

# 30%
result = _min_weight_mask(rolling, 0.3)
expected = Series([False, True, True, True, True, True])
tm.assert_series_equal(result, expected)

# 50%
result = _min_weight_mask(rolling, 0.6)
expected = Series([False, False, True, True, True, True])
tm.assert_series_equal(result, expected)

# 70%
result = _min_weight_mask(rolling, 0.7)
expected = Series([False, False, False, True, False, False])
tm.assert_series_equal(result, expected)

def test_min_weight_rolling(self):

series = Series([pd.np.NaN, -8, 3, 10, pd.np.NaN, 5])
rolling = series.rolling(3, min_periods=1, min_weight=0.6)

result = rolling.sum()
expected = Series([pd.np.NaN, pd.np.NaN, -5, 5, 13, 15])

tm.assert_series_equal(result, expected)

def test_min_weight_expanding(self):

series = Series([pd.np.NaN, -8, 3, pd.np.NaN, 10, 5])
rolling = series.expanding(min_periods=1, min_weight=0.51)

result = rolling.sum()
expected = Series([pd.np.NaN, pd.np.NaN, -5, pd.np.NaN, 5, 10])

tm.assert_series_equal(result, expected)

def test_min_weight_ewm(self):

from itertools import chain

# create a series with a big gap in the middle
series = Series(list(chain(range(9), [pd.np.NaN] * 80, range(9, 0))))
rolling = series.ewm(span=10, min_weight=0.5)

result = rolling.mean()

# check that all points between 25 and 90 are NaN
self.assertTrue(result.iloc[24:89].isnull().all())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

construct the actual result



# create the data only once as we are not setting it
def _create_consistency_data():
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ parentdir_prefix = pandas-

[flake8]
ignore = E731
max-line-length = 79

[yapf]
based_on_style = pep8
Expand Down