Skip to content

BUG: rolling_count() and expanding_*() with zero-length args; rolling/expanding_apply with min_periods=0 #8059

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 28, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ API changes
:func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, and :func:`rolling_quantile`,
:func:`rolling_cov`, :func:`rolling_corr`, :func:`rolling_corr_pairwise`,
:func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same
structure as the input ``arg`` with ``NaN``s in the final ``(window-1)/2`` entries.
structure as the input ``arg`` with ``NaN`` in the final ``(window-1)/2`` entries.
Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed
by ``(window-1)/2`` ``NaN``s. (:issue:`7925`)
by ``(window-1)/2`` ``NaN`` values. (:issue:`7925`)

Prior behavior (note final value is ``NaN``):

Expand Down Expand Up @@ -556,8 +556,8 @@ Bug Fixes
returning results with columns sorted by name and producing an error for non-unique columns;
now handles non-unique columns and returns columns in original order
(except for the case of two DataFrames with ``pairwise=False``, where behavior is unchanged) (:issue:`7542`)


- Bug in :func:`rolling_count` and ``expanding_*`` functions unnecessarily producing error message for zero-length data (:issue:`8056`)
- Bug in :func:`rolling_apply` and :func:`expanding_apply`` interpreting ``min_periods=0`` as ``min_periods=1 (:issue:`8080`)
- Bug in ``DataFrame.plot`` and ``Series.plot`` may ignore ``rot`` and ``fontsize`` keywords (:issue:`7844`)


Expand Down
8 changes: 3 additions & 5 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -712,17 +712,15 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
#
# -

def _check_minp(win, minp, N):
def _check_minp(win, minp, N, floor=1):
if minp > win:
raise ValueError('min_periods (%d) must be <= window (%d)'
% (minp, win))
elif minp > N:
minp = N + 1
elif minp == 0:
minp = 1
elif minp < 0:
raise ValueError('min_periods must be >= 0')
return minp
return max(minp, floor)

# original C implementation by N. Devillard.
# This code in public domain.
Expand Down Expand Up @@ -1766,7 +1764,7 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
if n == 0:
return input

minp = _check_minp(win, minp, n)
minp = _check_minp(win, minp, n, floor=0)
output = np.empty(n, dtype=float)
counts = roll_sum(np.isfinite(input).astype(float), win, minp)

Expand Down
49 changes: 30 additions & 19 deletions pandas/stats/moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def rolling_count(arg, window, freq=None, center=False, how=None):
return_hook, values = _process_data_structure(arg, kill_inf=False)

converted = np.isfinite(values).astype(float)
result = rolling_sum(converted, window, min_periods=1,
result = rolling_sum(converted, window, min_periods=0,
center=center) # already converted

# putmask here?
Expand Down Expand Up @@ -280,7 +280,8 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False):
elif isinstance(arg1, DataFrame):
def dataframe_from_int_dict(data, frame_template):
result = DataFrame(data, index=frame_template.index)
result.columns = frame_template.columns[result.columns]
if len(result.columns) > 0:
result.columns = frame_template.columns[result.columns]
return result

results = {}
Expand Down Expand Up @@ -314,8 +315,10 @@ def dataframe_from_int_dict(data, frame_template):
else:
results[i][j] = f(*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]))
p = Panel.from_dict(results).swapaxes('items', 'major')
p.major_axis = arg1.columns[p.major_axis]
p.minor_axis = arg2.columns[p.minor_axis]
if len(p.major_axis) > 0:
p.major_axis = arg1.columns[p.major_axis]
if len(p.minor_axis) > 0:
p.minor_axis = arg2.columns[p.minor_axis]
return p
else:
raise ValueError("'pairwise' is not True/False")
Expand Down Expand Up @@ -372,17 +375,22 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
y : type of input
"""
arg = _conv_timerule(arg, freq, how)
offset = int((window - 1) / 2.) if center else 0
additional_nans = np.array([np.NaN] * offset)
calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x,
window, minp=minp, args=args, kwargs=kwargs,
**kwds)

return_hook, values = _process_data_structure(arg)
# actually calculate the moment. Faster way to do this?
if values.ndim > 1:
result = np.apply_along_axis(calc, axis, values)

if values.size == 0:
result = values.copy()
else:
result = calc(values)
# actually calculate the moment. Faster way to do this?
offset = int((window - 1) / 2.) if center else 0
additional_nans = np.array([np.NaN] * offset)
calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x,
window, minp=minp, args=args, kwargs=kwargs,
**kwds)
if values.ndim > 1:
result = np.apply_along_axis(calc, axis, values)
else:
result = calc(values)

if center:
result = _center_window(result, window, axis)
Expand Down Expand Up @@ -817,11 +825,14 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None,
arg = _conv_timerule(arg, freq, how)
return_hook, values = _process_data_structure(arg)

offset = int((len(window) - 1) / 2.) if center else 0
additional_nans = np.array([np.NaN] * offset)
f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x,
window, minp, avg=mean)
result = np.apply_along_axis(f, axis, values)
if values.size == 0:
result = values.copy()
else:
offset = int((len(window) - 1) / 2.) if center else 0
additional_nans = np.array([np.NaN] * offset)
f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x,
window, minp, avg=mean)
result = np.apply_along_axis(f, axis, values)

if center:
result = _center_window(result, len(window), axis)
Expand Down Expand Up @@ -856,7 +867,7 @@ def _expanding_func(func, desc, check_minp=_use_window):
@Appender(_doc_template)
@wraps(func)
def f(arg, min_periods=1, freq=None, **kwargs):
window = len(arg)
window = max(len(arg), min_periods) if min_periods else len(arg)

def call_cython(arg, window, minp, args=(), kwargs={}, **kwds):
minp = check_minp(minp, window)
Expand Down
83 changes: 83 additions & 0 deletions pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,12 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False):
center=center)
self._check_moment_func(roll_mean, np.mean)

# GH 8080
s = Series([None, None, None])
result = mom.rolling_apply(s, 2, lambda x: len(x), min_periods=0)
expected = Series([1., 2., 2.])
assert_series_equal(result, expected)

def test_rolling_apply_out_of_bounds(self):
# #1850
arr = np.arange(4)
Expand Down Expand Up @@ -814,6 +820,12 @@ def expanding_mean(x, min_periods=1, freq=None):
freq=freq)
self._check_expanding(expanding_mean, np.mean)

# GH 8080
s = Series([None, None, None])
result = mom.expanding_apply(s, lambda x: len(x), min_periods=0)
expected = Series([1., 2., 3.])
assert_series_equal(result, expected)

def test_expanding_apply_args_kwargs(self):
def mean_w_arg(x, const):
return np.mean(x) + const
Expand Down Expand Up @@ -989,6 +1001,77 @@ def test_rolling_functions_window_non_shrinkage(self):
df_result_panel = f(df)
assert_panel_equal(df_result_panel, df_expected_panel)

def test_moment_functions_zero_length(self):
# GH 8056
s = Series()
s_expected = s
df1 = DataFrame()
df1_expected = df1
df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, minor_axis=df1.columns)
df2 = DataFrame(columns=['a'])
df2_expected = df2
df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns)

functions = [lambda x: mom.expanding_count(x),
lambda x: mom.expanding_cov(x, x, pairwise=False, min_periods=5),
lambda x: mom.expanding_corr(x, x, pairwise=False, min_periods=5),
lambda x: mom.expanding_max(x, min_periods=5),
lambda x: mom.expanding_min(x, min_periods=5),
lambda x: mom.expanding_sum(x, min_periods=5),
lambda x: mom.expanding_mean(x, min_periods=5),
lambda x: mom.expanding_std(x, min_periods=5),
lambda x: mom.expanding_var(x, min_periods=5),
lambda x: mom.expanding_skew(x, min_periods=5),
lambda x: mom.expanding_kurt(x, min_periods=5),
lambda x: mom.expanding_quantile(x, quantile=0.5, min_periods=5),
lambda x: mom.expanding_median(x, min_periods=5),
lambda x: mom.expanding_apply(x, func=sum, min_periods=5),
lambda x: mom.rolling_count(x, window=10),
lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5),
lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5),
lambda x: mom.rolling_max(x, window=10, min_periods=5),
lambda x: mom.rolling_min(x, window=10, min_periods=5),
lambda x: mom.rolling_sum(x, window=10, min_periods=5),
lambda x: mom.rolling_mean(x, window=10, min_periods=5),
lambda x: mom.rolling_std(x, window=10, min_periods=5),
lambda x: mom.rolling_var(x, window=10, min_periods=5),
lambda x: mom.rolling_skew(x, window=10, min_periods=5),
lambda x: mom.rolling_kurt(x, window=10, min_periods=5),
lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5),
lambda x: mom.rolling_median(x, window=10, min_periods=5),
lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5),
lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5),
]
for f in functions:
try:
s_result = f(s)
assert_series_equal(s_result, s_expected)

df1_result = f(df1)
assert_frame_equal(df1_result, df1_expected)

df2_result = f(df2)
assert_frame_equal(df2_result, df2_expected)
except (ImportError):

# scipy needed for rolling_window
continue

functions = [lambda x: mom.expanding_cov(x, x, pairwise=True, min_periods=5),
lambda x: mom.expanding_corr(x, x, pairwise=True, min_periods=5),
lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5),
lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5),
# rolling_corr_pairwise is depracated, so the following line should be deleted
# when rolling_corr_pairwise is removed.
lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5),
]
for f in functions:
df1_result_panel = f(df1)
assert_panel_equal(df1_result_panel, df1_expected_panel)

df2_result_panel = f(df2)
assert_panel_equal(df2_result_panel, df2_expected_panel)

def test_expanding_cov_pairwise_diff_length(self):
# GH 7512
df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B'])
Expand Down