Skip to content

Commit b70928a

Browse files
committed
Merge pull request #8059 from seth-p/rolling_count
BUG: rolling_count() and expanding_*() with zero-length args; rolling/expanding_apply with min_periods=0
2 parents a4a4fef + 9e2b335 commit b70928a

File tree

4 files changed

+120
-28
lines changed

4 files changed

+120
-28
lines changed

doc/source/v0.15.0.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ API changes
104104
:func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, and :func:`rolling_quantile`,
105105
:func:`rolling_cov`, :func:`rolling_corr`, :func:`rolling_corr_pairwise`,
106106
:func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same
107-
structure as the input ``arg`` with ``NaN``s in the final ``(window-1)/2`` entries.
107+
structure as the input ``arg`` with ``NaN`` in the final ``(window-1)/2`` entries.
108108
Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed
109-
by ``(window-1)/2`` ``NaN``s. (:issue:`7925`)
109+
by ``(window-1)/2`` ``NaN`` values. (:issue:`7925`)
110110

111111
Prior behavior (note final value is ``NaN``):
112112

@@ -564,8 +564,8 @@ Bug Fixes
564564
returning results with columns sorted by name and producing an error for non-unique columns;
565565
now handles non-unique columns and returns columns in original order
566566
(except for the case of two DataFrames with ``pairwise=False``, where behavior is unchanged) (:issue:`7542`)
567-
568-
567+
- Bug in :func:`rolling_count` and ``expanding_*`` functions unnecessarily producing error message for zero-length data (:issue:`8056`)
568+
- Bug in :func:`rolling_apply` and :func:`expanding_apply`` interpreting ``min_periods=0`` as ``min_periods=1 (:issue:`8080`)
569569
- Bug in ``DataFrame.plot`` and ``Series.plot`` may ignore ``rot`` and ``fontsize`` keywords (:issue:`7844`)
570570

571571

pandas/algos.pyx

+3-5
Original file line numberDiff line numberDiff line change
@@ -712,17 +712,15 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
712712
#
713713
# -
714714

715-
def _check_minp(win, minp, N):
715+
def _check_minp(win, minp, N, floor=1):
716716
if minp > win:
717717
raise ValueError('min_periods (%d) must be <= window (%d)'
718718
% (minp, win))
719719
elif minp > N:
720720
minp = N + 1
721-
elif minp == 0:
722-
minp = 1
723721
elif minp < 0:
724722
raise ValueError('min_periods must be >= 0')
725-
return minp
723+
return max(minp, floor)
726724

727725
# original C implementation by N. Devillard.
728726
# This code in public domain.
@@ -1766,7 +1764,7 @@ def roll_generic(ndarray[float64_t, cast=True] input, int win,
17661764
if n == 0:
17671765
return input
17681766

1769-
minp = _check_minp(win, minp, n)
1767+
minp = _check_minp(win, minp, n, floor=0)
17701768
output = np.empty(n, dtype=float)
17711769
counts = roll_sum(np.isfinite(input).astype(float), win, minp)
17721770

pandas/stats/moments.py

+30-19
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def rolling_count(arg, window, freq=None, center=False, how=None):
206206
return_hook, values = _process_data_structure(arg, kill_inf=False)
207207

208208
converted = np.isfinite(values).astype(float)
209-
result = rolling_sum(converted, window, min_periods=1,
209+
result = rolling_sum(converted, window, min_periods=0,
210210
center=center) # already converted
211211

212212
# putmask here?
@@ -280,7 +280,8 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False):
280280
elif isinstance(arg1, DataFrame):
281281
def dataframe_from_int_dict(data, frame_template):
282282
result = DataFrame(data, index=frame_template.index)
283-
result.columns = frame_template.columns[result.columns]
283+
if len(result.columns) > 0:
284+
result.columns = frame_template.columns[result.columns]
284285
return result
285286

286287
results = {}
@@ -314,8 +315,10 @@ def dataframe_from_int_dict(data, frame_template):
314315
else:
315316
results[i][j] = f(*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]))
316317
p = Panel.from_dict(results).swapaxes('items', 'major')
317-
p.major_axis = arg1.columns[p.major_axis]
318-
p.minor_axis = arg2.columns[p.minor_axis]
318+
if len(p.major_axis) > 0:
319+
p.major_axis = arg1.columns[p.major_axis]
320+
if len(p.minor_axis) > 0:
321+
p.minor_axis = arg2.columns[p.minor_axis]
319322
return p
320323
else:
321324
raise ValueError("'pairwise' is not True/False")
@@ -372,17 +375,22 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
372375
y : type of input
373376
"""
374377
arg = _conv_timerule(arg, freq, how)
375-
offset = int((window - 1) / 2.) if center else 0
376-
additional_nans = np.array([np.NaN] * offset)
377-
calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x,
378-
window, minp=minp, args=args, kwargs=kwargs,
379-
**kwds)
378+
380379
return_hook, values = _process_data_structure(arg)
381-
# actually calculate the moment. Faster way to do this?
382-
if values.ndim > 1:
383-
result = np.apply_along_axis(calc, axis, values)
380+
381+
if values.size == 0:
382+
result = values.copy()
384383
else:
385-
result = calc(values)
384+
# actually calculate the moment. Faster way to do this?
385+
offset = int((window - 1) / 2.) if center else 0
386+
additional_nans = np.array([np.NaN] * offset)
387+
calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x,
388+
window, minp=minp, args=args, kwargs=kwargs,
389+
**kwds)
390+
if values.ndim > 1:
391+
result = np.apply_along_axis(calc, axis, values)
392+
else:
393+
result = calc(values)
386394

387395
if center:
388396
result = _center_window(result, window, axis)
@@ -817,11 +825,14 @@ def rolling_window(arg, window=None, win_type=None, min_periods=None,
817825
arg = _conv_timerule(arg, freq, how)
818826
return_hook, values = _process_data_structure(arg)
819827

820-
offset = int((len(window) - 1) / 2.) if center else 0
821-
additional_nans = np.array([np.NaN] * offset)
822-
f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x,
823-
window, minp, avg=mean)
824-
result = np.apply_along_axis(f, axis, values)
828+
if values.size == 0:
829+
result = values.copy()
830+
else:
831+
offset = int((len(window) - 1) / 2.) if center else 0
832+
additional_nans = np.array([np.NaN] * offset)
833+
f = lambda x: algos.roll_window(np.concatenate((x, additional_nans)) if center else x,
834+
window, minp, avg=mean)
835+
result = np.apply_along_axis(f, axis, values)
825836

826837
if center:
827838
result = _center_window(result, len(window), axis)
@@ -856,7 +867,7 @@ def _expanding_func(func, desc, check_minp=_use_window):
856867
@Appender(_doc_template)
857868
@wraps(func)
858869
def f(arg, min_periods=1, freq=None, **kwargs):
859-
window = len(arg)
870+
window = max(len(arg), min_periods) if min_periods else len(arg)
860871

861872
def call_cython(arg, window, minp, args=(), kwargs={}, **kwds):
862873
minp = check_minp(minp, window)

pandas/stats/tests/test_moments.py

+83
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,12 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False):
244244
center=center)
245245
self._check_moment_func(roll_mean, np.mean)
246246

247+
# GH 8080
248+
s = Series([None, None, None])
249+
result = mom.rolling_apply(s, 2, lambda x: len(x), min_periods=0)
250+
expected = Series([1., 2., 2.])
251+
assert_series_equal(result, expected)
252+
247253
def test_rolling_apply_out_of_bounds(self):
248254
# #1850
249255
arr = np.arange(4)
@@ -814,6 +820,12 @@ def expanding_mean(x, min_periods=1, freq=None):
814820
freq=freq)
815821
self._check_expanding(expanding_mean, np.mean)
816822

823+
# GH 8080
824+
s = Series([None, None, None])
825+
result = mom.expanding_apply(s, lambda x: len(x), min_periods=0)
826+
expected = Series([1., 2., 3.])
827+
assert_series_equal(result, expected)
828+
817829
def test_expanding_apply_args_kwargs(self):
818830
def mean_w_arg(x, const):
819831
return np.mean(x) + const
@@ -989,6 +1001,77 @@ def test_rolling_functions_window_non_shrinkage(self):
9891001
df_result_panel = f(df)
9901002
assert_panel_equal(df_result_panel, df_expected_panel)
9911003

1004+
def test_moment_functions_zero_length(self):
1005+
# GH 8056
1006+
s = Series()
1007+
s_expected = s
1008+
df1 = DataFrame()
1009+
df1_expected = df1
1010+
df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, minor_axis=df1.columns)
1011+
df2 = DataFrame(columns=['a'])
1012+
df2_expected = df2
1013+
df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns)
1014+
1015+
functions = [lambda x: mom.expanding_count(x),
1016+
lambda x: mom.expanding_cov(x, x, pairwise=False, min_periods=5),
1017+
lambda x: mom.expanding_corr(x, x, pairwise=False, min_periods=5),
1018+
lambda x: mom.expanding_max(x, min_periods=5),
1019+
lambda x: mom.expanding_min(x, min_periods=5),
1020+
lambda x: mom.expanding_sum(x, min_periods=5),
1021+
lambda x: mom.expanding_mean(x, min_periods=5),
1022+
lambda x: mom.expanding_std(x, min_periods=5),
1023+
lambda x: mom.expanding_var(x, min_periods=5),
1024+
lambda x: mom.expanding_skew(x, min_periods=5),
1025+
lambda x: mom.expanding_kurt(x, min_periods=5),
1026+
lambda x: mom.expanding_quantile(x, quantile=0.5, min_periods=5),
1027+
lambda x: mom.expanding_median(x, min_periods=5),
1028+
lambda x: mom.expanding_apply(x, func=sum, min_periods=5),
1029+
lambda x: mom.rolling_count(x, window=10),
1030+
lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5),
1031+
lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5),
1032+
lambda x: mom.rolling_max(x, window=10, min_periods=5),
1033+
lambda x: mom.rolling_min(x, window=10, min_periods=5),
1034+
lambda x: mom.rolling_sum(x, window=10, min_periods=5),
1035+
lambda x: mom.rolling_mean(x, window=10, min_periods=5),
1036+
lambda x: mom.rolling_std(x, window=10, min_periods=5),
1037+
lambda x: mom.rolling_var(x, window=10, min_periods=5),
1038+
lambda x: mom.rolling_skew(x, window=10, min_periods=5),
1039+
lambda x: mom.rolling_kurt(x, window=10, min_periods=5),
1040+
lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5),
1041+
lambda x: mom.rolling_median(x, window=10, min_periods=5),
1042+
lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5),
1043+
lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5),
1044+
]
1045+
for f in functions:
1046+
try:
1047+
s_result = f(s)
1048+
assert_series_equal(s_result, s_expected)
1049+
1050+
df1_result = f(df1)
1051+
assert_frame_equal(df1_result, df1_expected)
1052+
1053+
df2_result = f(df2)
1054+
assert_frame_equal(df2_result, df2_expected)
1055+
except (ImportError):
1056+
1057+
# scipy needed for rolling_window
1058+
continue
1059+
1060+
functions = [lambda x: mom.expanding_cov(x, x, pairwise=True, min_periods=5),
1061+
lambda x: mom.expanding_corr(x, x, pairwise=True, min_periods=5),
1062+
lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5),
1063+
lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5),
1064+
# rolling_corr_pairwise is depracated, so the following line should be deleted
1065+
# when rolling_corr_pairwise is removed.
1066+
lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5),
1067+
]
1068+
for f in functions:
1069+
df1_result_panel = f(df1)
1070+
assert_panel_equal(df1_result_panel, df1_expected_panel)
1071+
1072+
df2_result_panel = f(df2)
1073+
assert_panel_equal(df2_result_panel, df2_expected_panel)
1074+
9921075
def test_expanding_cov_pairwise_diff_length(self):
9931076
# GH 7512
9941077
df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B'])

0 commit comments

Comments
 (0)