Skip to content

Commit 627da5a

Browse files
authored
DEPR: min_periods=None behavior for Rolling.count (#36649)
1 parent 3db38fb commit 627da5a

10 files changed

+139
-141
lines changed

doc/source/whatsnew/v1.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ Deprecations
265265
- Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`36179`)
266266
- Deprecated casting an object-dtype index of ``datetime`` objects to :class:`DatetimeIndex` in the :class:`Series` constructor (:issue:`23598`)
267267
- Deprecated :meth:`Index.is_all_dates` (:issue:`27744`)
268+
- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`)
268269

269270
.. ---------------------------------------------------------------------------
270271
@@ -404,6 +405,7 @@ Groupby/resample/rolling
404405
- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
405406
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
406407
- Bug in :meth:`Rolling.sum()` returned wrong values when dtypes where mixed between float and integer and axis was equal to one (:issue:`20649`, :issue:`35596`)
408+
- Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`)
407409

408410
Reshaping
409411
^^^^^^^^^

pandas/_libs/window/aggregations.pyx

-56
Original file line numberDiff line numberDiff line change
@@ -89,62 +89,6 @@ cdef bint is_monotonic_start_end_bounds(
8989
# Physical description: 366 p.
9090
# Series: Prentice-Hall Series in Automatic Computation
9191

92-
# ----------------------------------------------------------------------
93-
# Rolling count
94-
# this is only an impl for index not None, IOW, freq aware
95-
96-
97-
def roll_count(
98-
ndarray[float64_t] values,
99-
ndarray[int64_t] start,
100-
ndarray[int64_t] end,
101-
int64_t minp,
102-
):
103-
cdef:
104-
float64_t val, count_x = 0.0
105-
int64_t s, e, nobs, N = len(values)
106-
Py_ssize_t i, j
107-
ndarray[float64_t] output
108-
109-
output = np.empty(N, dtype=float)
110-
111-
with nogil:
112-
113-
for i in range(0, N):
114-
s = start[i]
115-
e = end[i]
116-
117-
if i == 0:
118-
119-
# setup
120-
count_x = 0.0
121-
for j in range(s, e):
122-
val = values[j]
123-
if notnan(val):
124-
count_x += 1.0
125-
126-
else:
127-
128-
# calculate deletes
129-
for j in range(start[i - 1], s):
130-
val = values[j]
131-
if notnan(val):
132-
count_x -= 1.0
133-
134-
# calculate adds
135-
for j in range(end[i - 1], e):
136-
val = values[j]
137-
if notnan(val):
138-
count_x += 1.0
139-
140-
if count_x >= minp:
141-
output[i] = count_x
142-
else:
143-
output[i] = NaN
144-
145-
return output
146-
147-
14892
# ----------------------------------------------------------------------
14993
# Rolling sum
15094

pandas/core/window/rolling.py

+26-40
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
Type,
1818
Union,
1919
)
20+
import warnings
2021

2122
import numpy as np
2223

@@ -469,31 +470,39 @@ def _get_window_indexer(self, window: int) -> BaseIndexer:
469470
return VariableWindowIndexer(index_array=self._on.asi8, window_size=window)
470471
return FixedWindowIndexer(window_size=window)
471472

472-
def _apply_series(self, homogeneous_func: Callable[..., ArrayLike]) -> "Series":
473+
def _apply_series(
474+
self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None
475+
) -> "Series":
473476
"""
474477
Series version of _apply_blockwise
475478
"""
476479
obj = self._create_data(self._selected_obj)
477480

478481
try:
479-
values = self._prep_values(obj.values)
482+
# GH 12541: Special case for count where we support date-like types
483+
input = obj.values if name != "count" else notna(obj.values).astype(int)
484+
values = self._prep_values(input)
480485
except (TypeError, NotImplementedError) as err:
481486
raise DataError("No numeric types to aggregate") from err
482487

483488
result = homogeneous_func(values)
484489
return obj._constructor(result, index=obj.index, name=obj.name)
485490

486491
def _apply_blockwise(
487-
self, homogeneous_func: Callable[..., ArrayLike]
492+
self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None
488493
) -> FrameOrSeriesUnion:
489494
"""
490495
Apply the given function to the DataFrame broken down into homogeneous
491496
sub-frames.
492497
"""
493498
if self._selected_obj.ndim == 1:
494-
return self._apply_series(homogeneous_func)
499+
return self._apply_series(homogeneous_func, name)
495500

496501
obj = self._create_data(self._selected_obj)
502+
if name == "count":
503+
# GH 12541: Special case for count where we support date-like types
504+
obj = notna(obj).astype(int)
505+
obj._mgr = obj._mgr.consolidate()
497506
mgr = obj._mgr
498507

499508
def hfunc(bvalues: ArrayLike) -> ArrayLike:
@@ -606,7 +615,7 @@ def calc(x):
606615

607616
return result
608617

609-
return self._apply_blockwise(homogeneous_func)
618+
return self._apply_blockwise(homogeneous_func, name)
610619

611620
def aggregate(self, func, *args, **kwargs):
612621
result, how = self._aggregate(func, *args, **kwargs)
@@ -1265,33 +1274,8 @@ class RollingAndExpandingMixin(BaseWindow):
12651274
)
12661275

12671276
def count(self):
1268-
# GH 32865. Using count with custom BaseIndexer subclass
1269-
# implementations shouldn't end up here
1270-
assert not isinstance(self.window, BaseIndexer)
1271-
1272-
obj = self._create_data(self._selected_obj)
1273-
1274-
def hfunc(values: np.ndarray) -> np.ndarray:
1275-
result = notna(values)
1276-
result = result.astype(int)
1277-
frame = type(obj)(result.T)
1278-
result = self._constructor(
1279-
frame,
1280-
window=self._get_window(),
1281-
min_periods=self.min_periods or 0,
1282-
center=self.center,
1283-
axis=self.axis,
1284-
closed=self.closed,
1285-
).sum()
1286-
return result.values.T
1287-
1288-
new_mgr = obj._mgr.apply(hfunc)
1289-
out = obj._constructor(new_mgr)
1290-
if obj.ndim == 1:
1291-
out.name = obj.name
1292-
else:
1293-
self._insert_on_column(out, obj)
1294-
return out
1277+
window_func = self._get_cython_func_type("roll_sum")
1278+
return self._apply(window_func, center=self.center, name="count")
12951279

12961280
_shared_docs["apply"] = dedent(
12971281
r"""
@@ -2050,14 +2034,16 @@ def aggregate(self, func, *args, **kwargs):
20502034
@Substitution(name="rolling")
20512035
@Appender(_shared_docs["count"])
20522036
def count(self):
2053-
2054-
# different impl for freq counting
2055-
# GH 32865. Use a custom count function implementation
2056-
# when using a BaseIndexer subclass as a window
2057-
if self.is_freq_type or isinstance(self.window, BaseIndexer):
2058-
window_func = self._get_roll_func("roll_count")
2059-
return self._apply(window_func, center=self.center, name="count")
2060-
2037+
if self.min_periods is None:
2038+
warnings.warn(
2039+
(
2040+
"min_periods=None will default to the size of window "
2041+
"consistent with other methods in a future version. "
2042+
"Specify min_periods=0 instead."
2043+
),
2044+
FutureWarning,
2045+
)
2046+
self.min_periods = 0
20612047
return super().count()
20622048

20632049
@Substitution(name="rolling")

pandas/tests/window/moments/test_moments_consistency_rolling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ def test_moment_functions_zero_length():
452452
df2_expected = df2
453453

454454
functions = [
455-
lambda x: x.rolling(window=10).count(),
455+
lambda x: x.rolling(window=10, min_periods=0).count(),
456456
lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
457457
lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
458458
lambda x: x.rolling(window=10, min_periods=5).max(),

pandas/tests/window/moments/test_moments_rolling_functions.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
[
1313
[np.mean, "mean", {}],
1414
[np.nansum, "sum", {}],
15-
[lambda x: np.isfinite(x).astype(float).sum(), "count", {}],
15+
pytest.param(
16+
lambda x: np.isfinite(x).astype(float).sum(),
17+
"count",
18+
{},
19+
marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"),
20+
),
1621
[np.median, "median", {}],
1722
[np.min, "min", {}],
1823
[np.max, "max", {}],
@@ -33,7 +38,12 @@ def test_series(series, compare_func, roll_func, kwargs):
3338
[
3439
[np.mean, "mean", {}],
3540
[np.nansum, "sum", {}],
36-
[lambda x: np.isfinite(x).astype(float).sum(), "count", {}],
41+
pytest.param(
42+
lambda x: np.isfinite(x).astype(float).sum(),
43+
"count",
44+
{},
45+
marks=pytest.mark.filterwarnings("ignore:min_periods:FutureWarning"),
46+
),
3747
[np.median, "median", {}],
3848
[np.min, "min", {}],
3949
[np.max, "max", {}],

pandas/tests/window/test_base_indexer.py

+10
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
138138
),
139139
],
140140
)
141+
@pytest.mark.filterwarnings("ignore:min_periods:FutureWarning")
141142
def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs):
142143
# GH 32865
143144
values = np.arange(10.0)
@@ -253,3 +254,12 @@ def test_non_fixed_variable_window_indexer(closed, expected_data):
253254
result = df.rolling(indexer, closed=closed).sum()
254255
expected = DataFrame(expected_data, index=index)
255256
tm.assert_frame_equal(result, expected)
257+
258+
259+
def test_fixed_forward_indexer_count():
260+
# GH: 35579
261+
df = DataFrame({"b": [None, None, None, 7]})
262+
indexer = FixedForwardWindowIndexer(window_size=2)
263+
result = df.rolling(window=indexer, min_periods=0).count()
264+
expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})
265+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)