Skip to content

Commit 93af2f0

Browse files
authored
DEPR: dropping nuisance columns in rolling aggregations (#42834)
1 parent 8e73c30 commit 93af2f0

File tree

6 files changed

+71
-28
lines changed

6 files changed

+71
-28
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ Deprecations
162162
- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
163163
- Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
164164
- Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`)
165+
- Deprecated dropping of nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`EWM` aggregations (:issue:`42738`)
165166
- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
166167
-
167168

pandas/core/window/rolling.py

+13
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from pandas.compat._optional import import_optional_dependency
3333
from pandas.compat.numpy import function as nv
3434
from pandas.util._decorators import doc
35+
from pandas.util._exceptions import find_stack_level
3536

3637
from pandas.core.dtypes.common import (
3738
ensure_float64,
@@ -436,6 +437,18 @@ def hfunc2d(values: ArrayLike) -> ArrayLike:
436437
new_mgr = mgr.apply_2d(hfunc2d, ignore_failures=True)
437438
else:
438439
new_mgr = mgr.apply(hfunc, ignore_failures=True)
440+
441+
if 0 != len(new_mgr.items) != len(mgr.items):
442+
# GH#42738 ignore_failures dropped nuisance columns
443+
dropped = mgr.items.difference(new_mgr.items)
444+
warnings.warn(
445+
"Dropping of nuisance columns in rolling operations "
446+
"is deprecated; in a future version this will raise TypeError. "
447+
"Select only valid columns before calling the operation. "
448+
f"Dropped columns were {dropped}",
449+
FutureWarning,
450+
stacklevel=find_stack_level(),
451+
)
439452
out = obj._constructor(new_mgr)
440453

441454
return self._resolve_output(out, obj)

pandas/tests/window/test_api.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,10 @@ def tests_skip_nuisance():
6868
def test_skip_sum_object_raises():
6969
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
7070
r = df.rolling(window=3)
71-
result = r.sum()
71+
msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)"
72+
with tm.assert_produces_warning(FutureWarning, match=msg):
73+
# GH#42738
74+
result = r.sum()
7275
expected = DataFrame(
7376
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
7477
columns=list("AB"),

pandas/tests/window/test_ewm.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
116116
data = np.arange(10.0)
117117
data[::2] = np.nan
118118
df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)})
119-
result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
120-
expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
119+
with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
120+
# GH#42738
121+
result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
122+
expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
121123
tm.assert_frame_equal(result, expected)
122124

123125

pandas/tests/window/test_groupby.py

+23-15
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,12 @@ def test_methods(self, method, expected_data):
923923
)
924924
tm.assert_frame_equal(result, expected)
925925

926-
expected = df.groupby("A").apply(lambda x: getattr(x.ewm(com=1.0), method)())
926+
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
927+
# GH#42738
928+
expected = df.groupby("A").apply(
929+
lambda x: getattr(x.ewm(com=1.0), method)()
930+
)
931+
927932
# There may be a bug in the above statement; not returning the correct index
928933
tm.assert_frame_equal(result.reset_index(drop=True), expected)
929934

@@ -955,7 +960,9 @@ def test_pairwise_methods(self, method, expected_data):
955960
def test_times(self, times_frame):
956961
# GH 40951
957962
halflife = "23 days"
958-
result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
963+
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
964+
# GH#42738
965+
result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
959966
expected = DataFrame(
960967
{
961968
"B": [
@@ -992,22 +999,23 @@ def test_times(self, times_frame):
992999
def test_times_vs_apply(self, times_frame):
9931000
# GH 40951
9941001
halflife = "23 days"
995-
result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
996-
expected = (
997-
times_frame.groupby("A")
998-
.apply(lambda x: x.ewm(halflife=halflife, times="C").mean())
999-
.iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]]
1000-
.reset_index(drop=True)
1001-
)
1002+
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
1003+
# GH#42738
1004+
result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
1005+
expected = (
1006+
times_frame.groupby("A")
1007+
.apply(lambda x: x.ewm(halflife=halflife, times="C").mean())
1008+
.iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]]
1009+
.reset_index(drop=True)
1010+
)
10021011
tm.assert_frame_equal(result.reset_index(drop=True), expected)
10031012

10041013
def test_times_array(self, times_frame):
10051014
# GH 40951
10061015
halflife = "23 days"
1007-
result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
1008-
expected = (
1009-
times_frame.groupby("A")
1010-
.ewm(halflife=halflife, times=times_frame["C"].values)
1011-
.mean()
1012-
)
1016+
gb = times_frame.groupby("A")
1017+
with tm.assert_produces_warning(FutureWarning, match="nuisance"):
1018+
# GH#42738
1019+
result = gb.ewm(halflife=halflife, times="C").mean()
1020+
expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean()
10131021
tm.assert_frame_equal(result, expected)

pandas/tests/window/test_numba.py

+26-10
Original file line numberDiff line numberDiff line change
@@ -170,26 +170,39 @@ def test_invalid_engine_kwargs(self, grouper):
170170
engine="cython", engine_kwargs={"nopython": True}
171171
)
172172

173-
@pytest.mark.parametrize(
174-
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
175-
)
173+
@pytest.mark.parametrize("grouper", ["None", "groupby"])
176174
def test_cython_vs_numba(
177175
self, grouper, nogil, parallel, nopython, ignore_na, adjust
178176
):
177+
if grouper == "None":
178+
grouper = lambda x: x
179+
warn = FutureWarning
180+
else:
181+
grouper = lambda x: x.groupby("A")
182+
warn = None
183+
179184
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
180185
ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
181186

182187
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
183-
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
184-
expected = ewm.mean(engine="cython")
188+
with tm.assert_produces_warning(warn, match="nuisance"):
189+
# GH#42738
190+
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
191+
expected = ewm.mean(engine="cython")
185192

186193
tm.assert_frame_equal(result, expected)
187194

188-
@pytest.mark.parametrize(
189-
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
190-
)
195+
@pytest.mark.parametrize("grouper", ["None", "groupby"])
191196
def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
192197
# GH 40951
198+
199+
if grouper == "None":
200+
grouper = lambda x: x
201+
warn = FutureWarning
202+
else:
203+
grouper = lambda x: x.groupby("A")
204+
warn = None
205+
193206
halflife = "23 days"
194207
times = to_datetime(
195208
[
@@ -207,8 +220,11 @@ def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_
207220
)
208221

209222
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
210-
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
211-
expected = ewm.mean(engine="cython")
223+
224+
with tm.assert_produces_warning(warn, match="nuisance"):
225+
# GH#42738
226+
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
227+
expected = ewm.mean(engine="cython")
212228

213229
tm.assert_frame_equal(result, expected)
214230

0 commit comments

Comments
 (0)