DEPR: dropping nuisance columns in rolling aggregations (#42834)

jbrockmendel · web-flow · commit 93af2f0d4de0 · 2021-08-10T14:59:42.000-04:00
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -162,6 +162,7 @@ Deprecations
 - Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
 - Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
 - Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`)
+- Deprecated dropping of nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`EWM` aggregations (:issue:`42738`)
 - Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
 -
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -32,6 +32,7 @@
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import doc
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     ensure_float64,
@@ -436,6 +437,18 @@ def hfunc2d(values: ArrayLike) -> ArrayLike:
             new_mgr = mgr.apply_2d(hfunc2d, ignore_failures=True)
         else:
             new_mgr = mgr.apply(hfunc, ignore_failures=True)
+
+        if 0 != len(new_mgr.items) != len(mgr.items):
+            # GH#42738 ignore_failures dropped nuisance columns
+            dropped = mgr.items.difference(new_mgr.items)
+            warnings.warn(
+                "Dropping of nuisance columns in rolling operations "
+                "is deprecated; in a future version this will raise TypeError. "
+                "Select only valid columns before calling the operation. "
+                f"Dropped columns were {dropped}",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         out = obj._constructor(new_mgr)
 
         return self._resolve_output(out, obj)
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
@@ -68,7 +68,10 @@ def tests_skip_nuisance():
 def test_skip_sum_object_raises():
     df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
     r = df.rolling(window=3)
-    result = r.sum()
+    msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        # GH#42738
+        result = r.sum()
     expected = DataFrame(
         {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
         columns=list("AB"),
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
@@ -116,8 +116,10 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
     data = np.arange(10.0)
     data[::2] = np.nan
     df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)})
-    result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
-    expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
+    with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
+        # GH#42738
+        result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
+        expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
@@ -923,7 +923,12 @@ def test_methods(self, method, expected_data):
         )
         tm.assert_frame_equal(result, expected)
 
-        expected = df.groupby("A").apply(lambda x: getattr(x.ewm(com=1.0), method)())
+        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            # GH#42738
+            expected = df.groupby("A").apply(
+                lambda x: getattr(x.ewm(com=1.0), method)()
+            )
+
         # There may be a bug in the above statement; not returning the correct index
         tm.assert_frame_equal(result.reset_index(drop=True), expected)
 
@@ -955,7 +960,9 @@ def test_pairwise_methods(self, method, expected_data):
     def test_times(self, times_frame):
         # GH 40951
         halflife = "23 days"
-        result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
+        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            # GH#42738
+            result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
         expected = DataFrame(
             {
                 "B": [
@@ -992,22 +999,23 @@ def test_times(self, times_frame):
     def test_times_vs_apply(self, times_frame):
         # GH 40951
         halflife = "23 days"
-        result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
-        expected = (
-            times_frame.groupby("A")
-            .apply(lambda x: x.ewm(halflife=halflife, times="C").mean())
-            .iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]]
-            .reset_index(drop=True)
-        )
+        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            # GH#42738
+            result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
+            expected = (
+                times_frame.groupby("A")
+                .apply(lambda x: x.ewm(halflife=halflife, times="C").mean())
+                .iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]]
+                .reset_index(drop=True)
+            )
         tm.assert_frame_equal(result.reset_index(drop=True), expected)
 
     def test_times_array(self, times_frame):
         # GH 40951
         halflife = "23 days"
-        result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean()
-        expected = (
-            times_frame.groupby("A")
-            .ewm(halflife=halflife, times=times_frame["C"].values)
-            .mean()
-        )
+        gb = times_frame.groupby("A")
+        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
+            # GH#42738
+            result = gb.ewm(halflife=halflife, times="C").mean()
+            expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean()
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
@@ -170,26 +170,39 @@ def test_invalid_engine_kwargs(self, grouper):
                 engine="cython", engine_kwargs={"nopython": True}
             )
 
-    @pytest.mark.parametrize(
-        "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
-    )
+    @pytest.mark.parametrize("grouper", ["None", "groupby"])
     def test_cython_vs_numba(
         self, grouper, nogil, parallel, nopython, ignore_na, adjust
     ):
+        if grouper == "None":
+            grouper = lambda x: x
+            warn = FutureWarning
+        else:
+            grouper = lambda x: x.groupby("A")
+            warn = None
+
         df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
         ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
-        result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
-        expected = ewm.mean(engine="cython")
+        with tm.assert_produces_warning(warn, match="nuisance"):
+            # GH#42738
+            result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
+            expected = ewm.mean(engine="cython")
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
-    )
+    @pytest.mark.parametrize("grouper", ["None", "groupby"])
     def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
         # GH 40951
+
+        if grouper == "None":
+            grouper = lambda x: x
+            warn = FutureWarning
+        else:
+            grouper = lambda x: x.groupby("A")
+            warn = None
+
         halflife = "23 days"
         times = to_datetime(
             [
@@ -207,8 +220,11 @@ def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_
         )
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
-        result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
-        expected = ewm.mean(engine="cython")
+
+        with tm.assert_produces_warning(warn, match="nuisance"):
+            # GH#42738
+            result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
+            expected = ewm.mean(engine="cython")
 
         tm.assert_frame_equal(result, expected)
 

Original file line number	Diff line number	Diff line change
`@@ -162,6 +162,7 @@ Deprecations`
`162`	`162`	- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
`163`	`163`	- Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
`164`	`164`	- Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`)
	`165`	+- Deprecated dropping of nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`EWM` aggregations (:issue:`42738`)
`165`	`166`	- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
`166`	`167`	`-`
`167`	`168`