pandas-dev · MarcoGorelli · Feb 28, 2020 · Feb 29, 2020 · Feb 29, 2020 · Feb 29, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -167,6 +167,35 @@ key and type of :class:`Index`.  These now consistently raise ``KeyError`` (:iss
     ...
     KeyError: Timestamp('1970-01-01 00:00:00')
 
+GroupBy.rolling no longer returns grouped-by column in values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Suppose we start with
+
+.. ipython:: python
+
+    df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]})
+    df
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+    In [1]: df.groupby("A").rolling(2).sum()
+    Out[1]:
+           A    B
+    A
+    1 0  NaN  NaN
+    1    2.0  1.0
+    2 2  NaN  NaN
+    3 3  NaN  NaN
+
+*New behavior*:
+
+.. ipython:: python
+
+    df.groupby("A").rolling(2).sum()
+
 .. ---------------------------------------------------------------------------
 
 .. _whatsnew_110.api_breaking.assignment_to_multiple_columns:

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -209,11 +209,18 @@ def _obj_with_exclusions(self):
         if self._selection is not None and isinstance(self.obj, ABCDataFrame):
             return self.obj.reindex(columns=self._selection_list)
 
-        if len(self.exclusions) > 0:
-            return self.obj.drop(self.exclusions, axis=1)
-        else:
+        elif not self.exclusions or not isinstance(self.obj, ABCDataFrame):
             return self.obj
 
+        # there may be elements in self.exclusions that are no longer
+        # in self.obj, see GH 32468
+        nlevels = self.obj.columns.nlevels
+        unique_column_names = {
+            j for i in range(nlevels) for j in self.obj.columns.get_level_values(i)
+        }
+        exclusions = self.exclusions.intersection(unique_column_names)
+        return self.obj.drop(exclusions, axis=1)
+
     def __getitem__(self, key):
         if self._selection is not None:
             raise IndexError(f"Column(s) {self._selection} already selected")

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1577,6 +1577,7 @@ def rolling(self, *args, **kwargs):
         """
         from pandas.core.window import RollingGroupby
 
+        kwargs["exclusions"] = self.exclusions
         return RollingGroupby(self, *args, **kwargs)
 
     @Substitution(name="groupby")

diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
@@ -35,6 +35,8 @@ def _dispatch(name: str, *args, **kwargs):
     def outer(self, *args, **kwargs):
         def f(x):
             x = self._shallow_copy(x, groupby=self._groupby)
+            # patch for GH 32332
+            x.obj = x._obj_with_exclusions
             return getattr(x, name)(*args, **kwargs)
 
         return self._groupby.apply(f)
@@ -82,6 +84,8 @@ def _apply(
         # TODO: can we de-duplicate with _dispatch?
         def f(x, name=name, *args):
             x = self._shallow_copy(x)
+            # patch for GH 32332
+            x.obj = x._obj_with_exclusions
 
             if isinstance(name, str):
                 return getattr(x, name)(*args, **kwargs)

diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
@@ -142,6 +142,7 @@ def __init__(
         adjust=True,
         ignore_na=False,
         axis=0,
+        **kwargs,
     ):
         self.obj = obj
         self.com = _get_center_of_mass(com, span, halflife, alpha)

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -93,6 +93,10 @@ def __init__(
         self.axis = obj._get_axis_number(axis) if axis is not None else None
         self.validate()
         self._numba_func_cache: Dict[Optional[str], Callable] = dict()
+        self.exclusions = kwargs.get("exclusions", set())
+
+    def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin:
+        return super()._shallow_copy(obj, exclusions=self.exclusions, **kwargs)
 
     @property
     def _constructor(self):
@@ -1187,8 +1191,7 @@ def count(self):
                 closed=self.closed,
             ).sum()
             results.append(result)
-
-        return self._wrap_results(results, blocks, obj)
+        return self._wrap_results(results, blocks, obj, exclude=self.exclusions)
 
     _shared_docs["apply"] = dedent(
         r"""
@@ -1632,6 +1635,8 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
             # only default unset
             pairwise = True if pairwise is None else pairwise
         other = self._shallow_copy(other)
+        # patch for GH 32332
+        other.obj = other._obj_with_exclusions
 
         # GH 16058: offset window
         if self.is_freq_type:
@@ -1775,6 +1780,8 @@ def corr(self, other=None, pairwise=None, **kwargs):
             # only default unset
             pairwise = True if pairwise is None else pairwise
         other = self._shallow_copy(other)
+        # patch for GH 32332
+        other.obj = other._obj_with_exclusions
         window = self._get_window(other) if not self.is_freq_type else self.win_freq
 
         def _get_corr(a, b):

diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py
@@ -62,11 +62,15 @@ def test_rolling(self):
         for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]:
             result = getattr(r, f)()
             expected = g.apply(lambda x: getattr(x.rolling(4), f)())
+            # groupby.apply doesn't drop the grouped-by column
+            expected = expected.drop("A", axis=1)
             tm.assert_frame_equal(result, expected)
 
         for f in ["std", "var"]:
             result = getattr(r, f)(ddof=1)
             expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
+            # groupby.apply doesn't drop the grouped-by column
+            expected = expected.drop("A", axis=1)
             tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -79,6 +83,8 @@ def test_rolling_quantile(self, interpolation):
         expected = g.apply(
             lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation)
         )
+        # groupby.apply doesn't drop the grouped-by column
+        expected = expected.drop("A", axis=1)
         tm.assert_frame_equal(result, expected)
 
     def test_rolling_corr_cov(self):
@@ -92,6 +98,8 @@ def func(x):
                 return getattr(x.rolling(4), f)(self.frame)
 
             expected = g.apply(func)
+            # groupby.apply doesn't drop the grouped-by column
+            expected = expected.drop("A", axis=1)
             tm.assert_frame_equal(result, expected)
 
             result = getattr(r.B, f)(pairwise=True)
@@ -109,6 +117,8 @@ def test_rolling_apply(self, raw):
         # reduction
         result = r.apply(lambda x: x.sum(), raw=raw)
         expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
+        # groupby.apply doesn't drop the grouped-by column
+        expected = expected.drop("A", axis=1)
         tm.assert_frame_equal(result, expected)
 
     def test_rolling_apply_mutability(self):

diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -465,3 +465,18 @@ def test_rolling_count_default_min_periods_with_null_values(constructor):
     result = constructor(values).rolling(3).count()
     expected = constructor(expected_counts)
     tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "columns", [pd.MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]]
+)
+def test_by_column_not_in_values(columns):
+    # GH 32262
+    df = pd.DataFrame([[1, 0]] * 20 + [[2, 0]] * 12 + [[3, 0]] * 8, columns=columns)
+
+    g = df.groupby("A")
+    original_obj = g.obj.copy(deep=True)
+    r = g.rolling(4)
+    result = r.sum()
+    assert "A" not in result.columns
+    tm.assert_frame_equal(g.obj, original_obj)  # check for side-effects