From 2abd61362b33900b529c1bbe39895684622f0d04 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 May 2018 15:24:24 -0700 Subject: [PATCH 1/5] Added test for issue --- pandas/tests/test_window.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 304e3d02466a5..b45ddf8ebdc94 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3174,6 +3174,28 @@ def test_rolling_apply(self, raw): lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) tm.assert_frame_equal(result, expected) + def test_rolling_apply_mutability(self): + # GH 14013 + df = pd.DataFrame({'A': ['foo'] * 3 + ['bar'] * 3, 'B': [1] * 6}) + g = df.groupby('A') + + # First ensure that the grouped column is not part of the output + mi = pd.MultiIndex.from_tuples([('bar', 3), ('bar', 4), ('bar', 5), + ('foo', 0), ('foo', 1), ('foo', 2)]) + + mi.names = ['A', None] + expected = pd.DataFrame([np.nan, 2., 2.] * 2, columns=['B'], index=mi) + + result = g.rolling(window=2).sum() + tm.assert_frame_equal(result, expected) + + # Call an arbitrary function on the groupby + g.sum() + + # Make sure nothing has been mutated + result = g.rolling(window=2).sum() + tm.assert_frame_equal(result, expected) + def test_expanding(self): g = self.frame.groupby('A') r = g.expanding() From ff2dd8fb138df66cf8293f7c1989cd1759b1f900 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 May 2018 16:59:05 -0700 Subject: [PATCH 2/5] Un-suppressed exception in Rolling apply --- pandas/core/window.py | 8 ++------ pandas/tests/test_window.py | 9 ++++++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index d7f9f7c85fbbc..1c68b18588071 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -791,12 +791,12 @@ def _apply(self, func, name, window=None, center=None, def f(x, name=name, *args): x = self._shallow_copy(x) - if isinstance(name, compat.string_types): return getattr(x, name)(*args, **kwargs) return x.apply(name, *args, **kwargs) + return self._groupby.apply(f) @@ -837,11 +837,7 @@ def _apply(self, func, name=None, window=None, center=None, index, indexi = self._get_index(index=index) results = [] for b in blocks: - try: - values = self._prep_values(b.values) - except TypeError: - results.append(b.values.copy()) - continue + values = self._prep_values(b.values) if values.size == 0: results.append(values.copy()) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index b45ddf8ebdc94..7ccc7f1534d44 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -105,7 +105,6 @@ def test_attribute_access(self): def tests_skip_nuisance(self): df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'}) - r = df.rolling(window=3) result = r[['A', 'B']].sum() expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9], @@ -113,9 +112,13 @@ def tests_skip_nuisance(self): columns=list('AB')) tm.assert_frame_equal(result, expected) + def test_skip_sum_object_raises(self): + df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'}) + r = df.rolling(window=3) expected = concat([r[['A', 'B']].sum(), df[['C']]], axis=1) - result = r.sum() - tm.assert_frame_equal(result, expected, check_like=True) + + with tm.assert_raises_regex(TypeError, 'cannot handle this type'): + result = r.sum() def test_agg(self): df = DataFrame({'A': range(5), 'B': range(0, 10, 2)}) From d73ff86cebd79f350416d0bcaf67f987a136c931 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 May 2018 17:02:23 -0700 Subject: [PATCH 3/5] LINT fixup --- pandas/core/window.py | 1 - pandas/tests/test_window.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 1c68b18588071..a23ee4f725289 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -796,7 +796,6 @@ def f(x, name=name, *args): return x.apply(name, *args, **kwargs) - return self._groupby.apply(f) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 7ccc7f1534d44..23c8f9c96189a 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -115,10 +115,9 @@ def tests_skip_nuisance(self): def test_skip_sum_object_raises(self): df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'}) r = df.rolling(window=3) - expected = concat([r[['A', 'B']].sum(), df[['C']]], axis=1) with tm.assert_raises_regex(TypeError, 'cannot handle this type'): - result = r.sum() + r.sum() def test_agg(self): df = DataFrame({'A': range(5), 'B': range(0, 10, 2)}) From 9bc29d0ac31bdca294256a3656afa4c6885bd3c6 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 May 2018 17:08:35 -0700 Subject: [PATCH 4/5] Reverted errant whitespace, cleaned up comment --- pandas/core/window.py | 1 + pandas/tests/test_window.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index a23ee4f725289..5fd054b1930e6 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -791,6 +791,7 @@ def _apply(self, func, name, window=None, center=None, def f(x, name=name, *args): x = self._shallow_copy(x) + if isinstance(name, compat.string_types): return getattr(x, name)(*args, **kwargs) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 23c8f9c96189a..93f637a561718 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3181,11 +3181,11 @@ def test_rolling_apply_mutability(self): df = pd.DataFrame({'A': ['foo'] * 3 + ['bar'] * 3, 'B': [1] * 6}) g = df.groupby('A') - # First ensure that the grouped column is not part of the output mi = pd.MultiIndex.from_tuples([('bar', 3), ('bar', 4), ('bar', 5), ('foo', 0), ('foo', 1), ('foo', 2)]) mi.names = ['A', None] + # Grouped column should not be a part of the output expected = pd.DataFrame([np.nan, 2., 2.] * 2, columns=['B'], index=mi) result = g.rolling(window=2).sum() From 878e640c16f0396e0302cb98b7c25e0ff46d12f3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 8 May 2018 20:45:07 -0700 Subject: [PATCH 5/5] Updated whatsnew --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c6991bc016868..32f7447e5ef77 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1328,6 +1328,7 @@ Groupby/Resample/Rolling - Bug in :func:`DataFrame.groupby` where transformations using ``np.all`` and ``np.any`` were raising a ``ValueError`` (:issue:`20653`) - Bug in :func:`DataFrame.resample` where ``ffill``, ``bfill``, ``pad``, ``backfill``, ``fillna``, ``interpolate``, and ``asfreq`` were ignoring ``loffset``. (:issue:`20744`) - Bug in :func:`DataFrame.groupby` when applying a function that has mixed data types and the user supplied function can fail on the grouping column (:issue:`20949`) +- Bug in :func:`DataFrameGroupBy.rolling().apply() ` where operations performed against the associated :class:`DataFrameGroupBy` object could impact the inclusion of the grouped item(s) in the result (:issue:`14013`) Sparse ^^^^^^