From a6ad6c516d1f1bfc7084cf9ccaa22df5e50d17b5 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 28 Feb 2020 13:58:29 +0000 Subject: [PATCH 01/39] make sure exclusions are applied before the groupby object reaches rolling --- pandas/core/window/common.py | 3 ++- pandas/tests/window/test_rolling.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index ed0b816f64800..162db7857b162 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -52,7 +52,8 @@ def __init__(self, obj, *args, **kwargs): kwargs.pop("parent", None) groupby = kwargs.pop("groupby", None) if groupby is None: - groupby, obj = obj, obj.obj + groupby, obj = obj, obj._obj_with_exclusions + groupby.obj = obj self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index ab2c7fcb7a0dc..d4693f250aeda 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -465,3 +465,13 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) + + +def test_by_column_not_in_values(): + # GH 32262 + df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}) + + g = df.groupby("A") + r = g.rolling(4) + result = r.sum() + assert "A" not in result.columns From 54903a70f4957c8b33aff1bd8e45f497af0123cd Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 29 Feb 2020 11:40:03 +0000 Subject: [PATCH 02/39] pass object with exclusions earlier on --- pandas/core/groupby/groupby.py | 1 + pandas/core/window/common.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6362f11a3e032..62329a96c1701 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1577,6 +1577,7 @@ def rolling(self, *args, **kwargs): """ from pandas.core.window import RollingGroupby + self.obj = self._obj_with_exclusions return RollingGroupby(self, *args, **kwargs) @Substitution(name="groupby") diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 162db7857b162..ed0b816f64800 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -52,8 +52,7 @@ def __init__(self, obj, *args, **kwargs): kwargs.pop("parent", None) groupby = kwargs.pop("groupby", None) if groupby is None: - groupby, obj = obj, obj._obj_with_exclusions - groupby.obj = obj + groupby, obj = obj, obj.obj self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True From d81c5725be6614f3f885a5e0afa50323ded45ee5 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 29 Feb 2020 22:30:57 +0000 Subject: [PATCH 03/39] revert accident --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/groupby/groupby.py | 1 - pandas/core/window/common.py | 3 ++- pandas/tests/window/test_rolling.py | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 44deab25db695..d27f6c5181a8f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -315,6 +315,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`) - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`) +- Bug in :meth:`pandas.core.groupby.RollingGroupby.apply` was including the grouped-by column in its values, rather than just in the index (:issue:`32262`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 62329a96c1701..6362f11a3e032 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1577,7 +1577,6 @@ def rolling(self, *args, **kwargs): """ from pandas.core.window import RollingGroupby - self.obj = self._obj_with_exclusions return RollingGroupby(self, *args, **kwargs) @Substitution(name="groupby") diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index ed0b816f64800..d44416b95aed6 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -81,7 +81,8 @@ def _apply( # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): - x = self._shallow_copy(x) + x = self._shallow_copy(x, exclusions=self._groupby.exclusions) + x.obj = x._obj_with_exclusions if isinstance(name, str): return getattr(x, name)(*args, **kwargs) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index d4693f250aeda..7b36bcfc34b80 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -475,3 +475,4 @@ def test_by_column_not_in_values(): r = g.rolling(4) result = r.sum() assert "A" not in result.columns + assert "A" in g.obj.columns # check for side-effects From e049205cdc99e15ab1317501398d195b4ae7828f Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 29 Feb 2020 22:32:54 +0000 Subject: [PATCH 04/39] check for side effects in obj --- pandas/tests/window/test_rolling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 7b36bcfc34b80..6ef0e995edb62 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -472,7 +472,8 @@ def test_by_column_not_in_values(): df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}) g = df.groupby("A") + original_obj = g.obj.copy(deep=True) r = g.rolling(4) result = r.sum() assert "A" not in result.columns - assert "A" in g.obj.columns # check for side-effects + tm.assert_frame_equal(g.obj, original_obj) # check for side-effects From 5f43f3a702abd6a016c70c92c064050b7fe22888 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 1 Mar 2020 11:49:23 +0000 Subject: [PATCH 05/39] wip --- pandas/core/base.py | 3 ++- pandas/core/groupby/groupby.py | 11 ++++++++--- pandas/core/groupby/ops.py | 3 +++ pandas/core/window/common.py | 8 ++++---- pandas/core/window/rolling.py | 4 +++- pandas/tests/groupby/test_apply.py | 3 ++- pandas/tests/window/test_grouper.py | 3 +++ pandas/tests/window/test_rolling.py | 10 ++++++++++ 8 files changed, 35 insertions(+), 10 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index f55d9f905945d..c3deedba8cfac 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -214,10 +214,11 @@ def ndim(self) -> int: @cache_readonly def _obj_with_exclusions(self): + # breakpoint() if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) - if len(self.exclusions) > 0: + if len(self.exclusions) > 0 and isinstance(self.obj, ABCDataFrame): return self.obj.drop(self.exclusions, axis=1) else: return self.obj diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6362f11a3e032..ceb7bbf60ca17 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -704,7 +704,7 @@ def __iter__(self): ) ) def apply(self, func, *args, **kwargs): - + # breakpoint() func = self._is_builtin_func(func) # this is needed so we don't try and wrap strings. If we could @@ -732,6 +732,7 @@ def f(g): # ignore SettingWithCopy here in case the user mutates with option_context("mode.chained_assignment", None): try: + # breakpoint() result = self._python_apply_general(f) except TypeError: # gh-20949 @@ -748,7 +749,11 @@ def f(g): return result def _python_apply_general(self, f): - keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis) + breakpoint() + if self.group_keys: + keys, values, mutated = self.grouper.apply(f, self._obj_with_exclusions, self.axis) + else: + keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis) return self._wrap_applied_output( keys, values, not_indexed_same=mutated or self.mutated @@ -1576,7 +1581,7 @@ def rolling(self, *args, **kwargs): Return a rolling grouper, providing rolling functionality per group. """ from pandas.core.window import RollingGroupby - + kwargs['exclusions'] = self.exclusions return RollingGroupby(self, *args, **kwargs) @Substitution(name="groupby") diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 7259268ac3f2b..44bc1f420ea72 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -149,6 +149,7 @@ def _get_group_keys(self): return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) def apply(self, f, data: FrameOrSeries, axis: int = 0): + # breakpoint() mutated = self.mutated splitter = self._get_splitter(data, axis=axis) group_keys = self._get_group_keys() @@ -169,6 +170,7 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): and not sdata.index._has_complex_internals ): try: + # breakpoint() result_values, mutated = splitter.fast_apply(f, sdata, group_keys) except libreduction.InvalidApply as err: @@ -927,6 +929,7 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: class FrameSplitter(DataSplitter): def fast_apply(self, f, sdata: FrameOrSeries, names): # must return keys::list, values::list, mutated::bool + # breakpoint() starts, ends = lib.generate_slices(self.slabels, self.ngroups) return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index d44416b95aed6..aa5c2ac08235d 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -52,7 +52,7 @@ def __init__(self, obj, *args, **kwargs): kwargs.pop("parent", None) groupby = kwargs.pop("groupby", None) if groupby is None: - groupby, obj = obj, obj.obj + groupby, obj = obj, obj._obj_with_exclusions self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True @@ -81,14 +81,14 @@ def _apply( # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): - x = self._shallow_copy(x, exclusions=self._groupby.exclusions) - x.obj = x._obj_with_exclusions + x = self._shallow_copy(x) + # x.obj = x._obj_with_exclusions if isinstance(name, str): return getattr(x, name)(*args, **kwargs) return x.apply(name, *args, **kwargs) - + # breakpoint() return self._groupby.apply(f) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3784989de10ab..d9e493db80ae5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -93,6 +93,7 @@ def __init__( self.axis = obj._get_axis_number(axis) if axis is not None else None self.validate() self._numba_func_cache: Dict[Optional[str], Callable] = dict() + self.exclusions = kwargs.get('exclusions', set()) @property def _constructor(self): @@ -1971,7 +1972,8 @@ def apply( engine_kwargs=None, args=None, kwargs=None, - ): + ): + # breakpoint() return super().apply( func, raw=raw, diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 18ad5d90b3f60..d614c0c710cc0 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -370,6 +370,7 @@ def test_apply_chunk_view(): result = df.groupby("key", group_keys=False).apply(lambda x: x[:2]) expected = df.take([0, 1, 3, 4, 6, 7]) + breakpoint() tm.assert_frame_equal(result, expected) @@ -405,7 +406,7 @@ def f(group): expected = df.copy() expected["v2"] = np.tile([0.0, 0.5, 1], 2) - + breakpoint() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 5b2687271f9d6..3b62bb1a8f22b 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -30,6 +30,7 @@ def test_getitem(self): expected = g_mutated.B.apply(lambda x: x.rolling(2).mean()) result = g.rolling(2).mean().B + # breakpoint() tm.assert_series_equal(result, expected) result = g.rolling(2).B.mean() @@ -61,7 +62,9 @@ def test_rolling(self): for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]: result = getattr(r, f)() + # breakpoint() expected = g.apply(lambda x: getattr(x.rolling(4), f)()) + # breakpoint() tm.assert_frame_equal(result, expected) for f in ["std", "var"]: diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 6ef0e995edb62..4763f33118573 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -477,3 +477,13 @@ def test_by_column_not_in_values(): result = r.sum() assert "A" not in result.columns tm.assert_frame_equal(g.obj, original_obj) # check for side-effects + + # g = df.groupby("A") + # original_obj = g.obj.copy(deep=True) + # r = g.rolling(4) + # def cust(x): + # breakpoint() + # return getattr(x.rolling(4), 'sum')() + # result = r.apply(cust) + # assert "A" not in result.columns + # tm.assert_frame_equal(g.obj, original_obj) # check for side-effects From 3b1c3ffb06d107e4423119c9e555bd36eae65093 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 1 Mar 2020 15:21:43 +0000 Subject: [PATCH 06/39] Wip --- pandas/core/base.py | 3 +-- pandas/core/frame.py | 1 + pandas/core/groupby/groupby.py | 27 +++++++++++++-------------- pandas/core/groupby/ops.py | 3 --- pandas/core/window/common.py | 5 ++--- pandas/core/window/rolling.py | 7 +------ pandas/tests/groupby/test_apply.py | 3 +-- pandas/tests/window/test_grouper.py | 3 --- 8 files changed, 19 insertions(+), 33 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index c3deedba8cfac..f55d9f905945d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -214,11 +214,10 @@ def ndim(self) -> int: @cache_readonly def _obj_with_exclusions(self): - # breakpoint() if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) - if len(self.exclusions) > 0 and isinstance(self.obj, ABCDataFrame): + if len(self.exclusions) > 0: return self.obj.drop(self.exclusions, axis=1) else: return self.obj diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 61641bfb24293..ef085d035bd62 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7731,6 +7731,7 @@ def count(self, axis=0, level=None, numeric_only=False): Lewis 1 Myla 1 """ + breakpoint() axis = self._get_axis_number(axis) if level is not None: return self._count_level(level, axis=axis, numeric_only=numeric_only) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ceb7bbf60ca17..eda500e48ecb3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -6,6 +6,7 @@ class providing the base-class of operations. (defined in pandas.core.groupby.generic) expose these user-facing objects to provide specific functionality. """ +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from contextlib import contextmanager import datetime @@ -408,7 +409,6 @@ def __init__( observed=observed, mutated=self.mutated, ) - self.obj = obj self.axis = obj._get_axis_number(axis) self.grouper = grouper @@ -508,13 +508,17 @@ def _get_index(self, name): @cache_readonly def _selected_obj(self): # Note: _selected_obj is always just `self.obj` for SeriesGroupBy - - if self._selection is None or isinstance(self.obj, Series): + breakpoint() + if self.as_index: + obj = self._obj_with_exclusions + else: + obj = self.obj + if self._selection is None or isinstance(obj, Series): if self._group_selection is not None: - return self.obj[self._group_selection] - return self.obj + return obj[self._group_selection] + return obj else: - return self.obj[self._selection] + return obj[self._selection] def _reset_group_selection(self): """ @@ -704,7 +708,7 @@ def __iter__(self): ) ) def apply(self, func, *args, **kwargs): - # breakpoint() + func = self._is_builtin_func(func) # this is needed so we don't try and wrap strings. If we could @@ -732,7 +736,6 @@ def f(g): # ignore SettingWithCopy here in case the user mutates with option_context("mode.chained_assignment", None): try: - # breakpoint() result = self._python_apply_general(f) except TypeError: # gh-20949 @@ -749,11 +752,7 @@ def f(g): return result def _python_apply_general(self, f): - breakpoint() - if self.group_keys: - keys, values, mutated = self.grouper.apply(f, self._obj_with_exclusions, self.axis) - else: - keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis) + keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis) return self._wrap_applied_output( keys, values, not_indexed_same=mutated or self.mutated @@ -1581,7 +1580,7 @@ def rolling(self, *args, **kwargs): Return a rolling grouper, providing rolling functionality per group. """ from pandas.core.window import RollingGroupby - kwargs['exclusions'] = self.exclusions + return RollingGroupby(self, *args, **kwargs) @Substitution(name="groupby") diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 44bc1f420ea72..7259268ac3f2b 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -149,7 +149,6 @@ def _get_group_keys(self): return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) def apply(self, f, data: FrameOrSeries, axis: int = 0): - # breakpoint() mutated = self.mutated splitter = self._get_splitter(data, axis=axis) group_keys = self._get_group_keys() @@ -170,7 +169,6 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): and not sdata.index._has_complex_internals ): try: - # breakpoint() result_values, mutated = splitter.fast_apply(f, sdata, group_keys) except libreduction.InvalidApply as err: @@ -929,7 +927,6 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: class FrameSplitter(DataSplitter): def fast_apply(self, f, sdata: FrameOrSeries, names): # must return keys::list, values::list, mutated::bool - # breakpoint() starts, ends = lib.generate_slices(self.slabels, self.ngroups) return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index aa5c2ac08235d..ed0b816f64800 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -52,7 +52,7 @@ def __init__(self, obj, *args, **kwargs): kwargs.pop("parent", None) groupby = kwargs.pop("groupby", None) if groupby is None: - groupby, obj = obj, obj._obj_with_exclusions + groupby, obj = obj, obj.obj self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True @@ -82,13 +82,12 @@ def _apply( # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): x = self._shallow_copy(x) - # x.obj = x._obj_with_exclusions if isinstance(name, str): return getattr(x, name)(*args, **kwargs) return x.apply(name, *args, **kwargs) - # breakpoint() + return self._groupby.apply(f) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d9e493db80ae5..641c78ea63c01 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -93,7 +93,6 @@ def __init__( self.axis = obj._get_axis_number(axis) if axis is not None else None self.validate() self._numba_func_cache: Dict[Optional[str], Callable] = dict() - self.exclusions = kwargs.get('exclusions', set()) @property def _constructor(self): @@ -1174,7 +1173,6 @@ class _Rolling_and_Expanding(_Rolling): ) def count(self): - blocks, obj = self._create_blocks() results = [] for b in blocks: @@ -1188,7 +1186,6 @@ def count(self): closed=self.closed, ).sum() results.append(result) - return self._wrap_results(results, blocks, obj) _shared_docs["apply"] = dedent( @@ -1954,7 +1951,6 @@ def aggregate(self, func, *args, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["count"]) def count(self): - # different impl for freq counting if self.is_freq_type: window_func = self._get_roll_func("roll_count") @@ -1972,8 +1968,7 @@ def apply( engine_kwargs=None, args=None, kwargs=None, - ): - # breakpoint() + ): return super().apply( func, raw=raw, diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index d614c0c710cc0..44ab7e33c3c2d 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -370,7 +370,6 @@ def test_apply_chunk_view(): result = df.groupby("key", group_keys=False).apply(lambda x: x[:2]) expected = df.take([0, 1, 3, 4, 6, 7]) - breakpoint() tm.assert_frame_equal(result, expected) @@ -406,7 +405,6 @@ def f(group): expected = df.copy() expected["v2"] = np.tile([0.0, 0.5, 1], 2) - breakpoint() tm.assert_frame_equal(result, expected) @@ -828,6 +826,7 @@ def test_apply_index_has_complex_internals(index): # GH 31248 df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) result = df.groupby("group").apply(lambda x: x) + breakpoint() tm.assert_frame_equal(result, df) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 3b62bb1a8f22b..5b2687271f9d6 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -30,7 +30,6 @@ def test_getitem(self): expected = g_mutated.B.apply(lambda x: x.rolling(2).mean()) result = g.rolling(2).mean().B - # breakpoint() tm.assert_series_equal(result, expected) result = g.rolling(2).B.mean() @@ -62,9 +61,7 @@ def test_rolling(self): for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]: result = getattr(r, f)() - # breakpoint() expected = g.apply(lambda x: getattr(x.rolling(4), f)()) - # breakpoint() tm.assert_frame_equal(result, expected) for f in ["std", "var"]: From 207a507f89b645ee7715527144c8d4a91207f265 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 1 Mar 2020 17:44:10 +0000 Subject: [PATCH 07/39] change _selected_obj according to self.mutated --- pandas/core/frame.py | 1 - pandas/core/groupby/groupby.py | 5 ++--- pandas/core/window/rolling.py | 3 +++ pandas/tests/groupby/test_apply.py | 2 +- pandas/tests/window/test_rolling.py | 10 ---------- 5 files changed, 6 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ef085d035bd62..61641bfb24293 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7731,7 +7731,6 @@ def count(self, axis=0, level=None, numeric_only=False): Lewis 1 Myla 1 """ - breakpoint() axis = self._get_axis_number(axis) if level is not None: return self._count_level(level, axis=axis, numeric_only=numeric_only) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index eda500e48ecb3..59987b6ac6e1c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -6,7 +6,6 @@ class providing the base-class of operations. (defined in pandas.core.groupby.generic) expose these user-facing objects to provide specific functionality. """ -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from contextlib import contextmanager import datetime @@ -409,6 +408,7 @@ def __init__( observed=observed, mutated=self.mutated, ) + self.obj = obj self.axis = obj._get_axis_number(axis) self.grouper = grouper @@ -508,8 +508,7 @@ def _get_index(self, name): @cache_readonly def _selected_obj(self): # Note: _selected_obj is always just `self.obj` for SeriesGroupBy - breakpoint() - if self.as_index: + if self.mutated: obj = self._obj_with_exclusions else: obj = self.obj diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 641c78ea63c01..3784989de10ab 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1173,6 +1173,7 @@ class _Rolling_and_Expanding(_Rolling): ) def count(self): + blocks, obj = self._create_blocks() results = [] for b in blocks: @@ -1186,6 +1187,7 @@ def count(self): closed=self.closed, ).sum() results.append(result) + return self._wrap_results(results, blocks, obj) _shared_docs["apply"] = dedent( @@ -1951,6 +1953,7 @@ def aggregate(self, func, *args, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["count"]) def count(self): + # different impl for freq counting if self.is_freq_type: window_func = self._get_roll_func("roll_count") diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 44ab7e33c3c2d..18ad5d90b3f60 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -405,6 +405,7 @@ def f(group): expected = df.copy() expected["v2"] = np.tile([0.0, 0.5, 1], 2) + tm.assert_frame_equal(result, expected) @@ -826,7 +827,6 @@ def test_apply_index_has_complex_internals(index): # GH 31248 df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) result = df.groupby("group").apply(lambda x: x) - breakpoint() tm.assert_frame_equal(result, df) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 4763f33118573..6ef0e995edb62 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -477,13 +477,3 @@ def test_by_column_not_in_values(): result = r.sum() assert "A" not in result.columns tm.assert_frame_equal(g.obj, original_obj) # check for side-effects - - # g = df.groupby("A") - # original_obj = g.obj.copy(deep=True) - # r = g.rolling(4) - # def cust(x): - # breakpoint() - # return getattr(x.rolling(4), 'sum')() - # result = r.apply(cust) - # assert "A" not in result.columns - # tm.assert_frame_equal(g.obj, original_obj) # check for side-effects From c9b34b2bec2116f8a0201df9ed8dbd71aee055e0 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 1 Mar 2020 18:05:51 +0000 Subject: [PATCH 08/39] sanitize --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 59987b6ac6e1c..1cc679fa12130 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -508,7 +508,7 @@ def _get_index(self, name): @cache_readonly def _selected_obj(self): # Note: _selected_obj is always just `self.obj` for SeriesGroupBy - if self.mutated: + if hasattr(self, "mutated") and self.mutated: obj = self._obj_with_exclusions else: obj = self.obj From b2ce75816d8506141ba1035b99c5710d64fac998 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 1 Mar 2020 18:26:13 +0000 Subject: [PATCH 09/39] update old tests --- pandas/tests/resample/test_datetime_index.py | 1 + pandas/tests/resample/test_resampler_grouper.py | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 3ad82b9e075a8..e3ae343bfa1e8 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -857,6 +857,7 @@ def test_resample_segfault(): ).set_index("timestamp") result = df.groupby("ID").resample("5min").sum() expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum()) + expected = expected.drop("ID", axis=1) # GH 32332 tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 03c1445e099a0..9f1b83f5c7724 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -57,6 +57,7 @@ def f(x): return x.set_index("date").resample("D").asfreq() expected = df.groupby("id").apply(f) + expected = expected.drop("id", axis=1) # GH 32332 result = df.set_index("date").groupby("id").resample("D").asfreq() tm.assert_frame_equal(result, expected) @@ -72,6 +73,7 @@ def f(x): return x.resample("1D").ffill() expected = df.groupby("group").apply(f) + expected = expected.drop("group", axis=1) # GH 32332 result = df.groupby("group").resample("1D").ffill() tm.assert_frame_equal(result, expected) @@ -260,10 +262,7 @@ def test_resample_groupby_with_label(): ), ] mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None]) - expected = DataFrame( - data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex - ) - + expected = DataFrame(data={"col1": [1, 1, 2, 1]}, index=mindex) tm.assert_frame_equal(result, expected) From c307fdc1ae1697ef3a1d26e73bdfa4b4fd692135 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 1 Mar 2020 18:46:18 +0000 Subject: [PATCH 10/39] fix old docstring --- pandas/core/groupby/groupby.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1cc679fa12130..7ce6f8d96041e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1510,11 +1510,11 @@ def resample(self, rule, *args, **kwargs): the timestamps falling into a bin. >>> df.groupby('a').resample('3T').sum() - a b + b a - 0 2000-01-01 00:00:00 0 2 - 2000-01-01 00:03:00 0 1 - 5 2000-01-01 00:00:00 5 1 + 0 2000-01-01 00:00:00 2 + 2000-01-01 00:03:00 1 + 5 2000-01-01 00:00:00 1 Upsample the series into 30 second bins. From 35f2b2da74b4f778ac2ad275cba99990f8a63bb2 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 1 Mar 2020 19:03:21 +0000 Subject: [PATCH 11/39] finish correcting old docstring --- pandas/core/groupby/groupby.py | 48 +++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7ce6f8d96041e..628f7534923df 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1519,54 +1519,54 @@ def resample(self, rule, *args, **kwargs): Upsample the series into 30 second bins. >>> df.groupby('a').resample('30S').sum() - a b + b a - 0 2000-01-01 00:00:00 0 1 - 2000-01-01 00:00:30 0 0 - 2000-01-01 00:01:00 0 1 - 2000-01-01 00:01:30 0 0 - 2000-01-01 00:02:00 0 0 - 2000-01-01 00:02:30 0 0 - 2000-01-01 00:03:00 0 1 - 5 2000-01-01 00:02:00 5 1 + 0 2000-01-01 00:00:00 1 + 2000-01-01 00:00:30 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 0 + 2000-01-01 00:02:00 0 + 2000-01-01 00:02:30 0 + 2000-01-01 00:03:00 1 + 5 2000-01-01 00:02:00 1 Resample by month. Values are assigned to the month of the period. >>> df.groupby('a').resample('M').sum() - a b + b a - 0 2000-01-31 0 3 - 5 2000-01-31 5 1 + 0 2000-01-31 3 + 5 2000-01-31 1 Downsample the series into 3 minute bins as above, but close the right side of the bin interval. >>> df.groupby('a').resample('3T', closed='right').sum() - a b + b a - 0 1999-12-31 23:57:00 0 1 - 2000-01-01 00:00:00 0 2 - 5 2000-01-01 00:00:00 5 1 + 0 1999-12-31 23:57:00 1 + 2000-01-01 00:00:00 2 + 5 2000-01-01 00:00:00 1 Downsample the series into 3 minute bins and close the right side of the bin interval, but label each bin using the right edge instead of the left. >>> df.groupby('a').resample('3T', closed='right', label='right').sum() - a b + b a - 0 2000-01-01 00:00:00 0 1 - 2000-01-01 00:03:00 0 2 - 5 2000-01-01 00:03:00 5 1 + 0 2000-01-01 00:00:00 1 + 2000-01-01 00:03:00 2 + 5 2000-01-01 00:03:00 1 Add an offset of twenty seconds. >>> df.groupby('a').resample('3T', loffset='20s').sum() - a b + b a - 0 2000-01-01 00:00:20 0 2 - 2000-01-01 00:03:20 0 1 - 5 2000-01-01 00:00:20 5 1 + 0 2000-01-01 00:00:20 2 + 2000-01-01 00:03:20 1 + 5 2000-01-01 00:00:20 1 """ from pandas.core.resample import get_resampler_for_grouping From 2bb79324cfea9c928546410c4661fa93b477ae74 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 1 Mar 2020 20:10:17 +0000 Subject: [PATCH 12/39] use getattr --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 628f7534923df..391bd7c8f766b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -508,7 +508,7 @@ def _get_index(self, name): @cache_readonly def _selected_obj(self): # Note: _selected_obj is always just `self.obj` for SeriesGroupBy - if hasattr(self, "mutated") and self.mutated: + if getattr(self, "mutated", False): obj = self._obj_with_exclusions else: obj = self.obj From eed4122950c14dbf4add4a86b865f2e85152cf40 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 10:28:37 +0000 Subject: [PATCH 13/39] old fix --- pandas/core/groupby/groupby.py | 5 +---- pandas/core/window/common.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 391bd7c8f766b..7e60e1beb5207 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -508,10 +508,7 @@ def _get_index(self, name): @cache_readonly def _selected_obj(self): # Note: _selected_obj is always just `self.obj` for SeriesGroupBy - if getattr(self, "mutated", False): - obj = self._obj_with_exclusions - else: - obj = self.obj + obj = self.obj if self._selection is None or isinstance(obj, Series): if self._group_selection is not None: return obj[self._group_selection] diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index ed0b816f64800..04c6aa21e7e17 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -81,7 +81,7 @@ def _apply( # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): - x = self._shallow_copy(x) + x = self._shallow_copy(x, exclusions=self._groupby.obj.exclusions.intersection(x.columns)) if isinstance(name, str): return getattr(x, name)(*args, **kwargs) From 26d9dec1c6c02243ec291147673b69c1b38b0288 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 12:32:10 +0000 Subject: [PATCH 14/39] simpler fix --- pandas/core/groupby/groupby.py | 66 +++++++++---------- pandas/core/window/common.py | 8 ++- pandas/tests/resample/test_datetime_index.py | 1 - .../tests/resample/test_resampler_grouper.py | 7 +- 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7e60e1beb5207..6362f11a3e032 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -508,13 +508,13 @@ def _get_index(self, name): @cache_readonly def _selected_obj(self): # Note: _selected_obj is always just `self.obj` for SeriesGroupBy - obj = self.obj - if self._selection is None or isinstance(obj, Series): + + if self._selection is None or isinstance(self.obj, Series): if self._group_selection is not None: - return obj[self._group_selection] - return obj + return self.obj[self._group_selection] + return self.obj else: - return obj[self._selection] + return self.obj[self._selection] def _reset_group_selection(self): """ @@ -1507,63 +1507,63 @@ def resample(self, rule, *args, **kwargs): the timestamps falling into a bin. >>> df.groupby('a').resample('3T').sum() - b + a b a - 0 2000-01-01 00:00:00 2 - 2000-01-01 00:03:00 1 - 5 2000-01-01 00:00:00 1 + 0 2000-01-01 00:00:00 0 2 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:00:00 5 1 Upsample the series into 30 second bins. >>> df.groupby('a').resample('30S').sum() - b + a b a - 0 2000-01-01 00:00:00 1 - 2000-01-01 00:00:30 0 - 2000-01-01 00:01:00 1 - 2000-01-01 00:01:30 0 - 2000-01-01 00:02:00 0 - 2000-01-01 00:02:30 0 - 2000-01-01 00:03:00 1 - 5 2000-01-01 00:02:00 1 + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:00:30 0 0 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:01:30 0 0 + 2000-01-01 00:02:00 0 0 + 2000-01-01 00:02:30 0 0 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:02:00 5 1 Resample by month. Values are assigned to the month of the period. >>> df.groupby('a').resample('M').sum() - b + a b a - 0 2000-01-31 3 - 5 2000-01-31 1 + 0 2000-01-31 0 3 + 5 2000-01-31 5 1 Downsample the series into 3 minute bins as above, but close the right side of the bin interval. >>> df.groupby('a').resample('3T', closed='right').sum() - b + a b a - 0 1999-12-31 23:57:00 1 - 2000-01-01 00:00:00 2 - 5 2000-01-01 00:00:00 1 + 0 1999-12-31 23:57:00 0 1 + 2000-01-01 00:00:00 0 2 + 5 2000-01-01 00:00:00 5 1 Downsample the series into 3 minute bins and close the right side of the bin interval, but label each bin using the right edge instead of the left. >>> df.groupby('a').resample('3T', closed='right', label='right').sum() - b + a b a - 0 2000-01-01 00:00:00 1 - 2000-01-01 00:03:00 2 - 5 2000-01-01 00:03:00 1 + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:03:00 0 2 + 5 2000-01-01 00:03:00 5 1 Add an offset of twenty seconds. >>> df.groupby('a').resample('3T', loffset='20s').sum() - b + a b a - 0 2000-01-01 00:00:20 2 - 2000-01-01 00:03:20 1 - 5 2000-01-01 00:00:20 1 + 0 2000-01-01 00:00:20 0 2 + 2000-01-01 00:03:20 0 1 + 5 2000-01-01 00:00:20 5 1 """ from pandas.core.resample import get_resampler_for_grouping diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 04c6aa21e7e17..c54636b2ddb52 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -81,7 +81,13 @@ def _apply( # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): - x = self._shallow_copy(x, exclusions=self._groupby.obj.exclusions.intersection(x.columns)) + if isinstance(x, ABCDataFrame): + x = self._shallow_copy( + x, exclusions=self._groupby.exclusions.intersection(x.columns) + ) + x.obj = x._obj_with_exclusions + else: + x = self._shallow_copy(x) if isinstance(name, str): return getattr(x, name)(*args, **kwargs) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index e3ae343bfa1e8..3ad82b9e075a8 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -857,7 +857,6 @@ def test_resample_segfault(): ).set_index("timestamp") result = df.groupby("ID").resample("5min").sum() expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum()) - expected = expected.drop("ID", axis=1) # GH 32332 tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 9f1b83f5c7724..03c1445e099a0 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -57,7 +57,6 @@ def f(x): return x.set_index("date").resample("D").asfreq() expected = df.groupby("id").apply(f) - expected = expected.drop("id", axis=1) # GH 32332 result = df.set_index("date").groupby("id").resample("D").asfreq() tm.assert_frame_equal(result, expected) @@ -73,7 +72,6 @@ def f(x): return x.resample("1D").ffill() expected = df.groupby("group").apply(f) - expected = expected.drop("group", axis=1) # GH 32332 result = df.groupby("group").resample("1D").ffill() tm.assert_frame_equal(result, expected) @@ -262,7 +260,10 @@ def test_resample_groupby_with_label(): ), ] mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None]) - expected = DataFrame(data={"col1": [1, 1, 2, 1]}, index=mindex) + expected = DataFrame( + data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex + ) + tm.assert_frame_equal(result, expected) From 68b4299e78ee1b57f8db70dfe150adfef4e62d86 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 14:02:58 +0000 Subject: [PATCH 15/39] try making Window's own _shallow_copy --- pandas/core/groupby/groupby.py | 1 + pandas/core/window/common.py | 9 +-------- pandas/core/window/rolling.py | 15 +++++++++++---- pandas/tests/window/test_grouper.py | 11 +++++++++++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6362f11a3e032..3213f644a87f6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1577,6 +1577,7 @@ def rolling(self, *args, **kwargs): """ from pandas.core.window import RollingGroupby + kwargs["exclusions"] = self.exclusions return RollingGroupby(self, *args, **kwargs) @Substitution(name="groupby") diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index c54636b2ddb52..5f19274eb4a97 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -81,17 +81,10 @@ def _apply( # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): - if isinstance(x, ABCDataFrame): - x = self._shallow_copy( - x, exclusions=self._groupby.exclusions.intersection(x.columns) - ) - x.obj = x._obj_with_exclusions - else: - x = self._shallow_copy(x) + x = self._shallow_copy(x) if isinstance(name, str): return getattr(x, name)(*args, **kwargs) - return x.apply(name, *args, **kwargs) return self._groupby.apply(f) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3784989de10ab..63c47648cb21e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -93,6 +93,17 @@ def __init__( self.axis = obj._get_axis_number(axis) if axis is not None else None self.validate() self._numba_func_cache: Dict[Optional[str], Callable] = dict() + self.exclusions = kwargs.get("exclusions", set()) + + def _shallow_copy(self, obj, **kwargs): + if isinstance(obj, ABCDataFrame): + obj = super()._shallow_copy( + obj, exclusions=exclusions.intersection(obj.columns), **kwargs + ) + obj.obj = obj._obj_with_exclusions + else: + obj = super()._shallow_copy(obj, **kwargs) + return obj @property def _constructor(self): @@ -156,7 +167,6 @@ def _create_blocks(self): if obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) blocks = obj._to_dict_of_blocks(copy=False).values() - return blocks, obj def _gotitem(self, key, ndim, subset=None): @@ -509,7 +519,6 @@ def calc(x): result = self._center_window(result, window) results.append(result) - return self._wrap_results(results, block_list, obj, exclude) def aggregate(self, func, *args, **kwargs): @@ -1187,7 +1196,6 @@ def count(self): closed=self.closed, ).sum() results.append(result) - return self._wrap_results(results, blocks, obj) _shared_docs["apply"] = dedent( @@ -1953,7 +1961,6 @@ def aggregate(self, func, *args, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["count"]) def count(self): - # different impl for freq counting if self.is_freq_type: window_func = self._get_roll_func("roll_count") diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 5b2687271f9d6..2fdd05e3f4fc7 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -62,11 +62,15 @@ def test_rolling(self): for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]: result = getattr(r, f)() expected = g.apply(lambda x: getattr(x.rolling(4), f)()) + expected = expected.drop("A", axis=1) # groupby.apply returns + # grouped-by column tm.assert_frame_equal(result, expected) for f in ["std", "var"]: result = getattr(r, f)(ddof=1) expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) + expected = expected.drop("A", axis=1) # groupby.apply returns + # grouped-by column tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -79,6 +83,9 @@ def test_rolling_quantile(self, interpolation): expected = g.apply( lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation) ) + expected = expected.drop( + "A", axis=1 + ) # groupby.apply returns the grouped-by column tm.assert_frame_equal(result, expected) def test_rolling_corr_cov(self): @@ -92,6 +99,7 @@ def func(x): return getattr(x.rolling(4), f)(self.frame) expected = g.apply(func) + expected = expected.drop("A", axis=1) tm.assert_frame_equal(result, expected) result = getattr(r.B, f)(pairwise=True) @@ -109,6 +117,9 @@ def test_rolling_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) + expected = expected.drop( + "A", axis=1 + ) # rolling.apply returns the grouping column. tm.assert_frame_equal(result, expected) def test_rolling_apply_mutability(self): From 159a3d64e594cac9d1981ca5afe7eec1e992f53b Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 14:24:00 +0000 Subject: [PATCH 16/39] more wip --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 63c47648cb21e..af157a679f989 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -98,7 +98,7 @@ def __init__( def _shallow_copy(self, obj, **kwargs): if isinstance(obj, ABCDataFrame): obj = super()._shallow_copy( - obj, exclusions=exclusions.intersection(obj.columns), **kwargs + obj, exclusions=self.exclusions.intersection(obj.columns), **kwargs ) obj.obj = obj._obj_with_exclusions else: From 0b393b66ddc7c3602d44323a3b14f6f6c38cb4b4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 14:55:48 +0000 Subject: [PATCH 17/39] typeerror! --- pandas/core/window/rolling.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index af157a679f989..e528bcb2ecf8b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -97,10 +97,13 @@ def __init__( def _shallow_copy(self, obj, **kwargs): if isinstance(obj, ABCDataFrame): - obj = super()._shallow_copy( - obj, exclusions=self.exclusions.intersection(obj.columns), **kwargs - ) - obj.obj = obj._obj_with_exclusions + try: + obj = super()._shallow_copy( + obj, exclusions=self.exclusions.intersection(obj.columns), **kwargs + ) + obj.obj = obj._obj_with_exclusions + except TypeError: # Some _shallow_copy don't take `exclusions` as argument + obj = super()._shallow_copy(obj, **kwargs) else: obj = super()._shallow_copy(obj, **kwargs) return obj From 9fc50ee03afed772ce907700266a2212c6e5be83 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 16:38:24 +0000 Subject: [PATCH 18/39] add some types, comment tests --- pandas/core/window/common.py | 1 + pandas/core/window/rolling.py | 6 +++++- pandas/tests/window/test_grouper.py | 11 ++++++----- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 5f19274eb4a97..ed0b816f64800 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -85,6 +85,7 @@ def f(x, name=name, *args): if isinstance(name, str): return getattr(x, name)(*args, **kwargs) + return x.apply(name, *args, **kwargs) return self._groupby.apply(f) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e528bcb2ecf8b..0ee62803d1646 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -95,7 +95,7 @@ def __init__( self._numba_func_cache: Dict[Optional[str], Callable] = dict() self.exclusions = kwargs.get("exclusions", set()) - def _shallow_copy(self, obj, **kwargs): + def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> "_Window": if isinstance(obj, ABCDataFrame): try: obj = super()._shallow_copy( @@ -170,6 +170,7 @@ def _create_blocks(self): if obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) blocks = obj._to_dict_of_blocks(copy=False).values() + return blocks, obj def _gotitem(self, key, ndim, subset=None): @@ -522,6 +523,7 @@ def calc(x): result = self._center_window(result, window) results.append(result) + return self._wrap_results(results, block_list, obj, exclude) def aggregate(self, func, *args, **kwargs): @@ -1199,6 +1201,7 @@ def count(self): closed=self.closed, ).sum() results.append(result) + return self._wrap_results(results, blocks, obj) _shared_docs["apply"] = dedent( @@ -1964,6 +1967,7 @@ def aggregate(self, func, *args, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["count"]) def count(self): + # different impl for freq counting if self.is_freq_type: window_func = self._get_roll_func("roll_count") diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 2fdd05e3f4fc7..b736284f86493 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -62,15 +62,15 @@ def test_rolling(self): for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]: result = getattr(r, f)() expected = g.apply(lambda x: getattr(x.rolling(4), f)()) - expected = expected.drop("A", axis=1) # groupby.apply returns - # grouped-by column + expected = expected.drop("A", axis=1) # groupby.apply doesn't + # drop the grouped-by column tm.assert_frame_equal(result, expected) for f in ["std", "var"]: result = getattr(r, f)(ddof=1) expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) - expected = expected.drop("A", axis=1) # groupby.apply returns - # grouped-by column + expected = expected.drop("A", axis=1) # groupby.apply doesn't + # drop the grouped-by column tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -99,7 +99,8 @@ def func(x): return getattr(x.rolling(4), f)(self.frame) expected = g.apply(func) - expected = expected.drop("A", axis=1) + expected = expected.drop("A", axis=1) # groupby.apply doesn't + # drop the grouped-by column tm.assert_frame_equal(result, expected) result = getattr(r.B, f)(pairwise=True) From 7fe2fcf5a8c753afc019ab7698e4909d06b34383 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 17:11:42 +0000 Subject: [PATCH 19/39] fix typing --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 0ee62803d1646..b7cbf55d77d77 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -95,7 +95,7 @@ def __init__( self._numba_func_cache: Dict[Optional[str], Callable] = dict() self.exclusions = kwargs.get("exclusions", set()) - def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> "_Window": + def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> FrameOrSeries: if isinstance(obj, ABCDataFrame): try: obj = super()._shallow_copy( From 8a34ff436f58b0329e895129053070c4484b027c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 17:14:22 +0000 Subject: [PATCH 20/39] better fix --- pandas/core/window/rolling.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b7cbf55d77d77..2a880642cae31 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -95,18 +95,18 @@ def __init__( self._numba_func_cache: Dict[Optional[str], Callable] = dict() self.exclusions = kwargs.get("exclusions", set()) - def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> FrameOrSeries: + def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> "_Window": if isinstance(obj, ABCDataFrame): try: - obj = super()._shallow_copy( + new_obj = super()._shallow_copy( obj, exclusions=self.exclusions.intersection(obj.columns), **kwargs ) - obj.obj = obj._obj_with_exclusions + new_obj.obj = new_obj._obj_with_exclusions except TypeError: # Some _shallow_copy don't take `exclusions` as argument - obj = super()._shallow_copy(obj, **kwargs) + new_obj = super()._shallow_copy(obj, **kwargs) else: - obj = super()._shallow_copy(obj, **kwargs) - return obj + new_obj = super()._shallow_copy(obj, **kwargs) + return new_obj @property def _constructor(self): From 8953bdad93d5a6fb6427413d92cd10c909e2ad9f Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 17:39:17 +0000 Subject: [PATCH 21/39] correct return annotation --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 2a880642cae31..6665b577c32c5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -95,7 +95,7 @@ def __init__( self._numba_func_cache: Dict[Optional[str], Callable] = dict() self.exclusions = kwargs.get("exclusions", set()) - def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> "_Window": + def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin: if isinstance(obj, ABCDataFrame): try: new_obj = super()._shallow_copy( From b2b8a424cc287f333ab97fc5806ef854821eb6cc Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 3 Mar 2020 17:48:07 +0000 Subject: [PATCH 22/39] simplify code --- pandas/core/window/rolling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6665b577c32c5..047db83b9c54d 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -102,10 +102,10 @@ def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin: obj, exclusions=self.exclusions.intersection(obj.columns), **kwargs ) new_obj.obj = new_obj._obj_with_exclusions + return new_obj except TypeError: # Some _shallow_copy don't take `exclusions` as argument - new_obj = super()._shallow_copy(obj, **kwargs) - else: - new_obj = super()._shallow_copy(obj, **kwargs) + pass + new_obj = super()._shallow_copy(obj, **kwargs) return new_obj @property From d47619184e762d7cef8a92a6126a84cf620dc434 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 4 Mar 2020 15:17:12 +0000 Subject: [PATCH 23/39] fix upstream, remove try except --- doc/source/whatsnew/v1.1.0.rst | 14 ++++++++++++++ pandas/core/window/ewm.py | 1 + pandas/core/window/rolling.py | 14 ++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d27f6c5181a8f..c0b175f7bd1d0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -162,6 +162,20 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss ... KeyError: Timestamp('1970-01-01 00:00:00') +groupby.rolling no longer returns grouped-by column in values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +*Previous behavior*: + +.. code-block:: ipython + +*New behavior*: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) + df.groupby("A").rolling(2).sum() + .. --------------------------------------------------------------------------- .. _whatsnew_110.deprecations: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index c6096c24ecbc9..de48bd2a065ed 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -129,6 +129,7 @@ def __init__( adjust=True, ignore_na=False, axis=0, + **kwargs, ): self.obj = obj self.com = _get_center_of_mass(com, span, halflife, alpha) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 047db83b9c54d..70eb5dfc14e4e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -97,15 +97,13 @@ def __init__( def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin: if isinstance(obj, ABCDataFrame): - try: - new_obj = super()._shallow_copy( - obj, exclusions=self.exclusions.intersection(obj.columns), **kwargs + exclusions = self.exclusions.intersection(obj.columns) + new_obj = super()._shallow_copy( + obj, exclusions=exclusions, **kwargs ) - new_obj.obj = new_obj._obj_with_exclusions - return new_obj - except TypeError: # Some _shallow_copy don't take `exclusions` as argument - pass - new_obj = super()._shallow_copy(obj, **kwargs) + new_obj.obj = new_obj._obj_with_exclusions + else: + new_obj = super()._shallow_copy(obj, **kwargs) return new_obj @property From 63e3d8562f6b515b1b1c6d8f17310baf03bada97 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 4 Mar 2020 15:25:13 +0000 Subject: [PATCH 24/39] document change as api-breaking lint wip clean re-write add comment --- doc/source/whatsnew/v1.1.0.rst | 18 ++++++++++++++---- pandas/core/window/rolling.py | 6 +++--- pandas/tests/window/test_grouper.py | 22 ++++++++++------------ 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c0b175f7bd1d0..b215fd2a52fb7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -162,19 +162,30 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss ... KeyError: Timestamp('1970-01-01 00:00:00') -groupby.rolling no longer returns grouped-by column in values +GroupBy.rolling no longer returns grouped-by column in values ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ *Previous behavior*: .. code-block:: ipython + In [1]: df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) + + In [2]: df.groupby("A").rolling(2).sum() + Out[2]: + A B + A + 1 0 NaN NaN + 1 2.0 1.0 + 2 2 NaN NaN + 3 3 NaN NaN + *New behavior*: .. ipython:: python - df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) - df.groupby("A").rolling(2).sum() + df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) + df.groupby("A").rolling(2).sum() .. --------------------------------------------------------------------------- @@ -329,7 +340,6 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`) - Bug in :meth:`DataFrameGroupby.transform` produces incorrect result with transformation functions (:issue:`30918`) -- Bug in :meth:`pandas.core.groupby.RollingGroupby.apply` was including the grouped-by column in its values, rather than just in the index (:issue:`32262`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 70eb5dfc14e4e..cd08f62745ab4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -97,10 +97,10 @@ def __init__( def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin: if isinstance(obj, ABCDataFrame): + # there may be elements in self.exclusions that are no longer + # in obj, see GH 32468 exclusions = self.exclusions.intersection(obj.columns) - new_obj = super()._shallow_copy( - obj, exclusions=exclusions, **kwargs - ) + new_obj = super()._shallow_copy(obj, exclusions=exclusions, **kwargs) new_obj.obj = new_obj._obj_with_exclusions else: new_obj = super()._shallow_copy(obj, **kwargs) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index b736284f86493..f979099e21cb6 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -62,15 +62,15 @@ def test_rolling(self): for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]: result = getattr(r, f)() expected = g.apply(lambda x: getattr(x.rolling(4), f)()) - expected = expected.drop("A", axis=1) # groupby.apply doesn't - # drop the grouped-by column + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) tm.assert_frame_equal(result, expected) for f in ["std", "var"]: result = getattr(r, f)(ddof=1) expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) - expected = expected.drop("A", axis=1) # groupby.apply doesn't - # drop the grouped-by column + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -83,9 +83,8 @@ def test_rolling_quantile(self, interpolation): expected = g.apply( lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation) ) - expected = expected.drop( - "A", axis=1 - ) # groupby.apply returns the grouped-by column + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) tm.assert_frame_equal(result, expected) def test_rolling_corr_cov(self): @@ -99,8 +98,8 @@ def func(x): return getattr(x.rolling(4), f)(self.frame) expected = g.apply(func) - expected = expected.drop("A", axis=1) # groupby.apply doesn't - # drop the grouped-by column + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) tm.assert_frame_equal(result, expected) result = getattr(r.B, f)(pairwise=True) @@ -118,9 +117,8 @@ def test_rolling_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) - expected = expected.drop( - "A", axis=1 - ) # rolling.apply returns the grouping column. + # groupby.apply doesn't drop the grouped-by column + expected = expected.drop("A", axis=1) tm.assert_frame_equal(result, expected) def test_rolling_apply_mutability(self): From a7ca8eb95279a0031038af1646a6c6898623f18f Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 15 Mar 2020 11:28:13 +0000 Subject: [PATCH 25/39] remove elements from exclusions within _obj_with_exclusions --- pandas/core/base.py | 11 +++++++++-- pandas/core/window/rolling.py | 11 +++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 40ff0640a5bc4..a6af57dddac54 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -207,8 +207,15 @@ def _obj_with_exclusions(self): if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) - if len(self.exclusions) > 0: - return self.obj.drop(self.exclusions, axis=1) + if not isinstance(self.obj, ABCDataFrame): + return self.obj + + # there may be elements in self.exclusions that are no longer + # in obj, see GH 32468 + exclusions = self.exclusions.intersection(self.obj.columns) + + if len(exclusions) > 0: + return self.obj.drop(exclusions, axis=1) else: return self.obj diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index cd08f62745ab4..887edb0b35d30 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -96,14 +96,9 @@ def __init__( self.exclusions = kwargs.get("exclusions", set()) def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin: - if isinstance(obj, ABCDataFrame): - # there may be elements in self.exclusions that are no longer - # in obj, see GH 32468 - exclusions = self.exclusions.intersection(obj.columns) - new_obj = super()._shallow_copy(obj, exclusions=exclusions, **kwargs) - new_obj.obj = new_obj._obj_with_exclusions - else: - new_obj = super()._shallow_copy(obj, **kwargs) + exclusions = self.exclusions + new_obj = super()._shallow_copy(obj, exclusions=exclusions, **kwargs) + new_obj.obj = new_obj._obj_with_exclusions return new_obj @property From 35e7340681edbce3451d7ed6c908f23537a1f203 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 15 Mar 2020 11:51:16 +0000 Subject: [PATCH 26/39] remove no-longer-necessary performance warning --- pandas/tests/groupby/test_groupby.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 5662d41e19885..103772db7c365 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv import pandas._testing as tm @@ -1565,8 +1563,7 @@ def test_groupby_multiindex_not_lexsorted(): tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) expected = lexsorted_df.groupby("a").mean() - with tm.assert_produces_warning(PerformanceWarning): - result = not_lexsorted_df.groupby("a").mean() + result = not_lexsorted_df.groupby("a").mean() tm.assert_frame_equal(expected, result) # a transforming function should work regardless of sort From 17090da8b1c69da105d4a7b889847072a4e73351 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 15 Mar 2020 12:35:11 +0000 Subject: [PATCH 27/39] simplify _obj_with_exclusions --- pandas/core/base.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index a6af57dddac54..98bed6756acb4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -207,17 +207,13 @@ def _obj_with_exclusions(self): if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) - if not isinstance(self.obj, ABCDataFrame): + if not isinstance(self.obj, ABCDataFrame) or not self.exclusions: return self.obj # there may be elements in self.exclusions that are no longer # in obj, see GH 32468 exclusions = self.exclusions.intersection(self.obj.columns) - - if len(exclusions) > 0: - return self.obj.drop(exclusions, axis=1) - else: - return self.obj + return self.obj.drop(exclusions, axis=1) def __getitem__(self, key): if self._selection is not None: From 7c7f79ca507da4f6958452173751c0ac02d783d9 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sun, 15 Mar 2020 12:36:19 +0000 Subject: [PATCH 28/39] correct comment --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 98bed6756acb4..0e916b1cb8547 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -211,7 +211,7 @@ def _obj_with_exclusions(self): return self.obj # there may be elements in self.exclusions that are no longer - # in obj, see GH 32468 + # in self.obj, see GH 32468 exclusions = self.exclusions.intersection(self.obj.columns) return self.obj.drop(exclusions, axis=1) From 4d1c5a6ac5a34c8f68d4f4f065ea74cbfc693182 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sat, 21 Mar 2020 12:01:49 +0000 Subject: [PATCH 29/39] deal with multiindexed columns case --- pandas/core/base.py | 7 ++++++- pandas/tests/groupby/test_groupby.py | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 3db556ae5c167..9d2a2fb81bfd1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -214,7 +214,12 @@ def _obj_with_exclusions(self): # there may be elements in self.exclusions that are no longer # in self.obj, see GH 32468 - exclusions = self.exclusions.intersection(self.obj.columns) + unique_column_names = { + j + for i in range(self.obj.columns.nlevels) + for j in self.obj.columns.get_level_values(i) + } + exclusions = self.exclusions.intersection(unique_column_names) return self.obj.drop(exclusions, axis=1) def __getitem__(self, key): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 103772db7c365..5662d41e19885 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.errors import PerformanceWarning + import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv import pandas._testing as tm @@ -1563,7 +1565,8 @@ def test_groupby_multiindex_not_lexsorted(): tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) expected = lexsorted_df.groupby("a").mean() - result = not_lexsorted_df.groupby("a").mean() + with tm.assert_produces_warning(PerformanceWarning): + result = not_lexsorted_df.groupby("a").mean() tm.assert_frame_equal(expected, result) # a transforming function should work regardless of sort From 0dde4a8ea9d517013abfb34501e3a92d040177eb Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sat, 21 Mar 2020 12:49:52 +0000 Subject: [PATCH 30/39] use elif in _obj_with_exclusions, add test for column multiindex, display dataframe in whatsnew entry separately from before vs after --- doc/source/whatsnew/v1.1.0.rst | 14 +++++++++----- pandas/core/base.py | 2 +- pandas/tests/window/test_rolling.py | 7 +++++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 63ad59ae72f0f..60b1805abbdb4 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -170,14 +170,19 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss GroupBy.rolling no longer returns grouped-by column in values ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Suppose we start with + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) + df + *Previous behavior*: .. code-block:: ipython - In [1]: df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) - - In [2]: df.groupby("A").rolling(2).sum() - Out[2]: + In [1]: df.groupby("A").rolling(2).sum() + Out[1]: A B A 1 0 NaN NaN @@ -189,7 +194,6 @@ GroupBy.rolling no longer returns grouped-by column in values .. ipython:: python - df = pd.DataFrame({"A": [1, 1, 2, 3], "B": [0, 1, 2, 3]}) df.groupby("A").rolling(2).sum() .. --------------------------------------------------------------------------- diff --git a/pandas/core/base.py b/pandas/core/base.py index 9d2a2fb81bfd1..16e04485b4b5e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -209,7 +209,7 @@ def _obj_with_exclusions(self): if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) - if not isinstance(self.obj, ABCDataFrame) or not self.exclusions: + elif not self.exclusions and not isinstance(self.obj, ABCDataFrame): return self.obj # there may be elements in self.exclusions that are no longer diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 6ef0e995edb62..2e8d5a6772e08 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -467,9 +467,12 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): tm.assert_equal(result, expected) -def test_by_column_not_in_values(): +@pytest.mark.parametrize( + "columns", [pd.MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]] +) +def test_by_column_not_in_values(columns): # GH 32262 - df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}) + df = pd.DataFrame([[1, 0]] * 20 + [[2, 0]] * 12 + [[3, 0]] * 8, columns=columns) g = df.groupby("A") original_obj = g.obj.copy(deep=True) From 1e4ba56fc6a2e760dff7b6bc76f91951bca7ecf9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sat, 21 Mar 2020 13:01:12 +0000 Subject: [PATCH 31/39] simplify unique_column_names --- pandas/core/base.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 16e04485b4b5e..f8edd35c7a356 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -214,11 +214,7 @@ def _obj_with_exclusions(self): # there may be elements in self.exclusions that are no longer # in self.obj, see GH 32468 - unique_column_names = { - j - for i in range(self.obj.columns.nlevels) - for j in self.obj.columns.get_level_values(i) - } + unique_column_names = {j for i in self.obj.columns for j in i} exclusions = self.exclusions.intersection(unique_column_names) return self.obj.drop(exclusions, axis=1) From 63127251a09433a8cfa5a997b7c76668185daf79 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sat, 21 Mar 2020 17:25:56 +0000 Subject: [PATCH 32/39] rewrite using get_level_values --- pandas/core/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index f8edd35c7a356..ad4bc0cfb9728 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -214,7 +214,10 @@ def _obj_with_exclusions(self): # there may be elements in self.exclusions that are no longer # in self.obj, see GH 32468 - unique_column_names = {j for i in self.obj.columns for j in i} + nlevels = self.obj.columns.nlevels + unique_column_names = { + j for i in range(nlevels) for j in self.obj.columns.get_level_values(i) + } exclusions = self.exclusions.intersection(unique_column_names) return self.obj.drop(exclusions, axis=1) From d9748d1160d9a8837f6bd1fbe663243229e23ca7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sat, 21 Mar 2020 20:50:29 +0000 Subject: [PATCH 33/39] revert 'or' which was accidentally changed to 'and' --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index ad4bc0cfb9728..a7aa6b39ea96d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -209,7 +209,7 @@ def _obj_with_exclusions(self): if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) - elif not self.exclusions and not isinstance(self.obj, ABCDataFrame): + elif not self.exclusions or not isinstance(self.obj, ABCDataFrame): return self.obj # there may be elements in self.exclusions that are no longer From 16239024a74851b369ed2daad3559c17b2cb7000 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sat, 21 Mar 2020 22:04:23 +0000 Subject: [PATCH 34/39] only patch _apply, cov and corr --- pandas/core/base.py | 13 +++---------- pandas/core/window/common.py | 2 ++ pandas/core/window/rolling.py | 13 +++++++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index a7aa6b39ea96d..e1c6bef66239d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -209,18 +209,11 @@ def _obj_with_exclusions(self): if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) - elif not self.exclusions or not isinstance(self.obj, ABCDataFrame): + if len(self.exclusions) > 0: + return self.obj.drop(self.exclusions, axis=1) + else: return self.obj - # there may be elements in self.exclusions that are no longer - # in self.obj, see GH 32468 - nlevels = self.obj.columns.nlevels - unique_column_names = { - j for i in range(nlevels) for j in self.obj.columns.get_level_values(i) - } - exclusions = self.exclusions.intersection(unique_column_names) - return self.obj.drop(exclusions, axis=1) - def __getitem__(self, key): if self._selection is not None: raise IndexError(f"Column(s) {self._selection} already selected") diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index ed0b816f64800..321630dfe0ee2 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -82,6 +82,8 @@ def _apply( # TODO: can we de-duplicate with _dispatch? def f(x, name=name, *args): x = self._shallow_copy(x) + # patch for GH 32332 + x.obj = x._obj_with_exclusions if isinstance(name, str): return getattr(x, name)(*args, **kwargs) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 887edb0b35d30..59a2e550be8a4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -98,7 +98,7 @@ def __init__( def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin: exclusions = self.exclusions new_obj = super()._shallow_copy(obj, exclusions=exclusions, **kwargs) - new_obj.obj = new_obj._obj_with_exclusions + # new_obj.obj = new_obj._obj_with_exclusions return new_obj @property @@ -1194,7 +1194,6 @@ def count(self): closed=self.closed, ).sum() results.append(result) - return self._wrap_results(results, blocks, obj) _shared_docs["apply"] = dedent( @@ -1663,7 +1662,10 @@ def _get_cov(X, Y): return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj return _flex_binary_moment( - self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) + self._obj_with_exclusions, + other._obj_with_exclusions, + _get_cov, + pairwise=bool(pairwise), ) _shared_docs["corr"] = dedent( @@ -1795,7 +1797,10 @@ def _get_corr(a, b): return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) return _flex_binary_moment( - self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) + self._obj_with_exclusions, + other._obj_with_exclusions, + _get_corr, + pairwise=bool(pairwise), ) From 5d7a4772566a845ca6b3719b7e81ef85a1e99166 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sat, 21 Mar 2020 22:41:38 +0000 Subject: [PATCH 35/39] reinstate change to _obj_with_exclusions --- pandas/core/base.py | 13 ++++++++++--- pandas/core/window/rolling.py | 4 +--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index e1c6bef66239d..a7aa6b39ea96d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -209,11 +209,18 @@ def _obj_with_exclusions(self): if self._selection is not None and isinstance(self.obj, ABCDataFrame): return self.obj.reindex(columns=self._selection_list) - if len(self.exclusions) > 0: - return self.obj.drop(self.exclusions, axis=1) - else: + elif not self.exclusions or not isinstance(self.obj, ABCDataFrame): return self.obj + # there may be elements in self.exclusions that are no longer + # in self.obj, see GH 32468 + nlevels = self.obj.columns.nlevels + unique_column_names = { + j for i in range(nlevels) for j in self.obj.columns.get_level_values(i) + } + exclusions = self.exclusions.intersection(unique_column_names) + return self.obj.drop(exclusions, axis=1) + def __getitem__(self, key): if self._selection is not None: raise IndexError(f"Column(s) {self._selection} already selected") diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 59a2e550be8a4..30b494bfb537f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -96,9 +96,7 @@ def __init__( self.exclusions = kwargs.get("exclusions", set()) def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin: - exclusions = self.exclusions - new_obj = super()._shallow_copy(obj, exclusions=exclusions, **kwargs) - # new_obj.obj = new_obj._obj_with_exclusions + new_obj = super()._shallow_copy(obj, exclusions=self.exclusions, **kwargs) return new_obj @property From d117624904a74c473a8d3973f6ea3ddba4fecf4b Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sun, 22 Mar 2020 10:26:52 +0000 Subject: [PATCH 36/39] exclude 'by' column in Rolling.count --- pandas/core/window/rolling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 30b494bfb537f..d96e82432aef4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1178,8 +1178,8 @@ class _Rolling_and_Expanding(_Rolling): ) def count(self): - - blocks, obj = self._create_blocks() + obj = self._obj_with_exclusions + blocks = obj._to_dict_of_blocks(copy=False).values() results = [] for b in blocks: result = b.notna().astype(int) @@ -1192,7 +1192,7 @@ def count(self): closed=self.closed, ).sum() results.append(result) - return self._wrap_results(results, blocks, obj) + return self._wrap_results(results, blocks, obj, exclude=self.exclusions) _shared_docs["apply"] = dedent( r""" From 367e67179cfad077b0edfd5d6ba4932fbdb85148 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sun, 22 Mar 2020 11:08:36 +0000 Subject: [PATCH 37/39] don't modify _selected_obj - instead, patch obj before we reach apply --- pandas/core/window/common.py | 2 ++ pandas/core/window/rolling.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 321630dfe0ee2..bbaa345c85970 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -35,6 +35,8 @@ def _dispatch(name: str, *args, **kwargs): def outer(self, *args, **kwargs): def f(x): x = self._shallow_copy(x, groupby=self._groupby) + # patch for GH 32332 + x.obj = x._obj_with_exclusions return getattr(x, name)(*args, **kwargs) return self._groupby.apply(f) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d96e82432aef4..4d88736d3b004 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1178,8 +1178,8 @@ class _Rolling_and_Expanding(_Rolling): ) def count(self): - obj = self._obj_with_exclusions - blocks = obj._to_dict_of_blocks(copy=False).values() + + blocks, obj = self._create_blocks() results = [] for b in blocks: result = b.notna().astype(int) @@ -1636,6 +1636,7 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) + other.obj = other._obj_with_exclusions # GH 16058: offset window if self.is_freq_type: @@ -1660,10 +1661,7 @@ def _get_cov(X, Y): return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj return _flex_binary_moment( - self._obj_with_exclusions, - other._obj_with_exclusions, - _get_cov, - pairwise=bool(pairwise), + self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise), ) _shared_docs["corr"] = dedent( @@ -1782,6 +1780,7 @@ def corr(self, other=None, pairwise=None, **kwargs): # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) + other.obj = other._obj_with_exclusions window = self._get_window(other) if not self.is_freq_type else self.win_freq def _get_corr(a, b): @@ -1795,10 +1794,7 @@ def _get_corr(a, b): return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) return _flex_binary_moment( - self._obj_with_exclusions, - other._obj_with_exclusions, - _get_corr, - pairwise=bool(pairwise), + self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise), ) From d7cf9f1495893833b49e90aee01cdfd0055684f6 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sun, 22 Mar 2020 11:10:16 +0000 Subject: [PATCH 38/39] slight simplification to _shallow_copy --- pandas/core/window/rolling.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 4d88736d3b004..f4c643273ab7f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -96,8 +96,7 @@ def __init__( self.exclusions = kwargs.get("exclusions", set()) def _shallow_copy(self, obj: FrameOrSeries, **kwargs) -> ShallowMixin: - new_obj = super()._shallow_copy(obj, exclusions=self.exclusions, **kwargs) - return new_obj + return super()._shallow_copy(obj, exclusions=self.exclusions, **kwargs) @property def _constructor(self): From d2517155211ca1751fe20533d054793c003b3b55 Mon Sep 17 00:00:00 2001 From: Marco Gorelli/DI /SRUK/Engineer/Samsung Electronics Date: Sun, 22 Mar 2020 11:13:57 +0000 Subject: [PATCH 39/39] comment on patch in corr and cov --- pandas/core/window/rolling.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f4c643273ab7f..de20e61c304e3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1635,6 +1635,7 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) + # patch for GH 32332 other.obj = other._obj_with_exclusions # GH 16058: offset window @@ -1660,7 +1661,7 @@ def _get_cov(X, Y): return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj return _flex_binary_moment( - self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise), + self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) ) _shared_docs["corr"] = dedent( @@ -1779,6 +1780,7 @@ def corr(self, other=None, pairwise=None, **kwargs): # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) + # patch for GH 32332 other.obj = other._obj_with_exclusions window = self._get_window(other) if not self.is_freq_type else self.win_freq @@ -1793,7 +1795,7 @@ def _get_corr(a, b): return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) return _flex_binary_moment( - self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise), + self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) )