From 9c7d69540f06a49b5070f590106420a393f98cd5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 16 Jul 2020 08:36:49 -0500 Subject: [PATCH] Revert BUG: Ensure same index is returned for slow and fast path in groupby.apply #31613 xref https://github.com/pandas-dev/pandas/pull/34998 --- doc/source/whatsnew/v1.1.0.rst | 4 ---- pandas/_libs/reduction.pyx | 2 +- pandas/tests/groupby/test_apply.py | 8 ++++++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index de3a05a2ccdfb..cee3680b4bf65 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1114,10 +1114,6 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`) - Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`) - Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`) -- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` where the output index shape for functions returning a DataFrame which is equally indexed - to the input DataFrame is inconsistent. An internal heuristic to detect index mutation would behave differently for equal but not identical - indices. In particular, the result index shape might change if a copy of the input would be returned. - The behaviour now is consistent, independent of internal heuristics. (:issue:`31612`, :issue:`14927`, :issue:`13056`) - Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of ``SeriesGroupBy`` previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) - Bug in :meth:`DataFrame.groupby` lost index, when one of the ``agg`` keys referenced an empty list (:issue:`32580`) - Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 97c491776f831..a01e0c5705dcf 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -366,7 +366,7 @@ def apply_frame_axis0(object frame, object f, object names, # Need to infer if low level index slider will cause segfaults require_slow_apply = i == 0 and piece is chunk try: - if not piece.index.equals(chunk.index): + if not piece.index is chunk.index: mutated = True except AttributeError: # `piece` might not have an index, could be e.g. an int diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index aa10f44670361..5a1268bfb03db 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -211,6 +211,7 @@ def test_group_apply_once_per_group2(capsys): assert result == expected +@pytest.mark.xfail(reason="GH-34998") def test_apply_fast_slow_identical(): # GH 31613 @@ -234,9 +235,11 @@ def fast(group): "func", [ lambda x: x, - lambda x: x[:], + pytest.param(lambda x: x[:], marks=pytest.mark.xfail(reason="GH-34998")), lambda x: x.copy(deep=False), - lambda x: x.copy(deep=True), + pytest.param( + lambda x: x.copy(deep=True), marks=pytest.mark.xfail(reason="GH-34998") + ), ], ) def test_groupby_apply_identity_maybecopy_index_identical(func): @@ -997,6 +1000,7 @@ def test_apply_function_with_indexing_return_column(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail(reason="GH-34998") def test_apply_with_timezones_aware(): # GH: 27212