From da5693b7e8c76e05d1e1c19bce48adbc74d1ddc3 Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Mon, 3 Feb 2020 14:13:17 +0100
Subject: [PATCH 1/4] groupy apply: Ensure same index is returned for slow and
 fast path

---
 doc/source/whatsnew/v1.1.0.rst     |  1 +
 pandas/_libs/reduction.pyx         |  2 +-
 pandas/tests/groupby/test_apply.py | 19 +++++++++++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index bfe2dcee40d5e..88a1da7db8ffa 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -827,6 +827,7 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`)
 - Bug in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`)
 - Bug in :meth:`DataFrameGroupby.std` and :meth:`DataFrameGroupby.sem` would modify grouped-by columns when ``as_index=False`` (:issue:`10355`)
+- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` where the result shape was incorrect when the output index was not identical to the input index (:issue:`31612`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 0988cd7ff0dde..18422c2f86129 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -502,7 +502,7 @@ def apply_frame_axis0(object frame, object f, object names,
             # Need to infer if low level index slider will cause segfaults
             require_slow_apply = i == 0 and piece is chunk
             try:
-                if piece.index is not chunk.index:
+                if not piece.index.equals(chunk.index):
                     mutated = True
             except AttributeError:
                 # `piece` might not have an index, could be e.g. an int
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index e2b5118922a5a..cc1a74fd8cbaf 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -921,3 +921,22 @@ def fn(x):
         name="col2",
     )
     tm.assert_series_equal(result, expected)
+
+
+def test_apply_fast_slow_identical():
+    # GH 31613
+
+    df = DataFrame({"A": [0, 0, 1], "b": range(3)})
+
+    # For simple index structures we check for fast/slow apply using
+    # an identity check on in/output
+    def slow(group):
+        return group
+
+    def fast(group):
+        return group.copy()
+
+    fast_df = df.groupby("A").apply(fast)
+    slow_df = df.groupby("A").apply(slow)
+
+    tm.assert_frame_equal(fast_df, slow_df)

From 95ac998088f09a9b7e5422da675205d775d8ae85 Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Tue, 4 Feb 2020 09:33:48 +0100
Subject: [PATCH 2/4] Add test case for GH14927

---
 pandas/tests/groupby/test_apply.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index cc1a74fd8cbaf..d3298493b0dc7 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -940,3 +940,20 @@ def fast(group):
     slow_df = df.groupby("A").apply(slow)
 
     tm.assert_frame_equal(fast_df, slow_df)
+
+
+def test_gh14927():
+    # GH 14927
+    df = pd.DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
+
+    df1 = df.groupby("g").apply(lambda x: x)
+
+    df2 = df.groupby("g").apply(lambda x: x[:])
+
+    df3 = df.groupby("g").apply(lambda x: x.copy(deep=False))
+
+    df4 = df.groupby("g").apply(lambda x: x.copy(deep=True))
+
+    tm.assert_frame_equal(df1, df2)
+    tm.assert_frame_equal(df2, df3)
+    tm.assert_frame_equal(df3, df4)

From 8503c04591d322946a438a326d2bce9d23fe9ffb Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Wed, 15 Apr 2020 11:14:20 +0200
Subject: [PATCH 3/4] Move tests

---
 pandas/tests/groupby/test_apply.py | 76 ++++++++++++++++--------------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index d3298493b0dc7..bc8067212d60e 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -190,6 +190,46 @@ def f_constant_df(group):
         assert names == group_names
 
 
+def test_apply_fast_slow_identical():
+    # GH 31613
+
+    df = DataFrame({"A": [0, 0, 1], "b": range(3)})
+
+    # For simple index structures we check for fast/slow apply using
+    # an identity check on in/output
+    def slow(group):
+        return group
+
+    def fast(group):
+        return group.copy()
+
+    fast_df = df.groupby("A").apply(fast)
+    slow_df = df.groupby("A").apply(slow)
+
+    tm.assert_frame_equal(fast_df, slow_df)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x,
+        lambda x: x[:],
+        lambda x: x.copy(deep=False),
+        lambda x: x.copy(deep=True),
+    ],
+)
+def test_groupby_apply_identity_maybecopy_index_identical(func):
+    # GH 14927
+    # Whether the function returns a copy of the input data or not should not
+    # have an impact on the index structure of the result since this is not
+    # transparent to the user
+
+    df = pd.DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
+
+    result = df.groupby("g").apply(func)
+    tm.assert_frame_equal(result, df)
+
+
 def test_apply_with_mixed_dtype():
     # GH3480, apply with mixed dtype on axis=1 breaks in 0.11
     df = DataFrame(
@@ -921,39 +961,3 @@ def fn(x):
         name="col2",
     )
     tm.assert_series_equal(result, expected)
-
-
-def test_apply_fast_slow_identical():
-    # GH 31613
-
-    df = DataFrame({"A": [0, 0, 1], "b": range(3)})
-
-    # For simple index structures we check for fast/slow apply using
-    # an identity check on in/output
-    def slow(group):
-        return group
-
-    def fast(group):
-        return group.copy()
-
-    fast_df = df.groupby("A").apply(fast)
-    slow_df = df.groupby("A").apply(slow)
-
-    tm.assert_frame_equal(fast_df, slow_df)
-
-
-def test_gh14927():
-    # GH 14927
-    df = pd.DataFrame({"g": [1, 2, 2, 2], "a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
-
-    df1 = df.groupby("g").apply(lambda x: x)
-
-    df2 = df.groupby("g").apply(lambda x: x[:])
-
-    df3 = df.groupby("g").apply(lambda x: x.copy(deep=False))
-
-    df4 = df.groupby("g").apply(lambda x: x.copy(deep=True))
-
-    tm.assert_frame_equal(df1, df2)
-    tm.assert_frame_equal(df2, df3)
-    tm.assert_frame_equal(df3, df4)

From bdec31737cbb2e09606cdff99eb613553678f435 Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Wed, 27 May 2020 14:26:10 +0200
Subject: [PATCH 4/4] More descriptive bug report in whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 88a1da7db8ffa..0495b21d2cb93 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -827,7 +827,10 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`)
 - Bug in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`)
 - Bug in :meth:`DataFrameGroupby.std` and :meth:`DataFrameGroupby.sem` would modify grouped-by columns when ``as_index=False`` (:issue:`10355`)
-- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` where the result shape was incorrect when the output index was not identical to the input index (:issue:`31612`)
+- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` where the output index shape for functions returning a DataFrame which is equally indexed
+  to the input DataFrame is inconsistent. An internal heuristic to detect index mutation would behave differently for equal but not identical
+  indices. In particular, the result index shape might change if a copy of the input would be returned.
+  The behaviour now is consistent, independent of internal heuristics. (:issue:`31612`, :issue:`14927`, :issue:`13056`)
 
 Reshaping
 ^^^^^^^^^