From ea88b2f1c631dc3bae6d1cdb30409b1eaec85e72 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 4 Jan 2020 22:44:49 +0800
Subject: [PATCH 1/7] BUG: groupby apply raises ValueError when groupby axis
 has duplicates (GH30667)

---
 doc/source/whatsnew/v1.0.0.rst     |  1 +
 pandas/core/groupby/groupby.py     | 24 +++++++++---------------
 pandas/tests/groupby/test_apply.py | 23 +++++++++++++++++++++++
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index b9cc1dad53674..b593bd6b003c7 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -973,6 +973,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
 - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
 - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
+- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates (:issue:`30667`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 1ba4938d45fc9..d16c755ac86e0 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -969,22 +969,16 @@ def reset_identity(values):
             result = concat(values, axis=self.axis)
             ax = self._selected_obj._get_axis(self.axis)
 
-            if isinstance(result, Series):
-                result = result.reindex(ax)
+            # this is a very unfortunate situation
+            # we can't use reindex to restore the original order
+            # when the ax has duplicates
+            # so we resort to this
+            # GH 14776, 30667
+            if ax.has_duplicates:
+                indexer = algorithms.unique1d(result.index.get_indexer_for(ax.values))
+                result = result.take(indexer, axis=self.axis)
             else:
-
-                # this is a very unfortunate situation
-                # we have a multi-index that is NOT lexsorted
-                # and we have a result which is duplicated
-                # we can't reindex, so we resort to this
-                # GH 14776
-                if isinstance(ax, MultiIndex) and not ax.is_unique:
-                    indexer = algorithms.unique1d(
-                        result.index.get_indexer_for(ax.values)
-                    )
-                    result = result.take(indexer, axis=self.axis)
-                else:
-                    result = result.reindex(ax, axis=self.axis)
+                result = result.reindex(ax, axis=self.axis)
 
         elif self.group_keys:
 
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 17e5d3efe850f..e7eec7895e4cf 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -752,3 +752,26 @@ def most_common_values(df):
         ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId"
     )
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("test_series", [True, False])
+def test_apply_with_duplicated_non_sorted_axis(test_series):
+    # GH 30667
+    df = pd.DataFrame(
+        [["x", "p"], ["x", "p"], ["x", "o"]], columns=["X", "Y"], index=[1, 2, 2]
+    )
+    if test_series:
+        ser = df.set_index("Y")["X"]
+        result = ser.groupby(level=0).apply(lambda x: x)
+
+        # not expecting the order to remain the same for duplicated axis
+        result = result.sort_index()
+        expected = ser.sort_index()
+        tm.assert_series_equal(result, expected)
+    else:
+        result = df.groupby("Y").apply(lambda x: x)
+
+        # not expecting the order to remain the same for duplicated axis
+        result = result.sort_values("Y")
+        expected = df.sort_values("Y")
+        tm.assert_frame_equal(result, expected)

From a7ebc613512282b6ade6565248a64fd5706e3963 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sun, 5 Jan 2020 15:18:10 +0800
Subject: [PATCH 2/7] CLN: cleaned up code (GH30667)

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d16c755ac86e0..09e59b1f603e7 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -975,7 +975,7 @@ def reset_identity(values):
             # so we resort to this
             # GH 14776, 30667
             if ax.has_duplicates:
-                indexer = algorithms.unique1d(result.index.get_indexer_for(ax.values))
+                indexer = algorithms.unique1d(result.index.get_indexer_non_unique(ax.values))
                 result = result.take(indexer, axis=self.axis)
             else:
                 result = result.reindex(ax, axis=self.axis)

From 94be16be2157a28eb5874c323e2101c701b64304 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sun, 5 Jan 2020 15:19:25 +0800
Subject: [PATCH 3/7] CLN: reformatted code (GH30667)

---
 pandas/core/groupby/groupby.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 09e59b1f603e7..dc7135b0339ce 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -975,7 +975,9 @@ def reset_identity(values):
             # so we resort to this
             # GH 14776, 30667
             if ax.has_duplicates:
-                indexer = algorithms.unique1d(result.index.get_indexer_non_unique(ax.values))
+                indexer = algorithms.unique1d(
+                    result.index.get_indexer_non_unique(ax.values)
+                )
                 result = result.take(indexer, axis=self.axis)
             else:
                 result = result.reindex(ax, axis=self.axis)

From c1e8038a7f0c673b843e01880ba867b92942704f Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sun, 5 Jan 2020 15:46:49 +0800
Subject: [PATCH 4/7] TST: moved test case next to other similar tests
 (GH30667)

---
 pandas/tests/groupby/test_apply.py | 46 +++++++++++++++---------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index e7eec7895e4cf..220281d29c9e3 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -467,6 +467,29 @@ def filt2(x):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("test_series", [True, False])
+def test_apply_with_duplicated_non_sorted_axis(test_series):
+    # GH 30667
+    df = pd.DataFrame(
+        [["x", "p"], ["x", "p"], ["x", "o"]], columns=["X", "Y"], index=[1, 2, 2]
+    )
+    if test_series:
+        ser = df.set_index("Y")["X"]
+        result = ser.groupby(level=0).apply(lambda x: x)
+
+        # not expecting the order to remain the same for duplicated axis
+        result = result.sort_index()
+        expected = ser.sort_index()
+        tm.assert_series_equal(result, expected)
+    else:
+        result = df.groupby("Y").apply(lambda x: x)
+
+        # not expecting the order to remain the same for duplicated axis
+        result = result.sort_values("Y")
+        expected = df.sort_values("Y")
+        tm.assert_frame_equal(result, expected)
+
+
 def test_apply_corner_cases():
     # #535, can't use sliding iterator
 
@@ -752,26 +775,3 @@ def most_common_values(df):
         ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId"
     )
     tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize("test_series", [True, False])
-def test_apply_with_duplicated_non_sorted_axis(test_series):
-    # GH 30667
-    df = pd.DataFrame(
-        [["x", "p"], ["x", "p"], ["x", "o"]], columns=["X", "Y"], index=[1, 2, 2]
-    )
-    if test_series:
-        ser = df.set_index("Y")["X"]
-        result = ser.groupby(level=0).apply(lambda x: x)
-
-        # not expecting the order to remain the same for duplicated axis
-        result = result.sort_index()
-        expected = ser.sort_index()
-        tm.assert_series_equal(result, expected)
-    else:
-        result = df.groupby("Y").apply(lambda x: x)
-
-        # not expecting the order to remain the same for duplicated axis
-        result = result.sort_values("Y")
-        expected = df.sort_values("Y")
-        tm.assert_frame_equal(result, expected)

From cbbeb09caead820079d0f2695c7c7cc88f79fe82 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sun, 5 Jan 2020 15:55:17 +0800
Subject: [PATCH 5/7] BUG: correction to previous logic(GH30667)

---
 pandas/core/groupby/groupby.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index dc7135b0339ce..ef56e4d89dcb2 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -975,9 +975,8 @@ def reset_identity(values):
             # so we resort to this
             # GH 14776, 30667
             if ax.has_duplicates:
-                indexer = algorithms.unique1d(
-                    result.index.get_indexer_non_unique(ax.values)
-                )
+                indexer, _ = result.index.get_indexer_non_unique(ax.values)
+                indexer = algorithms.unique1d(indexer)
                 result = result.take(indexer, axis=self.axis)
             else:
                 result = result.reindex(ax, axis=self.axis)

From 4502015cf2332742db10329dbc070d308c5d99ac Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Fri, 17 Jan 2020 09:17:13 +0800
Subject: [PATCH 6/7] DOC: Further qualified the bug fix entry in whatsnew
 (GH30667)

---
 doc/source/whatsnew/v1.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 2714b4347c2bb..737c4d6a8e07e 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -1112,7 +1112,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
 - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
 - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
-- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates (:issue:`30667`)
+- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
 
 Reshaping
 ^^^^^^^^^

From 358e673e4af45bc0fe71394296057333a0f7a48b Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 18 Jan 2020 23:46:51 +0800
Subject: [PATCH 7/7] DOC: moved whatsnew to V1.1.0 (GH30667)

---
 doc/source/whatsnew/v1.0.0.rst | 1 -
 doc/source/whatsnew/v1.1.0.rst | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 737c4d6a8e07e..fa562838c8f7c 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -1112,7 +1112,6 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
 - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
 - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
-- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index b5a7b19f160a4..daf78ee851341 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -133,9 +133,7 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
--
-
+- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates and the applied ``func`` does not mutate passed in objects (:issue:`30667`)
 
 Reshaping
 ^^^^^^^^^