pandas-dev · jreback · Jan 20, 2020 · Jan 4, 2020 · Jan 5, 2020 · Jan 5, 2020
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -976,6 +976,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
 - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
 - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
+- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted and has duplicates (:issue:`30667`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -969,22 +969,17 @@ def reset_identity(values):
             result = concat(values, axis=self.axis)
             ax = self._selected_obj._get_axis(self.axis)
 
-            if isinstance(result, Series):
-                result = result.reindex(ax)
+            # this is a very unfortunate situation
+            # we can't use reindex to restore the original order
+            # when the ax has duplicates
+            # so we resort to this
+            # GH 14776, 30667
+            if ax.has_duplicates:
+                indexer, _ = result.index.get_indexer_non_unique(ax.values)
+                indexer = algorithms.unique1d(indexer)
+                result = result.take(indexer, axis=self.axis)
             else:
-
-                # this is a very unfortunate situation
-                # we have a multi-index that is NOT lexsorted
-                # and we have a result which is duplicated
-                # we can't reindex, so we resort to this
-                # GH 14776
-                if isinstance(ax, MultiIndex) and not ax.is_unique:
-                    indexer = algorithms.unique1d(
-                        result.index.get_indexer_for(ax.values)
-                    )
-                    result = result.take(indexer, axis=self.axis)
-                else:
-                    result = result.reindex(ax, axis=self.axis)
+                result = result.reindex(ax, axis=self.axis)
 
         elif self.group_keys:
 

diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -467,6 +467,29 @@ def filt2(x):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("test_series", [True, False])
+def test_apply_with_duplicated_non_sorted_axis(test_series):
+    # GH 30667
+    df = pd.DataFrame(
+        [["x", "p"], ["x", "p"], ["x", "o"]], columns=["X", "Y"], index=[1, 2, 2]
+    )
+    if test_series:
+        ser = df.set_index("Y")["X"]
+        result = ser.groupby(level=0).apply(lambda x: x)
+
+        # not expecting the order to remain the same for duplicated axis
+        result = result.sort_index()
+        expected = ser.sort_index()
+        tm.assert_series_equal(result, expected)
+    else:
+        result = df.groupby("Y").apply(lambda x: x)
+
+        # not expecting the order to remain the same for duplicated axis
+        result = result.sort_values("Y")
+        expected = df.sort_values("Y")
+        tm.assert_frame_equal(result, expected)
+
+
 def test_apply_corner_cases():
     # #535, can't use sliding iterator