Revert "CLN/BUG: Clean/Simplify _wrap_applied_output (pandas-dev#35792)"

simonjayhawkins · simonjayhawkins · commit c4f6c1be18ab · 2020-10-26T12:03:38.000Z
This reverts commit 1dc0795.
diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst
@@ -35,7 +35,6 @@ Bug fixes
 - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` raising a ``ValueError`` when the target was read-only (:issue:`37174`)
 - Bug in :meth:`GroupBy.fillna` that introduced a performance regression after 1.0.5 (:issue:`36757`)
 - Bug in :meth:`DataFrame.info` was raising a ``KeyError`` when the DataFrame has integer column names (:issue:`37245`)
-- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on (:issue:`35792`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1219,25 +1219,57 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         if len(keys) == 0:
             return self.obj._constructor(index=keys)
 
+        key_names = self.grouper.names
+
         # GH12824
         first_not_none = next(com.not_none(*values), None)
 
         if first_not_none is None:
-            # GH9684 - All values are None, return an empty frame.
+            # GH9684. If all values are None, then this will throw an error.
+            # We'd prefer it return an empty dataframe.
             return self.obj._constructor()
         elif isinstance(first_not_none, DataFrame):
             return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
         else:
-            key_index = self.grouper.result_index if self.as_index else None
+            if len(self.grouper.groupings) > 1:
+                key_index = self.grouper.result_index
+
+            else:
+                ping = self.grouper.groupings[0]
+                if len(keys) == ping.ngroups:
+                    key_index = ping.group_index
+                    key_index.name = key_names[0]
+
+                    key_lookup = Index(keys)
+                    indexer = key_lookup.get_indexer(key_index)
+
+                    # reorder the values
+                    values = [values[i] for i in indexer]
+
+                    # update due to the potential reorder
+                    first_not_none = next(com.not_none(*values), None)
+                else:
+
+                    key_index = Index(keys, name=key_names[0])
+
+                # don't use the key indexer
+                if not self.as_index:
+                    key_index = None
 
-            if isinstance(first_not_none, Series):
+            # make Nones an empty object
+            if first_not_none is None:
+                return self.obj._constructor()
+            elif isinstance(first_not_none, NDFrame):
 
                 # this is to silence a DeprecationWarning
                 # TODO: Remove when default dtype of empty Series is object
                 kwargs = first_not_none._construct_axes_dict()
-                backup = create_series_with_explicit_dtype(
-                    **kwargs, dtype_if_empty=object
-                )
+                if isinstance(first_not_none, Series):
+                    backup = create_series_with_explicit_dtype(
+                        **kwargs, dtype_if_empty=object
+                    )
+                else:
+                    backup = first_not_none._constructor(**kwargs)
 
                 values = [x if (x is not None) else backup for x in values]
 
@@ -1246,7 +1278,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
             if isinstance(v, (np.ndarray, Index, Series)) or not self.as_index:
                 if isinstance(v, Series):
                     applied_index = self._selected_obj._get_axis(self.axis)
-                    all_indexed_same = all_indexes_same((x.index for x in values))
+                    all_indexed_same = all_indexes_same([x.index for x in values])
                     singular_series = len(values) == 1 and applied_index.nlevels == 1
 
                     # GH3596
@@ -1278,6 +1310,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                         # GH 8467
                         return self._concat_objects(keys, values, not_indexed_same=True)
 
+                if self.axis == 0 and isinstance(v, ABCSeries):
                     # GH6124 if the list of Series have a consistent name,
                     # then propagate that name to the result.
                     index = v.index.copy()
@@ -1290,27 +1323,34 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
                         if len(names) == 1:
                             index.name = list(names)[0]
 
-                    # Combine values
-                    # vstack+constructor is faster than concat and handles MI-columns
-                    stacked_values = np.vstack([np.asarray(v) for v in values])
-
-                    if self.axis == 0:
-                        index = key_index
-                        columns = v.index.copy()
-                        if columns.name is None:
-                            # GH6124 - propagate name of Series when it's consistent
-                            names = {v.name for v in values}
-                            if len(names) == 1:
-                                columns.name = list(names)[0]
+                    # normally use vstack as its faster than concat
+                    # and if we have mi-columns
+                    if (
+                        isinstance(v.index, MultiIndex)
+                        or key_index is None
+                        or isinstance(key_index, MultiIndex)
+                    ):
+                        stacked_values = np.vstack([np.asarray(v) for v in values])
+                        result = self.obj._constructor(
+                            stacked_values, index=key_index, columns=index
+                        )
                     else:
-                        index = v.index
-                        columns = key_index
-                        stacked_values = stacked_values.T
-
+                        # GH5788 instead of stacking; concat gets the
+                        # dtypes correct
+                        from pandas.core.reshape.concat import concat
+
+                        result = concat(
+                            values,
+                            keys=key_index,
+                            names=key_index.names,
+                            axis=self.axis,
+                        ).unstack()
+                        result.columns = index
+                elif isinstance(v, ABCSeries):
+                    stacked_values = np.vstack([np.asarray(v) for v in values])
                     result = self.obj._constructor(
-                        stacked_values, index=index, columns=columns
+                        stacked_values.T, index=v.index, columns=key_index
                     )
-
                 elif not self.as_index:
                     # We add grouping column below, so create a frame here
                     result = DataFrame(
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -298,16 +298,15 @@ def all_indexes_same(indexes):
 
     Parameters
     ----------
-    indexes : iterable of Index objects
+    indexes : list of Index objects
 
     Returns
     -------
     bool
         True if all indexes contain the same elements, False otherwise.
     """
-    itr = iter(indexes)
-    first = next(itr)
-    for index in itr:
+    first = indexes[0]
+    for index in indexes[1:]:
         if not first.equals(index):
             return False
     return True
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -868,14 +868,13 @@ def test_apply_multi_level_name(category):
     b = [1, 2] * 5
     if category:
         b = pd.Categorical(b, categories=[1, 2, 3])
-        expected_index = pd.CategoricalIndex([1, 2], categories=[1, 2, 3], name="B")
-    else:
-        expected_index = pd.Index([1, 2], name="B")
     df = pd.DataFrame(
         {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))}
     ).set_index(["A", "B"])
     result = df.groupby("B").apply(lambda x: x.sum())
-    expected = pd.DataFrame({"C": [20, 25], "D": [20, 25]}, index=expected_index)
+    expected = pd.DataFrame(
+        {"C": [20, 25], "D": [20, 25]}, index=pd.Index([1, 2], name="B")
+    )
     tm.assert_frame_equal(result, expected)
     assert df.index.names == ["A", "B"]