From 523a8620186b41c3fa90a5bbac4a63dc08600ef6 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 23 Dec 2020 00:54:36 +0100
Subject: [PATCH 1/6] BUG: Fix regression for groupby.indices in case of unused
 categories

---
 pandas/core/groupby/grouper.py           |  9 ++-------
 pandas/core/groupby/ops.py               |  5 +++++
 pandas/tests/groupby/test_categorical.py | 20 ++++++++++++++++++++
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index e3196904fa56f..26fb23087ed55 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -556,13 +556,8 @@ def indices(self):
         if isinstance(self.grouper, ops.BaseGrouper):
             return self.grouper.indices
 
-        # Return a dictionary of {group label: [indices belonging to the group label]}
-        # respecting whether sort was specified
-        codes, uniques = algorithms.factorize(self.grouper, sort=self.sort)
-        return {
-            category: np.flatnonzero(codes == i)
-            for i, category in enumerate(Index(uniques))
-        }
+        values = Categorical(self.grouper)
+        return values._reverse_indexer()
 
     @property
     def codes(self) -> np.ndarray:
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index d1a4fc6fc74e5..8e33ae0c30df3 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -53,6 +53,7 @@
     is_timedelta64_dtype,
     needs_i8_conversion,
 )
+from pandas.core.dtypes.generic import ABCCategoricalIndex
 from pandas.core.dtypes.missing import isna, maybe_fill
 
 import pandas.core.algorithms as algorithms
@@ -241,6 +242,10 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
     @cache_readonly
     def indices(self):
         """ dict {group name -> group indices} """
+        if len(self.groupings) == 1 and isinstance(
+            self.result_index, ABCCategoricalIndex
+        ):
+            return self.groupings[0].indices
         codes_list = [ping.codes for ping in self.groupings]
         keys = [ping.group_index for ping in self.groupings]
         return get_indexer_dict(codes_list, keys)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 8cf77ca6335f4..516d54b6a2924 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1678,3 +1678,23 @@ def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     df_grp = df.groupby(["a", "b"], observed=observed)
     result = getattr(df_grp, func)()
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_categorical_indices_unused_categories():
+    # GH#38642
+    df = DataFrame(
+        {
+            "key": pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]),
+            "col": range(3),
+        }
+    )
+    grouped = df.groupby("key", sort=False)
+    result = grouped.indices
+    expected = {
+        "b": np.array([0, 1]),
+        "a": np.array([2]),
+        "c": np.array([], dtype="int64"),
+    }
+    assert result.keys() == expected.keys()
+    for key in result.keys():
+        tm.assert_numpy_array_equal(result[key], expected[key])

From 80952f85edfd1aea2091d10320b0dbec62f7adb3 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 23 Dec 2020 00:58:26 +0100
Subject: [PATCH 2/6] Add comment

---
 pandas/core/groupby/ops.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 8e33ae0c30df3..d7a3fff5c3898 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -245,6 +245,7 @@ def indices(self):
         if len(self.groupings) == 1 and isinstance(
             self.result_index, ABCCategoricalIndex
         ):
+            # This shows unused categories in indices GH#38642
             return self.groupings[0].indices
         codes_list = [ping.codes for ping in self.groupings]
         keys = [ping.group_index for ping in self.groupings]

From 530361f893f0ff13d80128c4ebbec5d15177d6ca Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 23 Dec 2020 01:01:43 +0100
Subject: [PATCH 3/6] Remove pd

---
 pandas/tests/groupby/test_categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 516d54b6a2924..c1a5a7a86b922 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1684,7 +1684,7 @@ def test_groupby_categorical_indices_unused_categories():
     # GH#38642
     df = DataFrame(
         {
-            "key": pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]),
+            "key": Categorical(["b", "b", "a"], categories=["a", "b", "c"]),
             "col": range(3),
         }
     )

From c689f63e77c8ceef35456059baa7676de2e4ea1b Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Tue, 29 Dec 2020 21:55:43 +0100
Subject: [PATCH 4/6] Change test

---
 pandas/tests/groupby/test_categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index c1a5a7a86b922..f0bc58cbf07bf 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1691,8 +1691,8 @@ def test_groupby_categorical_indices_unused_categories():
     grouped = df.groupby("key", sort=False)
     result = grouped.indices
     expected = {
-        "b": np.array([0, 1]),
-        "a": np.array([2]),
+        "b": np.array([0, 1], dtype="int64"),
+        "a": np.array([2], dtype="int64"),
         "c": np.array([], dtype="int64"),
     }
     assert result.keys() == expected.keys()

From fac89853375e0d6bb130ca884c0c66d7e95eef77 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Tue, 29 Dec 2020 21:57:03 +0100
Subject: [PATCH 5/6] Add whatsnew

---
 doc/source/whatsnew/v1.2.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
index a756239ee6798..5ad357a44a6ca 100644
--- a/doc/source/whatsnew/v1.2.1.rst
+++ b/doc/source/whatsnew/v1.2.1.rst
@@ -18,7 +18,7 @@ Fixed regressions
 - :meth:`to_csv` created corrupted zip files when there were more rows than ``chunksize`` (issue:`38714`)
 - Fixed a regression in ``groupby().rolling()`` where :class:`MultiIndex` levels were dropped (:issue:`38523`)
 - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
--
+- Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`36842`)
 
 .. ---------------------------------------------------------------------------
 

From 86df6408703686ea035b77fe57d05571ec2ce162 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Tue, 29 Dec 2020 21:57:28 +0100
Subject: [PATCH 6/6] Change gh reference

---
 doc/source/whatsnew/v1.2.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
index 5ad357a44a6ca..804886fb987ad 100644
--- a/doc/source/whatsnew/v1.2.1.rst
+++ b/doc/source/whatsnew/v1.2.1.rst
@@ -18,7 +18,7 @@ Fixed regressions
 - :meth:`to_csv` created corrupted zip files when there were more rows than ``chunksize`` (issue:`38714`)
 - Fixed a regression in ``groupby().rolling()`` where :class:`MultiIndex` levels were dropped (:issue:`38523`)
 - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
-- Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`36842`)
+- Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
 
 .. ---------------------------------------------------------------------------