Skip to content

Commit 34e4aea

Browse files
phoflluckyvs1
authored andcommitted
BUG: Fix regression for groupby.indices in case of unused categories (pandas-dev#38649)
1 parent 3aa3e8f commit 34e4aea

File tree

4 files changed

+29
-8
lines changed

4 files changed

+29
-8
lines changed

doc/source/whatsnew/v1.2.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Fixed regressions
1818
- :meth:`to_csv` created corrupted zip files when there were more rows than ``chunksize`` (issue:`38714`)
1919
- Fixed a regression in ``groupby().rolling()`` where :class:`MultiIndex` levels were dropped (:issue:`38523`)
2020
- Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
21-
-
21+
- Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
2222

2323
.. ---------------------------------------------------------------------------
2424

pandas/core/groupby/grouper.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -556,13 +556,8 @@ def indices(self):
556556
if isinstance(self.grouper, ops.BaseGrouper):
557557
return self.grouper.indices
558558

559-
# Return a dictionary of {group label: [indices belonging to the group label]}
560-
# respecting whether sort was specified
561-
codes, uniques = algorithms.factorize(self.grouper, sort=self.sort)
562-
return {
563-
category: np.flatnonzero(codes == i)
564-
for i, category in enumerate(Index(uniques))
565-
}
559+
values = Categorical(self.grouper)
560+
return values._reverse_indexer()
566561

567562
@property
568563
def codes(self) -> np.ndarray:

pandas/core/groupby/ops.py

+6
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
is_timedelta64_dtype,
5454
needs_i8_conversion,
5555
)
56+
from pandas.core.dtypes.generic import ABCCategoricalIndex
5657
from pandas.core.dtypes.missing import isna, maybe_fill
5758

5859
import pandas.core.algorithms as algorithms
@@ -241,6 +242,11 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
241242
@cache_readonly
242243
def indices(self):
243244
""" dict {group name -> group indices} """
245+
if len(self.groupings) == 1 and isinstance(
246+
self.result_index, ABCCategoricalIndex
247+
):
248+
# This shows unused categories in indices GH#38642
249+
return self.groupings[0].indices
244250
codes_list = [ping.codes for ping in self.groupings]
245251
keys = [ping.group_index for ping in self.groupings]
246252
return get_indexer_dict(codes_list, keys)

pandas/tests/groupby/test_categorical.py

+20
Original file line numberDiff line numberDiff line change
@@ -1678,3 +1678,23 @@ def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
16781678
df_grp = df.groupby(["a", "b"], observed=observed)
16791679
result = getattr(df_grp, func)()
16801680
tm.assert_frame_equal(result, expected)
1681+
1682+
1683+
def test_groupby_categorical_indices_unused_categories():
1684+
# GH#38642
1685+
df = DataFrame(
1686+
{
1687+
"key": Categorical(["b", "b", "a"], categories=["a", "b", "c"]),
1688+
"col": range(3),
1689+
}
1690+
)
1691+
grouped = df.groupby("key", sort=False)
1692+
result = grouped.indices
1693+
expected = {
1694+
"b": np.array([0, 1], dtype="int64"),
1695+
"a": np.array([2], dtype="int64"),
1696+
"c": np.array([], dtype="int64"),
1697+
}
1698+
assert result.keys() == expected.keys()
1699+
for key in result.keys():
1700+
tm.assert_numpy_array_equal(result[key], expected[key])

0 commit comments

Comments
 (0)