Skip to content

Commit 4842bc7

Browse files
committed
fixup! BUG: Fix .groupby(categorical, sort=False) failing
1 parent 2aec326 commit 4842bc7

File tree

1 file changed

+22
-14
lines changed

1 file changed

+22
-14
lines changed

pandas/core/categorical.py

+22-14
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,13 @@ def _get_categories(self):
604604

605605
def _codes_for_groupby(self, sort):
606606
"""
607-
Return a Categorical adjusted for groupby
607+
If sort=False, return a copy of self, coded with categories as
608+
returned by .unique(), followed by any categories not appearing in
609+
the data. If sort=True, return self.
610+
611+
This method is needed solely to ensure the categorical index of the
612+
GroupBy result has categories in the order of appearance in the data
613+
(GH-8868).
608614
609615
Parameters
610616
----------
@@ -614,21 +620,23 @@ def _codes_for_groupby(self, sort):
614620
Returns
615621
-------
616622
Categorical
617-
In case of sort=True, self is returned with original categories
618-
preserved. In case of sort=False, the new categories are set
619-
to the order of appearance in codes (unless ordered=True),
620-
followed by any unrepresented categories in original order.
623+
If sort=False, the new categories are set to the order of
624+
appearance in codes (unless ordered=True, in which case the
625+
original order is preserved), followed by any unrepresented
626+
categories in the original order.
621627
"""
622-
cat = self
628+
if sort:
629+
return self
630+
623631
# sort=False should order groups in as-encountered order (GH-8868)
624-
if not sort:
625-
cat = self.unique()
626-
# But all categories should be present, including those missing
627-
# from the data (GH-13179), which .unique() dropped
628-
cat.add_categories(self.categories[
629-
~self.categories.isin(cat.categories)],
630-
inplace=True)
631-
cat = self.reorder_categories(cat.categories)
632+
cat = self.unique()
633+
# But for groupby to work, all categories should be present,
634+
# including those missing from the data (GH-13179), which .unique()
635+
# above dropped
636+
cat.add_categories(self.categories[
637+
~self.categories.isin(cat.categories)],
638+
inplace=True)
639+
cat = self.reorder_categories(cat.categories)
632640
return cat
633641

634642
_ordered = None

0 commit comments

Comments
 (0)