Skip to content

Commit 0c550e6

Browse files
committed
BUG: Fix .groupby(categorical, sort=False) failing
1 parent 29aeffb commit 0c550e6

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ Bug Fixes
539539

540540
- Bug in ``resample``, where a non-string ```loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`)
541541

542+
- Bug in ``.groupby`` where ```.groupby(categorical, sort=False)`` would raise ``ValueError`` due to non-matching categories (:issue:`13179`)
542543

543544

544545

pandas/core/groupby.py

+5
Original file line numberDiff line numberDiff line change
@@ -2315,6 +2315,11 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
23152315
# groupby
23162316
else:
23172317
cat = self.grouper.unique()
2318+
all_categories = self.grouper.categories
2319+
cat.add_categories(
2320+
all_categories[
2321+
~all_categories.isin(cat.categories)],
2322+
inplace=True) # GH-13179
23182323
self.grouper = self.grouper.reorder_categories(
23192324
cat.categories)
23202325

pandas/tests/groupby/test_categorical.py

+24
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,30 @@ def test_groupby_multi_categorical_as_index(self):
284284

285285
tm.assert_frame_equal(result, expected, check_index_type=True)
286286

287+
def test_groupby_preserve_categories(self):
288+
# GH-13179
289+
categories = list('abc')
290+
291+
# ordered=True
292+
df = DataFrame({'A': pd.Categorical(list('ba'),
293+
categories=categories,
294+
ordered=True)})
295+
index = pd.CategoricalIndex(categories, categories, ordered=True)
296+
tm.assert_index_equal(df.groupby('A', sort=True).first().index, index)
297+
tm.assert_index_equal(df.groupby('A', sort=False).first().index, index)
298+
299+
# ordered=False
300+
df = DataFrame({'A': pd.Categorical(list('ba'),
301+
categories=categories,
302+
ordered=False)})
303+
sort_index = pd.CategoricalIndex(categories, categories, ordered=False)
304+
nosort_index = pd.CategoricalIndex(list('bac'), list('bac'),
305+
ordered=False)
306+
tm.assert_index_equal(df.groupby('A', sort=True).first().index,
307+
sort_index)
308+
tm.assert_index_equal(df.groupby('A', sort=False).first().index,
309+
nosort_index)
310+
287311
def test_groupby_preserve_categorical_dtype(self):
288312
# GH13743, GH13854
289313
df = DataFrame({'A': [1, 2, 1, 1, 2],

0 commit comments

Comments
 (0)