Skip to content

Commit 8aea45f

Browse files
committed
fixup! BUG: Fix .groupby(categorical, sort=False) failing
1 parent 609a733 commit 8aea45f

File tree

4 files changed

+15
-17
lines changed

4 files changed

+15
-17
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ Bug Fixes
539539

540540
- Bug in ``resample``, where a non-string ```loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`)
541541

542-
- Bug in ``Categorical.unique()`` stripping unused categories resulting in ```.groupby(categorical, sort=False)`` not working (:issue:`13179`)
542+
- Bug in ``.groupby`` where ```.groupby(categorical, sort=False)`` would raise ``ValueError`` due to non-matching categories (:issue:`13179`)
543543

544544

545545

pandas/core/categorical.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -1855,14 +1855,11 @@ def unique(self):
18551855
cat = self.copy()
18561856
# keep nan in codes
18571857
cat._codes = unique_codes
1858-
if not self.ordered:
1859-
take_codes = unique_codes[unique_codes != -1]
1860-
# Sort categories according to codes order, suffixed by
1861-
# unused categories in original order (GH-13179)
1862-
order = (cat.categories[take_codes].tolist() +
1863-
cat.categories.tolist()).index
1864-
cat.set_categories(sorted(cat.categories, key=order), inplace=True)
1865-
return cat
1858+
# exclude nan from indexer for categories
1859+
take_codes = unique_codes[unique_codes != -1]
1860+
if self.ordered:
1861+
take_codes = sorted(take_codes)
1862+
return cat.set_categories(cat.categories.take(take_codes))
18661863

18671864
def equals(self, other):
18681865
"""

pandas/core/groupby.py

+3
Original file line numberDiff line numberDiff line change
@@ -2315,6 +2315,9 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
23152315
# groupby
23162316
else:
23172317
cat = self.grouper.unique()
2318+
cat.add_categories([c for c in self.grouper.categories
2319+
if c not in cat.categories],
2320+
inplace=True) # GH-13179
23182321
self.grouper = self.grouper.reorder_categories(
23192322
cat.categories)
23202323

pandas/tests/test_categorical.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -1287,9 +1287,8 @@ def test_unique(self):
12871287

12881288
cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"])
12891289
res = cat.unique()
1290-
exp = Index(["a", "b", "c"])
12911290
self.assert_index_equal(res.categories, exp)
1292-
tm.assert_categorical_equal(res, Categorical(["a", "b"], exp))
1291+
tm.assert_categorical_equal(res, Categorical(exp))
12931292

12941293
cat = Categorical(["c", "a", "b", "a", "a"],
12951294
categories=["a", "b", "c"])
@@ -1303,9 +1302,9 @@ def test_unique(self):
13031302
cat = Categorical(["b", np.nan, "b", np.nan, "a"],
13041303
categories=["a", "b", "c"])
13051304
res = cat.unique()
1306-
exp = Index(["b", "a", "c"])
1305+
exp = Index(["b", "a"])
13071306
self.assert_index_equal(res.categories, exp)
1308-
exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a", "c"])
1307+
exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"])
13091308
tm.assert_categorical_equal(res, exp_cat)
13101309

13111310
def test_unique_ordered(self):
@@ -1325,14 +1324,13 @@ def test_unique_ordered(self):
13251324
cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'],
13261325
ordered=True)
13271326
res = cat.unique()
1328-
exp_cat = Categorical(['b', 'a'], categories=['a', 'b', 'c'],
1329-
ordered=True)
1327+
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
13301328
tm.assert_categorical_equal(res, exp_cat)
13311329

13321330
cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'],
13331331
ordered=True)
13341332
res = cat.unique()
1335-
exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b', 'c'],
1333+
exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'],
13361334
ordered=True)
13371335
tm.assert_categorical_equal(res, exp_cat)
13381336

@@ -1347,7 +1345,7 @@ def test_unique_index_series(self):
13471345
tm.assert_categorical_equal(pd.Series(c).unique(), exp)
13481346

13491347
c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
1350-
exp = Categorical([1, 2], categories=[1, 2, 3])
1348+
exp = Categorical([1, 2], categories=[1, 2])
13511349
tm.assert_categorical_equal(c.unique(), exp)
13521350
tm.assert_index_equal(Index(c).unique(), Index(exp))
13531351
tm.assert_categorical_equal(pd.Series(c).unique(), exp)

0 commit comments

Comments
 (0)