Skip to content

Commit b8b2bf7

Browse files
committed
BUG: get_group fails when multi-grouping with a categorical
1 parent f6c7d89 commit b8b2bf7

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ Bug Fixes
6464

6565

6666
- Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
67+
- Bug in ``GroupBy.get_group`` when grouping on multiple keys, one of which is categorical. (:issue:`10132`)
6768
- Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
6869

6970

pandas/core/groupby.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1297,11 +1297,18 @@ def apply(self, f, data, axis=0):
12971297
@cache_readonly
12981298
def indices(self):
12991299
""" dict {group name -> group indices} """
1300+
1301+
def extract_values(x):
1302+
if isinstance(x, CategoricalIndex):
1303+
return x.values.get_values()
1304+
else:
1305+
return _values_from_object(x)
1306+
13001307
if len(self.groupings) == 1:
13011308
return self.groupings[0].indices
13021309
else:
13031310
label_list = [ping.labels for ping in self.groupings]
1304-
keys = [_values_from_object(ping.group_index) for ping in self.groupings]
1311+
keys = [extract_values(ping.group_index) for ping in self.groupings]
13051312
return _get_indices_dict(label_list, keys)
13061313

13071314
@property

pandas/tests/test_groupby.py

+7
Original file line numberDiff line numberDiff line change
@@ -5098,6 +5098,13 @@ def test_groupby_categorical_two_columns(self):
50985098
"ints": [1,2,1,2,1,2]}).set_index(["cat","ints"])
50995099
tm.assert_frame_equal(res, exp)
51005100

5101+
# GH 10132
5102+
for key in [('a', 1), ('b', 2), ('b', 1), ('a', 2)]:
5103+
c, i = key
5104+
result = groups_double_key.get_group(key)
5105+
expected = test.query('cat == @c & ints == @i')
5106+
assert_frame_equal(result, expected)
5107+
51015108
d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]}
51025109
test = pd.DataFrame(d)
51035110
values = pd.cut(test['C1'], [1, 2, 3, 6])

0 commit comments

Comments
 (0)