Skip to content

Commit 68fd85b

Browse files
ekisslingerTomAugspurger
authored andcommitted
BUG: Fix groupby over a CategoricalIndex in axis=1 (pandas-dev#18525)
(cherry picked from commit 5da3759)
1 parent 5348c6e commit 68fd85b

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

doc/source/whatsnew/v0.21.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ Categorical
139139
different ``CategoricalDtype`` (:issue:`18069`)
140140
- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
141141
- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`)
142+
- Bug in ``DataFrame.groupby(axis=1)`` with a ``CategoricalIndex`` (:issue:`18432`)
142143

143144
String
144145
^^^^^^

pandas/core/groupby.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -2931,9 +2931,11 @@ def is_in_obj(gpr):
29312931
else:
29322932
in_axis, name = False, None
29332933

2934-
if is_categorical_dtype(gpr) and len(gpr) != len(obj):
2935-
raise ValueError("Categorical dtype grouper must "
2936-
"have len(grouper) == len(data)")
2934+
if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
2935+
raise ValueError(
2936+
("Length of grouper ({len_gpr}) and axis ({len_axis})"
2937+
" must be same length"
2938+
.format(len_gpr=len(gpr), len_axis=obj.shape[axis])))
29372939

29382940
# create the Grouping
29392941
# allow us to passing the actual Grouping as the gpr

pandas/tests/groupby/test_groupby.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from pandas import (date_range, bdate_range, Timestamp,
1212
Index, MultiIndex, DataFrame, Series,
13-
concat, Panel, DatetimeIndex)
13+
concat, Panel, DatetimeIndex, CategoricalIndex)
1414
from pandas.errors import UnsupportedFunctionCall, PerformanceWarning
1515
from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
1616
assert_series_equal, assert_almost_equal,
@@ -262,6 +262,29 @@ def test_grouper_column_and_index(self):
262262
expected = df_single.reset_index().groupby(['inner', 'B']).mean()
263263
assert_frame_equal(result, expected)
264264

265+
def test_groupby_categorical_index_and_columns(self):
266+
# GH18432
267+
columns = ['A', 'B', 'A', 'B']
268+
categories = ['B', 'A']
269+
data = np.ones((5, 4), int)
270+
cat_columns = CategoricalIndex(columns,
271+
categories=categories,
272+
ordered=True)
273+
df = DataFrame(data=data, columns=cat_columns)
274+
result = df.groupby(axis=1, level=0).sum()
275+
expected_data = 2 * np.ones((5, 2), int)
276+
expected_columns = CategoricalIndex(categories,
277+
categories=categories,
278+
ordered=True)
279+
expected = DataFrame(data=expected_data, columns=expected_columns)
280+
assert_frame_equal(result, expected)
281+
282+
# test transposed version
283+
df = DataFrame(data.T, index=cat_columns)
284+
result = df.groupby(axis=0, level=0).sum()
285+
expected = DataFrame(data=expected_data.T, index=expected_columns)
286+
assert_frame_equal(result, expected)
287+
265288
def test_grouper_getting_correct_binner(self):
266289

267290
# GH 10063

0 commit comments

Comments
 (0)