Skip to content

Commit 5da3759

Browse files
ekisslingerjreback
authored andcommitted
BUG: Fix groupby over a CategoricalIndex in axis=1 (#18525)
1 parent 67c4d0f commit 5da3759

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

doc/source/whatsnew/v0.21.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ Categorical
137137
- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`)
138138
- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
139139
- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`)
140+
- Bug in ``DataFrame.groupby(axis=1)`` with a ``CategoricalIndex`` (:issue:`18432`)
140141

141142
String
142143
^^^^^^

pandas/core/groupby.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -2933,9 +2933,11 @@ def is_in_obj(gpr):
29332933
else:
29342934
in_axis, name = False, None
29352935

2936-
if is_categorical_dtype(gpr) and len(gpr) != len(obj):
2937-
raise ValueError("Categorical dtype grouper must "
2938-
"have len(grouper) == len(data)")
2936+
if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
2937+
raise ValueError(
2938+
("Length of grouper ({len_gpr}) and axis ({len_axis})"
2939+
" must be same length"
2940+
.format(len_gpr=len(gpr), len_axis=obj.shape[axis])))
29392941

29402942
# create the Grouping
29412943
# allow us to passing the actual Grouping as the gpr

pandas/tests/groupby/test_grouping.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from warnings import catch_warnings
88
from pandas import (date_range, Timestamp,
9-
Index, MultiIndex, DataFrame, Series)
9+
Index, MultiIndex, DataFrame, Series, CategoricalIndex)
1010
from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
1111
assert_series_equal, assert_almost_equal)
1212
from pandas.compat import lrange, long
@@ -251,6 +251,29 @@ def test_groupby_levels_and_columns(self):
251251
by_columns.columns = pd.Index(by_columns.columns, dtype=np.int64)
252252
tm.assert_frame_equal(by_levels, by_columns)
253253

254+
def test_groupby_categorical_index_and_columns(self):
255+
# GH18432
256+
columns = ['A', 'B', 'A', 'B']
257+
categories = ['B', 'A']
258+
data = np.ones((5, 4), int)
259+
cat_columns = CategoricalIndex(columns,
260+
categories=categories,
261+
ordered=True)
262+
df = DataFrame(data=data, columns=cat_columns)
263+
result = df.groupby(axis=1, level=0).sum()
264+
expected_data = 2 * np.ones((5, 2), int)
265+
expected_columns = CategoricalIndex(categories,
266+
categories=categories,
267+
ordered=True)
268+
expected = DataFrame(data=expected_data, columns=expected_columns)
269+
assert_frame_equal(result, expected)
270+
271+
# test transposed version
272+
df = DataFrame(data.T, index=cat_columns)
273+
result = df.groupby(axis=0, level=0).sum()
274+
expected = DataFrame(data=expected_data.T, index=expected_columns)
275+
assert_frame_equal(result, expected)
276+
254277
def test_grouper_getting_correct_binner(self):
255278

256279
# GH 10063

0 commit comments

Comments
 (0)