Skip to content

Commit a05147f

Browse files
author
Eric Kisslinger
committed
BUG: Fix groupby over a CategoricalIndex in axis=1
closes GH18432 Add multi-index columns test to test_groupby_categorical_columns_index() Add whatsnew for GH18432 bug fix Fix ValueError text for GH18432 bug fix
1 parent 262e8ff commit a05147f

File tree

3 files changed

+44
-3
lines changed

3 files changed

+44
-3
lines changed

doc/source/whatsnew/v0.21.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ Categorical
137137
- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`)
138138
- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
139139
- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`)
140+
- Bug when grouping over a ``CategoricalIndex`` in axis=1 (:issue:`18432`)
140141

141142
String
142143
^^^^^^

pandas/core/groupby.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2859,9 +2859,10 @@ def is_in_obj(gpr):
28592859
else:
28602860
in_axis, name = False, None
28612861

2862-
if is_categorical_dtype(gpr) and len(gpr) != len(obj):
2863-
raise ValueError("Categorical dtype grouper must "
2864-
"have len(grouper) == len(data)")
2862+
if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
2863+
raise ValueError(
2864+
("Length of grouper ({0}) and axis ({1}) must be same length"
2865+
.format(len(gpr), obj.shape[axis])))
28652866

28662867
# create the Grouping
28672868
# allow us to passing the actual Grouping as the gpr

pandas/tests/groupby/test_categorical.py

+39
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,45 @@ def test_groupby_categorical_index(self):
191191
[0, 1, 2, 3], levels, ordered=True), name='cats')
192192
assert_frame_equal(result, expected)
193193

194+
def test_groupby_categorical_columns_index(self):
195+
# GH18432
196+
s = np.random.RandomState(0)
197+
columns = ['A', 'B', 'A', 'B']
198+
categories = ['B', 'A']
199+
cat_columns = CategoricalIndex(columns,
200+
categories=categories,
201+
ordered=True)
202+
data = s.rand(5, len(columns))
203+
df = DataFrame(data, columns=cat_columns)
204+
result = df.groupby(axis=1, level=0).sum()
205+
df = DataFrame(data, columns=columns)
206+
expected = df.groupby(axis=1, level=0).sum()
207+
expected_cat_columns = CategoricalIndex(expected.columns,
208+
categories=categories,
209+
ordered=True)
210+
expected.columns = expected_cat_columns
211+
expected = expected.sort_index(axis=1)
212+
assert_frame_equal(result, expected)
213+
214+
# test multi-index version
215+
levels = [['a', 'b', 'c', 'd'],
216+
Categorical(['A', 'B'], categories=categories, ordered=True)]
217+
mixed_multi_index = MultiIndex.from_product(levels)
218+
data = s.rand(5, len(mixed_multi_index))
219+
df = DataFrame(data=data, columns=mixed_multi_index)
220+
result = df.groupby(axis=1, level=1).sum()
221+
vanilla_multi_index = MultiIndex.from_product([
222+
levels[0],
223+
levels[1].get_values()])
224+
df = DataFrame(data=data, columns=vanilla_multi_index)
225+
expected = df.groupby(axis=1, level=1).sum()
226+
expected_cat_index = CategoricalIndex(expected.columns,
227+
categories=levels[1],
228+
ordered=True)
229+
expected.columns = expected_cat_index
230+
expected = expected.sort_index(axis=1)
231+
assert_frame_equal(result, expected)
232+
194233
def test_groupby_describe_categorical_columns(self):
195234
# GH 11558
196235
cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],

0 commit comments

Comments
 (0)