Skip to content

Commit 3a4821e

Browse files
authored
Group by a categorical Series of unequal length (#44180)
1 parent 50b4df3 commit 3a4821e

File tree

3 files changed

+25
-9
lines changed

3 files changed

+25
-9
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,7 @@ Groupby/resample/rolling
809809
- Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
810810
- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`)
811811
- Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`)
812+
- Fixed bug where grouping by a :class:`Series` that has a categorical data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`)
812813

813814
Reshaping
814815
^^^^^^^^^

pandas/core/groupby/grouper.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -887,12 +887,6 @@ def is_in_obj(gpr) -> bool:
887887
else:
888888
in_axis = False
889889

890-
if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
891-
raise ValueError(
892-
f"Length of grouper ({len(gpr)}) and axis ({obj.shape[axis]}) "
893-
"must be same length"
894-
)
895-
896890
# create the Grouping
897891
# allow us to passing the actual Grouping as the gpr
898892
ping = (
@@ -938,7 +932,7 @@ def _convert_grouper(axis: Index, grouper):
938932
return grouper.reindex(axis)._values
939933
elif isinstance(grouper, MultiIndex):
940934
return grouper._values
941-
elif isinstance(grouper, (list, tuple, Series, Index, np.ndarray)):
935+
elif isinstance(grouper, (list, tuple, Index, Categorical, np.ndarray)):
942936
if len(grouper) != len(axis):
943937
raise ValueError("Grouper and axis must be same length")
944938

pandas/tests/groupby/test_categorical.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -664,11 +664,32 @@ def test_bins_unequal_len():
664664
bins = pd.cut(series.dropna().values, 4)
665665

666666
# len(bins) != len(series) here
667-
msg = r"Length of grouper \(8\) and axis \(10\) must be same length"
668-
with pytest.raises(ValueError, match=msg):
667+
with pytest.raises(ValueError, match="Grouper and axis must be same length"):
669668
series.groupby(bins).mean()
670669

671670

671+
@pytest.mark.parametrize(
672+
["series", "data"],
673+
[
674+
# Group a series with length and index equal to those of the grouper.
675+
(Series(range(4)), {"A": [0, 3], "B": [1, 2]}),
676+
# Group a series with length equal to that of the grouper and index unequal to
677+
# that of the grouper.
678+
(Series(range(4)).rename(lambda idx: idx + 1), {"A": [2], "B": [0, 1]}),
679+
# GH44179: Group a series with length unequal to that of the grouper.
680+
(Series(range(7)), {"A": [0, 3], "B": [1, 2]}),
681+
],
682+
)
683+
def test_categorical_series(series, data):
684+
# Group the given series by a series with categorical data type such that group A
685+
# takes indices 0 and 3 and group B indices 1 and 2, obtaining the values mapped in
686+
# the given data.
687+
groupby = series.groupby(Series(list("ABBA"), dtype="category"))
688+
result = groupby.aggregate(list)
689+
expected = Series(data, index=CategoricalIndex(data.keys()))
690+
tm.assert_series_equal(result, expected)
691+
692+
672693
def test_as_index():
673694
# GH13204
674695
df = DataFrame(

0 commit comments

Comments
 (0)