diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index a047ad46e4887..43ddfcfe74fcb 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -73,7 +73,7 @@ Bug Fixes **Reshaping** -- +- Fixed bug in :meth:`DataFrame.groupby` where using ``observed=True`` didn't sort properly (:issue:`25167`) - - diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index 85f51323a97b5..36f0ecce9f9aa 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -44,8 +44,7 @@ def recode_for_groupby(c, sort, observed): unique_codes = unique1d(c.codes) take_codes = unique_codes[unique_codes != -1] - if c.ordered: - take_codes = np.sort(take_codes) + take_codes = np.sort(take_codes) # we recode according to the uniques categories = c.categories.take(take_codes) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 260417bc0d598..1cf9fc173e8a7 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -300,6 +300,8 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, if observed: codes = algorithms.unique1d(self.grouper.codes) codes = codes[codes != -1] + if sort: + codes = np.sort(codes) else: codes = np.arange(len(categories)) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index e118135ccc75d..e75694aa52293 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -360,6 +360,14 @@ def test_observed(observed): expected = groups2.agg('mean').reset_index() tm.assert_frame_equal(result, expected) + # GH 25167 + df = pd.DataFrame({'A': pd.Categorical(['b', 'a']), 'B': [1, 2]}) + expected = pd.DataFrame([2, 1], + index=pd.CategoricalIndex(['a', 'b'], name='A'), + columns=['B']) + result = df.groupby('A', observed=observed).sum() + tm.assert_frame_equal(result, expected) + def test_observed_codes_remap(observed): d = {'C1': [3, 3, 4, 5], 'C2': [1, 2, 3, 4], 'C3': [10, 100, 200, 34]} diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index a509a7cb57c97..36527a3c5b389 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -269,7 +269,7 @@ def test_groupby_categorical_index_and_columns(self, observed): # if we are not-observed we undergo a reindex # so need to adjust the output as our expected sets us up # to be non-observed - expected_columns = CategoricalIndex(['A', 'B'], + expected_columns = CategoricalIndex(['B', 'A'], categories=categories, ordered=True) else: