Skip to content

Commit 04b83e0

Browse files
jrebackjorisvandenbossche
authored andcommitted
[Backport #14777] BUG: Bug in a groupby of a non-lexsorted MultiIndex
closes #14776 Author: Jeff Reback <[email protected]> Closes #14777 from jreback/mi_sort and squashes the following commits: cf31905 [Jeff Reback] BUG: Bug in a groupby of a non-lexsorted MultiIndex and multiple grouping levels (cherry picked from commit f23010a)
1 parent 7814a66 commit 04b83e0

File tree

3 files changed

+31
-2
lines changed

3 files changed

+31
-2
lines changed

doc/source/whatsnew/v0.19.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Bug Fixes
3636
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`)
3737

3838

39-
39+
- Bug in ``.groupby(..., sort=True)`` of a non-lexsorted MultiIndex when grouping with multiple levels (:issue:`14776`)
4040

4141

4242

pandas/core/groupby.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,17 @@ def reset_identity(values):
861861
if isinstance(result, Series):
862862
result = result.reindex(ax)
863863
else:
864-
result = result.reindex_axis(ax, axis=self.axis)
864+
865+
# this is a very unfortunate situation
866+
# we have a multi-index that is NOT lexsorted
867+
# and we have a result which is duplicated
868+
# we can't reindex, so we resort to this
869+
# GH 14776
870+
if isinstance(ax, MultiIndex) and not ax.is_unique:
871+
result = result.take(result.index.get_indexer_for(
872+
ax.values).unique(), axis=self.axis)
873+
else:
874+
result = result.reindex_axis(ax, axis=self.axis)
865875

866876
elif self.group_keys:
867877

pandas/tests/test_groupby.py

+19
Original file line numberDiff line numberDiff line change
@@ -4736,6 +4736,25 @@ def test_groupby_multiindex_not_lexsorted(self):
47364736
result = not_lexsorted_df.groupby('a').mean()
47374737
tm.assert_frame_equal(expected, result)
47384738

4739+
# a transforming function should work regardless of sort
4740+
# GH 14776
4741+
df = DataFrame({'x': ['a', 'a', 'b', 'a'],
4742+
'y': [1, 1, 2, 2],
4743+
'z': [1, 2, 3, 4]}).set_index(['x', 'y'])
4744+
self.assertFalse(df.index.is_lexsorted())
4745+
4746+
for level in [0, 1, [0, 1]]:
4747+
for sort in [False, True]:
4748+
result = df.groupby(level=level, sort=sort).apply(
4749+
DataFrame.drop_duplicates)
4750+
expected = df
4751+
tm.assert_frame_equal(expected, result)
4752+
4753+
result = df.sort_index().groupby(level=level, sort=sort).apply(
4754+
DataFrame.drop_duplicates)
4755+
expected = df.sort_index()
4756+
tm.assert_frame_equal(expected, result)
4757+
47394758
def test_groupby_levels_and_columns(self):
47404759
# GH9344, GH9049
47414760
idx_names = ['x', 'y']

0 commit comments

Comments
 (0)