Skip to content

Commit b291dd6

Browse files
nbonnottejreback
authored andcommitted
TST drop and groupby on dataframes with non-lexsorted multi-index
closes #11640 closes #11717
1 parent 41abbe5 commit b291dd6

File tree

3 files changed

+47
-1
lines changed

3 files changed

+47
-1
lines changed

pandas/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1092,7 +1092,7 @@ def drop(self, labels, level=None, errors='raise'):
10921092
elif is_bool_indexer(loc):
10931093
if self.lexsort_depth == 0:
10941094
warnings.warn('dropping on a non-lexsorted multi-index'
1095-
'without a level parameter may impact '
1095+
' without a level parameter may impact '
10961096
'performance.',
10971097
PerformanceWarning,
10981098
stacklevel=2)

pandas/tests/frame/test_axis_select_reindex.py

+23
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,29 @@ def test_drop(self):
114114
df.drop(labels=df[df.b > 0].index, inplace=True)
115115
assert_frame_equal(df, expected)
116116

117+
def test_drop_multiindex_not_lexsorted(self):
118+
# GH 11640
119+
120+
# define the lexsorted version
121+
lexsorted_mi = MultiIndex.from_tuples(
122+
[('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c'])
123+
lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
124+
self.assertTrue(lexsorted_df.columns.is_lexsorted())
125+
126+
# define the non-lexsorted version
127+
not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'],
128+
data=[[1, 'b1', 'c1', 3],
129+
[1, 'b2', 'c2', 4]])
130+
not_lexsorted_df = not_lexsorted_df.pivot_table(
131+
index='a', columns=['b', 'c'], values='d')
132+
not_lexsorted_df = not_lexsorted_df.reset_index()
133+
self.assertFalse(not_lexsorted_df.columns.is_lexsorted())
134+
135+
# compare the results
136+
tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
137+
tm.assert_frame_equal(lexsorted_df.drop('a', axis=1),
138+
not_lexsorted_df.drop('a', axis=1))
139+
117140
def test_reindex(self):
118141
newFrame = self.frame.reindex(self.ts1.index)
119142

pandas/tests/test_groupby.py

+23
Original file line numberDiff line numberDiff line change
@@ -4198,6 +4198,29 @@ def test_groupby_multiindex_missing_pair(self):
41984198

41994199
tm.assert_frame_equal(res, exp)
42004200

4201+
def test_groupby_multiindex_not_lexsorted(self):
4202+
# GH 11640
4203+
4204+
# define the lexsorted version
4205+
lexsorted_mi = MultiIndex.from_tuples(
4206+
[('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c'])
4207+
lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
4208+
self.assertTrue(lexsorted_df.columns.is_lexsorted())
4209+
4210+
# define the non-lexsorted version
4211+
not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'],
4212+
data=[[1, 'b1', 'c1', 3],
4213+
[1, 'b2', 'c2', 4]])
4214+
not_lexsorted_df = not_lexsorted_df.pivot_table(
4215+
index='a', columns=['b', 'c'], values='d')
4216+
not_lexsorted_df = not_lexsorted_df.reset_index()
4217+
self.assertFalse(not_lexsorted_df.columns.is_lexsorted())
4218+
4219+
# compare the results
4220+
tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
4221+
tm.assert_frame_equal(lexsorted_df.groupby('a').mean(),
4222+
not_lexsorted_df.groupby('a').mean())
4223+
42014224
def test_groupby_levels_and_columns(self):
42024225
# GH9344, GH9049
42034226
idx_names = ['x', 'y']

0 commit comments

Comments
 (0)