Skip to content

Commit 1b23b6f

Browse files
committed
BUG: make NDFrame.drop work with non-unique indexes. close #2101
1 parent d823bce commit 1b23b6f

File tree

3 files changed

+40
-20
lines changed

3 files changed

+40
-20
lines changed

pandas/core/generic.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pandas.tseries.index import DatetimeIndex
88
from pandas.tseries.offsets import DateOffset
99
import pandas.core.common as com
10+
import pandas.lib as lib
1011

1112

1213
class PandasError(Exception):
@@ -337,13 +338,26 @@ def drop(self, labels, axis=0, level=None):
337338
axis_name = self._get_axis_name(axis)
338339
axis = self._get_axis(axis)
339340

340-
if level is not None:
341-
assert(isinstance(axis, MultiIndex))
342-
new_axis = axis.drop(labels, level=level)
341+
if axis.is_unique:
342+
if level is not None:
343+
assert(isinstance(axis, MultiIndex))
344+
new_axis = axis.drop(labels, level=level)
345+
else:
346+
new_axis = axis.drop(labels)
347+
348+
return self.reindex(**{axis_name: new_axis})
343349
else:
344-
new_axis = axis.drop(labels)
350+
if level is not None:
351+
assert(isinstance(axis, MultiIndex))
352+
indexer = -lib.ismember(axis.get_level_values(level),
353+
set(labels))
354+
else:
355+
indexer = -axis.isin(labels)
345356

346-
return self.reindex(**{axis_name: new_axis})
357+
slicer = [slice(None)] * self.ndim
358+
slicer[self._get_axis_number(axis_name)] = indexer
359+
360+
return self.ix[tuple(slicer)]
347361

348362
def sort_index(self, axis=0, ascending=True):
349363
"""

pandas/tests/test_frame.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4366,21 +4366,6 @@ def test_drop_col_still_multiindex(self):
43664366
del df[('a','','')]
43674367
assert(isinstance(df.columns, MultiIndex))
43684368

4369-
def test_drop_nonuq_multiindex(self):
4370-
df = DataFrame([["x-a", "x", "a", 1.5],["x-a", "x", "a", 1.2],
4371-
["z-c", "z", "c", 3.1], ["x-a", "x", "a", 4.1],
4372-
["x-b", "x", "b", 5.1],["x-b", "x", "b", 4.1],
4373-
["x-b", "x", "b", 2.2],
4374-
["y-a", "y", "a", 1.2],["z-b", "z", "b", 2.1]],
4375-
columns=["var1", "var2", "var3", "var4"])
4376-
4377-
grp_size = df.groupby("var1").size()
4378-
drop_idx = grp_size.ix[grp_size == 1]
4379-
4380-
df = df.set_index(["var1", "var2", "var3"])
4381-
4382-
self.assertRaises(Exception, df.drop, drop_idx.index, level=0)
4383-
43844369
def test_fillna(self):
43854370
self.tsframe['A'][:5] = nan
43864371
self.tsframe['A'][-5:] = nan

pandas/tests/test_multilevel.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,6 +1326,27 @@ def test_mixed_depth_drop(self):
13261326
('routine2', 'result1', '')], axis=1)
13271327
assert_frame_equal(expected, result)
13281328

1329+
def test_drop_nonunique(self):
1330+
df = DataFrame([["x-a", "x", "a", 1.5],["x-a", "x", "a", 1.2],
1331+
["z-c", "z", "c", 3.1], ["x-a", "x", "a", 4.1],
1332+
["x-b", "x", "b", 5.1],["x-b", "x", "b", 4.1],
1333+
["x-b", "x", "b", 2.2],
1334+
["y-a", "y", "a", 1.2],["z-b", "z", "b", 2.1]],
1335+
columns=["var1", "var2", "var3", "var4"])
1336+
1337+
grp_size = df.groupby("var1").size()
1338+
drop_idx = grp_size.ix[grp_size == 1]
1339+
1340+
idf = df.set_index(["var1", "var2", "var3"])
1341+
1342+
# it works! #2101
1343+
result = idf.drop(drop_idx.index, level=0).reset_index()
1344+
expected = df[-df.var1.isin(drop_idx.index)]
1345+
1346+
result.index = expected.index
1347+
1348+
assert_frame_equal(result, expected)
1349+
13291350
def test_mixed_depth_pop(self):
13301351
arrays = [[ 'a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
13311352
[ '', 'OD', 'OD', 'result1', 'result2', 'result1'],

0 commit comments

Comments
 (0)