Skip to content

Commit a4ae0cf

Browse files
committed
Merge pull request pandas-dev#6736 from sinhrks/drop
ENH: drop function now has errors keyword for non-existing column handling
2 parents fa06aeb + a2620d7 commit a4ae0cf

File tree

8 files changed

+142
-22
lines changed

8 files changed

+142
-22
lines changed

doc/source/whatsnew/v0.15.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ API changes
4949
In [3]: cat = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c'])
5050

5151
In [4]: cat
52-
Out[4]:
52+
Out[4]:
5353
[a, b, a]
5454
Categories (3, object): [a < b < c]
5555

doc/source/whatsnew/v0.16.1.txt

+7
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ Enhancements
2323

2424

2525

26+
- ``drop`` function can now accept ``errors`` keyword to suppress ValueError raised when any of label does not exist in the target data. (:issue:`6736`)
27+
28+
.. ipython:: python
29+
30+
df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C'])
31+
df.drop(['A', 'X'], axis=1, errors='ignore')
32+
2633

2734
.. _whatsnew_0161.api:
2835

pandas/core/generic.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1557,7 +1557,7 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
15571557

15581558
return self.reindex(**d)
15591559

1560-
def drop(self, labels, axis=0, level=None, inplace=False):
1560+
def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
15611561
"""
15621562
Return new object with labels in requested axis removed
15631563
@@ -1569,6 +1569,8 @@ def drop(self, labels, axis=0, level=None, inplace=False):
15691569
For MultiIndex
15701570
inplace : bool, default False
15711571
If True, do operation inplace and return None.
1572+
errors : {'ignore', 'raise'}, default 'raise'
1573+
If 'ignore', suppress error and existing labels are dropped.
15721574
15731575
Returns
15741576
-------
@@ -1582,9 +1584,9 @@ def drop(self, labels, axis=0, level=None, inplace=False):
15821584
if level is not None:
15831585
if not isinstance(axis, MultiIndex):
15841586
raise AssertionError('axis must be a MultiIndex')
1585-
new_axis = axis.drop(labels, level=level)
1587+
new_axis = axis.drop(labels, level=level, errors=errors)
15861588
else:
1587-
new_axis = axis.drop(labels)
1589+
new_axis = axis.drop(labels, errors=errors)
15881590
dropped = self.reindex(**{axis_name: new_axis})
15891591
try:
15901592
dropped.axes[axis_].set_names(axis.names, inplace=True)

pandas/core/index.py

+20-11
Original file line numberDiff line numberDiff line change
@@ -2325,13 +2325,15 @@ def insert(self, loc, item):
23252325
(_self[:loc], item_idx, _self[loc:]))
23262326
return Index(idx, name=self.name)
23272327

2328-
def drop(self, labels):
2328+
def drop(self, labels, errors='raise'):
23292329
"""
23302330
Make new Index with passed list of labels deleted
23312331
23322332
Parameters
23332333
----------
23342334
labels : array-like
2335+
errors : {'ignore', 'raise'}, default 'raise'
2336+
If 'ignore', suppress error and existing labels are dropped.
23352337
23362338
Returns
23372339
-------
@@ -2341,7 +2343,9 @@ def drop(self, labels):
23412343
indexer = self.get_indexer(labels)
23422344
mask = indexer == -1
23432345
if mask.any():
2344-
raise ValueError('labels %s not contained in axis' % labels[mask])
2346+
if errors != 'ignore':
2347+
raise ValueError('labels %s not contained in axis' % labels[mask])
2348+
indexer = indexer[~mask]
23452349
return self.delete(indexer)
23462350

23472351
@Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs)
@@ -3847,7 +3851,7 @@ def repeat(self, n):
38473851
sortorder=self.sortorder,
38483852
verify_integrity=False)
38493853

3850-
def drop(self, labels, level=None):
3854+
def drop(self, labels, level=None, errors='raise'):
38513855
"""
38523856
Make new MultiIndex with passed list of labels deleted
38533857
@@ -3870,19 +3874,24 @@ def drop(self, labels, level=None):
38703874
indexer = self.get_indexer(labels)
38713875
mask = indexer == -1
38723876
if mask.any():
3873-
raise ValueError('labels %s not contained in axis'
3874-
% labels[mask])
3875-
return self.delete(indexer)
3877+
if errors != 'ignore':
3878+
raise ValueError('labels %s not contained in axis'
3879+
% labels[mask])
3880+
indexer = indexer[~mask]
38763881
except Exception:
38773882
pass
38783883

38793884
inds = []
38803885
for label in labels:
3881-
loc = self.get_loc(label)
3882-
if isinstance(loc, int):
3883-
inds.append(loc)
3884-
else:
3885-
inds.extend(lrange(loc.start, loc.stop))
3886+
try:
3887+
loc = self.get_loc(label)
3888+
if isinstance(loc, int):
3889+
inds.append(loc)
3890+
else:
3891+
inds.extend(lrange(loc.start, loc.stop))
3892+
except KeyError:
3893+
if errors != 'ignore':
3894+
raise
38863895

38873896
return self.delete(inds)
38883897

pandas/tests/test_frame.py

+33
Original file line numberDiff line numberDiff line change
@@ -7423,6 +7423,26 @@ def test_drop_names(self):
74237423
self.assertEqual(obj.columns.name, 'second')
74247424
self.assertEqual(list(df.columns), ['d', 'e', 'f'])
74257425

7426+
self.assertRaises(ValueError, df.drop, ['g'])
7427+
self.assertRaises(ValueError, df.drop, ['g'], 1)
7428+
7429+
# errors = 'ignore'
7430+
dropped = df.drop(['g'], errors='ignore')
7431+
expected = Index(['a', 'b', 'c'])
7432+
self.assert_index_equal(dropped.index, expected)
7433+
7434+
dropped = df.drop(['b', 'g'], errors='ignore')
7435+
expected = Index(['a', 'c'])
7436+
self.assert_index_equal(dropped.index, expected)
7437+
7438+
dropped = df.drop(['g'], axis=1, errors='ignore')
7439+
expected = Index(['d', 'e', 'f'])
7440+
self.assert_index_equal(dropped.columns, expected)
7441+
7442+
dropped = df.drop(['d', 'g'], axis=1, errors='ignore')
7443+
expected = Index(['e', 'f'])
7444+
self.assert_index_equal(dropped.columns, expected)
7445+
74267446
def test_dropEmptyRows(self):
74277447
N = len(self.frame.index)
74287448
mat = randn(N)
@@ -7801,6 +7821,19 @@ def test_drop(self):
78017821
assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.ix[[2], :])
78027822
assert_frame_equal(simple.drop([0, 3], axis='index'), simple.ix[[1, 2], :])
78037823

7824+
self.assertRaises(ValueError, simple.drop, 5)
7825+
self.assertRaises(ValueError, simple.drop, 'C', 1)
7826+
self.assertRaises(ValueError, simple.drop, [1, 5])
7827+
self.assertRaises(ValueError, simple.drop, ['A', 'C'], 1)
7828+
7829+
# errors = 'ignore'
7830+
assert_frame_equal(simple.drop(5, errors='ignore'), simple)
7831+
assert_frame_equal(simple.drop([0, 5], errors='ignore'),
7832+
simple.ix[[1, 2, 3], :])
7833+
assert_frame_equal(simple.drop('C', axis=1, errors='ignore'), simple)
7834+
assert_frame_equal(simple.drop(['A', 'C'], axis=1, errors='ignore'),
7835+
simple[['B']])
7836+
78047837
#non-unique - wheee!
78057838
nu_df = DataFrame(lzip(range(3), range(-3, 1), list('abc')),
78067839
columns=['a', 'a', 'b'])

pandas/tests/test_index.py

+59-7
Original file line numberDiff line numberDiff line change
@@ -1036,20 +1036,43 @@ def check_slice(in_slice, expected):
10361036
def test_drop(self):
10371037
n = len(self.strIndex)
10381038

1039-
dropped = self.strIndex.drop(self.strIndex[lrange(5, 10)])
1039+
drop = self.strIndex[lrange(5, 10)]
1040+
dropped = self.strIndex.drop(drop)
10401041
expected = self.strIndex[lrange(5) + lrange(10, n)]
10411042
self.assertTrue(dropped.equals(expected))
10421043

10431044
self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar'])
1045+
self.assertRaises(ValueError, self.strIndex.drop, ['1', 'bar'])
1046+
1047+
# errors='ignore'
1048+
mixed = drop.tolist() + ['foo']
1049+
dropped = self.strIndex.drop(mixed, errors='ignore')
1050+
expected = self.strIndex[lrange(5) + lrange(10, n)]
1051+
self.assert_index_equal(dropped, expected)
1052+
1053+
dropped = self.strIndex.drop(['foo', 'bar'], errors='ignore')
1054+
expected = self.strIndex[lrange(n)]
1055+
self.assert_index_equal(dropped, expected)
10441056

10451057
dropped = self.strIndex.drop(self.strIndex[0])
10461058
expected = self.strIndex[1:]
1047-
self.assertTrue(dropped.equals(expected))
1059+
self.assert_index_equal(dropped, expected)
10481060

10491061
ser = Index([1, 2, 3])
10501062
dropped = ser.drop(1)
10511063
expected = Index([2, 3])
1052-
self.assertTrue(dropped.equals(expected))
1064+
self.assert_index_equal(dropped, expected)
1065+
1066+
# errors='ignore'
1067+
self.assertRaises(ValueError, ser.drop, [3, 4])
1068+
1069+
dropped = ser.drop(4, errors='ignore')
1070+
expected = Index([1, 2, 3])
1071+
self.assert_index_equal(dropped, expected)
1072+
1073+
dropped = ser.drop([3, 4, 5], errors='ignore')
1074+
expected = Index([1, 2])
1075+
self.assert_index_equal(dropped, expected)
10531076

10541077
def test_tuple_union_bug(self):
10551078
import pandas
@@ -3529,21 +3552,50 @@ def test_drop(self):
35293552
dropped2 = self.index.drop(index)
35303553

35313554
expected = self.index[[0, 2, 3, 5]]
3532-
self.assertTrue(dropped.equals(expected))
3533-
self.assertTrue(dropped2.equals(expected))
3555+
self.assert_index_equal(dropped, expected)
3556+
self.assert_index_equal(dropped2, expected)
35343557

35353558
dropped = self.index.drop(['bar'])
35363559
expected = self.index[[0, 1, 3, 4, 5]]
3537-
self.assertTrue(dropped.equals(expected))
3560+
self.assert_index_equal(dropped, expected)
3561+
3562+
dropped = self.index.drop('foo')
3563+
expected = self.index[[2, 3, 4, 5]]
3564+
self.assert_index_equal(dropped, expected)
35383565

35393566
index = MultiIndex.from_tuples([('bar', 'two')])
35403567
self.assertRaises(KeyError, self.index.drop, [('bar', 'two')])
35413568
self.assertRaises(KeyError, self.index.drop, index)
3569+
self.assertRaises(KeyError, self.index.drop, ['foo', 'two'])
3570+
3571+
# partially correct argument
3572+
mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')])
3573+
self.assertRaises(KeyError, self.index.drop, mixed_index)
3574+
3575+
# error='ignore'
3576+
dropped = self.index.drop(index, errors='ignore')
3577+
expected = self.index[[0, 1, 2, 3, 4, 5]]
3578+
self.assert_index_equal(dropped, expected)
3579+
3580+
dropped = self.index.drop(mixed_index, errors='ignore')
3581+
expected = self.index[[0, 1, 2, 3, 5]]
3582+
self.assert_index_equal(dropped, expected)
3583+
3584+
dropped = self.index.drop(['foo', 'two'], errors='ignore')
3585+
expected = self.index[[2, 3, 4, 5]]
3586+
self.assert_index_equal(dropped, expected)
35423587

35433588
# mixed partial / full drop
35443589
dropped = self.index.drop(['foo', ('qux', 'one')])
35453590
expected = self.index[[2, 3, 5]]
3546-
self.assertTrue(dropped.equals(expected))
3591+
self.assert_index_equal(dropped, expected)
3592+
3593+
# mixed partial / full drop / error='ignore'
3594+
mixed_index = ['foo', ('qux', 'one'), 'two']
3595+
self.assertRaises(KeyError, self.index.drop, mixed_index)
3596+
dropped = self.index.drop(mixed_index, errors='ignore')
3597+
expected = self.index[[2, 3, 5]]
3598+
self.assert_index_equal(dropped, expected)
35473599

35483600
def test_droplevel_with_names(self):
35493601
index = self.index[self.index.get_loc('foo')]

pandas/tests/test_panel.py

+9
Original file line numberDiff line numberDiff line change
@@ -1984,6 +1984,15 @@ def check_drop(drop_val, axis_number, aliases, expected):
19841984
expected = Panel({"One": df})
19851985
check_drop('Two', 0, ['items'], expected)
19861986

1987+
self.assertRaises(ValueError, panel.drop, 'Three')
1988+
1989+
# errors = 'ignore'
1990+
dropped = panel.drop('Three', errors='ignore')
1991+
assert_panel_equal(dropped, panel)
1992+
dropped = panel.drop(['Two', 'Three'], errors='ignore')
1993+
expected = Panel({"One": df})
1994+
assert_panel_equal(dropped, expected)
1995+
19871996
# Major
19881997
exp_df = DataFrame({"A": [2], "B": [4]}, index=[1])
19891998
expected = Panel({"One": exp_df, "Two": exp_df})

pandas/tests/test_series.py

+8
Original file line numberDiff line numberDiff line change
@@ -1954,6 +1954,14 @@ def test_drop(self):
19541954
self.assertRaises(ValueError, s.drop, 'bc')
19551955
self.assertRaises(ValueError, s.drop, ('a',))
19561956

1957+
# errors='ignore'
1958+
s = Series(range(3),index=list('abc'))
1959+
result = s.drop('bc', errors='ignore')
1960+
assert_series_equal(result, s)
1961+
result = s.drop(['a', 'd'], errors='ignore')
1962+
expected = s.ix[1:]
1963+
assert_series_equal(result, expected)
1964+
19571965
# bad axis
19581966
self.assertRaises(ValueError, s.drop, 'one', axis='columns')
19591967

0 commit comments

Comments
 (0)