Skip to content

Commit d7a2e94

Browse files
aschadejreback
authored andcommitted
Fix no raise dup index when using drop with axis=0 (pandas-dev#19230)
1 parent 6e0927e commit d7a2e94

File tree

8 files changed

+56
-17
lines changed

8 files changed

+56
-17
lines changed

doc/source/whatsnew/v0.23.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ Other API Changes
307307
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
308308
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
309309
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
310+
- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`)
310311
- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
311312
- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`)
312313
- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`)
@@ -455,6 +456,8 @@ Indexing
455456
- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`)
456457
- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`)
457458
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
459+
- Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`)
460+
-
458461

459462
I/O
460463
^^^

pandas/core/generic.py

+8
Original file line numberDiff line numberDiff line change
@@ -2806,6 +2806,11 @@ def drop(self, labels=None, axis=0, index=None, columns=None, level=None,
28062806
-------
28072807
dropped : type of caller
28082808
2809+
Raises
2810+
------
2811+
KeyError
2812+
If none of the labels are found in the selected axis
2813+
28092814
Examples
28102815
--------
28112816
>>> df = pd.DataFrame(np.arange(12).reshape(3,4),
@@ -2909,6 +2914,9 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
29092914
else:
29102915
indexer = ~axis.isin(labels)
29112916

2917+
if errors == 'raise' and indexer.all():
2918+
raise KeyError('{} not found in axis'.format(labels))
2919+
29122920
slicer = [slice(None)] * self.ndim
29132921
slicer[self._get_axis_number(axis_name)] = indexer
29142922

pandas/core/indexes/base.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -3759,15 +3759,20 @@ def drop(self, labels, errors='raise'):
37593759
Returns
37603760
-------
37613761
dropped : Index
3762+
3763+
Raises
3764+
------
3765+
KeyError
3766+
If none of the labels are found in the selected axis
37623767
"""
37633768
arr_dtype = 'object' if self.dtype == 'object' else None
37643769
labels = _index_labels_to_array(labels, dtype=arr_dtype)
37653770
indexer = self.get_indexer(labels)
37663771
mask = indexer == -1
37673772
if mask.any():
37683773
if errors != 'ignore':
3769-
raise ValueError('labels %s not contained in axis' %
3770-
labels[mask])
3774+
raise KeyError(
3775+
'labels %s not contained in axis' % labels[mask])
37713776
indexer = indexer[~mask]
37723777
return self.delete(indexer)
37733778

pandas/core/reshape/pivot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
7575
for key in keys:
7676
try:
7777
values = values.drop(key)
78-
except (TypeError, ValueError):
78+
except (TypeError, ValueError, KeyError):
7979
pass
8080
values = list(values)
8181

pandas/tests/frame/test_axis_select_reindex.py

+29-6
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def test_drop_names(self):
4141
assert obj.columns.name == 'second'
4242
assert list(df.columns) == ['d', 'e', 'f']
4343

44-
pytest.raises(ValueError, df.drop, ['g'])
45-
pytest.raises(ValueError, df.drop, ['g'], 1)
44+
pytest.raises(KeyError, df.drop, ['g'])
45+
pytest.raises(KeyError, df.drop, ['g'], 1)
4646

4747
# errors = 'ignore'
4848
dropped = df.drop(['g'], errors='ignore')
@@ -87,10 +87,10 @@ def test_drop(self):
8787
assert_frame_equal(simple.drop(
8888
[0, 3], axis='index'), simple.loc[[1, 2], :])
8989

90-
pytest.raises(ValueError, simple.drop, 5)
91-
pytest.raises(ValueError, simple.drop, 'C', 1)
92-
pytest.raises(ValueError, simple.drop, [1, 5])
93-
pytest.raises(ValueError, simple.drop, ['A', 'C'], 1)
90+
pytest.raises(KeyError, simple.drop, 5)
91+
pytest.raises(KeyError, simple.drop, 'C', 1)
92+
pytest.raises(KeyError, simple.drop, [1, 5])
93+
pytest.raises(KeyError, simple.drop, ['A', 'C'], 1)
9494

9595
# errors = 'ignore'
9696
assert_frame_equal(simple.drop(5, errors='ignore'), simple)
@@ -1128,3 +1128,26 @@ def test_reindex_multi(self):
11281128
expected = df.reindex([0, 1]).reindex(columns=['a', 'b'])
11291129

11301130
assert_frame_equal(result, expected)
1131+
1132+
data = [[1, 2, 3], [1, 2, 3]]
1133+
1134+
@pytest.mark.parametrize('actual', [
1135+
DataFrame(data=data, index=['a', 'a']),
1136+
DataFrame(data=data, index=['a', 'b']),
1137+
DataFrame(data=data, index=['a', 'b']).set_index([0, 1]),
1138+
DataFrame(data=data, index=['a', 'a']).set_index([0, 1])
1139+
])
1140+
def test_raise_on_drop_duplicate_index(self, actual):
1141+
1142+
# issue 19186
1143+
level = 0 if isinstance(actual.index, MultiIndex) else None
1144+
with pytest.raises(KeyError):
1145+
actual.drop('c', level=level, axis=0)
1146+
with pytest.raises(KeyError):
1147+
actual.T.drop('c', level=level, axis=1)
1148+
expected_no_err = actual.drop('c', axis=0, level=level,
1149+
errors='ignore')
1150+
assert_frame_equal(expected_no_err, actual)
1151+
expected_no_err = actual.T.drop('c', axis=1, level=level,
1152+
errors='ignore')
1153+
assert_frame_equal(expected_no_err.T, actual)

pandas/tests/indexes/test_base.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1396,8 +1396,8 @@ def test_drop(self):
13961396
expected = self.strIndex[lrange(5) + lrange(10, n)]
13971397
tm.assert_index_equal(dropped, expected)
13981398

1399-
pytest.raises(ValueError, self.strIndex.drop, ['foo', 'bar'])
1400-
pytest.raises(ValueError, self.strIndex.drop, ['1', 'bar'])
1399+
pytest.raises(KeyError, self.strIndex.drop, ['foo', 'bar'])
1400+
pytest.raises(KeyError, self.strIndex.drop, ['1', 'bar'])
14011401

14021402
# errors='ignore'
14031403
mixed = drop.tolist() + ['foo']
@@ -1419,7 +1419,7 @@ def test_drop(self):
14191419
tm.assert_index_equal(dropped, expected)
14201420

14211421
# errors='ignore'
1422-
pytest.raises(ValueError, ser.drop, [3, 4])
1422+
pytest.raises(KeyError, ser.drop, [3, 4])
14231423

14241424
dropped = ser.drop(4, errors='ignore')
14251425
expected = Index([1, 2, 3])
@@ -1448,7 +1448,7 @@ def test_drop_tuple(self, values, to_drop):
14481448

14491449
removed = index.drop(to_drop[1])
14501450
for drop_me in to_drop[1], [to_drop[1]]:
1451-
pytest.raises(ValueError, removed.drop, drop_me)
1451+
pytest.raises(KeyError, removed.drop, drop_me)
14521452

14531453
def test_tuple_union_bug(self):
14541454
import pandas

pandas/tests/series/test_indexing.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1838,8 +1838,8 @@ def test_drop(self):
18381838

18391839
# single string/tuple-like
18401840
s = Series(range(3), index=list('abc'))
1841-
pytest.raises(ValueError, s.drop, 'bc')
1842-
pytest.raises(ValueError, s.drop, ('a', ))
1841+
pytest.raises(KeyError, s.drop, 'bc')
1842+
pytest.raises(KeyError, s.drop, ('a', ))
18431843

18441844
# errors='ignore'
18451845
s = Series(range(3), index=list('abc'))
@@ -1861,7 +1861,7 @@ def test_drop(self):
18611861

18621862
# GH 16877
18631863
s = Series([2, 3], index=[0, 1])
1864-
with tm.assert_raises_regex(ValueError, 'not contained in axis'):
1864+
with tm.assert_raises_regex(KeyError, 'not contained in axis'):
18651865
s.drop([False, True])
18661866

18671867
def test_align(self):

pandas/tests/test_panel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2302,7 +2302,7 @@ def check_drop(drop_val, axis_number, aliases, expected):
23022302
expected = Panel({"One": df})
23032303
check_drop('Two', 0, ['items'], expected)
23042304

2305-
pytest.raises(ValueError, panel.drop, 'Three')
2305+
pytest.raises(KeyError, panel.drop, 'Three')
23062306

23072307
# errors = 'ignore'
23082308
dropped = panel.drop('Three', errors='ignore')

0 commit comments

Comments
 (0)