Skip to content

Fix no raise dup index when using drop with axis=0 #19230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jan 18, 2018
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,8 @@ Indexing
- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`)
- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`)
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
- Bug in :func:`DataFrame.drop`, ``KeyError`` now raises when dropping an ``Index`` or column that has duplicates (:issue:`19186`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add another entry in API breaking saysing that .drop() for Series,Index,DataFrame,Panel .drop now raises KeyError rather than ValueError if labels are missing (use this PR number)

-

I/O
^^^
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2909,6 +2909,9 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
else:
indexer = ~axis.isin(labels)

if errors == 'raise' and indexer.all():
raise KeyError('{} not found in axis'.format(labels))

slicer = [slice(None)] * self.ndim
slicer[self._get_axis_number(axis_name)] = indexer

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3767,8 +3767,8 @@ def drop(self, labels, errors='raise'):
mask = indexer == -1
if mask.any():
if errors != 'ignore':
raise ValueError('labels %s not contained in axis' %
labels[mask])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to update the doc-strings for all (Index,Series,DataFrame,Panel) for drop to change ValueError -> KeyError in the Raises section (or add it if its not there)

raise KeyError(
'labels %s not contained in axis' % labels[mask])
indexer = indexer[~mask]
return self.delete(indexer)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
for key in keys:
try:
values = values.drop(key)
except (TypeError, ValueError):
except (TypeError, ValueError, KeyError):
pass
values = list(values)

Expand Down
34 changes: 28 additions & 6 deletions pandas/tests/frame/test_axis_select_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def test_drop_names(self):
assert obj.columns.name == 'second'
assert list(df.columns) == ['d', 'e', 'f']

pytest.raises(ValueError, df.drop, ['g'])
pytest.raises(ValueError, df.drop, ['g'], 1)
pytest.raises(KeyError, df.drop, ['g'])
pytest.raises(KeyError, df.drop, ['g'], 1)

# errors = 'ignore'
dropped = df.drop(['g'], errors='ignore')
Expand Down Expand Up @@ -87,10 +87,10 @@ def test_drop(self):
assert_frame_equal(simple.drop(
[0, 3], axis='index'), simple.loc[[1, 2], :])

pytest.raises(ValueError, simple.drop, 5)
pytest.raises(ValueError, simple.drop, 'C', 1)
pytest.raises(ValueError, simple.drop, [1, 5])
pytest.raises(ValueError, simple.drop, ['A', 'C'], 1)
pytest.raises(KeyError, simple.drop, 5)
pytest.raises(KeyError, simple.drop, 'C', 1)
pytest.raises(KeyError, simple.drop, [1, 5])
pytest.raises(KeyError, simple.drop, ['A', 'C'], 1)

# errors = 'ignore'
assert_frame_equal(simple.drop(5, errors='ignore'), simple)
Expand Down Expand Up @@ -1128,3 +1128,25 @@ def test_reindex_multi(self):
expected = df.reindex([0, 1]).reindex(columns=['a', 'b'])

assert_frame_equal(result, expected)

data = [[1, 2, 3], [1, 2, 3]]

@pytest.mark.parametrize('actual', [
DataFrame(data=data, index=['a', 'a']),
DataFrame(data=data, index=['a', 'b']),
DataFrame(data=data, index=['a', 'b']).set_index([0, 1]),
DataFrame(data=data, index=['a', 'a']).set_index([0, 1])
])
def test_raise_on_drop_duplicate_index(self, actual):

# issue 19186
level = 0 if isinstance(actual.index, MultiIndex) else None
with pytest.raises(KeyError):
actual.drop('c', level=level, axis=0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Split this into multiple with statements, one per exception.

actual.T.drop('c', level=level, axis=1)
expected_no_err = actual.drop('c', axis=0, level=level,
errors='ignore')
assert_frame_equal(expected_no_err, actual)
expected_no_err = actual.T.drop('c', axis=1, level=level,
errors='ignore')
assert_frame_equal(expected_no_err.T, actual)
8 changes: 4 additions & 4 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1396,8 +1396,8 @@ def test_drop(self):
expected = self.strIndex[lrange(5) + lrange(10, n)]
tm.assert_index_equal(dropped, expected)

pytest.raises(ValueError, self.strIndex.drop, ['foo', 'bar'])
pytest.raises(ValueError, self.strIndex.drop, ['1', 'bar'])
pytest.raises(KeyError, self.strIndex.drop, ['foo', 'bar'])
pytest.raises(KeyError, self.strIndex.drop, ['1', 'bar'])

# errors='ignore'
mixed = drop.tolist() + ['foo']
Expand All @@ -1419,7 +1419,7 @@ def test_drop(self):
tm.assert_index_equal(dropped, expected)

# errors='ignore'
pytest.raises(ValueError, ser.drop, [3, 4])
pytest.raises(KeyError, ser.drop, [3, 4])

dropped = ser.drop(4, errors='ignore')
expected = Index([1, 2, 3])
Expand Down Expand Up @@ -1448,7 +1448,7 @@ def test_drop_tuple(self, values, to_drop):

removed = index.drop(to_drop[1])
for drop_me in to_drop[1], [to_drop[1]]:
pytest.raises(ValueError, removed.drop, drop_me)
pytest.raises(KeyError, removed.drop, drop_me)

def test_tuple_union_bug(self):
import pandas
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/series/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,8 +1838,8 @@ def test_drop(self):

# single string/tuple-like
s = Series(range(3), index=list('abc'))
pytest.raises(ValueError, s.drop, 'bc')
pytest.raises(ValueError, s.drop, ('a', ))
pytest.raises(KeyError, s.drop, 'bc')
pytest.raises(KeyError, s.drop, ('a', ))

# errors='ignore'
s = Series(range(3), index=list('abc'))
Expand All @@ -1861,7 +1861,7 @@ def test_drop(self):

# GH 16877
s = Series([2, 3], index=[0, 1])
with tm.assert_raises_regex(ValueError, 'not contained in axis'):
with tm.assert_raises_regex(KeyError, 'not contained in axis'):
s.drop([False, True])

def test_align(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2302,7 +2302,7 @@ def check_drop(drop_val, axis_number, aliases, expected):
expected = Panel({"One": df})
check_drop('Two', 0, ['items'], expected)

pytest.raises(ValueError, panel.drop, 'Three')
pytest.raises(KeyError, panel.drop, 'Three')

# errors = 'ignore'
dropped = panel.drop('Three', errors='ignore')
Expand Down