-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Fix no raise dup index when using drop with axis=0 #19230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
d0b39ea
f43dbf8
a4618bf
b49f78d
43fe5b0
7ceafa1
5f7e6a2
6c7bcf2
fa4c9fe
adf2283
c02c400
85e0094
556f959
5776bf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -272,6 +272,7 @@ Other API Changes | |
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) | ||
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) | ||
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) | ||
- ``KeyError`` now raises instead of ``ValueError`` when using :meth:`drop()` to remove a non-existent element in an axis of ``Series``, ``Index``, ``DataFrame`` and ``Panel`` (:issue:`19186`) | ||
- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) | ||
|
||
.. _whatsnew_0230.deprecations: | ||
|
@@ -415,6 +416,8 @@ Indexing | |
- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) | ||
- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) | ||
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) | ||
- Bug in :func:`Index.drop()`, where no ``Exception`` is raised when dropping a non-existent element from an axis in ``Index`` (:issue:`19186`) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. say that this will now raise a KeyError |
||
- | ||
|
||
I/O | ||
^^^ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2806,6 +2806,11 @@ def drop(self, labels=None, axis=0, index=None, columns=None, level=None, | |
------- | ||
dropped : type of caller | ||
|
||
Raises | ||
------ | ||
KeyError | ||
* If labels are not found in the selected axis | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't think you need the |
||
|
||
Examples | ||
-------- | ||
>>> df = pd.DataFrame(np.arange(12).reshape(3,4), | ||
|
@@ -2909,6 +2914,9 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): | |
else: | ||
indexer = ~axis.isin(labels) | ||
|
||
if errors == 'raise' and indexer.all(): | ||
raise KeyError('{} not found in axis'.format(labels)) | ||
|
||
slicer = [slice(None)] * self.ndim | ||
slicer[self._get_axis_number(axis_name)] = indexer | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3760,15 +3760,20 @@ def drop(self, labels, errors='raise'): | |
Returns | ||
------- | ||
dropped : Index | ||
|
||
Raises | ||
------ | ||
KeyError | ||
* If labels are not found in the selected axis | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing |
||
""" | ||
arr_dtype = 'object' if self.dtype == 'object' else None | ||
labels = _index_labels_to_array(labels, dtype=arr_dtype) | ||
indexer = self.get_indexer(labels) | ||
mask = indexer == -1 | ||
if mask.any(): | ||
if errors != 'ignore': | ||
raise ValueError('labels %s not contained in axis' % | ||
labels[mask]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to update the doc-strings for all (Index,Series,DataFrame,Panel) for drop to change ValueError -> KeyError in the Raises section (or add it if its not there) |
||
raise KeyError( | ||
'labels %s not contained in axis' % labels[mask]) | ||
indexer = indexer[~mask] | ||
return self.delete(indexer) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,8 +41,8 @@ def test_drop_names(self): | |
assert obj.columns.name == 'second' | ||
assert list(df.columns) == ['d', 'e', 'f'] | ||
|
||
pytest.raises(ValueError, df.drop, ['g']) | ||
pytest.raises(ValueError, df.drop, ['g'], 1) | ||
pytest.raises(KeyError, df.drop, ['g']) | ||
pytest.raises(KeyError, df.drop, ['g'], 1) | ||
|
||
# errors = 'ignore' | ||
dropped = df.drop(['g'], errors='ignore') | ||
|
@@ -87,10 +87,10 @@ def test_drop(self): | |
assert_frame_equal(simple.drop( | ||
[0, 3], axis='index'), simple.loc[[1, 2], :]) | ||
|
||
pytest.raises(ValueError, simple.drop, 5) | ||
pytest.raises(ValueError, simple.drop, 'C', 1) | ||
pytest.raises(ValueError, simple.drop, [1, 5]) | ||
pytest.raises(ValueError, simple.drop, ['A', 'C'], 1) | ||
pytest.raises(KeyError, simple.drop, 5) | ||
pytest.raises(KeyError, simple.drop, 'C', 1) | ||
pytest.raises(KeyError, simple.drop, [1, 5]) | ||
pytest.raises(KeyError, simple.drop, ['A', 'C'], 1) | ||
|
||
# errors = 'ignore' | ||
assert_frame_equal(simple.drop(5, errors='ignore'), simple) | ||
|
@@ -1128,3 +1128,25 @@ def test_reindex_multi(self): | |
expected = df.reindex([0, 1]).reindex(columns=['a', 'b']) | ||
|
||
assert_frame_equal(result, expected) | ||
|
||
data = [[1, 2, 3], [1, 2, 3]] | ||
|
||
@pytest.mark.parametrize('actual', [ | ||
DataFrame(data=data, index=['a', 'a']), | ||
DataFrame(data=data, index=['a', 'b']), | ||
DataFrame(data=data, index=['a', 'b']).set_index([0, 1]), | ||
DataFrame(data=data, index=['a', 'a']).set_index([0, 1]) | ||
]) | ||
def test_raise_on_drop_duplicate_index(self, actual): | ||
|
||
# issue 19186 | ||
level = 0 if isinstance(actual.index, MultiIndex) else None | ||
with pytest.raises(KeyError): | ||
actual.drop('c', level=level, axis=0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Split this into multiple |
||
actual.T.drop('c', level=level, axis=1) | ||
expected_no_err = actual.drop('c', axis=0, level=level, | ||
errors='ignore') | ||
assert_frame_equal(expected_no_err, actual) | ||
expected_no_err = actual.T.drop('c', axis=1, level=level, | ||
errors='ignore') | ||
assert_frame_equal(expected_no_err.T, actual) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you re-word, start with
:meth:`drop()`
(side issue does this render?) you may hve to say:meth:`DataFrame.drop`
(and prob list for all of them Index, Series, Panel).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I don't think that'll render / link. I usually do
When I want to refer to a generic
.drop
method, which renders as.drop()
, but links to DataFrame.