-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Fix passing empty label to df drop #21515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
35742fc
832a50b
bb80ded
a81c74a
394b384
93bbf05
1b832d0
8119f72
9d5a42f
01f6a9c
13b36c2
d5f67e1
1ffc0d4
454cb7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3138,12 +3138,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): | |
new_axis = axis.drop(labels, level=level, errors=errors) | ||
else: | ||
new_axis = axis.drop(labels, errors=errors) | ||
dropped = self.reindex(**{axis_name: new_axis}) | ||
try: | ||
dropped.axes[axis_].set_names(axis.names, inplace=True) | ||
except AttributeError: | ||
pass | ||
result = dropped | ||
result = self.reindex(**{axis_name: new_axis}) | ||
|
||
else: | ||
labels = _ensure_object(com._index_labels_to_array(labels)) | ||
|
@@ -3154,8 +3149,12 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): | |
else: | ||
indexer = ~axis.isin(labels) | ||
|
||
if errors == 'raise' and indexer.all(): | ||
raise KeyError('{} not found in axis'.format(labels)) | ||
# Check if label doesn't exist along axis | ||
if len(labels): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You shouldn't need to check if the list is (non-)empty. The rule "if any label is missing" is fine (trivially false) with no labels. |
||
labels_missing = (~np.array([label in axis | ||
for label in labels])).any() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if errors == 'raise' and labels_missing: | ||
raise KeyError('{} not found in axis'.format(labels)) | ||
|
||
slicer = [slice(None)] * self.ndim | ||
slicer[self._get_axis_number(axis_name)] = indexer | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4335,13 +4335,13 @@ def drop(self, labels, errors='raise'): | |
Raises | ||
------ | ||
KeyError | ||
If none of the labels are found in the selected axis | ||
If not all of the labels are found in the selected axis | ||
""" | ||
arr_dtype = 'object' if self.dtype == 'object' else None | ||
labels = com._index_labels_to_array(labels, dtype=arr_dtype) | ||
indexer = self.get_indexer(labels) | ||
mask = indexer == -1 | ||
if mask.any(): | ||
if mask.any() and len(mask): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again: this shouldn't be needed. |
||
if errors != 'ignore': | ||
raise KeyError( | ||
'labels %s not contained in axis' % labels[mask]) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -514,3 +514,25 @@ def test_drop(): | |
s = Series([2, 3], index=[0, 1]) | ||
with tm.assert_raises_regex(KeyError, 'not contained in axis'): | ||
s.drop([False, True]) | ||
|
||
|
||
@pytest.mark.parametrize('index, drop_labels, expected_index', [ | ||
([1, 2, 3], [], [1, 2, 3]), | ||
([1, 1, 2], [], [1, 1, 2]), | ||
([1, 2, 3], [2], [1, 3]), | ||
([1, 1, 3], [1], [3]), | ||
]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason not to parametrize separately There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree will do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is already better, but what I meant was something like @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2], [1, 2, 2]])
@pytest.mark.parametrize('drop_labels', [[], [1]]) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
OK, then here the same applies ;-) |
||
def test_drop_empty_list(index, drop_labels, expected_index): | ||
# GH 21494 | ||
df = pd.DataFrame(index=index).drop(drop_labels) | ||
assert (df.index.values == expected_index).all() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. alway do a this test is in |
||
|
||
|
||
@pytest.mark.parametrize('index, drop_labels, error_key', [ | ||
([1, 2, 3], [1, 4], 'not contained in axis'), | ||
([1, 2, 2], [1, 4], 'not found in axis'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest instead to make the two errors the same (e.g. fix the one in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes that makes sense |
||
]) | ||
def test_drop_non_empty_list(index, drop_labels, error_key): | ||
# GH 21494 | ||
with tm.assert_raises_regex(KeyError, error_key): | ||
pd.DataFrame(index=index).drop(drop_labels) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
comment to indicate this is the non-unique case