Skip to content

Fix passing empty label to df drop #21515

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 21, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Bug Fixes

- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
-

**I/O**
Expand Down
19 changes: 9 additions & 10 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
"""
axis = self._get_axis_number(axis)
axis_name = self._get_axis_name(axis)
axis, axis_ = self._get_axis(axis), axis
axis = self._get_axis(axis)

if axis.is_unique:
if level is not None:
Expand All @@ -3138,24 +3138,23 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
new_axis = axis.drop(labels, level=level, errors=errors)
else:
new_axis = axis.drop(labels, errors=errors)
dropped = self.reindex(**{axis_name: new_axis})
try:
dropped.axes[axis_].set_names(axis.names, inplace=True)
except AttributeError:
pass
result = dropped
result = self.reindex(**{axis_name: new_axis})

else:
labels = _ensure_object(com._index_labels_to_array(labels))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment to indicate this is the non-unique case

if level is not None:
if not isinstance(axis, MultiIndex):
raise AssertionError('axis must be a MultiIndex')
indexer = ~axis.get_level_values(level).isin(labels)
# GH 18561 MultiIndex.drop should raise if label is absent
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blank line before the comment

if errors == 'raise' and indexer.all():
raise KeyError('{} not found in axis'.format(labels))
else:
indexer = ~axis.isin(labels)

if errors == 'raise' and indexer.all():
raise KeyError('{} not found in axis'.format(labels))
# Check if label doesn't exist along axis
labels_missing = (axis.get_indexer_for(labels) == -1).any()
if errors == 'raise' and labels_missing:
raise KeyError('{} not found in axis'.format(labels))

slicer = [slice(None)] * self.ndim
slicer[self._get_axis_number(axis_name)] = indexer
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4335,7 +4335,7 @@ def drop(self, labels, errors='raise'):
Raises
------
KeyError
If none of the labels are found in the selected axis
If not all of the labels are found in the selected axis
"""
arr_dtype = 'object' if self.dtype == 'object' else None
labels = com._index_labels_to_array(labels, dtype=arr_dtype)
Expand All @@ -4344,7 +4344,7 @@ def drop(self, labels, errors='raise'):
if mask.any():
if errors != 'ignore':
raise KeyError(
'labels %s not contained in axis' % labels[mask])
'{} not found in axis'.format(labels[mask]))
indexer = indexer[~mask]
return self.delete(indexer)

Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1719,7 +1719,6 @@ def drop(self, labels, level=None, errors='raise'):
if errors != 'ignore':
raise ValueError('labels %s not contained in axis' %
labels[mask])
indexer = indexer[~mask]
except Exception:
pass

Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3515,3 +3515,24 @@ def test_functions_no_warnings(self):
with tm.assert_produces_warning(False):
df['group'] = pd.cut(df.value, range(0, 105, 10), right=False,
labels=labels)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move with other drop tests in test_axis_select_reindex

@pytest.mark.parametrize('index, drop_labels', [
([1, 2, 3], []),
([1, 1, 2], []),
([1, 2, 3], [2]),
([1, 1, 3], [1]),
])
def test_drop_empty_list(self, index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
frame = pd.DataFrame(index=index).drop(drop_labels)
tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))

@pytest.mark.parametrize('index, drop_labels', [
([1, 2, 3], [1, 4]),
([1, 2, 2], [1, 4]),
])
def test_drop_non_empty_list(self, index, drop_labels):
# GH 21494
with tm.assert_raises_regex(KeyError, 'not found in axis'):
pd.DataFrame(index=index).drop(drop_labels)
25 changes: 24 additions & 1 deletion pandas/tests/series/indexing/test_alter_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,5 +512,28 @@ def test_drop():

# GH 16877
s = Series([2, 3], index=[0, 1])
with tm.assert_raises_regex(KeyError, 'not contained in axis'):
with tm.assert_raises_regex(KeyError, 'not found in axis'):
s.drop([False, True])


@pytest.mark.parametrize('index, drop_labels', [
([1, 2, 3], []),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to test_base with other test_drop tests

([1, 1, 2], []),
([1, 2, 3], [2]),
([1, 1, 3], [1]),
])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason not to parametrize separately index and drop_labels (and construct expected_index as [l for l in index if not l in drop_labels])?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree will do

Copy link
Member

@toobaz toobaz Jun 18, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already better, but what I meant was something like

@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2], [1, 2, 2]])
@pytest.mark.parametrize('drop_labels', [[], [1]])

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with you, your approach seems more compact. Let me update

OK, then here the same applies ;-)

def test_drop_empty_list(index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = pd.Series(index=index).drop(drop_labels)
tm.assert_series_equal(series, pd.Series(index=expected_index))


@pytest.mark.parametrize('index, drop_labels', [
([1, 2, 3], [1, 4]),
([1, 2, 2], [1, 4]),
])
def test_drop_non_empty_list(index, drop_labels):
# GH 21494
with tm.assert_raises_regex(KeyError, 'not found in axis'):
pd.Series(index=index).drop(drop_labels)