Skip to content

Commit 83d51cd

Browse files
alimcmaster1jorisvandenbossche
authored andcommitted
BUG: Fix passing empty label to df drop (#21515)
Closes #21494 (cherry picked from commit f4fba9e)
1 parent ff7d84a commit 83d51cd

File tree

6 files changed

+98
-50
lines changed

6 files changed

+98
-50
lines changed

doc/source/whatsnew/v0.23.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Bug Fixes
5858

5959
- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
6060
- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
61+
- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
6162
- Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
6263
-
6364

pandas/core/generic.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
31293129
"""
31303130
axis = self._get_axis_number(axis)
31313131
axis_name = self._get_axis_name(axis)
3132-
axis, axis_ = self._get_axis(axis), axis
3132+
axis = self._get_axis(axis)
31333133

31343134
if axis.is_unique:
31353135
if level is not None:
@@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
31383138
new_axis = axis.drop(labels, level=level, errors=errors)
31393139
else:
31403140
new_axis = axis.drop(labels, errors=errors)
3141-
dropped = self.reindex(**{axis_name: new_axis})
3142-
try:
3143-
dropped.axes[axis_].set_names(axis.names, inplace=True)
3144-
except AttributeError:
3145-
pass
3146-
result = dropped
3141+
result = self.reindex(**{axis_name: new_axis})
31473142

3143+
# Case for non-unique axis
31483144
else:
31493145
labels = _ensure_object(com._index_labels_to_array(labels))
31503146
if level is not None:
31513147
if not isinstance(axis, MultiIndex):
31523148
raise AssertionError('axis must be a MultiIndex')
31533149
indexer = ~axis.get_level_values(level).isin(labels)
3150+
3151+
# GH 18561 MultiIndex.drop should raise if label is absent
3152+
if errors == 'raise' and indexer.all():
3153+
raise KeyError('{} not found in axis'.format(labels))
31543154
else:
31553155
indexer = ~axis.isin(labels)
3156-
3157-
if errors == 'raise' and indexer.all():
3158-
raise KeyError('{} not found in axis'.format(labels))
3156+
# Check if label doesn't exist along axis
3157+
labels_missing = (axis.get_indexer_for(labels) == -1).any()
3158+
if errors == 'raise' and labels_missing:
3159+
raise KeyError('{} not found in axis'.format(labels))
31593160

31603161
slicer = [slice(None)] * self.ndim
31613162
slicer[self._get_axis_number(axis_name)] = indexer

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4392,7 +4392,7 @@ def drop(self, labels, errors='raise'):
43924392
Raises
43934393
------
43944394
KeyError
4395-
If none of the labels are found in the selected axis
4395+
If not all of the labels are found in the selected axis
43964396
"""
43974397
arr_dtype = 'object' if self.dtype == 'object' else None
43984398
labels = com._index_labels_to_array(labels, dtype=arr_dtype)
@@ -4401,7 +4401,7 @@ def drop(self, labels, errors='raise'):
44014401
if mask.any():
44024402
if errors != 'ignore':
44034403
raise KeyError(
4404-
'labels %s not contained in axis' % labels[mask])
4404+
'{} not found in axis'.format(labels[mask]))
44054405
indexer = indexer[~mask]
44064406
return self.delete(indexer)
44074407

pandas/core/indexes/multi.py

-1
Original file line numberDiff line numberDiff line change
@@ -1707,7 +1707,6 @@ def drop(self, labels, level=None, errors='raise'):
17071707
if errors != 'ignore':
17081708
raise ValueError('labels %s not contained in axis' %
17091709
labels[mask])
1710-
indexer = indexer[~mask]
17111710
except Exception:
17121711
pass
17131712

pandas/tests/frame/test_axis_select_reindex.py

+15
Original file line numberDiff line numberDiff line change
@@ -1151,3 +1151,18 @@ def test_raise_on_drop_duplicate_index(self, actual):
11511151
expected_no_err = actual.T.drop('c', axis=1, level=level,
11521152
errors='ignore')
11531153
assert_frame_equal(expected_no_err.T, actual)
1154+
1155+
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]])
1156+
@pytest.mark.parametrize('drop_labels', [[], [1], [2]])
1157+
def test_drop_empty_list(self, index, drop_labels):
1158+
# GH 21494
1159+
expected_index = [i for i in index if i not in drop_labels]
1160+
frame = pd.DataFrame(index=index).drop(drop_labels)
1161+
tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))
1162+
1163+
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]])
1164+
@pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]])
1165+
def test_drop_non_empty_list(self, index, drop_labels):
1166+
# GH 21494
1167+
with tm.assert_raises_regex(KeyError, 'not found in axis'):
1168+
pd.DataFrame(index=index).drop(drop_labels)

pandas/tests/series/indexing/test_alter_index.py

+69-37
Original file line numberDiff line numberDiff line change
@@ -463,54 +463,86 @@ def test_rename():
463463
assert result.name == expected.name
464464

465465

466-
def test_drop():
467-
# unique
468-
s = Series([1, 2], index=['one', 'two'])
469-
expected = Series([1], index=['one'])
470-
result = s.drop(['two'])
471-
assert_series_equal(result, expected)
472-
result = s.drop('two', axis='rows')
473-
assert_series_equal(result, expected)
474-
475-
# non-unique
476-
# GH 5248
477-
s = Series([1, 1, 2], index=['one', 'two', 'one'])
478-
expected = Series([1, 2], index=['one', 'one'])
479-
result = s.drop(['two'], axis=0)
480-
assert_series_equal(result, expected)
481-
result = s.drop('two')
482-
assert_series_equal(result, expected)
483-
484-
expected = Series([1], index=['two'])
485-
result = s.drop(['one'])
486-
assert_series_equal(result, expected)
487-
result = s.drop('one')
488-
assert_series_equal(result, expected)
466+
@pytest.mark.parametrize(
467+
'data, index, drop_labels,'
468+
' axis, expected_data, expected_index',
469+
[
470+
# Unique Index
471+
([1, 2], ['one', 'two'], ['two'],
472+
0, [1], ['one']),
473+
([1, 2], ['one', 'two'], ['two'],
474+
'rows', [1], ['one']),
475+
([1, 1, 2], ['one', 'two', 'one'], ['two'],
476+
0, [1, 2], ['one', 'one']),
477+
478+
# GH 5248 Non-Unique Index
479+
([1, 1, 2], ['one', 'two', 'one'], 'two',
480+
0, [1, 2], ['one', 'one']),
481+
([1, 1, 2], ['one', 'two', 'one'], ['one'],
482+
0, [1], ['two']),
483+
([1, 1, 2], ['one', 'two', 'one'], 'one',
484+
0, [1], ['two'])])
485+
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
486+
expected_data, expected_index):
487+
488+
s = Series(data=data, index=index)
489+
result = s.drop(drop_labels, axis=axis)
490+
expected = Series(data=expected_data, index=expected_index)
491+
tm.assert_series_equal(result, expected)
489492

490-
# single string/tuple-like
491-
s = Series(range(3), index=list('abc'))
492-
pytest.raises(KeyError, s.drop, 'bc')
493-
pytest.raises(KeyError, s.drop, ('a',))
494493

494+
@pytest.mark.parametrize(
495+
'data, index, drop_labels,'
496+
' axis, error_type, error_desc',
497+
[
498+
# single string/tuple-like
499+
(range(3), list('abc'), 'bc',
500+
0, KeyError, 'not found in axis'),
501+
502+
# bad axis
503+
(range(3), list('abc'), ('a',),
504+
0, KeyError, 'not found in axis'),
505+
(range(3), list('abc'), 'one',
506+
'columns', ValueError, 'No axis named columns')])
507+
def test_drop_exception_raised(data, index, drop_labels,
508+
axis, error_type, error_desc):
509+
510+
with tm.assert_raises_regex(error_type, error_desc):
511+
Series(data, index=index).drop(drop_labels, axis=axis)
512+
513+
514+
def test_drop_with_ignore_errors():
495515
# errors='ignore'
496516
s = Series(range(3), index=list('abc'))
497517
result = s.drop('bc', errors='ignore')
498-
assert_series_equal(result, s)
518+
tm.assert_series_equal(result, s)
499519
result = s.drop(['a', 'd'], errors='ignore')
500520
expected = s.iloc[1:]
501-
assert_series_equal(result, expected)
502-
503-
# bad axis
504-
pytest.raises(ValueError, s.drop, 'one', axis='columns')
521+
tm.assert_series_equal(result, expected)
505522

506523
# GH 8522
507524
s = Series([2, 3], index=[True, False])
508525
assert s.index.is_object()
509526
result = s.drop(True)
510527
expected = Series([3], index=[False])
511-
assert_series_equal(result, expected)
528+
tm.assert_series_equal(result, expected)
529+
512530

513-
# GH 16877
514-
s = Series([2, 3], index=[0, 1])
515-
with tm.assert_raises_regex(KeyError, 'not contained in axis'):
516-
s.drop([False, True])
531+
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
532+
@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
533+
def test_drop_empty_list(index, drop_labels):
534+
# GH 21494
535+
expected_index = [i for i in index if i not in drop_labels]
536+
series = pd.Series(index=index).drop(drop_labels)
537+
tm.assert_series_equal(series, pd.Series(index=expected_index))
538+
539+
540+
@pytest.mark.parametrize('data, index, drop_labels', [
541+
(None, [1, 2, 3], [1, 4]),
542+
(None, [1, 2, 2], [1, 4]),
543+
([2, 3], [0, 1], [False, True])
544+
])
545+
def test_drop_non_empty_list(data, index, drop_labels):
546+
# GH 21494 and GH 16877
547+
with tm.assert_raises_regex(KeyError, 'not found in axis'):
548+
pd.Series(data=data, index=index).drop(drop_labels)

0 commit comments

Comments
 (0)