Skip to content

Commit f4fba9e

Browse files
alimcmaster1toobaz
authored andcommitted
BUG: Fix passing empty label to df drop (#21515)
Closes #21494
1 parent f91a704 commit f4fba9e

File tree

6 files changed

+98
-50
lines changed

6 files changed

+98
-50
lines changed

doc/source/whatsnew/v0.23.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ Bug Fixes
6161

6262
- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
6363
- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
64+
- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
6465
- Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
6566
-
6667

pandas/core/generic.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
31293129
"""
31303130
axis = self._get_axis_number(axis)
31313131
axis_name = self._get_axis_name(axis)
3132-
axis, axis_ = self._get_axis(axis), axis
3132+
axis = self._get_axis(axis)
31333133

31343134
if axis.is_unique:
31353135
if level is not None:
@@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
31383138
new_axis = axis.drop(labels, level=level, errors=errors)
31393139
else:
31403140
new_axis = axis.drop(labels, errors=errors)
3141-
dropped = self.reindex(**{axis_name: new_axis})
3142-
try:
3143-
dropped.axes[axis_].set_names(axis.names, inplace=True)
3144-
except AttributeError:
3145-
pass
3146-
result = dropped
3141+
result = self.reindex(**{axis_name: new_axis})
31473142

3143+
# Case for non-unique axis
31483144
else:
31493145
labels = _ensure_object(com._index_labels_to_array(labels))
31503146
if level is not None:
31513147
if not isinstance(axis, MultiIndex):
31523148
raise AssertionError('axis must be a MultiIndex')
31533149
indexer = ~axis.get_level_values(level).isin(labels)
3150+
3151+
# GH 18561 MultiIndex.drop should raise if label is absent
3152+
if errors == 'raise' and indexer.all():
3153+
raise KeyError('{} not found in axis'.format(labels))
31543154
else:
31553155
indexer = ~axis.isin(labels)
3156-
3157-
if errors == 'raise' and indexer.all():
3158-
raise KeyError('{} not found in axis'.format(labels))
3156+
# Check if label doesn't exist along axis
3157+
labels_missing = (axis.get_indexer_for(labels) == -1).any()
3158+
if errors == 'raise' and labels_missing:
3159+
raise KeyError('{} not found in axis'.format(labels))
31593160

31603161
slicer = [slice(None)] * self.ndim
31613162
slicer[self._get_axis_number(axis_name)] = indexer

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4341,7 +4341,7 @@ def drop(self, labels, errors='raise'):
43414341
Raises
43424342
------
43434343
KeyError
4344-
If none of the labels are found in the selected axis
4344+
If not all of the labels are found in the selected axis
43454345
"""
43464346
arr_dtype = 'object' if self.dtype == 'object' else None
43474347
labels = com._index_labels_to_array(labels, dtype=arr_dtype)
@@ -4350,7 +4350,7 @@ def drop(self, labels, errors='raise'):
43504350
if mask.any():
43514351
if errors != 'ignore':
43524352
raise KeyError(
4353-
'labels %s not contained in axis' % labels[mask])
4353+
'{} not found in axis'.format(labels[mask]))
43544354
indexer = indexer[~mask]
43554355
return self.delete(indexer)
43564356

pandas/core/indexes/multi.py

-1
Original file line numberDiff line numberDiff line change
@@ -1707,7 +1707,6 @@ def drop(self, labels, level=None, errors='raise'):
17071707
if errors != 'ignore':
17081708
raise ValueError('labels %s not contained in axis' %
17091709
labels[mask])
1710-
indexer = indexer[~mask]
17111710
except Exception:
17121711
pass
17131712

pandas/tests/frame/test_axis_select_reindex.py

+15
Original file line numberDiff line numberDiff line change
@@ -1151,3 +1151,18 @@ def test_raise_on_drop_duplicate_index(self, actual):
11511151
expected_no_err = actual.T.drop('c', axis=1, level=level,
11521152
errors='ignore')
11531153
assert_frame_equal(expected_no_err.T, actual)
1154+
1155+
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]])
1156+
@pytest.mark.parametrize('drop_labels', [[], [1], [2]])
1157+
def test_drop_empty_list(self, index, drop_labels):
1158+
# GH 21494
1159+
expected_index = [i for i in index if i not in drop_labels]
1160+
frame = pd.DataFrame(index=index).drop(drop_labels)
1161+
tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))
1162+
1163+
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]])
1164+
@pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]])
1165+
def test_drop_non_empty_list(self, index, drop_labels):
1166+
# GH 21494
1167+
with tm.assert_raises_regex(KeyError, 'not found in axis'):
1168+
pd.DataFrame(index=index).drop(drop_labels)

pandas/tests/series/indexing/test_alter_index.py

+69-37
Original file line numberDiff line numberDiff line change
@@ -472,54 +472,86 @@ def test_rename():
472472
assert result.name == expected.name
473473

474474

475-
def test_drop():
476-
# unique
477-
s = Series([1, 2], index=['one', 'two'])
478-
expected = Series([1], index=['one'])
479-
result = s.drop(['two'])
480-
assert_series_equal(result, expected)
481-
result = s.drop('two', axis='rows')
482-
assert_series_equal(result, expected)
483-
484-
# non-unique
485-
# GH 5248
486-
s = Series([1, 1, 2], index=['one', 'two', 'one'])
487-
expected = Series([1, 2], index=['one', 'one'])
488-
result = s.drop(['two'], axis=0)
489-
assert_series_equal(result, expected)
490-
result = s.drop('two')
491-
assert_series_equal(result, expected)
492-
493-
expected = Series([1], index=['two'])
494-
result = s.drop(['one'])
495-
assert_series_equal(result, expected)
496-
result = s.drop('one')
497-
assert_series_equal(result, expected)
475+
@pytest.mark.parametrize(
476+
'data, index, drop_labels,'
477+
' axis, expected_data, expected_index',
478+
[
479+
# Unique Index
480+
([1, 2], ['one', 'two'], ['two'],
481+
0, [1], ['one']),
482+
([1, 2], ['one', 'two'], ['two'],
483+
'rows', [1], ['one']),
484+
([1, 1, 2], ['one', 'two', 'one'], ['two'],
485+
0, [1, 2], ['one', 'one']),
486+
487+
# GH 5248 Non-Unique Index
488+
([1, 1, 2], ['one', 'two', 'one'], 'two',
489+
0, [1, 2], ['one', 'one']),
490+
([1, 1, 2], ['one', 'two', 'one'], ['one'],
491+
0, [1], ['two']),
492+
([1, 1, 2], ['one', 'two', 'one'], 'one',
493+
0, [1], ['two'])])
494+
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
495+
expected_data, expected_index):
496+
497+
s = Series(data=data, index=index)
498+
result = s.drop(drop_labels, axis=axis)
499+
expected = Series(data=expected_data, index=expected_index)
500+
tm.assert_series_equal(result, expected)
498501

499-
# single string/tuple-like
500-
s = Series(range(3), index=list('abc'))
501-
pytest.raises(KeyError, s.drop, 'bc')
502-
pytest.raises(KeyError, s.drop, ('a',))
503502

503+
@pytest.mark.parametrize(
504+
'data, index, drop_labels,'
505+
' axis, error_type, error_desc',
506+
[
507+
# single string/tuple-like
508+
(range(3), list('abc'), 'bc',
509+
0, KeyError, 'not found in axis'),
510+
511+
# bad axis
512+
(range(3), list('abc'), ('a',),
513+
0, KeyError, 'not found in axis'),
514+
(range(3), list('abc'), 'one',
515+
'columns', ValueError, 'No axis named columns')])
516+
def test_drop_exception_raised(data, index, drop_labels,
517+
axis, error_type, error_desc):
518+
519+
with tm.assert_raises_regex(error_type, error_desc):
520+
Series(data, index=index).drop(drop_labels, axis=axis)
521+
522+
523+
def test_drop_with_ignore_errors():
504524
# errors='ignore'
505525
s = Series(range(3), index=list('abc'))
506526
result = s.drop('bc', errors='ignore')
507-
assert_series_equal(result, s)
527+
tm.assert_series_equal(result, s)
508528
result = s.drop(['a', 'd'], errors='ignore')
509529
expected = s.iloc[1:]
510-
assert_series_equal(result, expected)
511-
512-
# bad axis
513-
pytest.raises(ValueError, s.drop, 'one', axis='columns')
530+
tm.assert_series_equal(result, expected)
514531

515532
# GH 8522
516533
s = Series([2, 3], index=[True, False])
517534
assert s.index.is_object()
518535
result = s.drop(True)
519536
expected = Series([3], index=[False])
520-
assert_series_equal(result, expected)
537+
tm.assert_series_equal(result, expected)
538+
521539

522-
# GH 16877
523-
s = Series([2, 3], index=[0, 1])
524-
with tm.assert_raises_regex(KeyError, 'not contained in axis'):
525-
s.drop([False, True])
540+
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
541+
@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
542+
def test_drop_empty_list(index, drop_labels):
543+
# GH 21494
544+
expected_index = [i for i in index if i not in drop_labels]
545+
series = pd.Series(index=index).drop(drop_labels)
546+
tm.assert_series_equal(series, pd.Series(index=expected_index))
547+
548+
549+
@pytest.mark.parametrize('data, index, drop_labels', [
550+
(None, [1, 2, 3], [1, 4]),
551+
(None, [1, 2, 2], [1, 4]),
552+
([2, 3], [0, 1], [False, True])
553+
])
554+
def test_drop_non_empty_list(data, index, drop_labels):
555+
# GH 21494 and GH 16877
556+
with tm.assert_raises_regex(KeyError, 'not found in axis'):
557+
pd.Series(data=data, index=index).drop(drop_labels)

0 commit comments

Comments
 (0)