Skip to content

Fix passing empty label to df drop #21515

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 21, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Bug Fixes

- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
-

**I/O**
Expand Down
21 changes: 11 additions & 10 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
"""
axis = self._get_axis_number(axis)
axis_name = self._get_axis_name(axis)
axis, axis_ = self._get_axis(axis), axis
axis = self._get_axis(axis)

if axis.is_unique:
if level is not None:
Expand All @@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
new_axis = axis.drop(labels, level=level, errors=errors)
else:
new_axis = axis.drop(labels, errors=errors)
dropped = self.reindex(**{axis_name: new_axis})
try:
dropped.axes[axis_].set_names(axis.names, inplace=True)
except AttributeError:
pass
result = dropped
result = self.reindex(**{axis_name: new_axis})

# Case for non-unique axis
else:
labels = _ensure_object(com._index_labels_to_array(labels))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment to indicate this is the non-unique case

if level is not None:
if not isinstance(axis, MultiIndex):
raise AssertionError('axis must be a MultiIndex')
indexer = ~axis.get_level_values(level).isin(labels)

# GH 18561 MultiIndex.drop should raise if label is absent
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blank line before the comment

if errors == 'raise' and indexer.all():
raise KeyError('{} not found in axis'.format(labels))
else:
indexer = ~axis.isin(labels)

if errors == 'raise' and indexer.all():
raise KeyError('{} not found in axis'.format(labels))
# Check if label doesn't exist along axis
labels_missing = (axis.get_indexer_for(labels) == -1).any()
if errors == 'raise' and labels_missing:
raise KeyError('{} not found in axis'.format(labels))

slicer = [slice(None)] * self.ndim
slicer[self._get_axis_number(axis_name)] = indexer
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4335,7 +4335,7 @@ def drop(self, labels, errors='raise'):
Raises
------
KeyError
If none of the labels are found in the selected axis
If not all of the labels are found in the selected axis
"""
arr_dtype = 'object' if self.dtype == 'object' else None
labels = com._index_labels_to_array(labels, dtype=arr_dtype)
Expand All @@ -4344,7 +4344,7 @@ def drop(self, labels, errors='raise'):
if mask.any():
if errors != 'ignore':
raise KeyError(
'labels %s not contained in axis' % labels[mask])
'{} not found in axis'.format(labels[mask]))
indexer = indexer[~mask]
return self.delete(indexer)

Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1719,7 +1719,6 @@ def drop(self, labels, level=None, errors='raise'):
if errors != 'ignore':
raise ValueError('labels %s not contained in axis' %
labels[mask])
indexer = indexer[~mask]
except Exception:
pass

Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/frame/test_axis_select_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,3 +1151,24 @@ def test_raise_on_drop_duplicate_index(self, actual):
expected_no_err = actual.T.drop('c', axis=1, level=level,
errors='ignore')
assert_frame_equal(expected_no_err.T, actual)

@pytest.mark.parametrize('index, drop_labels', [
([1, 2, 3], []),
([1, 1, 2], []),
([1, 2, 3], [2]),
([1, 1, 3], [1]),
])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still think (see my previous comment) I would rather write this as:

@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]])
@pytest.mark.parametrize('drop_labels', [[], [1]])

Do you have any reason to explictly parametrize the combinations?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with you, your approach seems more compact. Let me update

def test_drop_empty_list(self, index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
frame = pd.DataFrame(index=index).drop(drop_labels)
tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))

@pytest.mark.parametrize('index, drop_labels', [
([1, 2, 3], [1, 4]),
([1, 2, 2], [1, 4]),
])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly as above, I don't think you need to parametrize drop_labels.

@jreback , what do you think? Do we have a general rule concerning making parameter combinations more explicit/more compact?

def test_drop_non_empty_list(self, index, drop_labels):
# GH 21494
with tm.assert_raises_regex(KeyError, 'not found in axis'):
pd.DataFrame(index=index).drop(drop_labels)
73 changes: 73 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,6 +1565,79 @@ def test_drop_tuple(self, values, to_drop):
for drop_me in to_drop[1], [to_drop[1]]:
pytest.raises(KeyError, removed.drop, drop_me)

def test_drop_unique_and_non_unique_index(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any way to parameterize some of this (e.g. you can break into 2 tests if that makes it easier)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I told you to move them here, but on 2nd thought, there they belong in test_alter_index (where the tests were originally). sorry about that.

test_base is for generically testing Index/Series types and not doing that here so kind of superfluous to put the tests here.

# unique
s = Series([1, 2], index=['one', 'two'])
expected = Series([1], index=['one'])
result = s.drop(['two'])
tm.assert_series_equal(result, expected)
result = s.drop('two', axis='rows')
tm.assert_series_equal(result, expected)

# non-unique
# GH 5248
s = Series([1, 1, 2], index=['one', 'two', 'one'])
expected = Series([1, 2], index=['one', 'one'])
result = s.drop(['two'], axis=0)
tm.assert_series_equal(result, expected)
result = s.drop('two')
tm.assert_series_equal(result, expected)

expected = Series([1], index=['two'])
result = s.drop(['one'])
tm.assert_series_equal(result, expected)
result = s.drop('one')
tm.assert_series_equal(result, expected)

# single string/tuple-like
s = Series(range(3), index=list('abc'))
pytest.raises(KeyError, s.drop, 'bc')
pytest.raises(KeyError, s.drop, ('a',))

# errors='ignore'
s = Series(range(3), index=list('abc'))
result = s.drop('bc', errors='ignore')
tm.assert_series_equal(result, s)
result = s.drop(['a', 'd'], errors='ignore')
expected = s.iloc[1:]
tm.assert_series_equal(result, expected)

# bad axis
pytest.raises(ValueError, s.drop, 'one', axis='columns')

# GH 8522
s = Series([2, 3], index=[True, False])
assert s.index.is_object()
result = s.drop(True)
expected = Series([3], index=[False])
tm.assert_series_equal(result, expected)

# GH 16877
s = Series([2, 3], index=[0, 1])
with tm.assert_raises_regex(KeyError, 'not found in axis'):
s.drop([False, True])

@pytest.mark.parametrize('index, drop_labels', [
([1, 2, 3], []),
([1, 1, 2], []),
([1, 2, 3], [2]),
([1, 1, 3], [1]),
])
def test_drop_empty_list(self, index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = pd.Series(index=index).drop(drop_labels)
tm.assert_series_equal(series, pd.Series(index=expected_index))

@pytest.mark.parametrize('index, drop_labels', [
([1, 2, 3], [1, 4]),
([1, 2, 2], [1, 4]),
])
def test_drop_non_empty_list(self, index, drop_labels):
# GH 21494
with tm.assert_raises_regex(KeyError, 'not found in axis'):
pd.Series(index=index).drop(drop_labels)

@pytest.mark.parametrize("method,expected", [
('intersection', np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')],
dtype=[('num', int), ('let', 'a1')])),
Expand Down
53 changes: 0 additions & 53 deletions pandas/tests/series/indexing/test_alter_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,56 +461,3 @@ def test_rename():
assert_series_equal(result, expected)

assert result.name == expected.name


def test_drop():
# unique
s = Series([1, 2], index=['one', 'two'])
expected = Series([1], index=['one'])
result = s.drop(['two'])
assert_series_equal(result, expected)
result = s.drop('two', axis='rows')
assert_series_equal(result, expected)

# non-unique
# GH 5248
s = Series([1, 1, 2], index=['one', 'two', 'one'])
expected = Series([1, 2], index=['one', 'one'])
result = s.drop(['two'], axis=0)
assert_series_equal(result, expected)
result = s.drop('two')
assert_series_equal(result, expected)

expected = Series([1], index=['two'])
result = s.drop(['one'])
assert_series_equal(result, expected)
result = s.drop('one')
assert_series_equal(result, expected)

# single string/tuple-like
s = Series(range(3), index=list('abc'))
pytest.raises(KeyError, s.drop, 'bc')
pytest.raises(KeyError, s.drop, ('a',))

# errors='ignore'
s = Series(range(3), index=list('abc'))
result = s.drop('bc', errors='ignore')
assert_series_equal(result, s)
result = s.drop(['a', 'd'], errors='ignore')
expected = s.iloc[1:]
assert_series_equal(result, expected)

# bad axis
pytest.raises(ValueError, s.drop, 'one', axis='columns')

# GH 8522
s = Series([2, 3], index=[True, False])
assert s.index.is_object()
result = s.drop(True)
expected = Series([3], index=[False])
assert_series_equal(result, expected)

# GH 16877
s = Series([2, 3], index=[0, 1])
with tm.assert_raises_regex(KeyError, 'not contained in axis'):
s.drop([False, True])