-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Fix passing empty label to df drop #21515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
35742fc
832a50b
bb80ded
a81c74a
394b384
93bbf05
1b832d0
8119f72
9d5a42f
01f6a9c
13b36c2
d5f67e1
1ffc0d4
454cb7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): | |
""" | ||
axis = self._get_axis_number(axis) | ||
axis_name = self._get_axis_name(axis) | ||
axis, axis_ = self._get_axis(axis), axis | ||
axis = self._get_axis(axis) | ||
|
||
if axis.is_unique: | ||
if level is not None: | ||
|
@@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): | |
new_axis = axis.drop(labels, level=level, errors=errors) | ||
else: | ||
new_axis = axis.drop(labels, errors=errors) | ||
dropped = self.reindex(**{axis_name: new_axis}) | ||
try: | ||
dropped.axes[axis_].set_names(axis.names, inplace=True) | ||
except AttributeError: | ||
pass | ||
result = dropped | ||
result = self.reindex(**{axis_name: new_axis}) | ||
|
||
# Case for non-unique axis | ||
else: | ||
labels = _ensure_object(com._index_labels_to_array(labels)) | ||
if level is not None: | ||
if not isinstance(axis, MultiIndex): | ||
raise AssertionError('axis must be a MultiIndex') | ||
indexer = ~axis.get_level_values(level).isin(labels) | ||
|
||
# GH 18561 MultiIndex.drop should raise if label is absent | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. blank line before the comment |
||
if errors == 'raise' and indexer.all(): | ||
raise KeyError('{} not found in axis'.format(labels)) | ||
else: | ||
indexer = ~axis.isin(labels) | ||
|
||
if errors == 'raise' and indexer.all(): | ||
raise KeyError('{} not found in axis'.format(labels)) | ||
# Check if label doesn't exist along axis | ||
labels_missing = (axis.get_indexer_for(labels) == -1).any() | ||
if errors == 'raise' and labels_missing: | ||
raise KeyError('{} not found in axis'.format(labels)) | ||
|
||
slicer = [slice(None)] * self.ndim | ||
slicer[self._get_axis_number(axis_name)] = indexer | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1151,3 +1151,18 @@ def test_raise_on_drop_duplicate_index(self, actual): | |
expected_no_err = actual.T.drop('c', axis=1, level=level, | ||
errors='ignore') | ||
assert_frame_equal(expected_no_err.T, actual) | ||
|
||
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]]) | ||
@pytest.mark.parametrize('drop_labels', [[], [1], [2]]) | ||
def test_drop_empty_list(self, index, drop_labels): | ||
# GH 21494 | ||
expected_index = [i for i in index if i not in drop_labels] | ||
frame = pd.DataFrame(index=index).drop(drop_labels) | ||
tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) | ||
|
||
@pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]]) | ||
@pytest.mark.parametrize('drop_labels', [[1, 4]]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please also include an entirely missing list (e.g. [4, 5]). So that we at least give this decorator a reason to exist ;-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @toobaz thanks updated :) |
||
def test_drop_non_empty_list(self, index, drop_labels): | ||
# GH 21494 | ||
with tm.assert_raises_regex(KeyError, 'not found in axis'): | ||
pd.DataFrame(index=index).drop(drop_labels) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -472,54 +472,90 @@ def test_rename(): | |
assert result.name == expected.name | ||
|
||
|
||
def test_drop(): | ||
# unique | ||
s = Series([1, 2], index=['one', 'two']) | ||
expected = Series([1], index=['one']) | ||
result = s.drop(['two']) | ||
assert_series_equal(result, expected) | ||
result = s.drop('two', axis='rows') | ||
assert_series_equal(result, expected) | ||
|
||
# non-unique | ||
# GH 5248 | ||
s = Series([1, 1, 2], index=['one', 'two', 'one']) | ||
expected = Series([1, 2], index=['one', 'one']) | ||
result = s.drop(['two'], axis=0) | ||
assert_series_equal(result, expected) | ||
result = s.drop('two') | ||
assert_series_equal(result, expected) | ||
|
||
expected = Series([1], index=['two']) | ||
result = s.drop(['one']) | ||
assert_series_equal(result, expected) | ||
result = s.drop('one') | ||
assert_series_equal(result, expected) | ||
@pytest.mark.parametrize( | ||
'data, index, drop_labels,' | ||
' axis, expected_data, expected_index', | ||
[ | ||
# Unique Index | ||
([1, 2], ['one', 'two'], ['two'], | ||
0, [1], ['one']), | ||
([1, 2], ['one', 'two'], ['two'], | ||
'rows', [1], ['one']), | ||
([1, 1, 2], ['one', 'two', 'one'], ['two'], | ||
0, [1, 2], ['one', 'one']), | ||
|
||
# GH 5248 Non-Unique Index | ||
([1, 1, 2], ['one', 'two', 'one'], 'two', | ||
0, [1, 2], ['one', 'one']), | ||
([1, 1, 2], ['one', 'two', 'one'], ['one'], | ||
0, [1], ['two']), | ||
([1, 1, 2], ['one', 'two', 'one'], 'one', | ||
0, [1], ['two'])]) | ||
def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels, | ||
expected_data, expected_index): | ||
|
||
s = Series(data=data, index=index) | ||
result = s.drop(drop_labels, axis=axis) | ||
expected = Series(data=expected_data, index=expected_index) | ||
tm.assert_series_equal(result, expected) | ||
|
||
# single string/tuple-like | ||
s = Series(range(3), index=list('abc')) | ||
pytest.raises(KeyError, s.drop, 'bc') | ||
pytest.raises(KeyError, s.drop, ('a',)) | ||
|
||
@pytest.mark.parametrize( | ||
'data, index, drop_labels,' | ||
' axis, error_type, error_desc', | ||
[ | ||
# single string/tuple-like | ||
(range(3), list('abc'), 'bc', | ||
0, KeyError, 'not found in axis'), | ||
|
||
# bad axis | ||
(range(3), list('abc'), ('a',), | ||
0, KeyError, 'not found in axis'), | ||
(range(3), list('abc'), 'one', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @jreback that't much neater than my attempt! |
||
'columns', ValueError, 'No axis named columns')]) | ||
def test_drop_exception_raised(data, index, drop_labels, | ||
axis, error_type, error_desc): | ||
|
||
with tm.assert_raises_regex(error_type, error_desc): | ||
Series(data, index=index).drop(drop_labels, axis=axis) | ||
|
||
|
||
def test_drop_with_ignore_errors(): | ||
# errors='ignore' | ||
s = Series(range(3), index=list('abc')) | ||
result = s.drop('bc', errors='ignore') | ||
assert_series_equal(result, s) | ||
tm.assert_series_equal(result, s) | ||
result = s.drop(['a', 'd'], errors='ignore') | ||
expected = s.iloc[1:] | ||
assert_series_equal(result, expected) | ||
|
||
# bad axis | ||
pytest.raises(ValueError, s.drop, 'one', axis='columns') | ||
tm.assert_series_equal(result, expected) | ||
|
||
# GH 8522 | ||
s = Series([2, 3], index=[True, False]) | ||
assert s.index.is_object() | ||
result = s.drop(True) | ||
expected = Series([3], index=[False]) | ||
assert_series_equal(result, expected) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
||
# GH 16877 | ||
s = Series([2, 3], index=[0, 1]) | ||
with tm.assert_raises_regex(KeyError, 'not contained in axis'): | ||
s.drop([False, True]) | ||
@pytest.mark.parametrize('index, drop_labels', [ | ||
([1, 2, 3], []), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move to test_base with other test_drop tests |
||
([1, 1, 2], []), | ||
([1, 2, 3], [2]), | ||
([1, 1, 3], [1]), | ||
]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason not to parametrize separately There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree will do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is already better, but what I meant was something like @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2], [1, 2, 2]])
@pytest.mark.parametrize('drop_labels', [[], [1]]) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
OK, then here the same applies ;-) |
||
def test_drop_empty_list(index, drop_labels): | ||
# GH 21494 | ||
expected_index = [i for i in index if i not in drop_labels] | ||
series = pd.Series(index=index).drop(drop_labels) | ||
tm.assert_series_equal(series, pd.Series(index=expected_index)) | ||
|
||
|
||
@pytest.mark.parametrize('data, index, drop_labels', [ | ||
(None, [1, 2, 3], [1, 4]), | ||
(None, [1, 2, 2], [1, 4]), | ||
([2, 3], [0, 1], [False, True]) | ||
]) | ||
def test_drop_non_empty_list(data, index, drop_labels): | ||
# GH 21494 and GH 16877 | ||
with tm.assert_raises_regex(KeyError, 'not found in axis'): | ||
pd.Series(data=data, index=index).drop(drop_labels) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
comment to indicate this is the non-unique case