From 35742fc730ced0f556f25b8959e303ba7e2c52df Mon Sep 17 00:00:00 2001 From: alistair Date: Sun, 17 Jun 2018 20:02:26 +0100 Subject: [PATCH 01/11] Fix passing empty label to df drop --- pandas/core/generic.py | 14 ++++++------- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/multi.py | 1 - .../tests/series/indexing/test_alter_index.py | 21 +++++++++++++++++++ 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 32f64b1d3e05c..c8ded4e06572e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3138,12 +3138,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): new_axis = axis.drop(labels, level=level, errors=errors) else: new_axis = axis.drop(labels, errors=errors) - dropped = self.reindex(**{axis_name: new_axis}) - try: - dropped.axes[axis_].set_names(axis.names, inplace=True) - except AttributeError: - pass - result = dropped + result = self.reindex(**{axis_name: new_axis}) else: labels = _ensure_object(com._index_labels_to_array(labels)) @@ -3154,8 +3149,11 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): else: indexer = ~axis.isin(labels) - if errors == 'raise' and indexer.all(): - raise KeyError('{} not found in axis'.format(labels)) + # Check if label doesn't exist along axis + if len(labels): + labels_missing = (~np.array([label in axis for label in labels])).any() + if errors == 'raise' and labels_missing: + raise KeyError('{} not found in axis'.format(labels)) slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6a56278b0da49..7d2ce24bb8dc2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4335,13 +4335,13 @@ def drop(self, labels, errors='raise'): Raises ------ KeyError - If none of the labels are found in the selected axis + If not all of the labels are found in the selected axis """ arr_dtype = 'object' if self.dtype == 'object' else None labels = com._index_labels_to_array(labels, dtype=arr_dtype) indexer = self.get_indexer(labels) mask = indexer == -1 - if mask.any(): + if mask.any() and len(mask): if errors != 'ignore': raise KeyError( 'labels %s not contained in axis' % labels[mask]) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 75b6be96feb78..b83e5679d8cf2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1719,7 +1719,6 @@ def drop(self, labels, level=None, errors='raise'): if errors != 'ignore': raise ValueError('labels %s not contained in axis' % labels[mask]) - indexer = indexer[~mask] except Exception: pass diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 999ed5f26daee..589c4a29d2cf3 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -514,3 +514,24 @@ def test_drop(): s = Series([2, 3], index=[0, 1]) with tm.assert_raises_regex(KeyError, 'not contained in axis'): s.drop([False, True]) + + +@pytest.mark.parametrize('index, drop_labels, expected_index', [ + ([1, 2, 3], [], [1, 2, 3]), + ([1, 1, 2], [], [1, 1, 2]), + ([1, 2, 3], [2], [1, 3]), + ([1, 1, 3], [1], [3]), + ]) +def test_drop_empty_list(index, drop_labels, expected_index): + # GH 21494 + df = pd.DataFrame(index=index).drop(drop_labels) + assert (df.index.values == expected_index).all() + +@pytest.mark.parametrize('index, drop_labels, error_key', [ + ([1, 2, 3], [1, 4], 'not contained in axis'), + ([1, 2, 2], [1, 4], 'not found in axis'), + ]) +def test_drop_non_empty_list(index, drop_labels, error_key): + # GH 21494 + with tm.assert_raises_regex(KeyError, error_key): + pd.DataFrame(index=index).drop(drop_labels) From 832a50ba5e25c0df775ead29a3a09a97ddd52c33 Mon Sep 17 00:00:00 2001 From: alistair Date: Sun, 17 Jun 2018 20:23:59 +0100 Subject: [PATCH 02/11] Pep8 --- pandas/core/generic.py | 3 ++- pandas/tests/series/indexing/test_alter_index.py | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c8ded4e06572e..da81d42d0e1cc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3151,7 +3151,8 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): # Check if label doesn't exist along axis if len(labels): - labels_missing = (~np.array([label in axis for label in labels])).any() + labels_missing = (~np.array([label in axis + for label in labels])).any() if errors == 'raise' and labels_missing: raise KeyError('{} not found in axis'.format(labels)) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 589c4a29d2cf3..cf332d377ffa5 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -517,19 +517,20 @@ def test_drop(): @pytest.mark.parametrize('index, drop_labels, expected_index', [ - ([1, 2, 3], [], [1, 2, 3]), - ([1, 1, 2], [], [1, 1, 2]), - ([1, 2, 3], [2], [1, 3]), - ([1, 1, 3], [1], [3]), + ([1, 2, 3], [], [1, 2, 3]), + ([1, 1, 2], [], [1, 1, 2]), + ([1, 2, 3], [2], [1, 3]), + ([1, 1, 3], [1], [3]), ]) def test_drop_empty_list(index, drop_labels, expected_index): # GH 21494 df = pd.DataFrame(index=index).drop(drop_labels) assert (df.index.values == expected_index).all() + @pytest.mark.parametrize('index, drop_labels, error_key', [ - ([1, 2, 3], [1, 4], 'not contained in axis'), - ([1, 2, 2], [1, 4], 'not found in axis'), + ([1, 2, 3], [1, 4], 'not contained in axis'), + ([1, 2, 2], [1, 4], 'not found in axis'), ]) def test_drop_non_empty_list(index, drop_labels, error_key): # GH 21494 From bb80dedd9a8003a2b7c44d3d9c69b8bf662b7a24 Mon Sep 17 00:00:00 2001 From: alistair Date: Sun, 17 Jun 2018 20:26:37 +0100 Subject: [PATCH 03/11] Pep8 --- pandas/tests/series/indexing/test_alter_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index cf332d377ffa5..94285aeb12ed4 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -521,7 +521,7 @@ def test_drop(): ([1, 1, 2], [], [1, 1, 2]), ([1, 2, 3], [2], [1, 3]), ([1, 1, 3], [1], [3]), - ]) +]) def test_drop_empty_list(index, drop_labels, expected_index): # GH 21494 df = pd.DataFrame(index=index).drop(drop_labels) @@ -531,7 +531,7 @@ def test_drop_empty_list(index, drop_labels, expected_index): @pytest.mark.parametrize('index, drop_labels, error_key', [ ([1, 2, 3], [1, 4], 'not contained in axis'), ([1, 2, 2], [1, 4], 'not found in axis'), - ]) +]) def test_drop_non_empty_list(index, drop_labels, error_key): # GH 21494 with tm.assert_raises_regex(KeyError, error_key): From a81c74ad49c8c51d6be7f370f6ec0ad79bdd726f Mon Sep 17 00:00:00 2001 From: alistair Date: Mon, 18 Jun 2018 01:08:38 +0100 Subject: [PATCH 04/11] Update per toobaz comments --- pandas/core/generic.py | 9 ++++----- pandas/core/indexes/base.py | 4 ++-- .../tests/series/indexing/test_alter_index.py | 17 +++++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index da81d42d0e1cc..2fad6f5acf36c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3146,13 +3146,12 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') indexer = ~axis.get_level_values(level).isin(labels) + if errors == 'raise' and indexer.all(): + raise KeyError('{} not found in axis'.format(labels)) else: indexer = ~axis.isin(labels) - - # Check if label doesn't exist along axis - if len(labels): - labels_missing = (~np.array([label in axis - for label in labels])).any() + # Check if label doesn't exist along axis + labels_missing = (axis.get_indexer_for(labels) == -1).any() if errors == 'raise' and labels_missing: raise KeyError('{} not found in axis'.format(labels)) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7d2ce24bb8dc2..f1cb34251f866 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4341,10 +4341,10 @@ def drop(self, labels, errors='raise'): labels = com._index_labels_to_array(labels, dtype=arr_dtype) indexer = self.get_indexer(labels) mask = indexer == -1 - if mask.any() and len(mask): + if mask.any(): if errors != 'ignore': raise KeyError( - 'labels %s not contained in axis' % labels[mask]) + '{} not found in axis'.format(labels[mask])) indexer = indexer[~mask] return self.delete(indexer) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 94285aeb12ed4..0fe88e87128e3 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -512,24 +512,25 @@ def test_drop(): # GH 16877 s = Series([2, 3], index=[0, 1]) - with tm.assert_raises_regex(KeyError, 'not contained in axis'): + with tm.assert_raises_regex(KeyError, 'not found in axis'): s.drop([False, True]) -@pytest.mark.parametrize('index, drop_labels, expected_index', [ - ([1, 2, 3], [], [1, 2, 3]), - ([1, 1, 2], [], [1, 1, 2]), - ([1, 2, 3], [2], [1, 3]), - ([1, 1, 3], [1], [3]), +@pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], []), + ([1, 1, 2], []), + ([1, 2, 3], [2]), + ([1, 1, 3], [1]), ]) -def test_drop_empty_list(index, drop_labels, expected_index): +def test_drop_empty_list(index, drop_labels): # GH 21494 + expected_index = [i for i in index if i not in drop_labels] df = pd.DataFrame(index=index).drop(drop_labels) assert (df.index.values == expected_index).all() @pytest.mark.parametrize('index, drop_labels, error_key', [ - ([1, 2, 3], [1, 4], 'not contained in axis'), + ([1, 2, 3], [1, 4], 'not found in axis'), ([1, 2, 2], [1, 4], 'not found in axis'), ]) def test_drop_non_empty_list(index, drop_labels, error_key): From 394b384563ce85a5a41c724032e49b105dfe28e9 Mon Sep 17 00:00:00 2001 From: alistair Date: Tue, 19 Jun 2018 00:01:30 +0100 Subject: [PATCH 05/11] Update per comment jreback --- doc/source/whatsnew/v0.23.2.txt | 1 + pandas/core/generic.py | 3 ++- pandas/tests/frame/test_indexing.py | 21 +++++++++++++++++++ .../tests/series/indexing/test_alter_index.py | 16 +++++++------- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index b8d865195cddd..c2511766b1b1b 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -55,6 +55,7 @@ Bug Fixes - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`) - Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`) +- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`) - **I/O** diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2fad6f5acf36c..fb77cfd5d5b83 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): """ axis = self._get_axis_number(axis) axis_name = self._get_axis_name(axis) - axis, axis_ = self._get_axis(axis), axis + axis = self._get_axis(axis) if axis.is_unique: if level is not None: @@ -3146,6 +3146,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') indexer = ~axis.get_level_values(level).isin(labels) + #GH 18561 MultiIndex.drop should raise if label is absent if errors == 'raise' and indexer.all(): raise KeyError('{} not found in axis'.format(labels)) else: diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index be37e696ea0a3..0d70392e5bf2b 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -3515,3 +3515,24 @@ def test_functions_no_warnings(self): with tm.assert_produces_warning(False): df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels) + + @pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], []), + ([1, 1, 2], []), + ([1, 2, 3], [2]), + ([1, 1, 3], [1]), + ]) + def test_drop_empty_list(self, index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + frame = pd.DataFrame(index=index).drop(drop_labels) + tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) + + @pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], [1, 4]), + ([1, 2, 2], [1, 4]), + ]) + def test_drop_non_empty_list(self, index, drop_labels): + # GH 21494 + with tm.assert_raises_regex(KeyError, 'not found in axis'): + pd.DataFrame(index=index).drop(drop_labels) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 0fe88e87128e3..0b91b3f87a815 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -525,15 +525,15 @@ def test_drop(): def test_drop_empty_list(index, drop_labels): # GH 21494 expected_index = [i for i in index if i not in drop_labels] - df = pd.DataFrame(index=index).drop(drop_labels) - assert (df.index.values == expected_index).all() + series = pd.Series(index=index).drop(drop_labels) + tm.assert_series_equal(series, pd.Series(index=expected_index)) -@pytest.mark.parametrize('index, drop_labels, error_key', [ - ([1, 2, 3], [1, 4], 'not found in axis'), - ([1, 2, 2], [1, 4], 'not found in axis'), +@pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], [1, 4]), + ([1, 2, 2], [1, 4]), ]) -def test_drop_non_empty_list(index, drop_labels, error_key): +def test_drop_non_empty_list(index, drop_labels): # GH 21494 - with tm.assert_raises_regex(KeyError, error_key): - pd.DataFrame(index=index).drop(drop_labels) + with tm.assert_raises_regex(KeyError, 'not found in axis'): + pd.Series(index=index).drop(drop_labels) From 93bbf0541fbb4709496a09dc70cd23f9cec2f3bc Mon Sep 17 00:00:00 2001 From: alistair Date: Tue, 19 Jun 2018 06:46:22 +0100 Subject: [PATCH 06/11] Pep8 --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fb77cfd5d5b83..de5d0b008e8ea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3146,7 +3146,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') indexer = ~axis.get_level_values(level).isin(labels) - #GH 18561 MultiIndex.drop should raise if label is absent + # GH 18561 MultiIndex.drop should raise if label is absent if errors == 'raise' and indexer.all(): raise KeyError('{} not found in axis'.format(labels)) else: From 1b832d0db13766f7e44573709a8d5066299170a3 Mon Sep 17 00:00:00 2001 From: alistair Date: Tue, 19 Jun 2018 20:24:27 +0100 Subject: [PATCH 07/11] Update per comment jreback --- pandas/core/generic.py | 2 + .../tests/frame/test_axis_select_reindex.py | 21 +++++ pandas/tests/frame/test_indexing.py | 21 ----- pandas/tests/indexes/test_base.py | 73 ++++++++++++++++++ .../tests/series/indexing/test_alter_index.py | 76 ------------------- 5 files changed, 96 insertions(+), 97 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index de5d0b008e8ea..ad9b53c8ec316 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3140,12 +3140,14 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): new_axis = axis.drop(labels, errors=errors) result = self.reindex(**{axis_name: new_axis}) + # Case for non-unique axis else: labels = _ensure_object(com._index_labels_to_array(labels)) if level is not None: if not isinstance(axis, MultiIndex): raise AssertionError('axis must be a MultiIndex') indexer = ~axis.get_level_values(level).isin(labels) + # GH 18561 MultiIndex.drop should raise if label is absent if errors == 'raise' and indexer.all(): raise KeyError('{} not found in axis'.format(labels)) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 28e82f7585850..2f55d66ce2911 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1151,3 +1151,24 @@ def test_raise_on_drop_duplicate_index(self, actual): expected_no_err = actual.T.drop('c', axis=1, level=level, errors='ignore') assert_frame_equal(expected_no_err.T, actual) + + @pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], []), + ([1, 1, 2], []), + ([1, 2, 3], [2]), + ([1, 1, 3], [1]), + ]) + def test_drop_empty_list(self, index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + frame = pd.DataFrame(index=index).drop(drop_labels) + tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) + + @pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], [1, 4]), + ([1, 2, 2], [1, 4]), + ]) + def test_drop_non_empty_list(self, index, drop_labels): + # GH 21494 + with tm.assert_raises_regex(KeyError, 'not found in axis'): + pd.DataFrame(index=index).drop(drop_labels) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 0d70392e5bf2b..be37e696ea0a3 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -3515,24 +3515,3 @@ def test_functions_no_warnings(self): with tm.assert_produces_warning(False): df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels) - - @pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], []), - ([1, 1, 2], []), - ([1, 2, 3], [2]), - ([1, 1, 3], [1]), - ]) - def test_drop_empty_list(self, index, drop_labels): - # GH 21494 - expected_index = [i for i in index if i not in drop_labels] - frame = pd.DataFrame(index=index).drop(drop_labels) - tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) - - @pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], [1, 4]), - ([1, 2, 2], [1, 4]), - ]) - def test_drop_non_empty_list(self, index, drop_labels): - # GH 21494 - with tm.assert_raises_regex(KeyError, 'not found in axis'): - pd.DataFrame(index=index).drop(drop_labels) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index b8bd218ec25ab..b7b120dab7326 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1565,6 +1565,79 @@ def test_drop_tuple(self, values, to_drop): for drop_me in to_drop[1], [to_drop[1]]: pytest.raises(KeyError, removed.drop, drop_me) + def test_drop_unique_and_non_unique_index(self): + # unique + s = Series([1, 2], index=['one', 'two']) + expected = Series([1], index=['one']) + result = s.drop(['two']) + tm.assert_series_equal(result, expected) + result = s.drop('two', axis='rows') + tm.assert_series_equal(result, expected) + + # non-unique + # GH 5248 + s = Series([1, 1, 2], index=['one', 'two', 'one']) + expected = Series([1, 2], index=['one', 'one']) + result = s.drop(['two'], axis=0) + tm.assert_series_equal(result, expected) + result = s.drop('two') + tm.assert_series_equal(result, expected) + + expected = Series([1], index=['two']) + result = s.drop(['one']) + tm.assert_series_equal(result, expected) + result = s.drop('one') + tm.assert_series_equal(result, expected) + + # single string/tuple-like + s = Series(range(3), index=list('abc')) + pytest.raises(KeyError, s.drop, 'bc') + pytest.raises(KeyError, s.drop, ('a',)) + + # errors='ignore' + s = Series(range(3), index=list('abc')) + result = s.drop('bc', errors='ignore') + tm.assert_series_equal(result, s) + result = s.drop(['a', 'd'], errors='ignore') + expected = s.iloc[1:] + tm.assert_series_equal(result, expected) + + # bad axis + pytest.raises(ValueError, s.drop, 'one', axis='columns') + + # GH 8522 + s = Series([2, 3], index=[True, False]) + assert s.index.is_object() + result = s.drop(True) + expected = Series([3], index=[False]) + tm.assert_series_equal(result, expected) + + # GH 16877 + s = Series([2, 3], index=[0, 1]) + with tm.assert_raises_regex(KeyError, 'not found in axis'): + s.drop([False, True]) + + @pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], []), + ([1, 1, 2], []), + ([1, 2, 3], [2]), + ([1, 1, 3], [1]), + ]) + def test_drop_empty_list(self, index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + series = pd.Series(index=index).drop(drop_labels) + tm.assert_series_equal(series, pd.Series(index=expected_index)) + + @pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], [1, 4]), + ([1, 2, 2], [1, 4]), + ]) + def test_drop_non_empty_list(self, index, drop_labels): + # GH 21494 + with tm.assert_raises_regex(KeyError, 'not found in axis'): + pd.Series(index=index).drop(drop_labels) + @pytest.mark.parametrize("method,expected", [ ('intersection', np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], dtype=[('num', int), ('let', 'a1')])), diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 0b91b3f87a815..199918e5088ef 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -461,79 +461,3 @@ def test_rename(): assert_series_equal(result, expected) assert result.name == expected.name - - -def test_drop(): - # unique - s = Series([1, 2], index=['one', 'two']) - expected = Series([1], index=['one']) - result = s.drop(['two']) - assert_series_equal(result, expected) - result = s.drop('two', axis='rows') - assert_series_equal(result, expected) - - # non-unique - # GH 5248 - s = Series([1, 1, 2], index=['one', 'two', 'one']) - expected = Series([1, 2], index=['one', 'one']) - result = s.drop(['two'], axis=0) - assert_series_equal(result, expected) - result = s.drop('two') - assert_series_equal(result, expected) - - expected = Series([1], index=['two']) - result = s.drop(['one']) - assert_series_equal(result, expected) - result = s.drop('one') - assert_series_equal(result, expected) - - # single string/tuple-like - s = Series(range(3), index=list('abc')) - pytest.raises(KeyError, s.drop, 'bc') - pytest.raises(KeyError, s.drop, ('a',)) - - # errors='ignore' - s = Series(range(3), index=list('abc')) - result = s.drop('bc', errors='ignore') - assert_series_equal(result, s) - result = s.drop(['a', 'd'], errors='ignore') - expected = s.iloc[1:] - assert_series_equal(result, expected) - - # bad axis - pytest.raises(ValueError, s.drop, 'one', axis='columns') - - # GH 8522 - s = Series([2, 3], index=[True, False]) - assert s.index.is_object() - result = s.drop(True) - expected = Series([3], index=[False]) - assert_series_equal(result, expected) - - # GH 16877 - s = Series([2, 3], index=[0, 1]) - with tm.assert_raises_regex(KeyError, 'not found in axis'): - s.drop([False, True]) - - -@pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], []), - ([1, 1, 2], []), - ([1, 2, 3], [2]), - ([1, 1, 3], [1]), -]) -def test_drop_empty_list(index, drop_labels): - # GH 21494 - expected_index = [i for i in index if i not in drop_labels] - series = pd.Series(index=index).drop(drop_labels) - tm.assert_series_equal(series, pd.Series(index=expected_index)) - - -@pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], [1, 4]), - ([1, 2, 2], [1, 4]), -]) -def test_drop_non_empty_list(index, drop_labels): - # GH 21494 - with tm.assert_raises_regex(KeyError, 'not found in axis'): - pd.Series(index=index).drop(drop_labels) From 8119f7204a366cdc38da5907de4f7ebaa43e2793 Mon Sep 17 00:00:00 2001 From: alistair Date: Tue, 19 Jun 2018 23:34:01 +0100 Subject: [PATCH 08/11] Update per comment jreback --- pandas/tests/indexes/test_base.py | 73 ---------------- .../tests/series/indexing/test_alter_index.py | 84 +++++++++++++++++++ 2 files changed, 84 insertions(+), 73 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index b7b120dab7326..b8bd218ec25ab 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1565,79 +1565,6 @@ def test_drop_tuple(self, values, to_drop): for drop_me in to_drop[1], [to_drop[1]]: pytest.raises(KeyError, removed.drop, drop_me) - def test_drop_unique_and_non_unique_index(self): - # unique - s = Series([1, 2], index=['one', 'two']) - expected = Series([1], index=['one']) - result = s.drop(['two']) - tm.assert_series_equal(result, expected) - result = s.drop('two', axis='rows') - tm.assert_series_equal(result, expected) - - # non-unique - # GH 5248 - s = Series([1, 1, 2], index=['one', 'two', 'one']) - expected = Series([1, 2], index=['one', 'one']) - result = s.drop(['two'], axis=0) - tm.assert_series_equal(result, expected) - result = s.drop('two') - tm.assert_series_equal(result, expected) - - expected = Series([1], index=['two']) - result = s.drop(['one']) - tm.assert_series_equal(result, expected) - result = s.drop('one') - tm.assert_series_equal(result, expected) - - # single string/tuple-like - s = Series(range(3), index=list('abc')) - pytest.raises(KeyError, s.drop, 'bc') - pytest.raises(KeyError, s.drop, ('a',)) - - # errors='ignore' - s = Series(range(3), index=list('abc')) - result = s.drop('bc', errors='ignore') - tm.assert_series_equal(result, s) - result = s.drop(['a', 'd'], errors='ignore') - expected = s.iloc[1:] - tm.assert_series_equal(result, expected) - - # bad axis - pytest.raises(ValueError, s.drop, 'one', axis='columns') - - # GH 8522 - s = Series([2, 3], index=[True, False]) - assert s.index.is_object() - result = s.drop(True) - expected = Series([3], index=[False]) - tm.assert_series_equal(result, expected) - - # GH 16877 - s = Series([2, 3], index=[0, 1]) - with tm.assert_raises_regex(KeyError, 'not found in axis'): - s.drop([False, True]) - - @pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], []), - ([1, 1, 2], []), - ([1, 2, 3], [2]), - ([1, 1, 3], [1]), - ]) - def test_drop_empty_list(self, index, drop_labels): - # GH 21494 - expected_index = [i for i in index if i not in drop_labels] - series = pd.Series(index=index).drop(drop_labels) - tm.assert_series_equal(series, pd.Series(index=expected_index)) - - @pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], [1, 4]), - ([1, 2, 2], [1, 4]), - ]) - def test_drop_non_empty_list(self, index, drop_labels): - # GH 21494 - with tm.assert_raises_regex(KeyError, 'not found in axis'): - pd.Series(index=index).drop(drop_labels) - @pytest.mark.parametrize("method,expected", [ ('intersection', np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], dtype=[('num', int), ('let', 'a1')])), diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 199918e5088ef..6ecb695af1327 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -461,3 +461,87 @@ def test_rename(): assert_series_equal(result, expected) assert result.name == expected.name + + +@pytest.mark.parametrize('data, index, drop_labels,' + ' axis, expected_data, expected_index', + [([1, 2], ['one', 'two'], ['two'], + 0, [1], ['one']), + # Unique Index + ([1, 2], ['one', 'two'], ['two'], + 'rows', [1], ['one']), + ([1, 1, 2], ['one', 'two', 'one'], ['two'], + 0, [1, 2], ['one', 'one']), + # GH 5248 Non-Unique Index + ([1, 1, 2], ['one', 'two', 'one'], 'two', + 0, [1, 2], ['one', 'one']), + ([1, 1, 2], ['one', 'two', 'one'], ['one'], + 0, [1], ['two']), + ([1, 1, 2], ['one', 'two', 'one'], 'one', + 0, [1], ['two'])]) +def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels, + expected_data, expected_index): + + s = Series(data=data, index=index) + result = s.drop(drop_labels, axis=axis) + expected = Series(data=expected_data, index=expected_index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize('data, index, drop_labels,' + ' axis, error_type, error_desc', + [(range(3), list('abc'), 'bc', + 0, KeyError, 'not found in axis'), + # single string/tuple-like + (range(3), list('abc'), ('a',), + 0, KeyError, 'not found in axis'), + (range(3), list('abc'), 'one', + 'columns', ValueError, 'No axis named columns') + # bad axis + ]) +def test_drop_exception_raised(data, index, drop_labels, + axis, error_type, error_desc): + + with tm.assert_raises_regex(error_type, error_desc): + Series(data, index=index).drop(drop_labels, axis=axis) + + +def test_drop_with_ignore_errors(): + # errors='ignore' + s = Series(range(3), index=list('abc')) + result = s.drop('bc', errors='ignore') + tm.assert_series_equal(result, s) + result = s.drop(['a', 'd'], errors='ignore') + expected = s.iloc[1:] + tm.assert_series_equal(result, expected) + + # GH 8522 + s = Series([2, 3], index=[True, False]) + assert s.index.is_object() + result = s.drop(True) + expected = Series([3], index=[False]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize('index, drop_labels', [ + ([1, 2, 3], []), + ([1, 1, 2], []), + ([1, 2, 3], [2]), + ([1, 1, 3], [1]), +]) +def test_drop_empty_list(index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + series = pd.Series(index=index).drop(drop_labels) + tm.assert_series_equal(series, pd.Series(index=expected_index)) + + +@pytest.mark.parametrize('data, index, drop_labels', [ + (None, [1, 2, 3], [1, 4]), + (None, [1, 2, 2], [1, 4]), + ([2, 3], [0, 1], [False, True]) +]) +def test_drop_non_empty_list(data, index, drop_labels): + # GH 21494 and GH 16877 + with tm.assert_raises_regex(KeyError, 'not found in axis'): + pd.Series(data=data, index=index).drop(drop_labels) From 01f6a9c9bbb7bb6db75ca11d4b99fb6aa7ee081b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 20 Jun 2018 06:19:35 -0400 Subject: [PATCH 09/11] clean tests --- .../tests/series/indexing/test_alter_index.py | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 19c3853fe4a46..52ad85aa7cc0b 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -472,22 +472,25 @@ def test_rename(): assert result.name == expected.name -@pytest.mark.parametrize('data, index, drop_labels,' - ' axis, expected_data, expected_index', - [([1, 2], ['one', 'two'], ['two'], - 0, [1], ['one']), - # Unique Index - ([1, 2], ['one', 'two'], ['two'], - 'rows', [1], ['one']), - ([1, 1, 2], ['one', 'two', 'one'], ['two'], - 0, [1, 2], ['one', 'one']), - # GH 5248 Non-Unique Index - ([1, 1, 2], ['one', 'two', 'one'], 'two', - 0, [1, 2], ['one', 'one']), - ([1, 1, 2], ['one', 'two', 'one'], ['one'], - 0, [1], ['two']), - ([1, 1, 2], ['one', 'two', 'one'], 'one', - 0, [1], ['two'])]) +@pytest.mark.parametrize( + 'data, index, drop_labels,' + ' axis, expected_data, expected_index', + [ + # Unique Index + ([1, 2], ['one', 'two'], ['two'], + 0, [1], ['one']), + ([1, 2], ['one', 'two'], ['two'], + 'rows', [1], ['one']), + ([1, 1, 2], ['one', 'two', 'one'], ['two'], + 0, [1, 2], ['one', 'one']), + + # GH 5248 Non-Unique Index + ([1, 1, 2], ['one', 'two', 'one'], 'two', + 0, [1, 2], ['one', 'one']), + ([1, 1, 2], ['one', 'two', 'one'], ['one'], + 0, [1], ['two']), + ([1, 1, 2], ['one', 'two', 'one'], 'one', + 0, [1], ['two'])]) def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels, expected_data, expected_index): @@ -497,17 +500,19 @@ def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels, tm.assert_series_equal(result, expected) -@pytest.mark.parametrize('data, index, drop_labels,' - ' axis, error_type, error_desc', - [(range(3), list('abc'), 'bc', - 0, KeyError, 'not found in axis'), - # single string/tuple-like - (range(3), list('abc'), ('a',), - 0, KeyError, 'not found in axis'), - (range(3), list('abc'), 'one', - 'columns', ValueError, 'No axis named columns') - # bad axis - ]) +@pytest.mark.parametrize( + 'data, index, drop_labels,' + ' axis, error_type, error_desc', + [ + # single string/tuple-like + (range(3), list('abc'), 'bc', + 0, KeyError, 'not found in axis'), + + # bad axis + (range(3), list('abc'), ('a',), + 0, KeyError, 'not found in axis'), + (range(3), list('abc'), 'one', + 'columns', ValueError, 'No axis named columns')]) def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc): From 13b36c2b639c1abaf36e047109656e0a20a6b0f2 Mon Sep 17 00:00:00 2001 From: alistair Date: Wed, 20 Jun 2018 23:03:54 +0100 Subject: [PATCH 10/11] Parameterize test cases --- pandas/tests/frame/test_axis_select_reindex.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 2f55d66ce2911..b759ce1158285 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1152,22 +1152,16 @@ def test_raise_on_drop_duplicate_index(self, actual): errors='ignore') assert_frame_equal(expected_no_err.T, actual) - @pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], []), - ([1, 1, 2], []), - ([1, 2, 3], [2]), - ([1, 1, 3], [1]), - ]) + @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]]) + @pytest.mark.parametrize('drop_labels', [[], [1], [2]]) def test_drop_empty_list(self, index, drop_labels): # GH 21494 expected_index = [i for i in index if i not in drop_labels] frame = pd.DataFrame(index=index).drop(drop_labels) tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) - @pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], [1, 4]), - ([1, 2, 2], [1, 4]), - ]) + @pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]]) + @pytest.mark.parametrize('drop_labels', [[1, 4]]) def test_drop_non_empty_list(self, index, drop_labels): # GH 21494 with tm.assert_raises_regex(KeyError, 'not found in axis'): From 1ffc0d48e17c4ddfb5818494d07db46a50809880 Mon Sep 17 00:00:00 2001 From: alistair Date: Thu, 21 Jun 2018 01:37:17 +0100 Subject: [PATCH 11/11] Parameterize test cases --- pandas/tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/series/indexing/test_alter_index.py | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index b759ce1158285..0e0d6598f5101 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -1161,7 +1161,7 @@ def test_drop_empty_list(self, index, drop_labels): tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) @pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]]) - @pytest.mark.parametrize('drop_labels', [[1, 4]]) + @pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]]) def test_drop_non_empty_list(self, index, drop_labels): # GH 21494 with tm.assert_raises_regex(KeyError, 'not found in axis'): diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py index 52ad85aa7cc0b..561d6a9b42508 100644 --- a/pandas/tests/series/indexing/test_alter_index.py +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -537,12 +537,8 @@ def test_drop_with_ignore_errors(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize('index, drop_labels', [ - ([1, 2, 3], []), - ([1, 1, 2], []), - ([1, 2, 3], [2]), - ([1, 1, 3], [1]), -]) +@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]]) +@pytest.mark.parametrize('drop_labels', [[], [1], [3]]) def test_drop_empty_list(index, drop_labels): # GH 21494 expected_index = [i for i in index if i not in drop_labels]