pandas-dev · jreback · Mar 15, 2020 · Mar 6, 2020 · Mar 12, 2020 · Mar 12, 2020
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -425,6 +425,15 @@ def nselect_method(request):
     return request.param
 
 
+@pytest.fixture(params=["first", "last", False])
+def keep(request):
+    """
+    Valid values for the 'keep' parameter used in
+    .duplicated or .drop_duplicates
+    """
+    return request.param
+
+
 @pytest.fixture(params=["left", "right", "both", "neither"])
 def closed(request):
     """

diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py
@@ -594,108 +594,6 @@ def test_factorize_repeated(self):
                 expected = o[5:10].append(o[:5])
                 tm.assert_index_equal(uniques, expected, check_names=False)
 
-    def test_duplicated_drop_duplicates_index(self):
-        # GH 4060
-        for original in self.objs:
-            if isinstance(original, Index):
-
-                # special case
-                if original.is_boolean():
-                    result = original.drop_duplicates()
-                    expected = Index([False, True], name="a")
-                    tm.assert_index_equal(result, expected)
-                    continue
-
-                # original doesn't have duplicates
-                expected = np.array([False] * len(original), dtype=bool)
-                duplicated = original.duplicated()
-                tm.assert_numpy_array_equal(duplicated, expected)
-                assert duplicated.dtype == bool
-                result = original.drop_duplicates()
-                tm.assert_index_equal(result, original)
-                assert result is not original
-
-                # has_duplicates
-                assert not original.has_duplicates
-
-                # create repeated values, 3rd and 5th values are duplicated
-                idx = original[list(range(len(original))) + [5, 3]]
-                expected = np.array([False] * len(original) + [True, True], dtype=bool)
-                duplicated = idx.duplicated()
-                tm.assert_numpy_array_equal(duplicated, expected)
-                assert duplicated.dtype == bool
-                tm.assert_index_equal(idx.drop_duplicates(), original)
-
-                base = [False] * len(idx)
-                base[3] = True
-                base[5] = True
-                expected = np.array(base)
-
-                duplicated = idx.duplicated(keep="last")
-                tm.assert_numpy_array_equal(duplicated, expected)
-                assert duplicated.dtype == bool
-                result = idx.drop_duplicates(keep="last")
-                tm.assert_index_equal(result, idx[~expected])
-
-                base = [False] * len(original) + [True, True]
-                base[3] = True
-                base[5] = True
-                expected = np.array(base)
-
-                duplicated = idx.duplicated(keep=False)
-                tm.assert_numpy_array_equal(duplicated, expected)
-                assert duplicated.dtype == bool
-                result = idx.drop_duplicates(keep=False)
-                tm.assert_index_equal(result, idx[~expected])
-
-                with pytest.raises(
-                    TypeError,
-                    match=r"drop_duplicates\(\) got an unexpected keyword argument",
-                ):
-                    idx.drop_duplicates(inplace=True)
-
-            else:
-                expected = Series(
-                    [False] * len(original), index=original.index, name="a"
-                )
-                tm.assert_series_equal(original.duplicated(), expected)
-                result = original.drop_duplicates()
-                tm.assert_series_equal(result, original)
-                assert result is not original
-
-                idx = original.index[list(range(len(original))) + [5, 3]]
-                values = original._values[list(range(len(original))) + [5, 3]]
-                s = Series(values, index=idx, name="a")
-
-                expected = Series(
-                    [False] * len(original) + [True, True], index=idx, name="a"
-                )
-                tm.assert_series_equal(s.duplicated(), expected)
-                tm.assert_series_equal(s.drop_duplicates(), original)
-
-                base = [False] * len(idx)
-                base[3] = True
-                base[5] = True
-                expected = Series(base, index=idx, name="a")
-
-                tm.assert_series_equal(s.duplicated(keep="last"), expected)
-                tm.assert_series_equal(
-                    s.drop_duplicates(keep="last"), s[~np.array(base)]
-                )
-
-                base = [False] * len(original) + [True, True]
-                base[3] = True
-                base[5] = True
-                expected = Series(base, index=idx, name="a")
-
-                tm.assert_series_equal(s.duplicated(keep=False), expected)
-                tm.assert_series_equal(
-                    s.drop_duplicates(keep=False), s[~np.array(base)]
-                )
-
-                s.drop_duplicates(inplace=True)
-                tm.assert_series_equal(s, original)
-
     def test_drop_duplicates_series_vs_dataframe(self):
         # GH 14192
         df = pd.DataFrame(

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
@@ -302,32 +302,65 @@ def test_pickle(self, indices):
         assert indices.equals(unpickled)
         indices.name = original_name
 
-    @pytest.mark.parametrize("keep", ["first", "last", False])
-    def test_duplicated(self, indices, keep):
-        if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
-            # MultiIndex tested separately in:
-            # tests/indexes/multi/test_unique_and_duplicates
-            pytest.skip("Skip check for empty Index, MultiIndex, RangeIndex")
-
+    def test_drop_duplicates(self, indices, keep):
+        if isinstance(indices, MultiIndex):
+            pytest.skip("MultiIndex is tested separately")
+        if isinstance(indices, RangeIndex):
+            pytest.skip(
+                "RangeIndex is tested in test_drop_duplicates_no_duplicates"
+                " as it cannot hold duplicates"
+            )
+        if len(indices) == 0:
+            pytest.skip(
+                "empty index is tested in test_drop_duplicates_no_duplicates"
+                " as it cannot hold duplicates"
+            )
+
+        # make unique index
         holder = type(indices)
+        unique_values = list(set(indices))
+        unique_idx = holder(unique_values)
+
+        # make duplicated index
+        n = len(unique_idx)
+        duplicated_selection = np.random.choice(n, int(n * 1.5))
+        idx = holder(unique_idx.values[duplicated_selection])
+
+        # Series.duplicated is tested separately
+        expected_duplicated = (
+            pd.Series(duplicated_selection).duplicated(keep=keep).values
+        )
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)
+
+        # Series.drop_duplicates is tested separately
+        expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
+        tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)
+
+    def test_drop_duplicates_no_duplicates(self, indices):
+        if isinstance(indices, MultiIndex):
+            pytest.skip("MultiIndex is tested separately")
 
-        idx = holder(indices)
-        if idx.has_duplicates:
-            # We are testing the duplicated-method here, so we need to know
-            # exactly which indices are duplicate and how (for the result).
-            # This is not possible if "idx" has duplicates already, which we
-            # therefore remove. This is seemingly circular, as drop_duplicates
-            # invokes duplicated, but in the end, it all works out because we
-            # cross-check with Series.duplicated, which is tested separately.
-            idx = idx.drop_duplicates()
-
-        n, k = len(idx), 10
-        duplicated_selection = np.random.choice(n, k * n)
-        expected = pd.Series(duplicated_selection).duplicated(keep=keep).values
-        idx = holder(idx.values[duplicated_selection])
-
-        result = idx.duplicated(keep=keep)
-        tm.assert_numpy_array_equal(result, expected)
+        # make unique index
+        if isinstance(indices, RangeIndex):
+            # RangeIndex cannot have duplicates
+            unique_idx = indices
+        else:
+            holder = type(indices)
+            unique_values = list(set(indices))
+            unique_idx = holder(unique_values)
+
+        # check on unique index
+        expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
+        tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
+        result_dropped = unique_idx.drop_duplicates()
+        tm.assert_index_equal(result_dropped, unique_idx)
+        # validate shallow copy
+        assert result_dropped is not unique_idx
+
+    def test_drop_duplicates_inplace(self, indices):
+        msg = r"drop_duplicates\(\) got an unexpected keyword argument"
+        with pytest.raises(TypeError, match=msg):
+            indices.drop_duplicates(inplace=True)
 
     def test_has_duplicates(self, indices):
         holder = type(indices)

diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py
@@ -44,6 +44,26 @@ def test_drop_duplicates_bool(keep, expected):
     tm.assert_series_equal(sc, tc[~expected])
 
 
+@pytest.mark.parametrize("values", [[], list(range(5))])
+def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values):
+    tc = Series(values, dtype=np.dtype(any_numpy_dtype))
+    expected = Series([False] * len(tc), dtype="bool")
+
+    if tc.dtype == "bool":
+        # 0 -> False and 1-> True
+        # any other value would be duplicated
+        tc = tc[:2]
+        expected = expected[:2]
+
+    tm.assert_series_equal(tc.duplicated(keep=keep), expected)
+
+    result_dropped = tc.drop_duplicates(keep=keep)
+    tm.assert_series_equal(result_dropped, tc)
+
+    # validate shallow copy
+    assert result_dropped is not tc
+
+
 class TestSeriesDropDuplicates:
     @pytest.mark.parametrize(
         "dtype",