From ca11b9fab740ce558b2a91b864cc2fa9e16a107b Mon Sep 17 00:00:00 2001
From: Martin Winkel <martin.winkel.pps@gmail.com>
Date: Fri, 6 Mar 2020 11:13:49 +0100
Subject: [PATCH 1/7] remove redundant duplicated test from
 tests/base/test_ops.py

---
 pandas/tests/base/test_ops.py | 102 ----------------------------------
 1 file changed, 102 deletions(-)

diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py
index 8f48d0a3e8378..dc7f85de15de3 100644
--- a/pandas/tests/base/test_ops.py
+++ b/pandas/tests/base/test_ops.py
@@ -594,108 +594,6 @@ def test_factorize_repeated(self):
                 expected = o[5:10].append(o[:5])
                 tm.assert_index_equal(uniques, expected, check_names=False)
 
-    def test_duplicated_drop_duplicates_index(self):
-        # GH 4060
-        for original in self.objs:
-            if isinstance(original, Index):
-
-                # special case
-                if original.is_boolean():
-                    result = original.drop_duplicates()
-                    expected = Index([False, True], name="a")
-                    tm.assert_index_equal(result, expected)
-                    continue
-
-                # original doesn't have duplicates
-                expected = np.array([False] * len(original), dtype=bool)
-                duplicated = original.duplicated()
-                tm.assert_numpy_array_equal(duplicated, expected)
-                assert duplicated.dtype == bool
-                result = original.drop_duplicates()
-                tm.assert_index_equal(result, original)
-                assert result is not original
-
-                # has_duplicates
-                assert not original.has_duplicates
-
-                # create repeated values, 3rd and 5th values are duplicated
-                idx = original[list(range(len(original))) + [5, 3]]
-                expected = np.array([False] * len(original) + [True, True], dtype=bool)
-                duplicated = idx.duplicated()
-                tm.assert_numpy_array_equal(duplicated, expected)
-                assert duplicated.dtype == bool
-                tm.assert_index_equal(idx.drop_duplicates(), original)
-
-                base = [False] * len(idx)
-                base[3] = True
-                base[5] = True
-                expected = np.array(base)
-
-                duplicated = idx.duplicated(keep="last")
-                tm.assert_numpy_array_equal(duplicated, expected)
-                assert duplicated.dtype == bool
-                result = idx.drop_duplicates(keep="last")
-                tm.assert_index_equal(result, idx[~expected])
-
-                base = [False] * len(original) + [True, True]
-                base[3] = True
-                base[5] = True
-                expected = np.array(base)
-
-                duplicated = idx.duplicated(keep=False)
-                tm.assert_numpy_array_equal(duplicated, expected)
-                assert duplicated.dtype == bool
-                result = idx.drop_duplicates(keep=False)
-                tm.assert_index_equal(result, idx[~expected])
-
-                with pytest.raises(
-                    TypeError,
-                    match=r"drop_duplicates\(\) got an unexpected keyword argument",
-                ):
-                    idx.drop_duplicates(inplace=True)
-
-            else:
-                expected = Series(
-                    [False] * len(original), index=original.index, name="a"
-                )
-                tm.assert_series_equal(original.duplicated(), expected)
-                result = original.drop_duplicates()
-                tm.assert_series_equal(result, original)
-                assert result is not original
-
-                idx = original.index[list(range(len(original))) + [5, 3]]
-                values = original._values[list(range(len(original))) + [5, 3]]
-                s = Series(values, index=idx, name="a")
-
-                expected = Series(
-                    [False] * len(original) + [True, True], index=idx, name="a"
-                )
-                tm.assert_series_equal(s.duplicated(), expected)
-                tm.assert_series_equal(s.drop_duplicates(), original)
-
-                base = [False] * len(idx)
-                base[3] = True
-                base[5] = True
-                expected = Series(base, index=idx, name="a")
-
-                tm.assert_series_equal(s.duplicated(keep="last"), expected)
-                tm.assert_series_equal(
-                    s.drop_duplicates(keep="last"), s[~np.array(base)]
-                )
-
-                base = [False] * len(original) + [True, True]
-                base[3] = True
-                base[5] = True
-                expected = Series(base, index=idx, name="a")
-
-                tm.assert_series_equal(s.duplicated(keep=False), expected)
-                tm.assert_series_equal(
-                    s.drop_duplicates(keep=False), s[~np.array(base)]
-                )
-
-                s.drop_duplicates(inplace=True)
-                tm.assert_series_equal(s, original)
-
     def test_drop_duplicates_series_vs_dataframe(self):
         # GH 14192
         df = pd.DataFrame(

From 0dd7da21af73c7ffd4adf0edad80c1a8b538ba62 Mon Sep 17 00:00:00 2001
From: Martin Winkel <martin.winkel.pps@gmail.com>
Date: Thu, 12 Mar 2020 17:31:07 +0100
Subject: [PATCH 2/7] added new test case for Series.duplicated and
 Series.drop_duplicates on a Series without duplicated values

---
 .../series/methods/test_drop_duplicates.py    | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py
index 2d052505d5ecc..f1fdb2abdd578 100644
--- a/pandas/tests/series/methods/test_drop_duplicates.py
+++ b/pandas/tests/series/methods/test_drop_duplicates.py
@@ -44,6 +44,27 @@ def test_drop_duplicates_bool(keep, expected):
     tm.assert_series_equal(sc, tc[~expected])
 
 
+@pytest.mark.parametrize("keep", ["first", "last", False])
+@pytest.mark.parametrize("values", [[], list(range(5))])
+def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values):
+    tc = Series(values, dtype=np.dtype(any_numpy_dtype))
+    expected = Series([False] * len(tc), dtype="bool")
+
+    if tc.dtype == "bool":
+        # 0 -> False and 1-> True
+        # any other value would be duplicated
+        tc = tc[:2]
+        expected = expected[:2]
+
+    tm.assert_series_equal(tc.duplicated(keep=keep), expected)
+
+    result_dropped = tc.drop_duplicates(keep=keep)
+    tm.assert_series_equal(result_dropped, tc)
+
+    # validate shallow copy
+    assert result_dropped is not tc
+
+
 class TestSeriesDropDuplicates:
     @pytest.mark.parametrize(
         "dtype",

From f286bc75314b06c5b4f0d61c800f2d7f673cab19 Mon Sep 17 00:00:00 2001
From: Martin Winkel <martin.winkel.pps@gmail.com>
Date: Thu, 12 Mar 2020 18:08:05 +0100
Subject: [PATCH 3/7] testing duplicated for empty indices as well

---
 pandas/tests/indexes/test_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index c6ba5c9d61e9e..be071c55af828 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -304,10 +304,10 @@ def test_pickle(self, indices):
 
     @pytest.mark.parametrize("keep", ["first", "last", False])
     def test_duplicated(self, indices, keep):
-        if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):
+        if isinstance(indices, (MultiIndex, RangeIndex)):
             # MultiIndex tested separately in:
             # tests/indexes/multi/test_unique_and_duplicates
-            pytest.skip("Skip check for empty Index, MultiIndex, RangeIndex")
+            pytest.skip("Skip check for MultiIndex, RangeIndex")
 
         holder = type(indices)
 

From b5e599cac5974dc76b929123197195bfd3e59fcb Mon Sep 17 00:00:00 2001
From: Martin Winkel <martin.winkel.pps@gmail.com>
Date: Thu, 12 Mar 2020 18:55:03 +0100
Subject: [PATCH 4/7] extended/enhanced tests for Index.duplicated and
 Index.drop_duplicates in tests/indexes/test_common.py

---
 pandas/tests/indexes/test_common.py | 46 ++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index be071c55af828..867a7c905e325 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -303,31 +303,41 @@ def test_pickle(self, indices):
         indices.name = original_name
 
     @pytest.mark.parametrize("keep", ["first", "last", False])
-    def test_duplicated(self, indices, keep):
+    def test_duplicated_and_drop_duplicates(self, indices, keep):
         if isinstance(indices, (MultiIndex, RangeIndex)):
             # MultiIndex tested separately in:
             # tests/indexes/multi/test_unique_and_duplicates
             pytest.skip("Skip check for MultiIndex, RangeIndex")
 
+        # make unique index
         holder = type(indices)
+        unique_values = list(set(indices))
+        unique_idx = holder(unique_values)
 
-        idx = holder(indices)
-        if idx.has_duplicates:
-            # We are testing the duplicated-method here, so we need to know
-            # exactly which indices are duplicate and how (for the result).
-            # This is not possible if "idx" has duplicates already, which we
-            # therefore remove. This is seemingly circular, as drop_duplicates
-            # invokes duplicated, but in the end, it all works out because we
-            # cross-check with Series.duplicated, which is tested separately.
-            idx = idx.drop_duplicates()
-
-        n, k = len(idx), 10
-        duplicated_selection = np.random.choice(n, k * n)
-        expected = pd.Series(duplicated_selection).duplicated(keep=keep).values
-        idx = holder(idx.values[duplicated_selection])
-
-        result = idx.duplicated(keep=keep)
-        tm.assert_numpy_array_equal(result, expected)
+        # check on unique index
+        expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
+        tm.assert_numpy_array_equal(
+            unique_idx.duplicated(keep=keep), expected_duplicated
+        )
+        result_dropped = unique_idx.drop_duplicates(keep=keep)
+        tm.assert_index_equal(result_dropped, unique_idx)
+        # validate shallow copy
+        assert result_dropped is not unique_idx
+
+        # make duplicated index
+        n = len(unique_idx)
+        duplicated_selection = np.random.choice(n, int(n * 1.5))
+        idx = holder(unique_idx.values[duplicated_selection])
+
+        # Series.duplicated is tested separately
+        expected_duplicated = (
+            pd.Series(duplicated_selection).duplicated(keep=keep).values
+        )
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)
+
+        # Series.drop_duplicates is tested separately
+        expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
+        tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)
 
     def test_has_duplicates(self, indices):
         holder = type(indices)

From 0329b57cafc5cfbe5381912923b37819c71dba8f Mon Sep 17 00:00:00 2001
From: Martin Winkel <martin.winkel.pps@gmail.com>
Date: Thu, 12 Mar 2020 19:07:54 +0100
Subject: [PATCH 5/7] split the test cases

---
 pandas/tests/indexes/test_common.py | 49 ++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index 867a7c905e325..bb7363dfa50b3 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -303,27 +303,20 @@ def test_pickle(self, indices):
         indices.name = original_name
 
     @pytest.mark.parametrize("keep", ["first", "last", False])
-    def test_duplicated_and_drop_duplicates(self, indices, keep):
-        if isinstance(indices, (MultiIndex, RangeIndex)):
-            # MultiIndex tested separately in:
-            # tests/indexes/multi/test_unique_and_duplicates
-            pytest.skip("Skip check for MultiIndex, RangeIndex")
+    def test_drop_duplicates(self, indices, keep):
+        if isinstance(indices, MultiIndex):
+            pytest.skip("MultiIndex is tested separately")
+        if isinstance(indices, RangeIndex):
+            pytest.skip(
+                "RangeIndex is tested in test_drop_duplicates_no_duplicates"
+                " as it cannot hold duplicates"
+            )
 
         # make unique index
         holder = type(indices)
         unique_values = list(set(indices))
         unique_idx = holder(unique_values)
 
-        # check on unique index
-        expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
-        tm.assert_numpy_array_equal(
-            unique_idx.duplicated(keep=keep), expected_duplicated
-        )
-        result_dropped = unique_idx.drop_duplicates(keep=keep)
-        tm.assert_index_equal(result_dropped, unique_idx)
-        # validate shallow copy
-        assert result_dropped is not unique_idx
-
         # make duplicated index
         n = len(unique_idx)
         duplicated_selection = np.random.choice(n, int(n * 1.5))
@@ -339,6 +332,32 @@ def test_duplicated_and_drop_duplicates(self, indices, keep):
         expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
         tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)
 
+    def test_drop_duplicates_no_duplicates(self, indices):
+        if isinstance(indices, MultiIndex):
+            pytest.skip("MultiIndex is tested separately")
+
+        # make unique index
+        if isinstance(indices, RangeIndex):
+            # RangeIndex cannot have duplicates
+            unique_idx = indices
+        else:
+            holder = type(indices)
+            unique_values = list(set(indices))
+            unique_idx = holder(unique_values)
+
+        # check on unique index
+        expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
+        tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
+        result_dropped = unique_idx.drop_duplicates()
+        tm.assert_index_equal(result_dropped, unique_idx)
+        # validate shallow copy
+        assert result_dropped is not unique_idx
+
+    def test_drop_duplicates_inplace(self, indices):
+        msg = r"drop_duplicates\(\) got an unexpected keyword argument"
+        with pytest.raises(TypeError, match=msg):
+            indices.drop_duplicates(inplace=True)
+
     def test_has_duplicates(self, indices):
         holder = type(indices)
         if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)):

From 29c02fcbc78d9c9eb7366e3de9b0b33d67f9cc0b Mon Sep 17 00:00:00 2001
From: Martin Winkel <martin.winkel.pps@gmail.com>
Date: Thu, 12 Mar 2020 19:12:51 +0100
Subject: [PATCH 6/7] added keep fixture

---
 pandas/conftest.py                                  | 9 +++++++++
 pandas/tests/indexes/test_common.py                 | 1 -
 pandas/tests/series/methods/test_drop_duplicates.py | 1 -
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index dcfc523315c8b..d8f96021cdb15 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -425,6 +425,15 @@ def nselect_method(request):
     return request.param
 
 
+@pytest.fixture(params=["first", "last", False])
+def keep(request):
+    """
+    Valid values for the 'keep' parameter used in
+    .duplicated or .drop_duplicates
+    """
+    return request.param
+
+
 @pytest.fixture(params=["left", "right", "both", "neither"])
 def closed(request):
     """
diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index bb7363dfa50b3..422558199ae60 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -302,7 +302,6 @@ def test_pickle(self, indices):
         assert indices.equals(unpickled)
         indices.name = original_name
 
-    @pytest.mark.parametrize("keep", ["first", "last", False])
     def test_drop_duplicates(self, indices, keep):
         if isinstance(indices, MultiIndex):
             pytest.skip("MultiIndex is tested separately")
diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py
index f1fdb2abdd578..54f32f979232d 100644
--- a/pandas/tests/series/methods/test_drop_duplicates.py
+++ b/pandas/tests/series/methods/test_drop_duplicates.py
@@ -44,7 +44,6 @@ def test_drop_duplicates_bool(keep, expected):
     tm.assert_series_equal(sc, tc[~expected])
 
 
-@pytest.mark.parametrize("keep", ["first", "last", False])
 @pytest.mark.parametrize("values", [[], list(range(5))])
 def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values):
     tc = Series(values, dtype=np.dtype(any_numpy_dtype))

From 57afc6bba902ff8c04b6fe9018dbc03be413cafc Mon Sep 17 00:00:00 2001
From: Martin Winkel <martin.winkel.pps@gmail.com>
Date: Fri, 13 Mar 2020 16:55:31 +0100
Subject: [PATCH 7/7] fixing some broken tests

---
 pandas/tests/indexes/test_common.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index 422558199ae60..6f0920c11a6e6 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -310,6 +310,11 @@ def test_drop_duplicates(self, indices, keep):
                 "RangeIndex is tested in test_drop_duplicates_no_duplicates"
                 " as it cannot hold duplicates"
             )
+        if len(indices) == 0:
+            pytest.skip(
+                "empty index is tested in test_drop_duplicates_no_duplicates"
+                " as it cannot hold duplicates"
+            )
 
         # make unique index
         holder = type(indices)