pandas-dev · jreback · Apr 6, 2020 · Mar 9, 2020 · Mar 10, 2020 · Mar 10, 2020
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
@@ -8,7 +8,7 @@
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
 import pandas as pd
-from pandas import Categorical, IntervalIndex
+from pandas import Categorical, IntervalIndex, Series
 import pandas._testing as tm
 from pandas.core.indexes.api import CategoricalIndex, Index
 
@@ -353,16 +353,78 @@ def test_is_monotonic(self, data, non_lexsorted_data):
         assert c.is_monotonic_decreasing is False
 
     def test_has_duplicates(self):
-
         idx = CategoricalIndex([0, 0, 0], name="foo")
         assert idx.is_unique is False
         assert idx.has_duplicates is True
 
+        idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
+        assert idx.is_unique is False
+        assert idx.has_duplicates is True
+
+        idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
+        assert idx.is_unique is True
+        assert idx.has_duplicates is False
+
+    def _test_drop_duplicates(self, idx, keep, expected, index):
+        for k, e, i in zip(keep, expected, index):
+            tm.assert_numpy_array_equal(idx.duplicated(keep=k), e)
+            e = idx[~e]
+
+            result = idx.drop_duplicates(keep=k)
+            tm.assert_index_equal(result, e)
+
+            result = Series(idx).drop_duplicates(keep=k)
+            tm.assert_series_equal(result, Series(e, i))
+
     def test_drop_duplicates(self):
+        keep = ["first", "last", False]
+
+        categories = [[1, 2, 3], list("abc")]
+        expected = [
+            np.array([False, True, True]),
+            np.array([True, True, False]),
+            np.array([True, True, True]),
+        ]
+        index = [[0], [2], np.empty(shape=(0), dtype=int)]
+        for c in categories:
+            idx = pd.CategoricalIndex([1, 1, 1], categories=c, name="foo")
+            self._test_drop_duplicates(idx, keep, expected, index)
+
+        categories = ["a", "b", "c"]
+        idx = CategoricalIndex([2, "a", "b"], categories=categories, name="foo")
+        expected = np.zeros(shape=(3, 3), dtype=np.bool)
+        index = [[0, 1, 2], [0, 1, 2], [0, 1, 2]]
+        self._test_drop_duplicates(idx, keep, expected, index)
+
+        idx = CategoricalIndex(list("abb"), categories=categories, name="foo")
+        expected = [
+            np.array([False, False, True]),
+            np.array([False, True, False]),
+            np.array([False, True, True]),
+        ]
+        index = [[0, 1], [0, 2], [0]]
+        self._test_drop_duplicates(idx, keep, expected, index)
+
+    def test_unique(self):
+
+        categories = [1, 2, 3]
+        idx = CategoricalIndex([1, 1, 1], categories=categories)
+        expected = CategoricalIndex([1], categories=[1])
+        tm.assert_index_equal(idx.unique(), expected)
 
-        idx = CategoricalIndex([0, 0, 0], name="foo")
-        expected = CategoricalIndex([0], name="foo")
-        tm.assert_index_equal(idx.drop_duplicates(), expected)
+        categories = list("abc")
+        idx = CategoricalIndex([1, 1, 1], categories=categories)
+        expected = CategoricalIndex([np.nan], categories=[])
+        tm.assert_index_equal(idx.unique(), expected)
+
+        categories = [1, 2, 3]
+        idx = CategoricalIndex([1, 2, "a"], categories=categories)
+        expected = CategoricalIndex([1, 2, np.nan], categories=[1, 2])
+        tm.assert_index_equal(idx.unique(), expected)
+
+        categories = list("abc")
+        idx = CategoricalIndex([2, "a", "b"], categories=categories)
+        expected = CategoricalIndex([np.nan, "a", "b"], categories=["a", "b"])
         tm.assert_index_equal(idx.unique(), expected)
 
     def test_repr_roundtrip(self):

diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py
@@ -259,9 +259,12 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture)
         tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
         assert ordered.freq is None
 
-    def test_drop_duplicates_metadata(self):
+    @pytest.mark.parametrize(
+        "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
+    )
+    def test_drop_duplicates_metadata(self, freq):
         # GH 10115
-        idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
+        idx = pd.date_range("2011-01-01", freq=freq, periods=10, name="idx")
         result = idx.drop_duplicates()
         tm.assert_index_equal(idx, result)
         assert idx.freq == result.freq
@@ -272,26 +275,34 @@ def test_drop_duplicates_metadata(self):
         tm.assert_index_equal(idx, result)
         assert result.freq is None
 
-    def test_drop_duplicates(self):
+    @pytest.mark.parametrize(
+        "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
+    )
+    @pytest.mark.parametrize(
+        "keep, expected, index",
+        [
+            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
+            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
+            (
+                False,
+                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
+                np.arange(5, 10),
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, freq, keep, expected, index):
         # to check Index/Series compat
-        base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
-        idx = base.append(base[:5])
-
-        res = idx.drop_duplicates()
-        tm.assert_index_equal(res, base)
-        res = Series(idx).drop_duplicates()
-        tm.assert_series_equal(res, Series(base))
-
-        res = idx.drop_duplicates(keep="last")
-        exp = base[5:].append(base[:5])
-        tm.assert_index_equal(res, exp)
-        res = Series(idx).drop_duplicates(keep="last")
-        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
-
-        res = idx.drop_duplicates(keep=False)
-        tm.assert_index_equal(res, base[5:])
-        res = Series(idx).drop_duplicates(keep=False)
-        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
+        idx = pd.date_range("2011-01-01", freq=freq, periods=10, name="idx")
+        idx = idx.append(idx[:5])
+
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
+        expected = idx[~expected]
+
+        result = idx.drop_duplicates(keep=keep)
+        tm.assert_index_equal(result, expected)
+
+        result = Series(idx).drop_duplicates(keep=keep)
+        tm.assert_series_equal(result, Series(expected, index=index))
 
     @pytest.mark.parametrize(
         "freq",

diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py
@@ -81,9 +81,10 @@ def test_value_counts_unique(self):
 
         tm.assert_index_equal(idx.unique(), exp_idx)
 
-    def test_drop_duplicates_metadata(self):
+    @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
+    def test_drop_duplicates_metadata(self, freq):
         # GH 10115
-        idx = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
+        idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
         result = idx.drop_duplicates()
         tm.assert_index_equal(idx, result)
         assert idx.freq == result.freq
@@ -93,26 +94,32 @@ def test_drop_duplicates_metadata(self):
         tm.assert_index_equal(idx, result)
         assert idx.freq == result.freq
 
-    def test_drop_duplicates(self):
+    @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
+    @pytest.mark.parametrize(
+        "keep, expected, index",
+        [
+            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
+            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
+            (
+                False,
+                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
+                np.arange(5, 10),
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, freq, keep, expected, index):
         # to check Index/Series compat
-        base = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
-        idx = base.append(base[:5])
-
-        res = idx.drop_duplicates()
-        tm.assert_index_equal(res, base)
-        res = Series(idx).drop_duplicates()
-        tm.assert_series_equal(res, Series(base))
-
-        res = idx.drop_duplicates(keep="last")
-        exp = base[5:].append(base[:5])
-        tm.assert_index_equal(res, exp)
-        res = Series(idx).drop_duplicates(keep="last")
-        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
-
-        res = idx.drop_duplicates(keep=False)
-        tm.assert_index_equal(res, base[5:])
-        res = Series(idx).drop_duplicates(keep=False)
-        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
+        idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
+        idx = idx.append(idx[:5])
+
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
+        expected = idx[~expected]
+
+        result = idx.drop_duplicates(keep=keep)
+        tm.assert_index_equal(result, expected)
+
+        result = Series(idx).drop_duplicates(keep=keep)
+        tm.assert_series_equal(result, Series(expected, index=index))
 
     def test_order_compat(self):
         def _check_freq(index, expected_index):

diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py
@@ -136,9 +136,12 @@ def test_order(self):
             tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
             assert ordered.freq is None
 
-    def test_drop_duplicates_metadata(self):
+    @pytest.mark.parametrize(
+        "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
+    )
+    def test_drop_duplicates_metadata(self, freq):
         # GH 10115
-        idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
+        idx = pd.timedelta_range("1 day", periods=10, freq=freq, name="idx")
         result = idx.drop_duplicates()
         tm.assert_index_equal(idx, result)
         assert idx.freq == result.freq
@@ -149,26 +152,34 @@ def test_drop_duplicates_metadata(self):
         tm.assert_index_equal(idx, result)
         assert result.freq is None
 
-    def test_drop_duplicates(self):
+    @pytest.mark.parametrize(
+        "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
+    )
+    @pytest.mark.parametrize(
+        "keep, expected, index",
+        [
+            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
+            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
+            (
+                False,
+                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
+                np.arange(5, 10),
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, freq, keep, expected, index):
         # to check Index/Series compat
-        base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
-        idx = base.append(base[:5])
-
-        res = idx.drop_duplicates()
-        tm.assert_index_equal(res, base)
-        res = Series(idx).drop_duplicates()
-        tm.assert_series_equal(res, Series(base))
-
-        res = idx.drop_duplicates(keep="last")
-        exp = base[5:].append(base[:5])
-        tm.assert_index_equal(res, exp)
-        res = Series(idx).drop_duplicates(keep="last")
-        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
-
-        res = idx.drop_duplicates(keep=False)
-        tm.assert_index_equal(res, base[5:])
-        res = Series(idx).drop_duplicates(keep=False)
-        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
+        idx = pd.timedelta_range("1 day", periods=10, freq=freq, name="idx")
+        idx = idx.append(idx[:5])
+
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
+        expected = idx[~expected]
+
+        result = idx.drop_duplicates(keep=keep)
+        tm.assert_index_equal(result, expected)
+
+        result = Series(idx).drop_duplicates(keep=keep)
+        tm.assert_series_equal(result, Series(expected, index=index))
 
     @pytest.mark.parametrize(
         "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]