pandas-dev · jreback · Mar 4, 2020 · Feb 26, 2020 · Feb 26, 2020 · Feb 26, 2020
diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py
@@ -1,3 +1,4 @@
+import collections
 from datetime import datetime, timedelta
 from io import StringIO
 import sys
@@ -28,7 +29,6 @@
     Series,
     Timedelta,
     TimedeltaIndex,
-    Timestamp,
 )
 import pandas._testing as tm
 
@@ -39,6 +39,23 @@ def allow_na_ops(obj: Any) -> bool:
     return not is_bool_index and obj._can_hold_na
 
 
+def repeat_values(obj):
+    """
+    Repeat values so that the previous values are ordered (increasing)
+    by number of occurrences
+    """
+    klass = type(obj)
+
+    if isinstance(obj, pd.Index):
+        return obj.repeat(range(1, len(obj) + 1))
+    elif isinstance(obj, pd.Series):
+        indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1))
+        rep = obj.values.take(indices)
+        idx = obj.index.repeat(range(1, len(obj) + 1))
+        return klass(rep, index=idx)
+    raise TypeError(f"Unexpected type: {klass}")
+
+
 class Ops:
     def setup_method(self, method):
         self.bool_index = tm.makeBoolIndex(10, name="a")
@@ -205,63 +222,44 @@ def test_ndarray_compat_properties(self, index_or_series_obj):
         assert Index([1]).item() == 1
         assert Series([1]).item() == 1
 
-    def test_value_counts_unique_nunique(self, index_or_series_obj):
-        orig = index_or_series_obj
-        obj = orig.copy()
-        klass = type(obj)
-        values = obj._values
-
-        if orig.duplicated().any():
-            pytest.xfail(
-                "The test implementation isn't flexible enough to deal"
-                " with duplicated values. This isn't a bug in the"
-                " application code, but in the test code."
-            )
+    def test_unique(self, index_or_series_obj):
+        obj = repeat_values(index_or_series_obj)
+        result = obj.unique()
 
-        # create repeated values, 'n'th element is repeated by n+1 times
-        if isinstance(obj, Index):
-            expected_index = Index(obj[::-1])
-            expected_index.name = None
-            obj = obj.repeat(range(1, len(obj) + 1))
+        # dict.fromkeys preserves the order
+        unique_values = list(dict.fromkeys(obj.values))
+        if isinstance(obj, pd.MultiIndex):
+            expected = pd.MultiIndex.from_tuples(unique_values)
+            expected.names = obj.names
+            tm.assert_index_equal(result, expected)
+        elif isinstance(obj, pd.Index):
+            expected = pd.Index(unique_values, dtype=obj.dtype)
+            if is_datetime64tz_dtype(obj):
+                expected = expected.normalize()
+            tm.assert_index_equal(result, expected)
         else:
-            expected_index = Index(values[::-1])
-            idx = obj.index.repeat(range(1, len(obj) + 1))
-            # take-based repeat
-            indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1))
-            rep = values.take(indices)
-            obj = klass(rep, index=idx)
-
-        # check values has the same dtype as the original
-        assert obj.dtype == orig.dtype
+            expected = np.array(unique_values)
+            tm.assert_numpy_array_equal(result, expected)
 
-        expected_s = Series(
-            range(len(orig), 0, -1), index=expected_index, dtype="int64"
-        )
+    def test_nunique(self, index_or_series_obj):
+        obj = repeat_values(index_or_series_obj)
+        result = obj.nunique(dropna=False)
+        assert result == len(obj.unique())
 
+    def test_value_counts(self, index_or_series_obj):
+        obj = repeat_values(index_or_series_obj)
         result = obj.value_counts()
-        tm.assert_series_equal(result, expected_s)
-        assert result.index.name is None
 
-        result = obj.unique()
-        if isinstance(obj, Index):
-            assert isinstance(result, type(obj))
-            tm.assert_index_equal(result, orig)
-            assert result.dtype == orig.dtype
-        elif is_datetime64tz_dtype(obj):
-            # datetimetz Series returns array of Timestamp
-            assert result[0] == orig[0]
-            for r in result:
-                assert isinstance(r, Timestamp)
-
-            tm.assert_numpy_array_equal(
-                result.astype(object), orig._values.astype(object)
-            )
-        else:
-            tm.assert_numpy_array_equal(result, orig.values)
-            assert result.dtype == orig.dtype
+        counter = collections.Counter(obj)
+        expected = pd.Series(dict(counter.most_common()), dtype=np.int64)
+        expected.index = expected.index.astype(obj.dtype)
+        if isinstance(obj, pd.MultiIndex):
+            expected.index = pd.Index(expected.index)
 
-        # dropna=True would break for MultiIndex
-        assert obj.nunique(dropna=False) == len(np.unique(obj.values))
+        # sort_index to avoid switched order when values share the same count
+        result = result.sort_index()
+        expected = expected.sort_index()
+        tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("null_obj", [np.nan, None])
     def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj):