28
28
Series ,
29
29
Timedelta ,
30
30
TimedeltaIndex ,
31
- Timestamp ,
32
31
)
33
32
import pandas ._testing as tm
34
33
@@ -39,6 +38,23 @@ def allow_na_ops(obj: Any) -> bool:
39
38
return not is_bool_index and obj ._can_hold_na
40
39
41
40
41
+ def multiply_values (obj ):
42
+ """
43
+ Repeat values so that the previous values are ordered (increasing)
44
+ by number of occurrences
45
+ """
46
+ klass = type (obj )
47
+
48
+ if isinstance (obj , pd .Index ):
49
+ return obj .repeat (range (1 , len (obj ) + 1 ))
50
+ elif isinstance (obj , pd .Series ):
51
+ indices = np .repeat (np .arange (len (obj )), range (1 , len (obj ) + 1 ))
52
+ rep = obj .values .take (indices )
53
+ idx = obj .index .repeat (range (1 , len (obj ) + 1 ))
54
+ return klass (rep , index = idx )
55
+ raise TypeError (f"Unexpected type: { klass } " )
56
+
57
+
42
58
class Ops :
43
59
def setup_method (self , method ):
44
60
self .bool_index = tm .makeBoolIndex (10 , name = "a" )
@@ -205,7 +221,31 @@ def test_ndarray_compat_properties(self, index_or_series_obj):
205
221
assert Index ([1 ]).item () == 1
206
222
assert Series ([1 ]).item () == 1
207
223
208
- def test_value_counts_unique_nunique (self , index_or_series_obj ):
224
+ def test_unique (self , index_or_series_obj ):
225
+ obj = multiply_values (index_or_series_obj )
226
+ result = obj .unique ()
227
+
228
+ # dict.fromkeys preserves the order
229
+ unique_values = list (dict .fromkeys (obj .values ))
230
+ if isinstance (obj , pd .MultiIndex ):
231
+ expected = pd .MultiIndex .from_tuples (unique_values )
232
+ expected .names = obj .names
233
+ tm .assert_index_equal (result , expected )
234
+ elif isinstance (obj , pd .Index ):
235
+ expected = pd .Index (unique_values , dtype = obj .dtype )
236
+ if is_datetime64tz_dtype (obj ):
237
+ expected = expected .normalize ()
238
+ tm .assert_index_equal (result , expected )
239
+ else :
240
+ expected = np .array (unique_values )
241
+ tm .assert_numpy_array_equal (result , expected )
242
+
243
+ def test_nunique (self , index_or_series_obj ):
244
+ obj = multiply_values (index_or_series_obj )
245
+ result = obj .nunique (dropna = False )
246
+ assert result == len (obj .unique ())
247
+
248
+ def test_value_counts (self , index_or_series_obj ):
209
249
orig = index_or_series_obj
210
250
obj = orig .copy ()
211
251
klass = type (obj )
@@ -242,27 +282,6 @@ def test_value_counts_unique_nunique(self, index_or_series_obj):
242
282
tm .assert_series_equal (result , expected_s )
243
283
assert result .index .name is None
244
284
245
- result = obj .unique ()
246
- if isinstance (obj , Index ):
247
- assert isinstance (result , type (obj ))
248
- tm .assert_index_equal (result , orig )
249
- assert result .dtype == orig .dtype
250
- elif is_datetime64tz_dtype (obj ):
251
- # datetimetz Series returns array of Timestamp
252
- assert result [0 ] == orig [0 ]
253
- for r in result :
254
- assert isinstance (r , Timestamp )
255
-
256
- tm .assert_numpy_array_equal (
257
- result .astype (object ), orig ._values .astype (object )
258
- )
259
- else :
260
- tm .assert_numpy_array_equal (result , orig .values )
261
- assert result .dtype == orig .dtype
262
-
263
- # dropna=True would break for MultiIndex
264
- assert obj .nunique (dropna = False ) == len (np .unique (obj .values ))
265
-
266
285
@pytest .mark .parametrize ("null_obj" , [np .nan , None ])
267
286
def test_value_counts_unique_nunique_null (self , null_obj , index_or_series_obj ):
268
287
orig = index_or_series_obj
0 commit comments