@@ -292,16 +292,81 @@ def test_is_monotonic(self, data, non_lexsorted_data):
292
292
assert c .is_monotonic_decreasing is False
293
293
294
294
def test_has_duplicates (self ):
295
-
296
295
idx = CategoricalIndex ([0 , 0 , 0 ], name = "foo" )
297
296
assert idx .is_unique is False
298
297
assert idx .has_duplicates is True
299
298
300
- def test_drop_duplicates (self ):
299
+ idx = CategoricalIndex ([0 , 1 ], categories = [2 , 3 ], name = "foo" )
300
+ assert idx .is_unique is False
301
+ assert idx .has_duplicates is True
301
302
302
- idx = CategoricalIndex ([0 , 0 , 0 ], name = "foo" )
303
- expected = CategoricalIndex ([0 ], name = "foo" )
304
- tm .assert_index_equal (idx .drop_duplicates (), expected )
303
+ idx = CategoricalIndex ([0 , 1 , 2 , 3 ], categories = [1 , 2 , 3 ], name = "foo" )
304
+ assert idx .is_unique is True
305
+ assert idx .has_duplicates is False
306
+
307
+ @pytest .mark .parametrize (
308
+ "data, categories, expected" ,
309
+ [
310
+ (
311
+ [1 , 1 , 1 ],
312
+ [1 , 2 , 3 ],
313
+ {
314
+ "first" : np .array ([False , True , True ]),
315
+ "last" : np .array ([True , True , False ]),
316
+ False : np .array ([True , True , True ]),
317
+ },
318
+ ),
319
+ (
320
+ [1 , 1 , 1 ],
321
+ list ("abc" ),
322
+ {
323
+ "first" : np .array ([False , True , True ]),
324
+ "last" : np .array ([True , True , False ]),
325
+ False : np .array ([True , True , True ]),
326
+ },
327
+ ),
328
+ (
329
+ [2 , "a" , "b" ],
330
+ list ("abc" ),
331
+ {
332
+ "first" : np .zeros (shape = (3 ), dtype = np .bool ),
333
+ "last" : np .zeros (shape = (3 ), dtype = np .bool ),
334
+ False : np .zeros (shape = (3 ), dtype = np .bool ),
335
+ },
336
+ ),
337
+ (
338
+ list ("abb" ),
339
+ list ("abc" ),
340
+ {
341
+ "first" : np .array ([False , False , True ]),
342
+ "last" : np .array ([False , True , False ]),
343
+ False : np .array ([False , True , True ]),
344
+ },
345
+ ),
346
+ ],
347
+ )
348
+ def test_drop_duplicates (self , data , categories , expected ):
349
+
350
+ idx = CategoricalIndex (data , categories = categories , name = "foo" )
351
+ for keep , e in expected .items ():
352
+ tm .assert_numpy_array_equal (idx .duplicated (keep = keep ), e )
353
+ e = idx [~ e ]
354
+ result = idx .drop_duplicates (keep = keep )
355
+ tm .assert_index_equal (result , e )
356
+
357
+ @pytest .mark .parametrize (
358
+ "data, categories, expected_data, expected_categories" ,
359
+ [
360
+ ([1 , 1 , 1 ], [1 , 2 , 3 ], [1 ], [1 ]),
361
+ ([1 , 1 , 1 ], list ("abc" ), [np .nan ], []),
362
+ ([1 , 2 , "a" ], [1 , 2 , 3 ], [1 , 2 , np .nan ], [1 , 2 ]),
363
+ ([2 , "a" , "b" ], list ("abc" ), [np .nan , "a" , "b" ], ["a" , "b" ]),
364
+ ],
365
+ )
366
+ def test_unique (self , data , categories , expected_data , expected_categories ):
367
+
368
+ idx = CategoricalIndex (data , categories = categories )
369
+ expected = CategoricalIndex (expected_data , categories = expected_categories )
305
370
tm .assert_index_equal (idx .unique (), expected )
306
371
307
372
def test_repr_roundtrip (self ):
0 commit comments