@@ -547,66 +547,27 @@ def test_value_counts_datetime64(self, index_or_series):
547
547
result2 = td2 .value_counts ()
548
548
tm .assert_series_equal (result2 , expected_s )
549
549
550
- def test_factorize (self ):
551
- for orig in self .objs :
552
- o = orig .copy ()
553
-
554
- if isinstance (o , Index ) and o .is_boolean ():
555
- exp_arr = np .array ([0 , 1 ] + [0 ] * 8 , dtype = np .intp )
556
- exp_uniques = o
557
- exp_uniques = Index ([False , True ])
558
- else :
559
- exp_arr = np .array (range (len (o )), dtype = np .intp )
560
- exp_uniques = o
561
- codes , uniques = o .factorize ()
562
-
563
- tm .assert_numpy_array_equal (codes , exp_arr )
564
- if isinstance (o , Series ):
565
- tm .assert_index_equal (uniques , Index (orig ), check_names = False )
566
- else :
567
- # factorize explicitly resets name
568
- tm .assert_index_equal (uniques , exp_uniques , check_names = False )
569
-
570
- def test_factorize_repeated (self ):
571
- for orig in self .objs :
572
- o = orig .copy ()
550
+ @pytest .mark .parametrize ("sort" , [True , False ])
551
+ def test_factorize (self , index_or_series_obj , sort ):
552
+ obj = index_or_series_obj
553
+ result_codes , result_uniques = obj .factorize (sort = sort )
573
554
574
- # don't test boolean
575
- if isinstance (o , Index ) and o .is_boolean ():
576
- continue
555
+ constructor = pd .Index
556
+ if isinstance (obj , pd .MultiIndex ):
557
+ constructor = pd .MultiIndex .from_tuples
558
+ expected_uniques = constructor (obj .unique ())
577
559
578
- # sort by value, and create duplicates
579
- if isinstance (o , Series ):
580
- o = o .sort_values ()
581
- n = o .iloc [5 :].append (o )
582
- else :
583
- indexer = o .argsort ()
584
- o = o .take (indexer )
585
- n = o [5 :].append (o )
586
-
587
- exp_arr = np .array (
588
- [5 , 6 , 7 , 8 , 9 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ], dtype = np .intp
589
- )
590
- codes , uniques = n .factorize (sort = True )
591
-
592
- tm .assert_numpy_array_equal (codes , exp_arr )
593
- if isinstance (o , Series ):
594
- tm .assert_index_equal (
595
- uniques , Index (orig ).sort_values (), check_names = False
596
- )
597
- else :
598
- tm .assert_index_equal (uniques , o , check_names = False )
560
+ if sort :
561
+ expected_uniques = expected_uniques .sort_values ()
599
562
600
- exp_arr = np .array ([0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 0 , 1 , 2 , 3 , 4 ], np .intp )
601
- codes , uniques = n .factorize (sort = False )
602
- tm .assert_numpy_array_equal (codes , exp_arr )
563
+ # construct an integer ndarray so that
564
+ # `expected_uniques.take(expected_codes)` is equal to `obj`
565
+ expected_uniques_list = list (expected_uniques )
566
+ expected_codes = [expected_uniques_list .index (val ) for val in obj ]
567
+ expected_codes = np .asarray (expected_codes , dtype = np .intp )
603
568
604
- if isinstance (o , Series ):
605
- expected = Index (o .iloc [5 :10 ].append (o .iloc [:5 ]))
606
- tm .assert_index_equal (uniques , expected , check_names = False )
607
- else :
608
- expected = o [5 :10 ].append (o [:5 ])
609
- tm .assert_index_equal (uniques , expected , check_names = False )
569
+ tm .assert_numpy_array_equal (result_codes , expected_codes )
570
+ tm .assert_index_equal (result_uniques , expected_uniques )
610
571
611
572
def test_duplicated_drop_duplicates_index (self ):
612
573
# GH 4060
0 commit comments