@@ -2750,18 +2750,12 @@ def duplicated(self, subset=None, take_last=False):
2750
2750
duplicated : Series
2751
2751
"""
2752
2752
from pandas .core .groupby import get_group_index
2753
+ from pandas .core .algorithms import factorize
2753
2754
from pandas .hashtable import duplicated_int64 , _SIZE_HINT_LIMIT
2754
2755
2755
- size_hint = min (len (self ), _SIZE_HINT_LIMIT )
2756
-
2757
- def factorize (vals ):
2758
- (hash_klass , vec_klass ), vals = \
2759
- algos ._get_data_algo (vals , algos ._hashtables )
2760
-
2761
- uniques , table = vec_klass (), hash_klass (size_hint )
2762
- labels = table .get_labels (vals , uniques , 0 , - 1 )
2763
-
2764
- return labels .astype ('i8' , copy = False ), len (uniques )
2756
+ def f (vals ):
2757
+ labels , shape = factorize (vals , size_hint = min (len (self ), _SIZE_HINT_LIMIT ))
2758
+ return labels .astype ('i8' ,copy = False ), len (shape )
2765
2759
2766
2760
if subset is None :
2767
2761
subset = self .columns
@@ -2771,7 +2765,7 @@ def factorize(vals):
2771
2765
subset = subset ,
2772
2766
2773
2767
vals = (self [col ].values for col in subset )
2774
- labels , shape = map (list , zip ( * map (factorize , vals )))
2768
+ labels , shape = map (list , zip ( * map (f , vals )))
2775
2769
2776
2770
ids = get_group_index (labels , shape , sort = False , xnull = False )
2777
2771
return Series (duplicated_int64 (ids , take_last ), index = self .index )
0 commit comments