@@ -2832,18 +2832,12 @@ def duplicated(self, subset=None, take_last=False):
2832
2832
duplicated : Series
2833
2833
"""
2834
2834
from pandas .core .groupby import get_group_index
2835
+ from pandas .core .algorithms import factorize
2835
2836
from pandas .hashtable import duplicated_int64 , _SIZE_HINT_LIMIT
2836
2837
2837
- size_hint = min (len (self ), _SIZE_HINT_LIMIT )
2838
-
2839
- def factorize (vals ):
2840
- (hash_klass , vec_klass ), vals = \
2841
- algos ._get_data_algo (vals , algos ._hashtables )
2842
-
2843
- uniques , table = vec_klass (), hash_klass (size_hint )
2844
- labels = table .get_labels (vals , uniques , 0 , - 1 )
2845
-
2846
- return labels .astype ('i8' , copy = False ), len (uniques )
2838
+ def f (vals ):
2839
+ labels , shape = factorize (vals , size_hint = min (len (self ), _SIZE_HINT_LIMIT ))
2840
+ return labels .astype ('i8' ,copy = False ), len (shape )
2847
2841
2848
2842
if subset is None :
2849
2843
subset = self .columns
@@ -2853,7 +2847,7 @@ def factorize(vals):
2853
2847
subset = subset ,
2854
2848
2855
2849
vals = (self [col ].values for col in subset )
2856
- labels , shape = map (list , zip ( * map (factorize , vals )))
2850
+ labels , shape = map (list , zip ( * map (f , vals )))
2857
2851
2858
2852
ids = get_group_index (labels , shape , sort = False , xnull = False )
2859
2853
return Series (duplicated_int64 (ids , take_last ), index = self .index )
0 commit comments