@@ -304,7 +304,7 @@ def _get_group_keys(self):
304
304
group_sizes = []
305
305
306
306
for lk , rk in zip (left_keys , right_keys ):
307
- llab , rlab , count = _factorize_objects (lk , rk , sort = self .sort )
307
+ llab , rlab , count = _factorize_keys (lk , rk , sort = self .sort )
308
308
309
309
left_labels .append (llab )
310
310
right_labels .append (rlab )
@@ -321,24 +321,24 @@ def _get_group_keys(self):
321
321
raise Exception ('Combinatorial explosion! (boom)' )
322
322
323
323
left_group_key , right_group_key , max_groups = \
324
- _factorize_int64 (left_group_key , right_group_key ,
324
+ _factorize_keys (left_group_key , right_group_key ,
325
325
sort = self .sort )
326
326
return left_group_key , right_group_key , max_groups
327
327
328
328
def _get_multiindex_indexer (join_keys , index , sort = False ):
329
329
shape = []
330
330
labels = []
331
331
for level , key in zip (index .levels , join_keys ):
332
- llab , rlab , count = _factorize_objects (level , key , sort = False )
332
+ llab , rlab , count = _factorize_keys (level , key , sort = False )
333
333
labels .append (rlab )
334
334
shape .append (count )
335
335
336
336
left_group_key = get_group_index (labels , shape )
337
337
right_group_key = get_group_index (index .labels , shape )
338
338
339
339
left_group_key , right_group_key , max_groups = \
340
- _factorize_int64 (left_group_key , right_group_key ,
341
- sort = False )
340
+ _factorize_keys (left_group_key , right_group_key ,
341
+ sort = False )
342
342
343
343
left_indexer , right_indexer = \
344
344
lib .left_outer_join (com ._ensure_int64 (left_group_key ),
@@ -348,7 +348,7 @@ def _get_multiindex_indexer(join_keys, index, sort=False):
348
348
return left_indexer , right_indexer
349
349
350
350
def _get_single_indexer (join_key , index , sort = False ):
351
- left_key , right_key , count = _factorize_objects (join_key , index , sort = sort )
351
+ left_key , right_key , count = _factorize_keys (join_key , index , sort = sort )
352
352
353
353
left_indexer , right_indexer = \
354
354
lib .left_outer_join (com ._ensure_int64 (left_key ),
@@ -394,26 +394,21 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False):
394
394
'outer' : lib .full_outer_join ,
395
395
}
396
396
397
- def _factorize_int64 (left_index , right_index , sort = True ):
398
- rizer = lib .Int64Factorizer (max (len (left_index ), len (right_index )))
399
397
400
- # 32-bit compatibility
401
- left_index = com ._ensure_int64 (left_index )
402
- right_index = com ._ensure_int64 (right_index )
403
-
404
- llab , _ = rizer .factorize (left_index )
405
- rlab , _ = rizer .factorize (right_index )
406
-
407
- if sort :
408
- llab , rlab = _sort_labels (np .array (rizer .uniques ), llab , rlab )
409
-
410
- return llab , rlab , rizer .get_count ()
398
+ def _factorize_keys (lk , rk , sort = True ):
399
+ if com .is_integer_dtype (lk ) and com .is_integer_dtype (rk ):
400
+ klass = lib .Int64Factorizer
401
+ lk = com ._ensure_int64 (lk )
402
+ rk = com ._ensure_int64 (rk )
403
+ else :
404
+ klass = lib .Factorizer
405
+ lk = com ._ensure_object (lk )
406
+ rk = com ._ensure_object (rk )
411
407
412
- def _factorize_objects (left_index , right_index , sort = True ):
413
- rizer = lib .Factorizer (max (len (left_index ), len (right_index )))
408
+ rizer = klass (max (len (lk ), len (rk )))
414
409
415
- llab , _ = rizer .factorize (left_index . astype ( 'O' ) )
416
- rlab , _ = rizer .factorize (right_index . astype ( 'O' ) )
410
+ llab , _ = rizer .factorize (lk )
411
+ rlab , _ = rizer .factorize (rk )
417
412
418
413
count = rizer .get_count ()
419
414
0 commit comments