@@ -2364,35 +2364,7 @@ def _factorize_keys(
2364
2364
# "_values_for_factorize"
2365
2365
rk , _ = rk ._values_for_factorize () # type: ignore[union-attr]
2366
2366
2367
- klass : type [libhashtable .Factorizer ]
2368
- if is_numeric_dtype (lk .dtype ):
2369
- if not is_dtype_equal (lk , rk ):
2370
- dtype = find_common_type ([lk .dtype , rk .dtype ])
2371
- if isinstance (dtype , ExtensionDtype ):
2372
- cls = dtype .construct_array_type ()
2373
- if not isinstance (lk , ExtensionArray ):
2374
- lk = cls ._from_sequence (lk , dtype = dtype , copy = False )
2375
- else :
2376
- lk = lk .astype (dtype )
2377
-
2378
- if not isinstance (rk , ExtensionArray ):
2379
- rk = cls ._from_sequence (rk , dtype = dtype , copy = False )
2380
- else :
2381
- rk = rk .astype (dtype )
2382
- else :
2383
- lk = lk .astype (dtype )
2384
- rk = rk .astype (dtype )
2385
- if isinstance (lk , BaseMaskedArray ):
2386
- # Invalid index type "type" for "Dict[Type[object], Type[Factorizer]]";
2387
- # expected type "Type[object]"
2388
- klass = _factorizers [lk .dtype .type ] # type: ignore[index]
2389
- else :
2390
- klass = _factorizers [lk .dtype .type ]
2391
-
2392
- else :
2393
- klass = libhashtable .ObjectFactorizer
2394
- lk = ensure_object (lk )
2395
- rk = ensure_object (rk )
2367
+ klass , lk , rk = _convert_arrays_and_get_rizer_klass (lk , rk )
2396
2368
2397
2369
rizer = klass (max (len (lk ), len (rk )))
2398
2370
@@ -2433,6 +2405,41 @@ def _factorize_keys(
2433
2405
return llab , rlab , count
2434
2406
2435
2407
2408
+ def _convert_arrays_and_get_rizer_klass (
2409
+ lk : ArrayLike , rk : ArrayLike
2410
+ ) -> tuple [type [libhashtable .Factorizer ], ArrayLike , ArrayLike ]:
2411
+ klass : type [libhashtable .Factorizer ]
2412
+ if is_numeric_dtype (lk .dtype ):
2413
+ if not is_dtype_equal (lk , rk ):
2414
+ dtype = find_common_type ([lk .dtype , rk .dtype ])
2415
+ if isinstance (dtype , ExtensionDtype ):
2416
+ cls = dtype .construct_array_type ()
2417
+ if not isinstance (lk , ExtensionArray ):
2418
+ lk = cls ._from_sequence (lk , dtype = dtype , copy = False )
2419
+ else :
2420
+ lk = lk .astype (dtype )
2421
+
2422
+ if not isinstance (rk , ExtensionArray ):
2423
+ rk = cls ._from_sequence (rk , dtype = dtype , copy = False )
2424
+ else :
2425
+ rk = rk .astype (dtype )
2426
+ else :
2427
+ lk = lk .astype (dtype )
2428
+ rk = rk .astype (dtype )
2429
+ if isinstance (lk , BaseMaskedArray ):
2430
+ # Invalid index type "type" for "Dict[Type[object], Type[Factorizer]]";
2431
+ # expected type "Type[object]"
2432
+ klass = _factorizers [lk .dtype .type ] # type: ignore[index]
2433
+ else :
2434
+ klass = _factorizers [lk .dtype .type ]
2435
+
2436
+ else :
2437
+ klass = libhashtable .ObjectFactorizer
2438
+ lk = ensure_object (lk )
2439
+ rk = ensure_object (rk )
2440
+ return klass , lk , rk
2441
+
2442
+
2436
2443
def _sort_labels (
2437
2444
uniques : np .ndarray , left : npt .NDArray [np .intp ], right : npt .NDArray [np .intp ]
2438
2445
) -> tuple [npt .NDArray [np .intp ], npt .NDArray [np .intp ]]:
0 commit comments