90
90
ExtensionArray ,
91
91
)
92
92
from pandas .core .arrays ._mixins import NDArrayBackedExtensionArray
93
+ from pandas .core .arrays .string_ import StringDtype
93
94
import pandas .core .common as com
94
95
from pandas .core .construction import (
95
96
ensure_wrapped_if_datetimelike ,
@@ -2399,21 +2400,9 @@ def _factorize_keys(
2399
2400
rk = ensure_int64 (rk .codes )
2400
2401
2401
2402
elif isinstance (lk , ExtensionArray ) and lk .dtype == rk .dtype :
2402
- if not isinstance (lk , BaseMaskedArray ) and not (
2403
- # exclude arrow dtypes that would get cast to object
2404
- isinstance (lk .dtype , ArrowDtype )
2405
- and (
2406
- is_numeric_dtype (lk .dtype .numpy_dtype )
2407
- or is_string_dtype (lk .dtype )
2408
- and not sort
2409
- )
2403
+ if (isinstance (lk .dtype , ArrowDtype ) and is_string_dtype (lk .dtype )) or (
2404
+ isinstance (lk .dtype , StringDtype ) and lk .dtype .storage == "pyarrow"
2410
2405
):
2411
- lk , _ = lk ._values_for_factorize ()
2412
-
2413
- # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
2414
- # "_values_for_factorize"
2415
- rk , _ = rk ._values_for_factorize () # type: ignore[union-attr]
2416
- elif isinstance (lk .dtype , ArrowDtype ) and is_string_dtype (lk .dtype ):
2417
2406
import pyarrow as pa
2418
2407
import pyarrow .compute as pc
2419
2408
@@ -2436,6 +2425,21 @@ def _factorize_keys(
2436
2425
return rlab , llab , count
2437
2426
return llab , rlab , count
2438
2427
2428
+ if not isinstance (lk , BaseMaskedArray ) and not (
2429
+ # exclude arrow dtypes that would get cast to object
2430
+ isinstance (lk .dtype , ArrowDtype )
2431
+ and (
2432
+ is_numeric_dtype (lk .dtype .numpy_dtype )
2433
+ or is_string_dtype (lk .dtype )
2434
+ and not sort
2435
+ )
2436
+ ):
2437
+ lk , _ = lk ._values_for_factorize ()
2438
+
2439
+ # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
2440
+ # "_values_for_factorize"
2441
+ rk , _ = rk ._values_for_factorize () # type: ignore[union-attr]
2442
+
2439
2443
if needs_i8_conversion (lk .dtype ) and lk .dtype == rk .dtype :
2440
2444
# GH#23917 TODO: Needs tests for non-matching dtypes
2441
2445
# GH#23917 TODO: needs tests for case where lk is integer-dtype
0 commit comments