Skip to content

Commit 81d9fb5

Browse files
committed
Speed up StringDtype arrow implementation
1 parent 1aa9150 commit 81d9fb5

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

pandas/core/reshape/merge.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
na_value_for_dtype,
7777
)
7878

79+
import pandas as pd
7980
from pandas import (
8081
ArrowDtype,
8182
Categorical,
@@ -2407,13 +2408,20 @@ def _factorize_keys(
24072408
or is_string_dtype(lk.dtype)
24082409
and not sort
24092410
)
2411+
or is_string_dtype(lk.dtype)
2412+
and lk.dtype.storage == "pyarrow"
24102413
):
24112414
lk, _ = lk._values_for_factorize()
24122415

24132416
# error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
24142417
# "_values_for_factorize"
24152418
rk, _ = rk._values_for_factorize() # type: ignore[union-attr]
2416-
elif isinstance(lk.dtype, ArrowDtype) and is_string_dtype(lk.dtype):
2419+
elif (
2420+
isinstance(lk.dtype, ArrowDtype)
2421+
and is_string_dtype(lk.dtype)
2422+
or isinstance(lk.dtype, pd.StringDtype)
2423+
and lk.dtype.storage == "pyarrow"
2424+
):
24172425
import pyarrow as pa
24182426
import pyarrow.compute as pc
24192427

0 commit comments

Comments
 (0)