From 1aa9150a48fa370f878d6242e9b00b7488fefc29 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 11 Aug 2023 15:08:11 +0200
Subject: [PATCH 1/3] Speed up string inference in maybe_convert_objects

---
 pandas/_libs/lib.pyx | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 38695fbb8222b..0df739462b99d 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2680,14 +2680,13 @@ def maybe_convert_objects(ndarray[object] objects,
         seen.object_ = True
 
     elif seen.str_:
-        if is_string_array(objects, skipna=True):
-            if using_pyarrow_string_dtype():
-                import pyarrow as pa
+        if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
+            import pyarrow as pa
 
-                from pandas.core.dtypes.dtypes import ArrowDtype
+            from pandas.core.dtypes.dtypes import ArrowDtype
 
-                dtype = ArrowDtype(pa.string())
-                return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
+            dtype = ArrowDtype(pa.string())
+            return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
 
         seen.object_ = True
     elif seen.interval_:

From 81d9fb5777bdc8d403f3c97309c27dc67eaae3ff Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 12 Aug 2023 09:32:34 +0200
Subject: [PATCH 2/3] Speed up StringDtype arrow implementation

---
 pandas/core/reshape/merge.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 6987a0ac7bf6b..3015b3ab448a7 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -76,6 +76,7 @@
     na_value_for_dtype,
 )
 
+import pandas as pd
 from pandas import (
     ArrowDtype,
     Categorical,
@@ -2407,13 +2408,20 @@ def _factorize_keys(
                 or is_string_dtype(lk.dtype)
                 and not sort
             )
+            or is_string_dtype(lk.dtype)
+            and lk.dtype.storage == "pyarrow"
         ):
             lk, _ = lk._values_for_factorize()
 
             # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
             # "_values_for_factorize"
             rk, _ = rk._values_for_factorize()  # type: ignore[union-attr]
-        elif isinstance(lk.dtype, ArrowDtype) and is_string_dtype(lk.dtype):
+        elif (
+            isinstance(lk.dtype, ArrowDtype)
+            and is_string_dtype(lk.dtype)
+            or isinstance(lk.dtype, pd.StringDtype)
+            and lk.dtype.storage == "pyarrow"
+        ):
             import pyarrow as pa
             import pyarrow.compute as pc
 

From 4115867aca66312fa6a32edf8581ad1279e170d9 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 12 Aug 2023 09:34:52 +0200
Subject: [PATCH 3/3] Revert "Speed up StringDtype arrow implementation"

This reverts commit 81d9fb5777bdc8d403f3c97309c27dc67eaae3ff.
---
 pandas/core/reshape/merge.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 3015b3ab448a7..6987a0ac7bf6b 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -76,7 +76,6 @@
     na_value_for_dtype,
 )
 
-import pandas as pd
 from pandas import (
     ArrowDtype,
     Categorical,
@@ -2408,20 +2407,13 @@ def _factorize_keys(
                 or is_string_dtype(lk.dtype)
                 and not sort
             )
-            or is_string_dtype(lk.dtype)
-            and lk.dtype.storage == "pyarrow"
         ):
             lk, _ = lk._values_for_factorize()
 
             # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
             # "_values_for_factorize"
             rk, _ = rk._values_for_factorize()  # type: ignore[union-attr]
-        elif (
-            isinstance(lk.dtype, ArrowDtype)
-            and is_string_dtype(lk.dtype)
-            or isinstance(lk.dtype, pd.StringDtype)
-            and lk.dtype.storage == "pyarrow"
-        ):
+        elif isinstance(lk.dtype, ArrowDtype) and is_string_dtype(lk.dtype):
             import pyarrow as pa
             import pyarrow.compute as pc