From 1cd53fa2b0dc8033ea5ca1ef68bd81047d81340d Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sun, 25 Oct 2020 11:22:41 +0000
Subject: [PATCH 1/6] REGR/PERF: Index.is_

---
 pandas/core/indexes/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 006469f79780d..24caf6ee49b4a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -545,7 +545,7 @@ def is_(self, other) -> bool:
             return True
         elif not hasattr(other, "_id"):
             return False
-        elif com.any_none(self._id, other._id):
+        elif self._id is None or other._id is None:
             return False
         else:
             return self._id is other._id

From a0c1ec0d277b091e093bbf1b0eff9b2d4f942b8b Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Fri, 23 Oct 2020 22:05:22 +0100
Subject: [PATCH 2/6] PERF: ensure_string_array with non-numpy input array

---
 pandas/_libs/lib.pyx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 001fbae120ae8..0250e5454eeab 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -651,6 +651,7 @@ cpdef ndarray[object] ensure_string_array(
     cdef:
         Py_ssize_t i = 0, n = len(arr)
 
+    arr = np.asarray(arr)  # PERF: need a numpy array to ensure fast access
     result = np.asarray(arr, dtype="object")
 
     if copy and result is arr:

From 7b4928c82f8a310615ff1147fd99a1186aa8802a Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 24 Oct 2020 00:53:16 +0100
Subject: [PATCH 3/6] fix conversion of nan to string

---
 pandas/_libs/lib.pyx | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 0250e5454eeab..6fcf2bc92511d 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -651,27 +651,29 @@ cpdef ndarray[object] ensure_string_array(
     cdef:
         Py_ssize_t i = 0, n = len(arr)
 
-    arr = np.asarray(arr)  # PERF: need a numpy array to ensure fast access
     result = np.asarray(arr, dtype="object")
 
     if copy and result is arr:
         result = result.copy()
 
+    arr = np.asarray(arr)  # PERF: need a numpy array to ensure fast access
+
     for i in range(n):
-        val = arr[i]
+        arr_val = arr[i]
+        res_val = result[i]
 
-        if isinstance(val, str):
+        if not checknull(res_val) and isinstance(arr_val, str):
             continue
 
-        if not checknull(val):
-            result[i] = str(val)
+        if not checknull(res_val):
+            result[i] = str(arr_val)
         else:
             if convert_na_value:
-                val = na_value
+                arr_val = na_value
             if skipna:
-                result[i] = val
+                result[i] = arr_val
             else:
-                result[i] = str(val)
+                result[i] = str(arr_val)
 
     return result
 

From f28792d0fb4e9f8ee5bfff44b273c9711a22b04c Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 24 Oct 2020 08:41:07 +0100
Subject: [PATCH 4/6] fix conversion

---
 pandas/_libs/lib.pyx | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 6fcf2bc92511d..597cad6046a0b 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -651,29 +651,33 @@ cpdef ndarray[object] ensure_string_array(
     cdef:
         Py_ssize_t i = 0, n = len(arr)
 
+    from pandas.core.dtypes.common import is_extension_array_dtype
+
+    if is_extension_array_dtype(arr):
+        arr = arr.to_numpy()
+    elif not isinstance(arr, np.ndarray):
+        arr = np.array(arr, dtype=object)
+
     result = np.asarray(arr, dtype="object")
 
     if copy and result is arr:
         result = result.copy()
 
-    arr = np.asarray(arr)  # PERF: need a numpy array to ensure fast access
-
     for i in range(n):
-        arr_val = arr[i]
-        res_val = result[i]
+        val = arr[i]
 
-        if not checknull(res_val) and isinstance(arr_val, str):
+        if isinstance(val, str):
             continue
 
-        if not checknull(res_val):
-            result[i] = str(arr_val)
+        if not checknull(val):
+            result[i] = str(val)
         else:
             if convert_na_value:
-                arr_val = na_value
+                val = na_value
             if skipna:
-                result[i] = arr_val
+                result[i] = val
             else:
-                result[i] = str(arr_val)
+                result[i] = str(val)
 
     return result
 

From 3262d8fe878394ae4fee70f65bfe0b14681c3d7c Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 24 Oct 2020 15:23:16 +0100
Subject: [PATCH 5/6] add whatsnew, ASVs

---
 asv_bench/benchmarks/strings.py | 18 +++++++++++++++++-
 doc/source/whatsnew/v1.2.0.rst  |  2 +-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index d8b35abb94b9d..7c75ad031e7cd 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from pandas import DataFrame, Series
+from pandas import Categorical, DataFrame, Series
 
 from .pandas_vb_common import tm
 
@@ -16,6 +16,10 @@ def setup(self, dtype):
         self.series_arr = tm.rands_array(nchars=10, size=10 ** 5)
         self.frame_arr = self.series_arr.reshape((50_000, 2)).copy()
 
+        # GH37371. Testing construction of string series/frames from ExtensionArrays
+        self.series_cat_arr = Categorical(self.series_arr)
+        self.frame_cat_arr = Categorical(self.frame_arr)
+
     def time_series_construction(self, dtype):
         Series(self.series_arr, dtype=dtype)
 
@@ -28,6 +32,18 @@ def time_frame_construction(self, dtype):
     def peakmem_frame_construction(self, dtype):
         DataFrame(self.frame_arr, dtype=dtype)
 
+    def time_cat_series_construction(self, dtype):
+        Series(self.series_cat_arr, dtype=dtype)
+
+    def peakmem_cat_series_construction(self, dtype):
+        Series(self.series_cat_arr, dtype=dtype)
+
+    def time_cat_frame_construction(self, dtype):
+        DataFrame(self.frame_cat_arr, dtype=dtype)
+
+    def peakmem_cat_frame_construction(self, dtype):
+        DataFrame(self.frame_cat_arr, dtype=dtype)
+
 
 class Methods:
     def setup(self):
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 05996efb6d332..9b320182d7968 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -334,7 +334,7 @@ Deprecations
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-- Performance improvements when creating DataFrame or Series with dtype ``str`` or :class:`StringDtype` from array with many string elements (:issue:`36304`, :issue:`36317`, :issue:`36325`, :issue:`36432`)
+- Performance improvements when creating DataFrame or Series with dtype ``str`` or :class:`StringDtype` from array with many string elements (:issue:`36304`, :issue:`36317`, :issue:`36325`, :issue:`36432`, :issue:`37371`)
 - Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`)
 - Performance improvements when creating :meth:`pd.Series.map` from a huge dictionary (:issue:`34717`)
 - Performance improvement in :meth:`GroupBy.transform` with the ``numba`` engine (:issue:`36240`)

From d9f8e6e044c4dca4019bca5b287262c96f71e901 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Mon, 26 Oct 2020 12:57:52 +0000
Subject: [PATCH 6/6] is_extension_dtype -> hasattr

---
 pandas/_libs/lib.pyx | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 597cad6046a0b..2cb4df7e054fe 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -651,12 +651,10 @@ cpdef ndarray[object] ensure_string_array(
     cdef:
         Py_ssize_t i = 0, n = len(arr)
 
-    from pandas.core.dtypes.common import is_extension_array_dtype
-
-    if is_extension_array_dtype(arr):
+    if hasattr(arr, "to_numpy"):
         arr = arr.to_numpy()
     elif not isinstance(arr, np.ndarray):
-        arr = np.array(arr, dtype=object)
+        arr = np.array(arr, dtype="object")
 
     result = np.asarray(arr, dtype="object")