From d9629d6945f4d624290510bdcad76812d262590f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 29 May 2021 13:44:11 -0700
Subject: [PATCH 1/4] REF: more explicit dtypes in strings.accessor

---
 pandas/core/series.py           |  2 +-
 pandas/core/strings/accessor.py | 38 ++++++++++++++++++++++-----------
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 2f45a2adbdec7..96bf1858de39c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3980,7 +3980,7 @@ def explode(self, ignore_index: bool = False) -> Series:
         else:
             index = self.index.repeat(counts)
 
-        return self._constructor(values, index=index, name=self.name)
+        return self._constructor(values, index=index, name=self.name, dtype=object)
 
     def unstack(self, level=-1, fill_value=None) -> DataFrame:
         """
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 7643019ff8c55..d6a70e7a4fcde 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -13,7 +13,10 @@
 import numpy as np
 
 import pandas._libs.lib as lib
-from pandas._typing import FrameOrSeriesUnion
+from pandas._typing import (
+    DtypeObj,
+    FrameOrSeriesUnion,
+)
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import (
@@ -34,6 +37,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.base import NoNewAttributesMixin
+from pandas.core.construction import extract_array
 
 if TYPE_CHECKING:
     from pandas import Index
@@ -122,7 +126,7 @@ def _map_and_wrap(name, docstring):
     @forbid_nonstring_types(["bytes"], name=name)
     def wrapper(self):
         result = getattr(self._data.array, f"_str_{name}")()
-        return self._wrap_result(result)
+        return self._wrap_result(result, returns_bool=True)
 
     wrapper.__doc__ = docstring
     return wrapper
@@ -209,8 +213,8 @@ def _validate(data):
         # see _libs/lib.pyx for list of inferred types
         allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]
 
-        values = getattr(data, "values", data)  # Series / Index
-        values = getattr(values, "categories", values)  # categorical / normal
+        data = extract_array(data, extract_numpy=True)
+        values = getattr(data, "categories", data)  # categorical / normal
 
         inferred_dtype = lib.infer_dtype(values, skipna=True)
 
@@ -242,6 +246,7 @@ def _wrap_result(
         expand: bool | None = None,
         fill_value=np.nan,
         returns_string=True,
+        returns_bool: bool = False,
     ):
         from pandas import (
             Index,
@@ -319,11 +324,18 @@ def cons_row(x):
         else:
             index = self._orig.index
             # This is a mess.
-            dtype: str | None
-            if self._is_string and returns_string:
-                dtype = self._orig.dtype
+            dtype: DtypeObj | str | None
+            if self._is_string:
+                if returns_bool:
+                    dtype = "boolean"
+                elif returns_string:
+                    dtype = self._orig.dtype
+                else:
+                    dtype = result.dtype
+            elif returns_bool:
+                dtype = result.dtype  # i.e. bool
             else:
-                dtype = None
+                dtype = getattr(result, "dtype", None)
 
             if expand:
                 cons = self._orig._constructor_expanddim
@@ -331,7 +343,7 @@ def cons_row(x):
             else:
                 # Must be a Series
                 cons = self._orig._constructor
-                result = cons(result, name=name, index=index)
+                result = cons(result, name=name, index=index, dtype=dtype)
             result = result.__finalize__(self._orig, method="str")
             if name is not None and result.ndim == 1:
                 # __finalize__ might copy over the original name, but we may
@@ -369,7 +381,7 @@ def _get_series_list(self, others):
         if isinstance(others, ABCSeries):
             return [others]
         elif isinstance(others, ABCIndex):
-            return [Series(others._values, index=idx)]
+            return [Series(others._values, index=idx, dtype=others.dtype)]
         elif isinstance(others, ABCDataFrame):
             return [others[x] for x in others]
         elif isinstance(others, np.ndarray) and others.ndim == 2:
@@ -547,7 +559,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
             sep = ""
 
         if isinstance(self._orig, ABCIndex):
-            data = Series(self._orig, index=self._orig)
+            data = Series(self._orig, index=self._orig, dtype=self._orig.dtype)
         else:  # Series
             data = self._orig
 
@@ -2145,7 +2157,7 @@ def startswith(self, pat, na=None):
         dtype: bool
         """
         result = self._data.array._str_startswith(pat, na=na)
-        return self._wrap_result(result, returns_string=False)
+        return self._wrap_result(result, returns_string=False, returns_bool=True)
 
     @forbid_nonstring_types(["bytes"])
     def endswith(self, pat, na=None):
@@ -2202,7 +2214,7 @@ def endswith(self, pat, na=None):
         dtype: bool
         """
         result = self._data.array._str_endswith(pat, na=na)
-        return self._wrap_result(result, returns_string=False)
+        return self._wrap_result(result, returns_string=False, returns_bool=True)
 
     @forbid_nonstring_types(["bytes"])
     def findall(self, pat, flags=0):

From 4204274322602e0ec8909c76a6a288370e1f8497 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 29 May 2021 19:01:51 -0700
Subject: [PATCH 2/4] revert explode edit

---
 pandas/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 96bf1858de39c..2f45a2adbdec7 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3980,7 +3980,7 @@ def explode(self, ignore_index: bool = False) -> Series:
         else:
             index = self.index.repeat(counts)
 
-        return self._constructor(values, index=index, name=self.name, dtype=object)
+        return self._constructor(values, index=index, name=self.name)
 
     def unstack(self, level=-1, fill_value=None) -> DataFrame:
         """

From 234697013018cef6c27491d6a508e3af728114e0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 1 Jun 2021 14:00:53 -0700
Subject: [PATCH 3/4] fix test_numpy tests

---
 pandas/core/strings/accessor.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index d6a70e7a4fcde..69fb4f6b61224 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -37,7 +37,6 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.base import NoNewAttributesMixin
-from pandas.core.construction import extract_array
 
 if TYPE_CHECKING:
     from pandas import Index
@@ -213,7 +212,11 @@ def _validate(data):
         # see _libs/lib.pyx for list of inferred types
         allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]
 
-        data = extract_array(data, extract_numpy=True)
+        # TODO: avoid kludge for tests.extension.test_numpy
+        from pandas.core.internals.managers import _extract_array
+
+        data = _extract_array(data)
+
         values = getattr(data, "categories", data)  # categorical / normal
 
         inferred_dtype = lib.infer_dtype(values, skipna=True)

From 8ed20e6dafce124ae63307a0c45b1962051c5f21 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 7 Jun 2021 14:20:54 -0700
Subject: [PATCH 4/4] remove returns_bool

---
 pandas/core/strings/accessor.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 69fb4f6b61224..29d37599b0785 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -125,7 +125,7 @@ def _map_and_wrap(name, docstring):
     @forbid_nonstring_types(["bytes"], name=name)
     def wrapper(self):
         result = getattr(self._data.array, f"_str_{name}")()
-        return self._wrap_result(result, returns_bool=True)
+        return self._wrap_result(result)
 
     wrapper.__doc__ = docstring
     return wrapper
@@ -328,17 +328,16 @@ def cons_row(x):
             index = self._orig.index
             # This is a mess.
             dtype: DtypeObj | str | None
+            vdtype = getattr(result, "dtype", None)
             if self._is_string:
-                if returns_bool:
-                    dtype = "boolean"
+                if is_bool_dtype(vdtype):
+                    dtype = result.dtype
                 elif returns_string:
                     dtype = self._orig.dtype
                 else:
-                    dtype = result.dtype
-            elif returns_bool:
-                dtype = result.dtype  # i.e. bool
+                    dtype = vdtype
             else:
-                dtype = getattr(result, "dtype", None)
+                dtype = vdtype
 
             if expand:
                 cons = self._orig._constructor_expanddim
@@ -2160,7 +2159,7 @@ def startswith(self, pat, na=None):
         dtype: bool
         """
         result = self._data.array._str_startswith(pat, na=na)
-        return self._wrap_result(result, returns_string=False, returns_bool=True)
+        return self._wrap_result(result, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
     def endswith(self, pat, na=None):
@@ -2217,7 +2216,7 @@ def endswith(self, pat, na=None):
         dtype: bool
         """
         result = self._data.array._str_endswith(pat, na=na)
-        return self._wrap_result(result, returns_string=False, returns_bool=True)
+        return self._wrap_result(result, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
     def findall(self, pat, flags=0):