From 9e90d4e7b07ea4a6975af4153f2d595bd18919e2 Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwe.korn@quantco.com>
Date: Fri, 10 Jul 2020 20:19:15 +0200
Subject: [PATCH 01/24] Implement BaseDtypeTests for ArrowStringDtype

---
 pandas/core/arrays/base.py                  |   6 +-
 pandas/core/arrays/string_arrow.py          | 484 ++++++++++++++++++++
 pandas/tests/extension/test_string_arrow.py | 125 +++++
 setup.py                                    |   2 +-
 4 files changed, 615 insertions(+), 2 deletions(-)
 create mode 100644 pandas/core/arrays/string_arrow.py
 create mode 100644 pandas/tests/extension/test_string_arrow.py

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 0c8efda5fc588..e8d00807ad70f 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -457,9 +457,13 @@ def astype(self, dtype, copy=True):
             NumPy ndarray with 'dtype' for its dtype.
         """
         from pandas.core.arrays.string_ import StringDtype
+        from pandas.core.arrays.string_arrow import ArrowStringDtype
 
         dtype = pandas_dtype(dtype)
-        if isinstance(dtype, StringDtype):  # allow conversion to StringArrays
+        # FIXME: Really hard-code here?
+        if isinstance(
+            dtype, (ArrowStringDtype, StringDtype)
+        ):  # allow conversion to StringArrays
             return dtype.construct_array_type()._from_sequence(self, copy=False)
 
         return np.array(self, dtype=dtype, copy=copy)
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
new file mode 100644
index 0000000000000..8248a3e91c0fe
--- /dev/null
+++ b/pandas/core/arrays/string_arrow.py
@@ -0,0 +1,484 @@
+from collections.abc import Iterable
+from typing import Any, Optional, Sequence, Tuple, Type, Union
+
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from pandas._libs import missing as libmissing
+from pandas._typing import ArrayLike
+
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.dtypes import register_extension_dtype
+
+import pandas as pd
+from pandas.api.types import (
+    is_array_like,
+    is_bool_dtype,
+    is_integer,
+    is_integer_dtype,
+    is_scalar,
+)
+from pandas.core.arrays.base import ExtensionArray
+from pandas.core.indexers import check_array_indexer
+
+
+def _as_pandas_scalar(arrow_scalar: pa.Scalar) -> Optional[str]:
+    scalar = arrow_scalar.as_py()
+    if scalar is None:
+        return libmissing.NA
+    else:
+        return scalar
+
+
+@register_extension_dtype
+class ArrowStringDtype(ExtensionDtype):
+    """
+    Extension dtype for string data in a ``pyarrow.ChunkedArray``.
+
+    .. versionadded:: 1.1.0
+
+    .. warning::
+
+       ArrowStringDtype is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    Examples
+    --------
+    >>> pd.ArrowStringDtype()
+    ArrowStringDtype
+    """
+
+    name = "arrow_string"
+
+    #: StringDtype.na_value uses pandas.NA
+    na_value = libmissing.NA
+
+    @property
+    def type(self) -> Type[str]:
+        return str
+
+    @classmethod
+    def construct_array_type(cls) -> Type["ArrowStringArray"]:
+        """
+        Return the array type associated with this dtype.
+
+        Returns
+        -------
+        type
+        """
+        return ArrowStringArray
+
+    def __hash__(self) -> int:
+        return hash("ArrowStringDtype")
+
+    def __repr__(self) -> str:
+        return "ArrowStringDtype"
+
+    def __from_arrow__(
+        self, array: Union["pa.Array", "pa.ChunkedArray"]
+    ) -> "ArrowStringArray":
+        """
+        Construct StringArray from pyarrow Array/ChunkedArray.
+        """
+        return ArrowStringArray(array)
+
+    def __eq__(self, other) -> bool:
+        """Check whether 'other' is equal to self.
+
+        By default, 'other' is considered equal if
+        * it's a string matching 'self.name'.
+        * it's an instance of this type.
+
+        Parameters
+        ----------
+        other : Any
+
+        Returns
+        -------
+        bool
+        """
+        if isinstance(other, ArrowStringDtype):
+            return True
+        elif isinstance(other, str) and other == "arrow_string":
+            return True
+        else:
+            return False
+
+
+class ArrowStringArray(ExtensionArray):
+    """
+    Extension array for string data in a ``pyarrow.ChunkedArray``.
+
+    .. versionadded:: 1.1.0
+
+    .. warning::
+
+       ArrowStringArray is considered experimental. The implementation and
+       parts of the API may change without warning.
+
+    Parameters
+    ----------
+    values : pyarrow.Array or pyarrow.ChunkedArray
+        The array of data.
+
+    Attributes
+    ----------
+    None
+
+    Methods
+    -------
+    None
+
+    See Also
+    --------
+    array
+        The recommended function for creating a ArrowStringArray.
+    Series.str
+        The string methods are available on Series backed by
+        a ArrowStringArray.
+
+    Notes
+    -----
+    ArrowStringArray returns a BooleanArray for comparison methods.
+
+    Examples
+    --------
+    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="arrow_string")
+    <ArrowStringArray>
+    ['This is', 'some text', <NA>, 'data.']
+    Length: 4, dtype: arrow_string
+    """
+
+    def __init__(self, values):
+        if isinstance(values, pa.Array):
+            self.data = pa.chunked_array([values])
+        elif isinstance(values, pa.ChunkedArray):
+            self.data = values
+        else:
+            raise ValueError(f"Unsupported type '{type(values)}' for ArrowStringArray")
+
+    @classmethod
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        # TODO(ARROW-9407): Accept pd.NA in Arrow
+        scalars_corrected = [None if pd.isna(x) else x for x in scalars]
+        return cls(pa.array(scalars_corrected, type=pa.string()))
+
+    @property
+    def dtype(self) -> ArrowStringDtype:
+        """
+        An instance of 'ArrowStringDtype'.
+        """
+        return ArrowStringDtype()
+
+    def __array__(self, *args, **kwargs) -> "np.ndarray":
+        """Correctly construct numpy arrays when passed to `np.asarray()`."""
+        return self.data.__array__(*args, **kwargs)
+
+    def __arrow_array__(self, type=None):
+        """Convert myself to a pyarrow Array or ChunkedArray."""
+        return self.data
+
+    @property
+    def size(self) -> int:
+        """
+        Return the number of elements in this array.
+
+        Returns
+        -------
+        size : int
+        """
+        return len(self.data)
+
+    @property
+    def shape(self) -> Tuple[int]:
+        """Return the shape of the data."""
+        # This may be patched by pandas to support pseudo-2D operations.
+        return (len(self.data),)
+
+    @property
+    def ndim(self) -> int:
+        """Return the number of dimensions of the underlying data."""
+        return 1
+
+    def __len__(self) -> int:
+        """
+        Length of this array.
+
+        Returns
+        -------
+        length : int
+        """
+        return len(self.data)
+
+    @classmethod
+    def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
+        return cls._from_sequence(strings, dtype=dtype, copy=copy)
+
+    def __getitem__(self, item):
+        # type (Any) -> Any
+        """Select a subset of self.
+
+        Parameters
+        ----------
+        item : int, slice, or ndarray
+            * int: The position in 'self' to get.
+            * slice: A slice object, where 'start', 'stop', and 'step' are
+              integers or None
+            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
+
+        Returns
+        -------
+        item : scalar or ExtensionArray
+
+        Notes
+        -----
+        For scalar ``item``, return a scalar value suitable for the array's
+        type. This should be an instance of ``self.dtype.type``.
+        For slice ``key``, return an instance of ``ExtensionArray``, even
+        if the slice is length 0 or 1.
+        For a boolean mask, return an instance of ``ExtensionArray``, filtered
+        to the values where ``item`` is True.
+        """
+        item = check_array_indexer(self, item)
+
+        if isinstance(item, Iterable):
+            if not is_array_like(item):
+                item = np.array(item)
+            if len(item) == 0:
+                return type(self)(pa.chunked_array([], type=pa.string()))
+            elif is_integer_dtype(item):
+                return self.take(item)
+            elif is_bool_dtype(item):
+                return type(self)(self.data.filter(item))
+            else:
+                raise IndexError(
+                    "Only integers, slices and integer or "
+                    "boolean arrays are valid indices."
+                )
+        elif is_integer(item):
+            if item < 0:
+                item += len(self)
+            if item >= len(self):
+                raise IndexError("index out of bounds")
+
+        value = self.data[item]
+        if isinstance(value, pa.ChunkedArray):
+            return type(self)(value)
+        else:
+            return _as_pandas_scalar(value)
+
+    def fillna(self, value=None, method=None, limit=None):
+        raise NotImplementedError("fillna")
+
+    def _reduce(self, name, skipna=True, **kwargs):
+        if name in ["min", "max"]:
+            return getattr(self, name)(skipna=skipna)
+
+        raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
+
+    @property
+    def nbytes(self) -> int:
+        """
+        The number of bytes needed to store this object in memory.
+        """
+        return self.data.nbytes
+
+    def isna(self) -> np.ndarray:
+        """
+        Boolean NumPy array indicating if each value is missing.
+
+        This should return a 1-D array the same length as 'self'.
+        """
+        # TODO: Implement .to_numpy for ChunkedArray
+        return self.data.is_null().to_pandas().values
+
+    def copy(self) -> ExtensionArray:
+        """
+        Return a copy of the array.
+
+        Parameters
+        ----------
+        deep : bool, default False
+            Also copy the underlying data backing this array.
+
+        Returns
+        -------
+        ExtensionArray
+        """
+        return type(self)(self.data)
+
+    def __eq__(self, other: Any) -> ArrayLike:
+        """
+        Return for `self == other` (element-wise equality).
+        """
+        if isinstance(other, (pd.Series, pd.DataFrame, pd.Index)):
+            return NotImplemented
+        if isinstance(other, ArrowStringArray):
+            result = pc.equal(self.data, other.data)
+        elif is_scalar(other):
+            result = pc.equal(self.data, pa.scalar(other))
+        else:
+            raise NotImplementedError("Neither scalar nor ArrowStringArray")
+
+        # TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
+        return pd.array(result.to_pandas().values)
+
+    def __setitem__(self, key, value):
+        # type: (Union[int, np.ndarray], Any) -> None
+        """Set one or more values inplace.
+
+        Parameters
+        ----------
+        key : int, ndarray, or slice
+            When called from, e.g. ``Series.__setitem__``, ``key`` will be
+            one of
+
+            * scalar int
+            * ndarray of integers.
+            * boolean ndarray
+            * slice object
+
+        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
+            value or values to be set of ``key``.
+
+        Returns
+        -------
+        None
+        """
+        key = check_array_indexer(self, key)
+
+        if is_integer(key):
+            if not pd.api.types.is_scalar(value):
+                raise ValueError("Must pass scalars with scalar indexer")
+            elif pd.isna(value):
+                value = None
+            elif not isinstance(value, str):
+                raise ValueError("Scalar must be NA or str")
+
+            # Slice data and insert inbetween
+            new_data = [
+                *self.data[0:key].chunks,
+                pa.array([value], type=pa.string()),
+                *self.data[(key + 1) :].chunks,
+            ]
+            self.data = pa.chunked_array(new_data)
+        else:
+            # Convert to integer indices and iteratively assign.
+            # TODO: Make a faster variant of this in Arrow upstream.
+            #       This is probably extremely slow.
+
+            # Convert all possible input key types to an array of integers
+            if is_bool_dtype(key):
+                # TODO(ARROW-9430): Directly support setitem(booleans)
+                key_array = np.argwhere(key).flatten()
+            elif isinstance(key, slice):
+                key_array = np.array(range(len(self))[key])
+            else:
+                # TODO(ARROW-9431): Directly support setitem(integers)
+                key_array = np.asanyarray(key)
+
+            if pd.api.types.is_scalar(value):
+                value = np.broadcast_to(value, len(key_array))
+            else:
+                value = np.asarray(value)
+
+            if len(key_array) != len(value):
+                raise ValueError("Length of indexer and values mismatch")
+
+            for k, v in zip(key_array, value):
+                self[k] = v
+
+    def take(
+        self, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None
+    ) -> "ExtensionArray":
+        """
+        Take elements from an array.
+
+        Parameters
+        ----------
+        indices : sequence of int
+            Indices to be taken.
+        allow_fill : bool, default False
+            How to handle negative values in `indices`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right (the default). This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
+
+        fill_value : any, optional
+            Fill value to use for NA-indices when `allow_fill` is True.
+            This may be ``None``, in which case the default NA value for
+            the type, ``self.dtype.na_value``, is used.
+
+            For many ExtensionArrays, there will be two representations of
+            `fill_value`: a user-facing "boxed" scalar, and a low-level
+            physical NA value. `fill_value` should be the user-facing version,
+            and the implementation should handle translating that to the
+            physical version for processing the take if necessary.
+
+        Returns
+        -------
+        ExtensionArray
+
+        Raises
+        ------
+        IndexError
+            When the indices are out of bounds for the array.
+        ValueError
+            When `indices` contains negative values other than ``-1``
+            and `allow_fill` is True.
+
+        See Also
+        --------
+        numpy.take
+        api.extensions.take
+
+        Notes
+        -----
+        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
+        ``iloc``, when `indices` is a sequence of values. Additionally,
+        it's called by :meth:`Series.reindex`, or any other method
+        that causes realignment, with a `fill_value`.
+        """
+        # TODO: Remove once we got rid of the (indices < 0) check
+        if not is_array_like(indices):
+            indices_array = np.asanyarray(indices)
+        else:
+            indices_array = indices
+
+        if len(self.data) == 0 and (indices_array >= 0).any():
+            raise IndexError("cannot do a non-empty take")
+        if indices_array.max() >= len(self.data):
+            raise IndexError("out of bounds value in 'indices'.")
+
+        if allow_fill:
+            if (indices_array < 0).any():
+                # TODO(ARROW-9433): Treat negative indices as NULL
+                indices_array = pa.array(indices_array, mask=indices_array < 0)
+                result = self.data.take(indices_array)
+                if pd.isna(fill_value):
+                    return type(self)(result)
+                return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
+            else:
+                # Nothing to fill
+                return type(self)(self.data.take(indices))
+        else:  # allow_fill=False
+            # TODO(ARROW-9432): Treat negative indices as indices from the right.
+            if (indices_array < 0).any():
+                # Don't modify in-place
+                indices_array = np.copy(indices_array)
+                indices_array[indices_array < 0] += len(self.data)
+            return type(self)(self.data.take(indices_array))
diff --git a/pandas/tests/extension/test_string_arrow.py b/pandas/tests/extension/test_string_arrow.py
new file mode 100644
index 0000000000000..437d51060fb7f
--- /dev/null
+++ b/pandas/tests/extension/test_string_arrow.py
@@ -0,0 +1,125 @@
+import string
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas.core.arrays.string_arrow import ArrowStringArray, ArrowStringDtype
+from pandas.tests.extension import base
+
+
+@pytest.fixture
+def dtype():
+    return ArrowStringDtype()
+
+
+@pytest.fixture
+def data():
+    strings = np.random.choice(list(string.ascii_letters), size=100)
+    while strings[0] == strings[1]:
+        strings = np.random.choice(list(string.ascii_letters), size=100)
+
+    return ArrowStringArray._from_sequence(strings)
+
+
+@pytest.fixture
+def data_missing():
+    """Length 2 array with [NA, Valid]"""
+    return ArrowStringArray._from_sequence([pd.NA, "A"])
+
+
+@pytest.fixture
+def data_for_sorting():
+    return ArrowStringArray._from_sequence(["B", "C", "A"])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return ArrowStringArray._from_sequence(["B", pd.NA, "A"])
+
+
+@pytest.fixture
+def na_value():
+    return pd.NA
+
+
+@pytest.fixture
+def data_for_grouping():
+    return ArrowStringArray._from_sequence(["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"])
+
+
+class TestDtype(base.BaseDtypeTests):
+    pass
+
+
+class TestInterface(base.BaseInterfaceTests):
+    @pytest.mark.xfail(reason="Fails until implement, remove before merge")
+    def test_view(self, data):
+        base.BaseInterfaceTests.test_view(self, data)
+
+
+class TestConstructors(base.BaseConstructorsTests):
+    pass
+
+
+#  class TestReshaping(base.BaseReshapingTests):
+#     pass
+
+
+class TestGetitem(base.BaseGetitemTests):
+    pass
+
+
+class TestSetitem(base.BaseSetitemTests):
+    pass
+
+
+# class TestMissing(base.BaseMissingTests):
+#     pass
+
+
+# class TestNoReduce(base.BaseNoReduceTests):
+#     @pytest.mark.parametrize("skipna", [True, False])
+#     def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
+#         op_name = all_numeric_reductions
+#
+#         if op_name in ["min", "max"]:
+#             return None
+#
+#         s = pd.Series(data)
+#         with pytest.raises(TypeError):
+#             getattr(s, op_name)(skipna=skipna)
+
+
+# class TestMethods(base.BaseMethodsTests):
+#     @pytest.mark.skip(reason="returns nullable")
+#     def test_value_counts(self, all_data, dropna):
+#         return super().test_value_counts(all_data, dropna)
+
+
+# class TestCasting(base.BaseCastingTests):
+#     pass
+
+
+# class TestComparisonOps(base.BaseComparisonOpsTests):
+#     def _compare_other(self, s, data, op_name, other):
+#         result = getattr(s, op_name)(other)
+#         expected = getattr(s.astype(object), op_name)(other).astype("boolean")
+#         self.assert_series_equal(result, expected)
+
+#     def test_compare_scalar(self, data, all_compare_operators):
+#         op_name = all_compare_operators
+#         s = pd.Series(data)
+#         self._compare_other(s, data, op_name, "abc")
+
+
+# class TestParsing(base.BaseParsingTests):
+#     pass
+
+
+# class TestPrinting(base.BasePrintingTests):
+#     pass
+
+
+# class TestGroupBy(base.BaseGroupbyTests):
+#     pass
diff --git a/setup.py b/setup.py
index f6f0cd9aabc0e..4033ea2935de5 100755
--- a/setup.py
+++ b/setup.py
@@ -432,7 +432,7 @@ def run(self):
         extra_compile_args.append("/Z7")
         extra_link_args.append("/DEBUG")
 else:
-    extra_compile_args = ["-Werror"]
+    extra_compile_args = []
     extra_link_args = []
     if debugging_symbols_requested:
         extra_compile_args.append("-g")

From 92f1d2635ea6fc34da38383de852d194b864bec0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 3 Sep 2020 15:32:45 -0500
Subject: [PATCH 02/24] Refactor to use parametrized StringDtype

---
 pandas/core/arrays/base.py                    |  13 +-
 pandas/core/arrays/string_.py                 |  90 +++++++++-
 pandas/core/arrays/string_arrow.py            | 166 +++++++-----------
 pandas/core/config_init.py                    |  13 ++
 pandas/core/strings.py                        |  10 +-
 .../tests/arrays/string_/test_string_arrow.py |  26 +++
 pandas/tests/extension/arrow/test_string.py   |   7 +-
 pandas/tests/extension/test_string_arrow.py   | 103 +++++++----
 setup.py                                      |   2 +-
 9 files changed, 261 insertions(+), 169 deletions(-)
 create mode 100644 pandas/tests/arrays/string_/test_string_arrow.py

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index e8d00807ad70f..4258bcc956cdd 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -356,6 +356,8 @@ def __ne__(self, other: Any) -> ArrayLike:
         """
         Return for `self != other` (element-wise in-equality).
         """
+        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
+            return NotImplemented
         return ~(self == other)
 
     def to_numpy(
@@ -457,13 +459,10 @@ def astype(self, dtype, copy=True):
             NumPy ndarray with 'dtype' for its dtype.
         """
         from pandas.core.arrays.string_ import StringDtype
-        from pandas.core.arrays.string_arrow import ArrowStringDtype
 
         dtype = pandas_dtype(dtype)
         # FIXME: Really hard-code here?
-        if isinstance(
-            dtype, (ArrowStringDtype, StringDtype)
-        ):  # allow conversion to StringArrays
+        if isinstance(dtype, StringDtype):  # allow conversion to StringArrays
             return dtype.construct_array_type()._from_sequence(self, copy=False)
 
         return np.array(self, dtype=dtype, copy=copy)
@@ -928,9 +927,9 @@ def take(
               from the right (the default). This is similar to
               :func:`numpy.take`.
 
-            * True: negative values in `indices` indicate
-              missing values. These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
+            * True: ``-1`` in `indices` indicate missing values.
+              These values are set to `fill_value`. Any other other negative
+              value raise a ``ValueError``.
 
         fill_value : any, optional
             Fill value to use for NA-indices when `allow_fill` is True.
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 381968f9724b6..0e7c5a8036bcf 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -1,8 +1,10 @@
 import operator
-from typing import TYPE_CHECKING, Type, Union
+from typing import TYPE_CHECKING, Any, Type, Union
 
 import numpy as np
 
+from pandas._config import get_option
+
 from pandas._libs import lib, missing as libmissing
 
 from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype
@@ -50,17 +52,83 @@ class StringDtype(ExtensionDtype):
     StringDtype
     """
 
-    name = "string"
-
     #: StringDtype.na_value uses pandas.NA
     na_value = libmissing.NA
+    _metadata = ("storage",)
+
+    def __init__(self, storage=None):
+        if storage is None:
+            storage = get_option("mode.string_storage")
+        if storage not in {"python", "pyarrow"}:
+            raise ValueError(
+                f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
+            )
+        self.storage = storage
+
+    @property
+    def name(self):
+        return f"StringDtype[{self.storage}]"
 
     @property
     def type(self) -> Type[str]:
         return str
 
     @classmethod
-    def construct_array_type(cls) -> Type["StringArray"]:
+    def construct_from_string(cls, string):
+        """
+        Construct a StringDtype from a string.
+
+        Parameters
+        ----------
+        string : str
+            The type of the name. The storage type will be taking from `string`.
+            Valid options and their storage types are
+
+            ========================== ==============
+            string                     result storage
+            ========================== ==============
+            ``'string'``               global default
+            ``'string[python]'``       python
+            ``'StringDtype[python]'``  python
+            ``'string[pyarrow]'``      pyarrow
+            ``'StringDtype[pyarrow]'`` pyarrow
+            ========================== =============
+
+        Returns
+        -------
+        StringDtype
+
+        Raise
+        -----
+        TypeError
+            If the string is not a valid option.
+
+        """
+        if not isinstance(string, str):
+            raise TypeError(
+                f"'construct_from_string' expects a string, got {type(string)}"
+            )
+        if string == "string":
+            # TODO: use global default
+            return cls()
+        elif string in {"string[python]", "StringDtype[python]"}:
+            return cls(storage="python")
+        elif string in {"string[pyarrow]", "StringDtype[pyarrow]"}:
+            return cls(storage="pyarrow")
+        else:
+            raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, str) and other == "string":
+            return True
+        return super().__eq__(other)
+
+    def __hash__(self) -> int:
+        # custom __eq__ so have to override __hash__
+        return super().__hash__()
+
+    # XXX: this is a classmethod, but we need to know the storage type.
+    def construct_array_type(self) -> Type["StringArray"]:
         """
         Return the array type associated with this dtype.
 
@@ -68,10 +136,15 @@ def construct_array_type(cls) -> Type["StringArray"]:
         -------
         type
         """
-        return StringArray
+        from .string_arrow import ArrowStringArray
+
+        if self.storage == "python":
+            return StringArray
+        else:
+            return ArrowStringArray
 
-    def __repr__(self) -> str:
-        return "StringDtype"
+    def __repr__(self):
+        return self.name
 
     def __from_arrow__(
         self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
@@ -80,6 +153,7 @@ def __from_arrow__(
         Construct StringArray from pyarrow Array/ChunkedArray.
         """
         import pyarrow  # noqa: F811
+        from .string_arrow import ArrowStringArray
 
         if isinstance(array, pyarrow.Array):
             chunks = [array]
@@ -93,7 +167,7 @@ def __from_arrow__(
             str_arr = StringArray._from_sequence(np.array(arr))
             results.append(str_arr)
 
-        return StringArray._concat_same_type(results)
+        return ArrowStringArray._concat_same_type(results)
 
 
 class StringArray(PandasArray):
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 8248a3e91c0fe..c0831a65b3644 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -1,5 +1,5 @@
 from collections.abc import Iterable
-from typing import Any, Optional, Sequence, Tuple, Type, Union
+from typing import Any, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pyarrow as pa
@@ -8,18 +8,19 @@
 from pandas._libs import missing as libmissing
 from pandas._typing import ArrayLike
 
-from pandas.core.dtypes.base import ExtensionDtype
-from pandas.core.dtypes.dtypes import register_extension_dtype
+from pandas.core.dtypes.missing import isna
 
-import pandas as pd
 from pandas.api.types import (
     is_array_like,
     is_bool_dtype,
+    is_int64_dtype,
     is_integer,
     is_integer_dtype,
     is_scalar,
 )
+from pandas.core.algorithms import factorize
 from pandas.core.arrays.base import ExtensionArray
+from pandas.core.arrays.string_ import StringDtype
 from pandas.core.indexers import check_array_indexer
 
 
@@ -31,89 +32,6 @@ def _as_pandas_scalar(arrow_scalar: pa.Scalar) -> Optional[str]:
         return scalar
 
 
-@register_extension_dtype
-class ArrowStringDtype(ExtensionDtype):
-    """
-    Extension dtype for string data in a ``pyarrow.ChunkedArray``.
-
-    .. versionadded:: 1.1.0
-
-    .. warning::
-
-       ArrowStringDtype is considered experimental. The implementation and
-       parts of the API may change without warning.
-
-    Attributes
-    ----------
-    None
-
-    Methods
-    -------
-    None
-
-    Examples
-    --------
-    >>> pd.ArrowStringDtype()
-    ArrowStringDtype
-    """
-
-    name = "arrow_string"
-
-    #: StringDtype.na_value uses pandas.NA
-    na_value = libmissing.NA
-
-    @property
-    def type(self) -> Type[str]:
-        return str
-
-    @classmethod
-    def construct_array_type(cls) -> Type["ArrowStringArray"]:
-        """
-        Return the array type associated with this dtype.
-
-        Returns
-        -------
-        type
-        """
-        return ArrowStringArray
-
-    def __hash__(self) -> int:
-        return hash("ArrowStringDtype")
-
-    def __repr__(self) -> str:
-        return "ArrowStringDtype"
-
-    def __from_arrow__(
-        self, array: Union["pa.Array", "pa.ChunkedArray"]
-    ) -> "ArrowStringArray":
-        """
-        Construct StringArray from pyarrow Array/ChunkedArray.
-        """
-        return ArrowStringArray(array)
-
-    def __eq__(self, other) -> bool:
-        """Check whether 'other' is equal to self.
-
-        By default, 'other' is considered equal if
-        * it's a string matching 'self.name'.
-        * it's an instance of this type.
-
-        Parameters
-        ----------
-        other : Any
-
-        Returns
-        -------
-        bool
-        """
-        if isinstance(other, ArrowStringDtype):
-            return True
-        elif isinstance(other, str) and other == "arrow_string":
-            return True
-        else:
-            return False
-
-
 class ArrowStringArray(ExtensionArray):
     """
     Extension array for string data in a ``pyarrow.ChunkedArray``.
@@ -165,19 +83,20 @@ def __init__(self, values):
             self.data = values
         else:
             raise ValueError(f"Unsupported type '{type(values)}' for ArrowStringArray")
+        self._dtype = StringDtype(storage="pyarrow")
 
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
         # TODO(ARROW-9407): Accept pd.NA in Arrow
-        scalars_corrected = [None if pd.isna(x) else x for x in scalars]
+        scalars_corrected = [None if isna(x) else x for x in scalars]
         return cls(pa.array(scalars_corrected, type=pa.string()))
 
     @property
-    def dtype(self) -> ArrowStringDtype:
+    def dtype(self) -> StringDtype:
         """
-        An instance of 'ArrowStringDtype'.
+        An instance of 'StringDtype'.
         """
-        return ArrowStringDtype()
+        return self._dtype
 
     def __array__(self, *args, **kwargs) -> "np.ndarray":
         """Correctly construct numpy arrays when passed to `np.asarray()`."""
@@ -276,15 +195,6 @@ def __getitem__(self, item):
         else:
             return _as_pandas_scalar(value)
 
-    def fillna(self, value=None, method=None, limit=None):
-        raise NotImplementedError("fillna")
-
-    def _reduce(self, name, skipna=True, **kwargs):
-        if name in ["min", "max"]:
-            return getattr(self, name)(skipna=skipna)
-
-        raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
-
     @property
     def nbytes(self) -> int:
         """
@@ -320,7 +230,9 @@ def __eq__(self, other: Any) -> ArrayLike:
         """
         Return for `self == other` (element-wise equality).
         """
-        if isinstance(other, (pd.Series, pd.DataFrame, pd.Index)):
+        from pandas import array, Series, DataFrame, Index
+
+        if isinstance(other, (Series, DataFrame, Index)):
             return NotImplemented
         if isinstance(other, ArrowStringArray):
             result = pc.equal(self.data, other.data)
@@ -330,7 +242,7 @@ def __eq__(self, other: Any) -> ArrayLike:
             raise NotImplementedError("Neither scalar nor ArrowStringArray")
 
         # TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
-        return pd.array(result.to_pandas().values)
+        return array(result.to_pandas().values, dtype="boolean")
 
     def __setitem__(self, key, value):
         # type: (Union[int, np.ndarray], Any) -> None
@@ -357,9 +269,9 @@ def __setitem__(self, key, value):
         key = check_array_indexer(self, key)
 
         if is_integer(key):
-            if not pd.api.types.is_scalar(value):
+            if not is_scalar(value):
                 raise ValueError("Must pass scalars with scalar indexer")
-            elif pd.isna(value):
+            elif isna(value):
                 value = None
             elif not isinstance(value, str):
                 raise ValueError("Scalar must be NA or str")
@@ -386,7 +298,7 @@ def __setitem__(self, key, value):
                 # TODO(ARROW-9431): Directly support setitem(integers)
                 key_array = np.asanyarray(key)
 
-            if pd.api.types.is_scalar(value):
+            if is_scalar(value):
                 value = np.broadcast_to(value, len(key_array))
             else:
                 value = np.asarray(value)
@@ -461,15 +373,20 @@ def take(
 
         if len(self.data) == 0 and (indices_array >= 0).any():
             raise IndexError("cannot do a non-empty take")
-        if indices_array.max() >= len(self.data):
+        if len(indices_array) > 0 and indices_array.max() >= len(self.data):
             raise IndexError("out of bounds value in 'indices'.")
 
         if allow_fill:
             if (indices_array < 0).any():
+                if indices_array.min() < -1:
+                    raise ValueError(
+                        "'indicies' contains negative values other "
+                        "-1 with 'allow_fill=True."
+                    )
                 # TODO(ARROW-9433): Treat negative indices as NULL
                 indices_array = pa.array(indices_array, mask=indices_array < 0)
                 result = self.data.take(indices_array)
-                if pd.isna(fill_value):
+                if isna(fill_value):
                     return type(self)(result)
                 return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
             else:
@@ -482,3 +399,38 @@ def take(
                 indices_array = np.copy(indices_array)
                 indices_array[indices_array < 0] += len(self.data)
             return type(self)(self.data.take(indices_array))
+
+    def value_counts(self, dropna=True):
+        from pandas import Series
+
+        if dropna:
+            na = self.isna()
+            self = self[~na]
+        counts = self.data.value_counts()
+        return Series(counts.field(1), counts.field(0))
+
+    def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, "ExtensionArray"]:
+        # see https://github.com/xhochy/fletcher/blob/master/fletcher/base.py
+        # doesn't handle dictionary types.
+        if self.data.num_chunks == 1:
+            encoded = self.data.chunk(0).dictionary_encode()
+            indices = encoded.indices.to_pandas()
+            if indices.dtype.kind == "f":
+                indices[np.isnan(indices)] = na_sentinel
+                indices = indices.astype(int)
+            if not is_int64_dtype(indices):
+                indices = indices.astype(np.int64)
+            return indices.values, type(self)(encoded.dictionary)
+        else:
+            np_array = self.data.to_pandas().values
+            return factorize(np_array, na_sentinel=na_sentinel)
+
+    @classmethod
+    def _concat_same_type(
+        cls, to_concat: Sequence["ArrowStringArray"]
+    ) -> "ArrowStringArray":
+        return cls(
+            pa.chunked_array(
+                [array for ea in to_concat for array in ea.data.iterchunks()]
+            )
+        )
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index bfe20551cbcfc..c7e0e7ef19010 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -504,6 +504,19 @@ def use_inf_as_na_cb(key):
     )
 
 
+string_storage_doc = """
+: string
+    The default storage for StringDtype.
+"""
+
+with cf.config_prefix("mode"):
+    cf.register_option(
+        "string_storage",
+        "python",
+        string_storage_doc,
+        validator=is_one_of_factory(["python", "pyarrow"]),
+    )
+
 # Set up the io.excel specific reader configuration.
 reader_engine_doc = """
 : string
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 6702bf519c52e..59aa8fc5cfa0e 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -901,8 +901,10 @@ def _result_dtype(arr):
     # workaround #27953
     # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
     # when the list of values is empty.
-    if arr.dtype.name == "string":
-        return "string"
+    from pandas.core.arrays.string_ import StringDtype
+
+    if isinstance(arr.dtype.name, StringDtype):
+        return arr.dtype.name
     else:
         return object
 
@@ -2097,9 +2099,11 @@ class StringMethods(NoNewAttributesMixin):
     """
 
     def __init__(self, data):
+        from pandas.core.arrays.string_ import StringDtype
+
         self._inferred_dtype = self._validate(data)
         self._is_categorical = is_categorical_dtype(data.dtype)
-        self._is_string = data.dtype.name == "string"
+        self._is_string = isinstance(data.dtype, StringDtype)
 
         # ._values.categories works for both Series/Index
         self._parent = data._values.categories if self._is_categorical else data
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
new file mode 100644
index 0000000000000..40e3f21670ea0
--- /dev/null
+++ b/pandas/tests/arrays/string_/test_string_arrow.py
@@ -0,0 +1,26 @@
+import pytest
+
+import pandas as pd
+import pandas.testing as tm
+
+
+def test_eq_all_na():
+    a = pd.array([pd.NA, pd.NA], dtype=pd.StringDtype("pyarrow"))
+    result = a == a
+    expected = pd.array([pd.NA, pd.NA], dtype="boolean")
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_config():
+    # python by default
+    assert pd.StringDtype().storage == "python"
+    arr = pd.array(["a", "b"])
+    assert arr.dtype.storage == "python"
+
+    with pd.option_context("mode.string_storage", "pyarrow"):
+        assert pd.StringDtype().storage == "pyarrow"
+        arr = pd.array(["a", "b"])
+        assert arr.dtype.storage == "pyarrow"
+
+    with pytest.raises(ValueError):
+        pd.options.mode.string_storage = "foo"
diff --git a/pandas/tests/extension/arrow/test_string.py b/pandas/tests/extension/arrow/test_string.py
index abd5c1f386dc5..f32f1e415ddc7 100644
--- a/pandas/tests/extension/arrow/test_string.py
+++ b/pandas/tests/extension/arrow/test_string.py
@@ -4,10 +4,9 @@
 
 pytest.importorskip("pyarrow", minversion="0.13.0")
 
-from .arrays import ArrowStringDtype  # isort:skip
-
 
 def test_constructor_from_list():
     # GH 27673
-    result = pd.Series(["E"], dtype=ArrowStringDtype())
-    assert isinstance(result.dtype, ArrowStringDtype)
+    result = pd.Series(["E"], dtype=pd.StringDtype(storage="pyarrow"))
+    assert isinstance(result.dtype, pd.StringDtype)
+    assert result.dtype.storage == "pyarrow"
diff --git a/pandas/tests/extension/test_string_arrow.py b/pandas/tests/extension/test_string_arrow.py
index 437d51060fb7f..848e8a435b530 100644
--- a/pandas/tests/extension/test_string_arrow.py
+++ b/pandas/tests/extension/test_string_arrow.py
@@ -4,13 +4,13 @@
 import pytest
 
 import pandas as pd
-from pandas.core.arrays.string_arrow import ArrowStringArray, ArrowStringDtype
+from pandas.core.arrays.string_arrow import ArrowStringArray
 from pandas.tests.extension import base
 
 
 @pytest.fixture
 def dtype():
-    return ArrowStringDtype()
+    return pd.StringDtype(storage="pyarrow")
 
 
 @pytest.fixture
@@ -62,64 +62,89 @@ class TestConstructors(base.BaseConstructorsTests):
     pass
 
 
-#  class TestReshaping(base.BaseReshapingTests):
-#     pass
+class TestReshaping(base.BaseReshapingTests):
+    pass
 
 
 class TestGetitem(base.BaseGetitemTests):
-    pass
+    @pytest.mark.xfail(
+        reason="pyarrow.lib.ArrowNotImplementedError: Function "
+        "fill_null has no kernel matching input types "
+        "(array[string], scalar[string])"
+    )
+    def test_take_non_na_fill_value(self, data_missing):
+        super().test_take_non_na_fill_value(data_missing)
+
+    @pytest.mark.xfail(
+        reason="pyarrow.lib.ArrowNotImplementedError: Function fill_null has no "
+        "kernel matching input types (array[string], scalar[string])"
+    )
+    def test_reindex_non_na_fill_value(self, data_missing):
+        super().test_reindex_non_na_fill_value(self, data_missing)
 
 
 class TestSetitem(base.BaseSetitemTests):
+    @pytest.mark.xfail(reason="TODO")
+    def test_setitem_preserves_views(self, data):
+        # Unclear where the issue is (pyarrow getitem, our getitem, our slice)
+        # and what to do here.
+        super().test_setitem_preserves_views(data)
+
+
+class TestMissing(base.BaseMissingTests):
     pass
 
 
-# class TestMissing(base.BaseMissingTests):
-#     pass
+class TestNoReduce(base.BaseNoReduceTests):
+    @pytest.mark.parametrize("skipna", [True, False])
+    def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
+        op_name = all_numeric_reductions
 
+        if op_name in ["min", "max"]:
+            return None
 
-# class TestNoReduce(base.BaseNoReduceTests):
-#     @pytest.mark.parametrize("skipna", [True, False])
-#     def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
-#         op_name = all_numeric_reductions
-#
-#         if op_name in ["min", "max"]:
-#             return None
-#
-#         s = pd.Series(data)
-#         with pytest.raises(TypeError):
-#             getattr(s, op_name)(skipna=skipna)
+        s = pd.Series(data)
+        with pytest.raises(TypeError):
+            getattr(s, op_name)(skipna=skipna)
 
 
-# class TestMethods(base.BaseMethodsTests):
-#     @pytest.mark.skip(reason="returns nullable")
-#     def test_value_counts(self, all_data, dropna):
-#         return super().test_value_counts(all_data, dropna)
+class TestMethods(base.BaseMethodsTests):
+    @pytest.mark.skip(reason="returns nullable")
+    def test_value_counts(self, all_data, dropna):
+        return super().test_value_counts(all_data, dropna)
 
 
-# class TestCasting(base.BaseCastingTests):
-#     pass
+class TestCasting(base.BaseCastingTests):
+    pass
 
 
-# class TestComparisonOps(base.BaseComparisonOpsTests):
-#     def _compare_other(self, s, data, op_name, other):
-#         result = getattr(s, op_name)(other)
-#         expected = getattr(s.astype(object), op_name)(other).astype("boolean")
-#         self.assert_series_equal(result, expected)
+class TestComparisonOps(base.BaseComparisonOpsTests):
+    def _compare_other(self, s, data, op_name, other):
+        if op_name not in {"__eq__", "__ne__"}:
+            pytest.skip(f"{op_name} is not implemented.")
+        result = getattr(s, op_name)(other)
+        expected = getattr(s.astype(object), op_name)(other).astype("boolean")
+        self.assert_series_equal(result, expected)
 
-#     def test_compare_scalar(self, data, all_compare_operators):
-#         op_name = all_compare_operators
-#         s = pd.Series(data)
-#         self._compare_other(s, data, op_name, "abc")
+    def test_compare_scalar(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+        self._compare_other(s, data, op_name, "abc")
 
+    def test_compare_array(self, data, all_compare_operators):
+        op_name = all_compare_operators
+        s = pd.Series(data)
+        other = pd.Series([data[0]] * len(data), dtype=data.dtype)
+        self._compare_other(s, data, op_name, other)
 
-# class TestParsing(base.BaseParsingTests):
-#     pass
+
+class TestParsing(base.BaseParsingTests):
+    pass
 
 
-# class TestPrinting(base.BasePrintingTests):
-#     pass
+class TestPrinting(base.BasePrintingTests):
+    pass
 
 
-# class TestGroupBy(base.BaseGroupbyTests):
-#     pass
+class TestGroupBy(base.BaseGroupbyTests):
+    pass
diff --git a/setup.py b/setup.py
index 4033ea2935de5..f6f0cd9aabc0e 100755
--- a/setup.py
+++ b/setup.py
@@ -432,7 +432,7 @@ def run(self):
         extra_compile_args.append("/Z7")
         extra_link_args.append("/DEBUG")
 else:
-    extra_compile_args = []
+    extra_compile_args = ["-Werror"]
     extra_link_args = []
     if debugging_symbols_requested:
         extra_compile_args.append("-g")

From 00096f099a9b26795c709684b4d241483a77e08d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 8 Sep 2020 11:44:42 -0500
Subject: [PATCH 03/24] wip

---
 pandas/core/arrays/string_.py       |   97 +-
 pandas/core/arrays/string_arrow.py  |   14 +
 pandas/core/strings.py              | 3657 ---------------------------
 pandas/core/strings/__init__.py     |    1 +
 pandas/core/strings/accessor.py     | 2752 ++++++++++++++++++++
 pandas/core/strings/base.py         |   33 +
 pandas/core/strings/object_array.py |  145 ++
 pandas/core/strings_.py             |  775 ++++++
 8 files changed, 3814 insertions(+), 3660 deletions(-)
 delete mode 100644 pandas/core/strings.py
 create mode 100644 pandas/core/strings/__init__.py
 create mode 100644 pandas/core/strings/accessor.py
 create mode 100644 pandas/core/strings/base.py
 create mode 100644 pandas/core/strings/object_array.py
 create mode 100644 pandas/core/strings_.py

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 0e7c5a8036bcf..d8c76d0615d45 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -1,23 +1,33 @@
 import operator
-from typing import TYPE_CHECKING, Any, Type, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, Type, Union
 
 import numpy as np
 
 from pandas._config import get_option
 
 from pandas._libs import lib, missing as libmissing
+from pandas._typing import ArrayLike, Dtype
 
 from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype
-from pandas.core.dtypes.common import pandas_dtype
-from pandas.core.dtypes.inference import is_array_like
+from pandas.core.dtypes.common import (
+    is_array_like,
+    is_bool_dtype,
+    is_integer_dtype,
+    is_object_dtype,
+    is_string_dtype,
+    pandas_dtype,
+)
 
 from pandas import compat
 from pandas.core import ops
+from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import IntegerArray, PandasArray
+from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.arrays.integer import _IntegerDtype
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
+from pandas.core.strings.base import BaseStringArrayMethods
 
 if TYPE_CHECKING:
     import pyarrow  # noqa: F401
@@ -170,6 +180,86 @@ def __from_arrow__(
         return ArrowStringArray._concat_same_type(results)
 
 
+def _map_stringarray(
+    func: Callable[[str], Any],
+    arr: "StringArray",
+    na_value: Any = libmissing.NA,
+    dtype: Dtype = StringDtype(),
+) -> ArrayLike:
+    """
+    Map a callable over valid elements of a StringArray.
+
+    Parameters
+    ----------
+    func : Callable[[str], Any]
+        Apply to each valid element.
+    arr : StringArray
+    na_value : Any
+        The value to use for missing values. By default, this is
+        the original value (NA).
+    dtype : Dtype
+        The result dtype to use. Specifying this avoids an intermediate
+        object-dtype allocation.
+
+    Returns
+    -------
+    ArrayLike
+        An ExtensionArray for integer or string dtypes, otherwise
+        an ndarray.
+
+    """
+    from pandas.arrays import BooleanArray, IntegerArray, StringArray
+
+    mask = isna(arr)
+
+    assert isinstance(arr, StringArray)
+    arr = np.asarray(arr)
+    if na_value is None:
+        na_value = libmissing.NA
+
+    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+        constructor: Union[Type[IntegerArray], Type[BooleanArray]]
+        if is_integer_dtype(dtype):
+            constructor = IntegerArray
+        else:
+            constructor = BooleanArray
+
+        na_value_is_na = isna(na_value)
+        if na_value_is_na:
+            na_value = 1
+        result = lib.map_infer_mask(
+            arr,
+            func,
+            mask.view("uint8"),
+            convert=False,
+            na_value=na_value,
+            dtype=np.dtype(dtype),
+        )
+
+        if not na_value_is_na:
+            mask[:] = False
+
+        return constructor(result, mask)
+
+    elif is_string_dtype(dtype) and not is_object_dtype(dtype):
+        # i.e. StringDtype
+        result = lib.map_infer_mask(
+            arr, func, mask.view("uint8"), convert=False, na_value=na_value
+        )
+        return StringArray(result)
+    else:
+        # This is when the result type is object. We reach this when
+        # -> We know the result type is truly object (e.g. .encode returns bytes
+        #    or .findall returns a list).
+        # -> We don't know the result type. E.g. `.get` can return anything.
+        return lib.map_infer_mask(arr, func, mask.view("uint8"))
+
+
+class StringArrayMethods(BaseStringArrayMethods):
+    def _map(self, f, na_result=libmissing.NA, dtype=StringDtype()):
+        return _map_stringarray(f, self._array, na_result, dtype)
+
+
 class StringArray(PandasArray):
     """
     Extension array for string data.
@@ -417,6 +507,7 @@ def _add_arithmetic_ops(cls):
         cls.__rmul__ = cls._create_arithmetic_method(ops.rmul)
 
     _create_comparison_method = _create_arithmetic_method
+    _str = CachedAccessor("str", StringArrayMethods)
 
 
 StringArray._add_arithmetic_ops()
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index c0831a65b3644..65aa38db4f6f6 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -18,6 +18,7 @@
     is_integer_dtype,
     is_scalar,
 )
+from pandas.core.accessor import CachedAccessor
 from pandas.core.algorithms import factorize
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.string_ import StringDtype
@@ -32,6 +33,17 @@ def _as_pandas_scalar(arrow_scalar: pa.Scalar) -> Optional[str]:
         return scalar
 
 
+class ArrowStringMethods:
+    def __init__(self, arr):
+        self._data = arr
+
+    def upper(self):
+        import pyarrow.compute as pc
+
+        result = pc.utf8_upper(self._data.data)
+        return ArrowStringArray(result)
+
+
 class ArrowStringArray(ExtensionArray):
     """
     Extension array for string data in a ``pyarrow.ChunkedArray``.
@@ -434,3 +446,5 @@ def _concat_same_type(
                 [array for ea in to_concat for array in ea.data.iterchunks()]
             )
         )
+
+    str = CachedAccessor("str", ArrowStringMethods)
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
deleted file mode 100644
index 59aa8fc5cfa0e..0000000000000
--- a/pandas/core/strings.py
+++ /dev/null
@@ -1,3657 +0,0 @@
-import codecs
-from functools import wraps
-import re
-import textwrap
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Pattern, Type, Union
-import warnings
-
-import numpy as np
-
-import pandas._libs.lib as lib
-import pandas._libs.missing as libmissing
-import pandas._libs.ops as libops
-from pandas._typing import ArrayLike, Dtype, Scalar
-from pandas.util._decorators import Appender
-
-from pandas.core.dtypes.common import (
-    ensure_object,
-    is_bool_dtype,
-    is_categorical_dtype,
-    is_extension_array_dtype,
-    is_integer,
-    is_integer_dtype,
-    is_list_like,
-    is_object_dtype,
-    is_re,
-    is_scalar,
-    is_string_dtype,
-)
-from pandas.core.dtypes.generic import (
-    ABCDataFrame,
-    ABCIndexClass,
-    ABCMultiIndex,
-    ABCSeries,
-)
-from pandas.core.dtypes.missing import isna
-
-from pandas.core.algorithms import take_1d
-from pandas.core.base import NoNewAttributesMixin
-from pandas.core.construction import extract_array
-
-if TYPE_CHECKING:
-    from pandas.arrays import StringArray
-
-_cpython_optimized_encoders = (
-    "utf-8",
-    "utf8",
-    "latin-1",
-    "latin1",
-    "iso-8859-1",
-    "mbcs",
-    "ascii",
-)
-_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32")
-
-_shared_docs: Dict[str, str] = dict()
-
-
-def cat_core(list_of_columns: List, sep: str):
-    """
-    Auxiliary function for :meth:`str.cat`
-
-    Parameters
-    ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
-    sep : string
-        The separator string for concatenating the columns.
-
-    Returns
-    -------
-    nd.array
-        The concatenation of list_of_columns with sep.
-    """
-    if sep == "":
-        # no need to interleave sep if it is empty
-        arr_of_cols = np.asarray(list_of_columns, dtype=object)
-        return np.sum(arr_of_cols, axis=0)
-    list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
-    list_with_sep[::2] = list_of_columns
-    arr_with_sep = np.asarray(list_with_sep, dtype=object)
-    return np.sum(arr_with_sep, axis=0)
-
-
-def cat_safe(list_of_columns: List, sep: str):
-    """
-    Auxiliary function for :meth:`str.cat`.
-
-    Same signature as cat_core, but handles TypeErrors in concatenation, which
-    happen if the arrays in list_of columns have the wrong dtypes or content.
-
-    Parameters
-    ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
-    sep : string
-        The separator string for concatenating the columns.
-
-    Returns
-    -------
-    nd.array
-        The concatenation of list_of_columns with sep.
-    """
-    try:
-        result = cat_core(list_of_columns, sep)
-    except TypeError:
-        # if there are any non-string values (wrong dtype or hidden behind
-        # object dtype), np.sum will fail; catch and return with better message
-        for column in list_of_columns:
-            dtype = lib.infer_dtype(column, skipna=True)
-            if dtype not in ["string", "empty"]:
-                raise TypeError(
-                    "Concatenation requires list-likes containing only "
-                    "strings (or missing values). Offending values found in "
-                    f"column {dtype}"
-                ) from None
-    return result
-
-
-def _na_map(f, arr, na_result=None, dtype=np.dtype(object)):
-    if is_extension_array_dtype(arr.dtype):
-        if na_result is None:
-            na_result = libmissing.NA
-        # just StringDtype
-        arr = extract_array(arr)
-        return _map_stringarray(f, arr, na_value=na_result, dtype=dtype)
-    if na_result is None:
-        na_result = np.nan
-    return _map_object(f, arr, na_mask=True, na_value=na_result, dtype=dtype)
-
-
-def _map_stringarray(
-    func: Callable[[str], Any], arr: "StringArray", na_value: Any, dtype: Dtype
-) -> ArrayLike:
-    """
-    Map a callable over valid elements of a StringArray.
-
-    Parameters
-    ----------
-    func : Callable[[str], Any]
-        Apply to each valid element.
-    arr : StringArray
-    na_value : Any
-        The value to use for missing values. By default, this is
-        the original value (NA).
-    dtype : Dtype
-        The result dtype to use. Specifying this avoids an intermediate
-        object-dtype allocation.
-
-    Returns
-    -------
-    ArrayLike
-        An ExtensionArray for integer or string dtypes, otherwise
-        an ndarray.
-
-    """
-    from pandas.arrays import BooleanArray, IntegerArray, StringArray
-
-    mask = isna(arr)
-
-    assert isinstance(arr, StringArray)
-    arr = np.asarray(arr)
-
-    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-        constructor: Union[Type[IntegerArray], Type[BooleanArray]]
-        if is_integer_dtype(dtype):
-            constructor = IntegerArray
-        else:
-            constructor = BooleanArray
-
-        na_value_is_na = isna(na_value)
-        if na_value_is_na:
-            na_value = 1
-        result = lib.map_infer_mask(
-            arr,
-            func,
-            mask.view("uint8"),
-            convert=False,
-            na_value=na_value,
-            dtype=np.dtype(dtype),
-        )
-
-        if not na_value_is_na:
-            mask[:] = False
-
-        return constructor(result, mask)
-
-    elif is_string_dtype(dtype) and not is_object_dtype(dtype):
-        # i.e. StringDtype
-        result = lib.map_infer_mask(
-            arr, func, mask.view("uint8"), convert=False, na_value=na_value
-        )
-        return StringArray(result)
-    else:
-        # This is when the result type is object. We reach this when
-        # -> We know the result type is truly object (e.g. .encode returns bytes
-        #    or .findall returns a list).
-        # -> We don't know the result type. E.g. `.get` can return anything.
-        return lib.map_infer_mask(arr, func, mask.view("uint8"))
-
-
-def _map_object(f, arr, na_mask=False, na_value=np.nan, dtype=np.dtype(object)):
-    if not len(arr):
-        return np.ndarray(0, dtype=dtype)
-
-    if isinstance(arr, ABCSeries):
-        arr = arr._values  # TODO: extract_array?
-    if not isinstance(arr, np.ndarray):
-        arr = np.asarray(arr, dtype=object)
-    if na_mask:
-        mask = isna(arr)
-        convert = not np.all(mask)
-        try:
-            result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert)
-        except (TypeError, AttributeError) as e:
-            # Reraise the exception if callable `f` got wrong number of args.
-            # The user may want to be warned by this, instead of getting NaN
-            p_err = (
-                r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
-                r"(?(3)required )positional arguments?"
-            )
-
-            if len(e.args) >= 1 and re.search(p_err, e.args[0]):
-                # FIXME: this should be totally avoidable
-                raise e
-
-            def g(x):
-                try:
-                    return f(x)
-                except (TypeError, AttributeError):
-                    return na_value
-
-            return _map_object(g, arr, dtype=dtype)
-        if na_value is not np.nan:
-            np.putmask(result, mask, na_value)
-            if result.dtype == object:
-                result = lib.maybe_convert_objects(result)
-        return result
-    else:
-        return lib.map_infer(arr, f)
-
-
-def str_count(arr, pat, flags=0):
-    """
-    Count occurrences of pattern in each string of the Series/Index.
-
-    This function is used to count the number of times a particular regex
-    pattern is repeated in each of the string elements of the
-    :class:`~pandas.Series`.
-
-    Parameters
-    ----------
-    pat : str
-        Valid regular expression.
-    flags : int, default 0, meaning no flags
-        Flags for the `re` module. For a complete list, `see here
-        <https://docs.python.org/3/howto/regex.html#compilation-flags>`_.
-    **kwargs
-        For compatibility with other string methods. Not used.
-
-    Returns
-    -------
-    Series or Index
-        Same type as the calling object containing the integer counts.
-
-    See Also
-    --------
-    re : Standard library module for regular expressions.
-    str.count : Standard library version, without regular expression support.
-
-    Notes
-    -----
-    Some characters need to be escaped when passing in `pat`.
-    eg. ``'$'`` has a special meaning in regex and must be escaped when
-    finding this literal character.
-
-    Examples
-    --------
-    >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat'])
-    >>> s.str.count('a')
-    0    0.0
-    1    0.0
-    2    2.0
-    3    2.0
-    4    NaN
-    5    0.0
-    6    1.0
-    dtype: float64
-
-    Escape ``'$'`` to find the literal dollar sign.
-
-    >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
-    >>> s.str.count('\\$')
-    0    1
-    1    0
-    2    1
-    3    2
-    4    2
-    5    0
-    dtype: int64
-
-    This is also available on Index
-
-    >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')
-    Int64Index([0, 0, 2, 1], dtype='int64')
-    """
-    regex = re.compile(pat, flags=flags)
-    f = lambda x: len(regex.findall(x))
-    return _na_map(f, arr, dtype="int64")
-
-
-def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
-    """
-    Test if pattern or regex is contained within a string of a Series or Index.
-
-    Return boolean Series or Index based on whether a given pattern or regex is
-    contained within a string of a Series or Index.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Flags to pass through to the re module, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-    regex : bool, default True
-        If True, assumes the pat is a regular expression.
-
-        If False, treats the pat as a literal string.
-
-    Returns
-    -------
-    Series or Index of boolean values
-        A Series or Index of boolean values indicating whether the
-        given pattern is contained within the string of each element
-        of the Series or Index.
-
-    See Also
-    --------
-    match : Analogous, but stricter, relying on re.match instead of re.search.
-    Series.str.startswith : Test if the start of each string element matches a
-        pattern.
-    Series.str.endswith : Same as startswith, but tests the end of string.
-
-    Examples
-    --------
-    Returning a Series of booleans using only a literal pattern.
-
-    >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
-    >>> s1.str.contains('og', regex=False)
-    0    False
-    1     True
-    2    False
-    3    False
-    4      NaN
-    dtype: object
-
-    Returning an Index of booleans using only a literal pattern.
-
-    >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
-    >>> ind.str.contains('23', regex=False)
-    Index([False, False, False, True, nan], dtype='object')
-
-    Specifying case sensitivity using `case`.
-
-    >>> s1.str.contains('oG', case=True, regex=True)
-    0    False
-    1    False
-    2    False
-    3    False
-    4      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN` replaces NaN values
-    with `False`. If Series or Index does not contain NaN values
-    the resultant dtype will be `bool`, otherwise, an `object` dtype.
-
-    >>> s1.str.contains('og', na=False, regex=True)
-    0    False
-    1     True
-    2    False
-    3    False
-    4    False
-    dtype: bool
-
-    Returning 'house' or 'dog' when either expression occurs in a string.
-
-    >>> s1.str.contains('house|dog', regex=True)
-    0    False
-    1     True
-    2     True
-    3    False
-    4      NaN
-    dtype: object
-
-    Ignoring case sensitivity using `flags` with regex.
-
-    >>> import re
-    >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True)
-    0    False
-    1    False
-    2     True
-    3    False
-    4      NaN
-    dtype: object
-
-    Returning any digit using regular expression.
-
-    >>> s1.str.contains('\\d', regex=True)
-    0    False
-    1    False
-    2    False
-    3     True
-    4      NaN
-    dtype: object
-
-    Ensure `pat` is a not a literal pattern when `regex` is set to True.
-    Note in the following example one might expect only `s2[1]` and `s2[3]` to
-    return `True`. However, '.0' as a regex matches any character
-    followed by a 0.
-
-    >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35'])
-    >>> s2.str.contains('.0', regex=True)
-    0     True
-    1     True
-    2    False
-    3     True
-    4    False
-    dtype: bool
-    """
-    if regex:
-        if not case:
-            flags |= re.IGNORECASE
-
-        regex = re.compile(pat, flags=flags)
-
-        if regex.groups > 0:
-            warnings.warn(
-                "This pattern has match groups. To actually get the "
-                "groups, use str.extract.",
-                UserWarning,
-                stacklevel=3,
-            )
-
-        f = lambda x: regex.search(x) is not None
-    else:
-        if case:
-            f = lambda x: pat in x
-        else:
-            upper_pat = pat.upper()
-            f = lambda x: upper_pat in x
-            uppered = _na_map(lambda x: x.upper(), arr)
-            return _na_map(f, uppered, na, dtype=np.dtype(bool))
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_startswith(arr, pat, na=np.nan):
-    """
-    Test if the start of each string element matches a pattern.
-
-    Equivalent to :meth:`str.startswith`.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence. Regular expressions are not accepted.
-    na : object, default NaN
-        Object shown if element tested is not a string.
-
-    Returns
-    -------
-    Series or Index of bool
-        A Series of booleans indicating whether the given pattern matches
-        the start of each string element.
-
-    See Also
-    --------
-    str.startswith : Python standard library string method.
-    Series.str.endswith : Same as startswith, but tests the end of string.
-    Series.str.contains : Tests if string element contains a pattern.
-
-    Examples
-    --------
-    >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan])
-    >>> s
-    0     bat
-    1    Bear
-    2     cat
-    3     NaN
-    dtype: object
-
-    >>> s.str.startswith('b')
-    0     True
-    1    False
-    2    False
-    3      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN`.
-
-    >>> s.str.startswith('b', na=False)
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-    """
-    f = lambda x: x.startswith(pat)
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_endswith(arr, pat, na=np.nan):
-    """
-    Test if the end of each string element matches a pattern.
-
-    Equivalent to :meth:`str.endswith`.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence. Regular expressions are not accepted.
-    na : object, default NaN
-        Object shown if element tested is not a string.
-
-    Returns
-    -------
-    Series or Index of bool
-        A Series of booleans indicating whether the given pattern matches
-        the end of each string element.
-
-    See Also
-    --------
-    str.endswith : Python standard library string method.
-    Series.str.startswith : Same as endswith, but tests the start of string.
-    Series.str.contains : Tests if string element contains a pattern.
-
-    Examples
-    --------
-    >>> s = pd.Series(['bat', 'bear', 'caT', np.nan])
-    >>> s
-    0     bat
-    1    bear
-    2     caT
-    3     NaN
-    dtype: object
-
-    >>> s.str.endswith('t')
-    0     True
-    1    False
-    2    False
-    3      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN`.
-
-    >>> s.str.endswith('t', na=False)
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-    """
-    f = lambda x: x.endswith(pat)
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
-    r"""
-    Replace each occurrence of pattern/regex in the Series/Index.
-
-    Equivalent to :meth:`str.replace` or :func:`re.sub`, depending on the regex value.
-
-    Parameters
-    ----------
-    pat : str or compiled regex
-        String can be a character sequence or regular expression.
-    repl : str or callable
-        Replacement string or a callable. The callable is passed the regex
-        match object and must return a replacement string to be used.
-        See :func:`re.sub`.
-    n : int, default -1 (all)
-        Number of replacements to make from start.
-    case : bool, default None
-        Determines if replace is case sensitive:
-
-        - If True, case sensitive (the default if `pat` is a string)
-        - Set to False for case insensitive
-        - Cannot be set if `pat` is a compiled regex.
-
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled
-        regex.
-    regex : bool, default True
-        Determines if assumes the passed-in pattern is a regular expression:
-
-        - If True, assumes the passed-in pattern is a regular expression.
-        - If False, treats the pattern as a literal string
-        - Cannot be set to False if `pat` is a compiled regex or `repl` is
-          a callable.
-
-        .. versionadded:: 0.23.0
-
-    Returns
-    -------
-    Series or Index of object
-        A copy of the object with all matching occurrences of `pat` replaced by
-        `repl`.
-
-    Raises
-    ------
-    ValueError
-        * if `regex` is False and `repl` is a callable or `pat` is a compiled
-          regex
-        * if `pat` is a compiled regex and `case` or `flags` is set
-
-    Notes
-    -----
-    When `pat` is a compiled regex, all flags should be included in the
-    compiled regex. Use of `case`, `flags`, or `regex=False` with a compiled
-    regex will raise an error.
-
-    Examples
-    --------
-    When `pat` is a string and `regex` is True (the default), the given `pat`
-    is compiled as a regex. When `repl` is a string, it replaces matching
-    regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are
-    left as is:
-
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)
-    0    bao
-    1    baz
-    2    NaN
-    dtype: object
-
-    When `pat` is a string and `regex` is False, every `pat` is replaced with
-    `repl` as with :meth:`str.replace`:
-
-    >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)
-    0    bao
-    1    fuz
-    2    NaN
-    dtype: object
-
-    When `repl` is a callable, it is called on every `pat` using
-    :func:`re.sub`. The callable should expect one positional argument
-    (a regex object) and return a string.
-
-    To get the idea:
-
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)
-    0    <re.Match object; span=(0, 1), match='f'>oo
-    1    <re.Match object; span=(0, 1), match='f'>uz
-    2                                            NaN
-    dtype: object
-
-    Reverse every lowercase alphabetic word:
-
-    >>> repl = lambda m: m.group(0)[::-1]
-    >>> pd.Series(['foo 123', 'bar baz', np.nan]).str.replace(r'[a-z]+', repl)
-    0    oof 123
-    1    rab zab
-    2        NaN
-    dtype: object
-
-    Using regex groups (extract second group and swap case):
-
-    >>> pat = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
-    >>> repl = lambda m: m.group('two').swapcase()
-    >>> pd.Series(['One Two Three', 'Foo Bar Baz']).str.replace(pat, repl)
-    0    tWO
-    1    bAR
-    dtype: object
-
-    Using a compiled regex with flags
-
-    >>> import re
-    >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE)
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar')
-    0    foo
-    1    bar
-    2    NaN
-    dtype: object
-    """
-    # Check whether repl is valid (GH 13438, GH 15055)
-    if not (isinstance(repl, str) or callable(repl)):
-        raise TypeError("repl must be a string or callable")
-
-    is_compiled_re = is_re(pat)
-    if regex:
-        if is_compiled_re:
-            if (case is not None) or (flags != 0):
-                raise ValueError(
-                    "case and flags cannot be set when pat is a compiled regex"
-                )
-        else:
-            # not a compiled regex
-            # set default case
-            if case is None:
-                case = True
-
-            # add case flag, if provided
-            if case is False:
-                flags |= re.IGNORECASE
-        if is_compiled_re or len(pat) > 1 or flags or callable(repl):
-            n = n if n >= 0 else 0
-            compiled = re.compile(pat, flags=flags)
-            f = lambda x: compiled.sub(repl=repl, string=x, count=n)
-        else:
-            f = lambda x: x.replace(pat, repl, n)
-    else:
-        if is_compiled_re:
-            raise ValueError(
-                "Cannot use a compiled regex as replacement pattern with regex=False"
-            )
-        if callable(repl):
-            raise ValueError("Cannot use a callable replacement when regex=False")
-        f = lambda x: x.replace(pat, repl, n)
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_repeat(arr, repeats):
-    """
-    Duplicate each string in the Series or Index.
-
-    Parameters
-    ----------
-    repeats : int or sequence of int
-        Same value for all (int) or different value per (sequence).
-
-    Returns
-    -------
-    Series or Index of object
-        Series or Index of repeated string objects specified by
-        input parameter repeats.
-
-    Examples
-    --------
-    >>> s = pd.Series(['a', 'b', 'c'])
-    >>> s
-    0    a
-    1    b
-    2    c
-    dtype: object
-
-    Single int repeats string in Series
-
-    >>> s.str.repeat(repeats=2)
-    0    aa
-    1    bb
-    2    cc
-    dtype: object
-
-    Sequence of int repeats corresponding string in Series
-
-    >>> s.str.repeat(repeats=[1, 2, 3])
-    0      a
-    1     bb
-    2    ccc
-    dtype: object
-    """
-    if is_scalar(repeats):
-
-        def scalar_rep(x):
-            try:
-                return bytes.__mul__(x, repeats)
-            except TypeError:
-                return str.__mul__(x, repeats)
-
-        return _na_map(scalar_rep, arr, dtype=str)
-    else:
-
-        def rep(x, r):
-            if x is libmissing.NA:
-                return x
-            try:
-                return bytes.__mul__(x, r)
-            except TypeError:
-                return str.__mul__(x, r)
-
-        repeats = np.asarray(repeats, dtype=object)
-        result = libops.vec_binop(np.asarray(arr), repeats, rep)
-        return result
-
-
-def str_match(
-    arr: ArrayLike,
-    pat: Union[str, Pattern],
-    case: bool = True,
-    flags: int = 0,
-    na: Scalar = np.nan,
-):
-    """
-    Determine if each string starts with a match of a regular expression.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-
-    Returns
-    -------
-    Series/array of boolean values
-
-    See Also
-    --------
-    fullmatch : Stricter matching that requires the entire string to match.
-    contains : Analogous, but less strict, relying on re.search instead of
-        re.match.
-    extract : Extract matched groups.
-    """
-    if not case:
-        flags |= re.IGNORECASE
-
-    regex = re.compile(pat, flags=flags)
-
-    f = lambda x: regex.match(x) is not None
-
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_fullmatch(
-    arr: ArrayLike,
-    pat: Union[str, Pattern],
-    case: bool = True,
-    flags: int = 0,
-    na: Scalar = np.nan,
-):
-    """
-    Determine if each string entirely matches a regular expression.
-
-    .. versionadded:: 1.1.0
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-
-    Returns
-    -------
-    Series/array of boolean values
-
-    See Also
-    --------
-    match : Similar, but also returns `True` when only a *prefix* of the string
-        matches the regular expression.
-    extract : Extract matched groups.
-    """
-    if not case:
-        flags |= re.IGNORECASE
-
-    regex = re.compile(pat, flags=flags)
-
-    f = lambda x: regex.fullmatch(x) is not None
-
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def _get_single_group_name(rx):
-    try:
-        return list(rx.groupindex.keys()).pop()
-    except IndexError:
-        return None
-
-
-def _groups_or_na_fun(regex):
-    """Used in both extract_noexpand and extract_frame"""
-    if regex.groups == 0:
-        raise ValueError("pattern contains no capture groups")
-    empty_row = [np.nan] * regex.groups
-
-    def f(x):
-        if not isinstance(x, str):
-            return empty_row
-        m = regex.search(x)
-        if m:
-            return [np.nan if item is None else item for item in m.groups()]
-        else:
-            return empty_row
-
-    return f
-
-
-def _result_dtype(arr):
-    # workaround #27953
-    # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
-    # when the list of values is empty.
-    from pandas.core.arrays.string_ import StringDtype
-
-    if isinstance(arr.dtype.name, StringDtype):
-        return arr.dtype.name
-    else:
-        return object
-
-
-def _str_extract_noexpand(arr, pat, flags=0):
-    """
-    Find groups in each string in the Series using passed regular
-    expression. This function is called from
-    str_extract(expand=False), and can return Series, DataFrame, or
-    Index.
-
-    """
-    from pandas import DataFrame
-
-    regex = re.compile(pat, flags=flags)
-    groups_or_na = _groups_or_na_fun(regex)
-
-    if regex.groups == 1:
-        result = np.array([groups_or_na(val)[0] for val in arr], dtype=object)
-        name = _get_single_group_name(regex)
-    else:
-        if isinstance(arr, ABCIndexClass):
-            raise ValueError("only one regex group is supported with Index")
-        name = None
-        names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-        columns = [names.get(1 + i, i) for i in range(regex.groups)]
-        if arr.empty:
-            result = DataFrame(columns=columns, dtype=object)
-        else:
-            dtype = _result_dtype(arr)
-            result = DataFrame(
-                [groups_or_na(val) for val in arr],
-                columns=columns,
-                index=arr.index,
-                dtype=dtype,
-            )
-    return result, name
-
-
-def _str_extract_frame(arr, pat, flags=0):
-    """
-    For each subject string in the Series, extract groups from the
-    first match of regular expression pat. This function is called from
-    str_extract(expand=True), and always returns a DataFrame.
-
-    """
-    from pandas import DataFrame
-
-    regex = re.compile(pat, flags=flags)
-    groups_or_na = _groups_or_na_fun(regex)
-    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-    columns = [names.get(1 + i, i) for i in range(regex.groups)]
-
-    if len(arr) == 0:
-        return DataFrame(columns=columns, dtype=object)
-    try:
-        result_index = arr.index
-    except AttributeError:
-        result_index = None
-    dtype = _result_dtype(arr)
-    return DataFrame(
-        [groups_or_na(val) for val in arr],
-        columns=columns,
-        index=result_index,
-        dtype=dtype,
-    )
-
-
-def str_extract(arr, pat, flags=0, expand=True):
-    r"""
-    Extract capture groups in the regex `pat` as columns in a DataFrame.
-
-    For each subject string in the Series, extract groups from the
-    first match of regular expression `pat`.
-
-    Parameters
-    ----------
-    pat : str
-        Regular expression pattern with capturing groups.
-    flags : int, default 0 (no flags)
-        Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that
-        modify regular expression matching for things like case,
-        spaces, etc. For more details, see :mod:`re`.
-    expand : bool, default True
-        If True, return DataFrame with one column per capture group.
-        If False, return a Series/Index if there is one capture group
-        or DataFrame if there are multiple capture groups.
-
-    Returns
-    -------
-    DataFrame or Series or Index
-        A DataFrame with one row for each subject string, and one
-        column for each group. Any capture group names in regular
-        expression pat will be used for column names; otherwise
-        capture group numbers will be used. The dtype of each result
-        column is always object, even when no match is found. If
-        ``expand=False`` and pat has only one capture group, then
-        return a Series (if subject is a Series) or Index (if subject
-        is an Index).
-
-    See Also
-    --------
-    extractall : Returns all matches (not just the first match).
-
-    Examples
-    --------
-    A pattern with two groups will return a DataFrame with two columns.
-    Non-matches will be NaN.
-
-    >>> s = pd.Series(['a1', 'b2', 'c3'])
-    >>> s.str.extract(r'([ab])(\d)')
-         0    1
-    0    a    1
-    1    b    2
-    2  NaN  NaN
-
-    A pattern may contain optional groups.
-
-    >>> s.str.extract(r'([ab])?(\d)')
-         0  1
-    0    a  1
-    1    b  2
-    2  NaN  3
-
-    Named groups will become column names in the result.
-
-    >>> s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')
-      letter digit
-    0      a     1
-    1      b     2
-    2    NaN   NaN
-
-    A pattern with one group will return a DataFrame with one column
-    if expand=True.
-
-    >>> s.str.extract(r'[ab](\d)', expand=True)
-         0
-    0    1
-    1    2
-    2  NaN
-
-    A pattern with one group will return a Series if expand=False.
-
-    >>> s.str.extract(r'[ab](\d)', expand=False)
-    0      1
-    1      2
-    2    NaN
-    dtype: object
-    """
-    if not isinstance(expand, bool):
-        raise ValueError("expand must be True or False")
-    if expand:
-        return _str_extract_frame(arr._orig, pat, flags=flags)
-    else:
-        result, name = _str_extract_noexpand(arr._parent, pat, flags=flags)
-        return arr._wrap_result(result, name=name, expand=expand)
-
-
-def str_extractall(arr, pat, flags=0):
-    r"""
-    Extract capture groups in the regex `pat` as columns in DataFrame.
-
-    For each subject string in the Series, extract groups from all
-    matches of regular expression pat. When each subject string in the
-    Series has exactly one match, extractall(pat).xs(0, level='match')
-    is the same as extract(pat).
-
-    Parameters
-    ----------
-    pat : str
-        Regular expression pattern with capturing groups.
-    flags : int, default 0 (no flags)
-        A ``re`` module flag, for example ``re.IGNORECASE``. These allow
-        to modify regular expression matching for things like case, spaces,
-        etc. Multiple flags can be combined with the bitwise OR operator,
-        for example ``re.IGNORECASE | re.MULTILINE``.
-
-    Returns
-    -------
-    DataFrame
-        A ``DataFrame`` with one row for each match, and one column for each
-        group. Its rows have a ``MultiIndex`` with first levels that come from
-        the subject ``Series``. The last level is named 'match' and indexes the
-        matches in each item of the ``Series``. Any capture group names in
-        regular expression pat will be used for column names; otherwise capture
-        group numbers will be used.
-
-    See Also
-    --------
-    extract : Returns first match only (not all matches).
-
-    Examples
-    --------
-    A pattern with one group will return a DataFrame with one column.
-    Indices with no matches will not appear in the result.
-
-    >>> s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
-    >>> s.str.extractall(r"[ab](\d)")
-             0
-      match
-    A 0      1
-      1      2
-    B 0      1
-
-    Capture group names are used for column names of the result.
-
-    >>> s.str.extractall(r"[ab](?P<digit>\d)")
-            digit
-      match
-    A 0         1
-      1         2
-    B 0         1
-
-    A pattern with two groups will return a DataFrame with two columns.
-
-    >>> s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
-            letter digit
-      match
-    A 0          a     1
-      1          a     2
-    B 0          b     1
-
-    Optional groups that do not match are NaN in the result.
-
-    >>> s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")
-            letter digit
-      match
-    A 0          a     1
-      1          a     2
-    B 0          b     1
-    C 0        NaN     1
-    """
-    regex = re.compile(pat, flags=flags)
-    # the regex must contain capture groups.
-    if regex.groups == 0:
-        raise ValueError("pattern contains no capture groups")
-
-    if isinstance(arr, ABCIndexClass):
-        arr = arr.to_series().reset_index(drop=True)
-
-    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-    columns = [names.get(1 + i, i) for i in range(regex.groups)]
-    match_list = []
-    index_list = []
-    is_mi = arr.index.nlevels > 1
-
-    for subject_key, subject in arr.items():
-        if isinstance(subject, str):
-
-            if not is_mi:
-                subject_key = (subject_key,)
-
-            for match_i, match_tuple in enumerate(regex.findall(subject)):
-                if isinstance(match_tuple, str):
-                    match_tuple = (match_tuple,)
-                na_tuple = [np.NaN if group == "" else group for group in match_tuple]
-                match_list.append(na_tuple)
-                result_key = tuple(subject_key + (match_i,))
-                index_list.append(result_key)
-
-    from pandas import MultiIndex
-
-    index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"])
-    dtype = _result_dtype(arr)
-
-    result = arr._constructor_expanddim(
-        match_list, index=index, columns=columns, dtype=dtype
-    )
-    return result
-
-
-def str_get_dummies(arr, sep="|"):
-    """
-    Return DataFrame of dummy/indicator variables for Series.
-
-    Each string in Series is split by sep and returned as a DataFrame
-    of dummy/indicator variables.
-
-    Parameters
-    ----------
-    sep : str, default "|"
-        String to split on.
-
-    Returns
-    -------
-    DataFrame
-        Dummy variables corresponding to values of the Series.
-
-    See Also
-    --------
-    get_dummies : Convert categorical variable into dummy/indicator
-        variables.
-
-    Examples
-    --------
-    >>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies()
-       a  b  c
-    0  1  1  0
-    1  1  0  0
-    2  1  0  1
-
-    >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
-       a  b  c
-    0  1  1  0
-    1  0  0  0
-    2  1  0  1
-    """
-    arr = arr.fillna("")
-    try:
-        arr = sep + arr + sep
-    except TypeError:
-        arr = sep + arr.astype(str) + sep
-
-    tags = set()
-    for ts in arr.str.split(sep):
-        tags.update(ts)
-    tags = sorted(tags - {""})
-
-    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
-
-    for i, t in enumerate(tags):
-        pat = sep + t + sep
-        dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
-    return dummies, tags
-
-
-def str_join(arr, sep):
-    """
-    Join lists contained as elements in the Series/Index with passed delimiter.
-
-    If the elements of a Series are lists themselves, join the content of these
-    lists using the delimiter passed to the function.
-    This function is an equivalent to :meth:`str.join`.
-
-    Parameters
-    ----------
-    sep : str
-        Delimiter to use between list entries.
-
-    Returns
-    -------
-    Series/Index: object
-        The list entries concatenated by intervening occurrences of the
-        delimiter.
-
-    Raises
-    ------
-    AttributeError
-        If the supplied Series contains neither strings nor lists.
-
-    See Also
-    --------
-    str.join : Standard library version of this method.
-    Series.str.split : Split strings around given separator/delimiter.
-
-    Notes
-    -----
-    If any of the list items is not a string object, the result of the join
-    will be `NaN`.
-
-    Examples
-    --------
-    Example with a list that contains non-string elements.
-
-    >>> s = pd.Series([['lion', 'elephant', 'zebra'],
-    ...                [1.1, 2.2, 3.3],
-    ...                ['cat', np.nan, 'dog'],
-    ...                ['cow', 4.5, 'goat'],
-    ...                ['duck', ['swan', 'fish'], 'guppy']])
-    >>> s
-    0        [lion, elephant, zebra]
-    1                [1.1, 2.2, 3.3]
-    2                [cat, nan, dog]
-    3               [cow, 4.5, goat]
-    4    [duck, [swan, fish], guppy]
-    dtype: object
-
-    Join all lists using a '-'. The lists containing object(s) of types other
-    than str will produce a NaN.
-
-    >>> s.str.join('-')
-    0    lion-elephant-zebra
-    1                    NaN
-    2                    NaN
-    3                    NaN
-    4                    NaN
-    dtype: object
-    """
-    return _na_map(sep.join, arr, dtype=str)
-
-
-def str_findall(arr, pat, flags=0):
-    """
-    Find all occurrences of pattern or regular expression in the Series/Index.
-
-    Equivalent to applying :func:`re.findall` to all the elements in the
-    Series/Index.
-
-    Parameters
-    ----------
-    pat : str
-        Pattern or regular expression.
-    flags : int, default 0
-        Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which
-        means no flags).
-
-    Returns
-    -------
-    Series/Index of lists of strings
-        All non-overlapping matches of pattern or regular expression in each
-        string of this Series/Index.
-
-    See Also
-    --------
-    count : Count occurrences of pattern or regular expression in each string
-        of the Series/Index.
-    extractall : For each string in the Series, extract groups from all matches
-        of regular expression and return a DataFrame with one row for each
-        match and one column for each group.
-    re.findall : The equivalent ``re`` function to all non-overlapping matches
-        of pattern or regular expression in string, as a list of strings.
-
-    Examples
-    --------
-    >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit'])
-
-    The search for the pattern 'Monkey' returns one match:
-
-    >>> s.str.findall('Monkey')
-    0          []
-    1    [Monkey]
-    2          []
-    dtype: object
-
-    On the other hand, the search for the pattern 'MONKEY' doesn't return any
-    match:
-
-    >>> s.str.findall('MONKEY')
-    0    []
-    1    []
-    2    []
-    dtype: object
-
-    Flags can be added to the pattern or regular expression. For instance,
-    to find the pattern 'MONKEY' ignoring the case:
-
-    >>> import re
-    >>> s.str.findall('MONKEY', flags=re.IGNORECASE)
-    0          []
-    1    [Monkey]
-    2          []
-    dtype: object
-
-    When the pattern matches more than one string in the Series, all matches
-    are returned:
-
-    >>> s.str.findall('on')
-    0    [on]
-    1    [on]
-    2      []
-    dtype: object
-
-    Regular expressions are supported too. For instance, the search for all the
-    strings ending with the word 'on' is shown next:
-
-    >>> s.str.findall('on$')
-    0    [on]
-    1      []
-    2      []
-    dtype: object
-
-    If the pattern is found more than once in the same string, then a list of
-    multiple strings is returned:
-
-    >>> s.str.findall('b')
-    0        []
-    1        []
-    2    [b, b]
-    dtype: object
-    """
-    regex = re.compile(pat, flags=flags)
-    return _na_map(regex.findall, arr)
-
-
-def str_find(arr, sub, start=0, end=None, side="left"):
-    """
-    Return indexes in each strings in the Series/Index where the
-    substring is fully contained between [start:end]. Return -1 on failure.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-    side : {'left', 'right'}, default 'left'
-        Specifies a starting side, equivalent to ``find`` or ``rfind``.
-
-    Returns
-    -------
-    Series or Index
-        Indexes where substring is found.
-    """
-    if not isinstance(sub, str):
-        msg = f"expected a string object, not {type(sub).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        method = "find"
-    elif side == "right":
-        method = "rfind"
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    if end is None:
-        f = lambda x: getattr(x, method)(sub, start)
-    else:
-        f = lambda x: getattr(x, method)(sub, start, end)
-
-    return _na_map(f, arr, dtype=np.dtype("int64"))
-
-
-def str_index(arr, sub, start=0, end=None, side="left"):
-    if not isinstance(sub, str):
-        msg = f"expected a string object, not {type(sub).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        method = "index"
-    elif side == "right":
-        method = "rindex"
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    if end is None:
-        f = lambda x: getattr(x, method)(sub, start)
-    else:
-        f = lambda x: getattr(x, method)(sub, start, end)
-
-    return _na_map(f, arr, dtype=np.dtype("int64"))
-
-
-def str_pad(arr, width, side="left", fillchar=" "):
-    """
-    Pad strings in the Series/Index up to width.
-
-    Parameters
-    ----------
-    width : int
-        Minimum width of resulting string; additional characters will be filled
-        with character defined in `fillchar`.
-    side : {'left', 'right', 'both'}, default 'left'
-        Side from which to fill resulting string.
-    fillchar : str, default ' '
-        Additional character for filling, default is whitespace.
-
-    Returns
-    -------
-    Series or Index of object
-        Returns Series or Index with minimum number of char in object.
-
-    See Also
-    --------
-    Series.str.rjust : Fills the left side of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='left')``.
-    Series.str.ljust : Fills the right side of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='right')``.
-    Series.str.center : Fills boths sides of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='both')``.
-    Series.str.zfill : Pad strings in the Series/Index by prepending '0'
-        character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``.
-
-    Examples
-    --------
-    >>> s = pd.Series(["caribou", "tiger"])
-    >>> s
-    0    caribou
-    1      tiger
-    dtype: object
-
-    >>> s.str.pad(width=10)
-    0       caribou
-    1         tiger
-    dtype: object
-
-    >>> s.str.pad(width=10, side='right', fillchar='-')
-    0    caribou---
-    1    tiger-----
-    dtype: object
-
-    >>> s.str.pad(width=10, side='both', fillchar='-')
-    0    -caribou--
-    1    --tiger---
-    dtype: object
-    """
-    if not isinstance(fillchar, str):
-        msg = f"fillchar must be a character, not {type(fillchar).__name__}"
-        raise TypeError(msg)
-
-    if len(fillchar) != 1:
-        raise TypeError("fillchar must be a character, not str")
-
-    if not is_integer(width):
-        msg = f"width must be of integer type, not {type(width).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        f = lambda x: x.rjust(width, fillchar)
-    elif side == "right":
-        f = lambda x: x.ljust(width, fillchar)
-    elif side == "both":
-        f = lambda x: x.center(width, fillchar)
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_split(arr, pat=None, n=None):
-
-    if pat is None:
-        if n is None or n == 0:
-            n = -1
-        f = lambda x: x.split(pat, n)
-    else:
-        if len(pat) == 1:
-            if n is None or n == 0:
-                n = -1
-            f = lambda x: x.split(pat, n)
-        else:
-            if n is None or n == -1:
-                n = 0
-            regex = re.compile(pat)
-            f = lambda x: regex.split(x, maxsplit=n)
-    res = _na_map(f, arr)
-    return res
-
-
-def str_rsplit(arr, pat=None, n=None):
-
-    if n is None or n == 0:
-        n = -1
-    f = lambda x: x.rsplit(pat, n)
-    res = _na_map(f, arr)
-    return res
-
-
-def str_slice(arr, start=None, stop=None, step=None):
-    """
-    Slice substrings from each element in the Series or Index.
-
-    Parameters
-    ----------
-    start : int, optional
-        Start position for slice operation.
-    stop : int, optional
-        Stop position for slice operation.
-    step : int, optional
-        Step size for slice operation.
-
-    Returns
-    -------
-    Series or Index of object
-        Series or Index from sliced substring from original string object.
-
-    See Also
-    --------
-    Series.str.slice_replace : Replace a slice with a string.
-    Series.str.get : Return element at position.
-        Equivalent to `Series.str.slice(start=i, stop=i+1)` with `i`
-        being the position.
-
-    Examples
-    --------
-    >>> s = pd.Series(["koala", "fox", "chameleon"])
-    >>> s
-    0        koala
-    1          fox
-    2    chameleon
-    dtype: object
-
-    >>> s.str.slice(start=1)
-    0        oala
-    1          ox
-    2    hameleon
-    dtype: object
-
-    >>> s.str.slice(start=-1)
-    0           a
-    1           x
-    2           n
-    dtype: object
-
-    >>> s.str.slice(stop=2)
-    0    ko
-    1    fo
-    2    ch
-    dtype: object
-
-    >>> s.str.slice(step=2)
-    0      kaa
-    1       fx
-    2    caeen
-    dtype: object
-
-    >>> s.str.slice(start=0, stop=5, step=3)
-    0    kl
-    1     f
-    2    cm
-    dtype: object
-
-    Equivalent behaviour to:
-
-    >>> s.str[0:5:3]
-    0    kl
-    1     f
-    2    cm
-    dtype: object
-    """
-    obj = slice(start, stop, step)
-    f = lambda x: x[obj]
-    return _na_map(f, arr, dtype=str)
-
-
-def str_slice_replace(arr, start=None, stop=None, repl=None):
-    """
-    Replace a positional slice of a string with another value.
-
-    Parameters
-    ----------
-    start : int, optional
-        Left index position to use for the slice. If not specified (None),
-        the slice is unbounded on the left, i.e. slice from the start
-        of the string.
-    stop : int, optional
-        Right index position to use for the slice. If not specified (None),
-        the slice is unbounded on the right, i.e. slice until the
-        end of the string.
-    repl : str, optional
-        String for replacement. If not specified (None), the sliced region
-        is replaced with an empty string.
-
-    Returns
-    -------
-    Series or Index
-        Same type as the original object.
-
-    See Also
-    --------
-    Series.str.slice : Just slicing without replacement.
-
-    Examples
-    --------
-    >>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde'])
-    >>> s
-    0        a
-    1       ab
-    2      abc
-    3     abdc
-    4    abcde
-    dtype: object
-
-    Specify just `start`, meaning replace `start` until the end of the
-    string with `repl`.
-
-    >>> s.str.slice_replace(1, repl='X')
-    0    aX
-    1    aX
-    2    aX
-    3    aX
-    4    aX
-    dtype: object
-
-    Specify just `stop`, meaning the start of the string to `stop` is replaced
-    with `repl`, and the rest of the string is included.
-
-    >>> s.str.slice_replace(stop=2, repl='X')
-    0       X
-    1       X
-    2      Xc
-    3     Xdc
-    4    Xcde
-    dtype: object
-
-    Specify `start` and `stop`, meaning the slice from `start` to `stop` is
-    replaced with `repl`. Everything before or after `start` and `stop` is
-    included as is.
-
-    >>> s.str.slice_replace(start=1, stop=3, repl='X')
-    0      aX
-    1      aX
-    2      aX
-    3     aXc
-    4    aXde
-    dtype: object
-    """
-    if repl is None:
-        repl = ""
-
-    def f(x):
-        if x[start:stop] == "":
-            local_stop = start
-        else:
-            local_stop = stop
-        y = ""
-        if start is not None:
-            y += x[:start]
-        y += repl
-        if stop is not None:
-            y += x[local_stop:]
-        return y
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_strip(arr, to_strip=None, side="both"):
-    """
-    Strip whitespace (including newlines) from each string in the
-    Series/Index.
-
-    Parameters
-    ----------
-    to_strip : str or unicode
-    side : {'left', 'right', 'both'}, default 'both'
-
-    Returns
-    -------
-    Series or Index
-    """
-    if side == "both":
-        f = lambda x: x.strip(to_strip)
-    elif side == "left":
-        f = lambda x: x.lstrip(to_strip)
-    elif side == "right":
-        f = lambda x: x.rstrip(to_strip)
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-    return _na_map(f, arr, dtype=str)
-
-
-def str_wrap(arr, width, **kwargs):
-    r"""
-    Wrap strings in Series/Index at specified line width.
-
-    This method has the same keyword parameters and defaults as
-    :class:`textwrap.TextWrapper`.
-
-    Parameters
-    ----------
-    width : int
-        Maximum line width.
-    expand_tabs : bool, optional
-        If True, tab characters will be expanded to spaces (default: True).
-    replace_whitespace : bool, optional
-        If True, each whitespace character (as defined by string.whitespace)
-        remaining after tab expansion will be replaced by a single space
-        (default: True).
-    drop_whitespace : bool, optional
-        If True, whitespace that, after wrapping, happens to end up at the
-        beginning or end of a line is dropped (default: True).
-    break_long_words : bool, optional
-        If True, then words longer than width will be broken in order to ensure
-        that no lines are longer than width. If it is false, long words will
-        not be broken, and some lines may be longer than width (default: True).
-    break_on_hyphens : bool, optional
-        If True, wrapping will occur preferably on whitespace and right after
-        hyphens in compound words, as it is customary in English. If false,
-        only whitespaces will be considered as potentially good places for line
-        breaks, but you need to set break_long_words to false if you want truly
-        insecable words (default: True).
-
-    Returns
-    -------
-    Series or Index
-
-    Notes
-    -----
-    Internally, this method uses a :class:`textwrap.TextWrapper` instance with
-    default settings. To achieve behavior matching R's stringr library str_wrap
-    function, use the arguments:
-
-    - expand_tabs = False
-    - replace_whitespace = True
-    - drop_whitespace = True
-    - break_long_words = False
-    - break_on_hyphens = False
-
-    Examples
-    --------
-    >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped'])
-    >>> s.str.wrap(12)
-    0             line to be\nwrapped
-    1    another line\nto be\nwrapped
-    dtype: object
-    """
-    kwargs["width"] = width
-
-    tw = textwrap.TextWrapper(**kwargs)
-
-    return _na_map(lambda s: "\n".join(tw.wrap(s)), arr, dtype=str)
-
-
-def str_translate(arr, table):
-    """
-    Map all characters in the string through the given mapping table.
-
-    Equivalent to standard :meth:`str.translate`.
-
-    Parameters
-    ----------
-    table : dict
-        Table is a mapping of Unicode ordinals to Unicode ordinals, strings, or
-        None. Unmapped characters are left untouched.
-        Characters mapped to None are deleted. :meth:`str.maketrans` is a
-        helper function for making translation tables.
-
-    Returns
-    -------
-    Series or Index
-    """
-    return _na_map(lambda x: x.translate(table), arr, dtype=str)
-
-
-def str_get(arr, i):
-    """
-    Extract element from each component at specified position.
-
-    Extract element from lists, tuples, or strings in each element in the
-    Series/Index.
-
-    Parameters
-    ----------
-    i : int
-        Position of element to extract.
-
-    Returns
-    -------
-    Series or Index
-
-    Examples
-    --------
-    >>> s = pd.Series(["String",
-    ...               (1, 2, 3),
-    ...               ["a", "b", "c"],
-    ...               123,
-    ...               -456,
-    ...               {1: "Hello", "2": "World"}])
-    >>> s
-    0                        String
-    1                     (1, 2, 3)
-    2                     [a, b, c]
-    3                           123
-    4                          -456
-    5    {1: 'Hello', '2': 'World'}
-    dtype: object
-
-    >>> s.str.get(1)
-    0        t
-    1        2
-    2        b
-    3      NaN
-    4      NaN
-    5    Hello
-    dtype: object
-
-    >>> s.str.get(-1)
-    0      g
-    1      3
-    2      c
-    3    NaN
-    4    NaN
-    5    None
-    dtype: object
-    """
-
-    def f(x):
-        if isinstance(x, dict):
-            return x.get(i)
-        elif len(x) > i >= -len(x):
-            return x[i]
-        return np.nan
-
-    return _na_map(f, arr)
-
-
-def str_decode(arr, encoding, errors="strict"):
-    """
-    Decode character string in the Series/Index using indicated encoding.
-
-    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
-    python3.
-
-    Parameters
-    ----------
-    encoding : str
-    errors : str, optional
-
-    Returns
-    -------
-    Series or Index
-    """
-    if encoding in _cpython_optimized_decoders:
-        # CPython optimized implementation
-        f = lambda x: x.decode(encoding, errors)
-    else:
-        decoder = codecs.getdecoder(encoding)
-        f = lambda x: decoder(x, errors)[0]
-    return _na_map(f, arr)
-
-
-def str_encode(arr, encoding, errors="strict"):
-    """
-    Encode character string in the Series/Index using indicated encoding.
-
-    Equivalent to :meth:`str.encode`.
-
-    Parameters
-    ----------
-    encoding : str
-    errors : str, optional
-
-    Returns
-    -------
-    encoded : Series/Index of objects
-    """
-    if encoding in _cpython_optimized_encoders:
-        # CPython optimized implementation
-        f = lambda x: x.encode(encoding, errors)
-    else:
-        encoder = codecs.getencoder(encoding)
-        f = lambda x: encoder(x, errors)[0]
-    return _na_map(f, arr)
-
-
-def forbid_nonstring_types(forbidden, name=None):
-    """
-    Decorator to forbid specific types for a method of StringMethods.
-
-    For calling `.str.{method}` on a Series or Index, it is necessary to first
-    initialize the :class:`StringMethods` object, and then call the method.
-    However, different methods allow different input types, and so this can not
-    be checked during :meth:`StringMethods.__init__`, but must be done on a
-    per-method basis. This decorator exists to facilitate this process, and
-    make it explicit which (inferred) types are disallowed by the method.
-
-    :meth:`StringMethods.__init__` allows the *union* of types its different
-    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
-    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
-
-    The default string types ['string', 'empty'] are allowed for all methods.
-    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
-    then needs to forbid the types it is not intended for.
-
-    Parameters
-    ----------
-    forbidden : list-of-str or None
-        List of forbidden non-string types, may be one or more of
-        `['bytes', 'mixed', 'mixed-integer']`.
-    name : str, default None
-        Name of the method to use in the error message. By default, this is
-        None, in which case the name from the method being wrapped will be
-        copied. However, for working with further wrappers (like _pat_wrapper
-        and _noarg_wrapper), it is necessary to specify the name.
-
-    Returns
-    -------
-    func : wrapper
-        The method to which the decorator is applied, with an added check that
-        enforces the inferred type to not be in the list of forbidden types.
-
-    Raises
-    ------
-    TypeError
-        If the inferred type of the underlying data is in `forbidden`.
-    """
-    # deal with None
-    forbidden = [] if forbidden is None else forbidden
-
-    allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set(
-        forbidden
-    )
-
-    def _forbid_nonstring_types(func):
-        func_name = func.__name__ if name is None else name
-
-        @wraps(func)
-        def wrapper(self, *args, **kwargs):
-            if self._inferred_dtype not in allowed_types:
-                msg = (
-                    f"Cannot use .str.{func_name} with values of "
-                    f"inferred dtype '{self._inferred_dtype}'."
-                )
-                raise TypeError(msg)
-            return func(self, *args, **kwargs)
-
-        wrapper.__name__ = func_name
-        return wrapper
-
-    return _forbid_nonstring_types
-
-
-def _noarg_wrapper(
-    f,
-    name=None,
-    docstring=None,
-    forbidden_types=["bytes"],
-    returns_string=True,
-    **kwargs,
-):
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper(self):
-        result = _na_map(f, self._parent, **kwargs)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    wrapper.__name__ = f.__name__ if name is None else name
-    if docstring is not None:
-        wrapper.__doc__ = docstring
-    else:
-        raise ValueError("Provide docstring")
-
-    return wrapper
-
-
-def _pat_wrapper(
-    f,
-    flags=False,
-    na=False,
-    name=None,
-    forbidden_types=["bytes"],
-    returns_string=True,
-    **kwargs,
-):
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper1(self, pat):
-        result = f(self._parent, pat)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper2(self, pat, flags=0, **kwargs):
-        result = f(self._parent, pat, flags=flags, **kwargs)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper3(self, pat, na=np.nan):
-        result = f(self._parent, pat, na=na)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
-
-    wrapper.__name__ = f.__name__ if name is None else name
-    if f.__doc__:
-        wrapper.__doc__ = f.__doc__
-
-    return wrapper
-
-
-def copy(source):
-    """Copy a docstring from another source function (if present)"""
-
-    def do_copy(target):
-        if source.__doc__:
-            target.__doc__ = source.__doc__
-        return target
-
-    return do_copy
-
-
-class StringMethods(NoNewAttributesMixin):
-    """
-    Vectorized string functions for Series and Index.
-
-    NAs stay NA unless handled otherwise by a particular method.
-    Patterned after Python's string methods, with some inspiration from
-    R's stringr package.
-
-    Examples
-    --------
-    >>> s = pd.Series(["A_Str_Series"])
-    >>> s
-    0    A_Str_Series
-    dtype: object
-
-    >>> s.str.split("_")
-    0    [A, Str, Series]
-    dtype: object
-
-    >>> s.str.replace("_", "")
-    0    AStrSeries
-    dtype: object
-    """
-
-    def __init__(self, data):
-        from pandas.core.arrays.string_ import StringDtype
-
-        self._inferred_dtype = self._validate(data)
-        self._is_categorical = is_categorical_dtype(data.dtype)
-        self._is_string = isinstance(data.dtype, StringDtype)
-
-        # ._values.categories works for both Series/Index
-        self._parent = data._values.categories if self._is_categorical else data
-        # save orig to blow up categoricals to the right type
-        self._orig = data
-        self._freeze()
-
-    @staticmethod
-    def _validate(data):
-        """
-        Auxiliary function for StringMethods, infers and checks dtype of data.
-
-        This is a "first line of defence" at the creation of the StringMethods-
-        object (see _make_accessor), and just checks that the dtype is in the
-        *union* of the allowed types over all string methods below; this
-        restriction is then refined on a per-method basis using the decorator
-        @forbid_nonstring_types (more info in the corresponding docstring).
-
-        This really should exclude all series/index with any non-string values,
-        but that isn't practical for performance reasons until we have a str
-        dtype (GH 9343 / 13877)
-
-        Parameters
-        ----------
-        data : The content of the Series
-
-        Returns
-        -------
-        dtype : inferred dtype of data
-        """
-        from pandas import StringDtype
-
-        if isinstance(data, ABCMultiIndex):
-            raise AttributeError(
-                "Can only use .str accessor with Index, not MultiIndex"
-            )
-
-        # see _libs/lib.pyx for list of inferred types
-        allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]
-
-        values = getattr(data, "values", data)  # Series / Index
-        values = getattr(values, "categories", values)  # categorical / normal
-
-        # explicitly allow StringDtype
-        if isinstance(values.dtype, StringDtype):
-            return "string"
-
-        try:
-            inferred_dtype = lib.infer_dtype(values, skipna=True)
-        except ValueError:
-            # GH#27571 mostly occurs with ExtensionArray
-            inferred_dtype = None
-
-        if inferred_dtype not in allowed_types:
-            raise AttributeError("Can only use .str accessor with string values!")
-        return inferred_dtype
-
-    def __getitem__(self, key):
-        if isinstance(key, slice):
-            return self.slice(start=key.start, stop=key.stop, step=key.step)
-        else:
-            return self.get(key)
-
-    def __iter__(self):
-        warnings.warn(
-            "Columnar iteration over characters will be deprecated in future releases.",
-            FutureWarning,
-            stacklevel=2,
-        )
-        i = 0
-        g = self.get(i)
-        while g.notna().any():
-            yield g
-            i += 1
-            g = self.get(i)
-
-    def _wrap_result(
-        self,
-        result,
-        use_codes=True,
-        name=None,
-        expand=None,
-        fill_value=np.nan,
-        returns_string=True,
-    ):
-
-        from pandas import Index, MultiIndex, Series
-
-        # for category, we do the stuff on the categories, so blow it up
-        # to the full series again
-        # But for some operations, we have to do the stuff on the full values,
-        # so make it possible to skip this step as the method already did this
-        # before the transformation...
-        if use_codes and self._is_categorical:
-            # if self._orig is a CategoricalIndex, there is no .cat-accessor
-            result = take_1d(
-                result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value
-            )
-
-        if not hasattr(result, "ndim") or not hasattr(result, "dtype"):
-            return result
-        assert result.ndim < 3
-
-        # We can be wrapping a string / object / categorical result, in which
-        # case we'll want to return the same dtype as the input.
-        # Or we can be wrapping a numeric output, in which case we don't want
-        # to return a StringArray.
-        if self._is_string and returns_string:
-            dtype = "string"
-        else:
-            dtype = None
-
-        if expand is None:
-            # infer from ndim if expand is not specified
-            expand = result.ndim != 1
-
-        elif expand is True and not isinstance(self._orig, ABCIndexClass):
-            # required when expand=True is explicitly specified
-            # not needed when inferred
-
-            def cons_row(x):
-                if is_list_like(x):
-                    return x
-                else:
-                    return [x]
-
-            result = [cons_row(x) for x in result]
-            if result:
-                # propagate nan values to match longest sequence (GH 18450)
-                max_len = max(len(x) for x in result)
-                result = [
-                    x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result
-                ]
-
-        if not isinstance(expand, bool):
-            raise ValueError("expand must be True or False")
-
-        if expand is False:
-            # if expand is False, result should have the same name
-            # as the original otherwise specified
-            if name is None:
-                name = getattr(result, "name", None)
-            if name is None:
-                # do not use logical or, _orig may be a DataFrame
-                # which has "name" column
-                name = self._orig.name
-
-        # Wait until we are sure result is a Series or Index before
-        # checking attributes (GH 12180)
-        if isinstance(self._orig, ABCIndexClass):
-            # if result is a boolean np.array, return the np.array
-            # instead of wrapping it into a boolean Index (GH 8875)
-            if is_bool_dtype(result):
-                return result
-
-            if expand:
-                result = list(result)
-                out = MultiIndex.from_tuples(result, names=name)
-                if out.nlevels == 1:
-                    # We had all tuples of length-one, which are
-                    # better represented as a regular Index.
-                    out = out.get_level_values(0)
-                return out
-            else:
-                return Index(result, name=name)
-        else:
-            index = self._orig.index
-            if expand:
-                cons = self._orig._constructor_expanddim
-                result = cons(result, columns=name, index=index, dtype=dtype)
-            else:
-                # Must be a Series
-                cons = self._orig._constructor
-                result = cons(result, name=name, index=index, dtype=dtype)
-            return result
-
-    def _get_series_list(self, others):
-        """
-        Auxiliary function for :meth:`str.cat`. Turn potentially mixed input
-        into a list of Series (elements without an index must match the length
-        of the calling Series/Index).
-
-        Parameters
-        ----------
-        others : Series, DataFrame, np.ndarray, list-like or list-like of
-            Objects that are either Series, Index or np.ndarray (1-dim).
-
-        Returns
-        -------
-        list of Series
-            Others transformed into list of Series.
-        """
-        from pandas import DataFrame, Series
-
-        # self._orig is either Series or Index
-        idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index
-
-        # Generally speaking, all objects without an index inherit the index
-        # `idx` of the calling Series/Index - i.e. must have matching length.
-        # Objects with an index (i.e. Series/Index/DataFrame) keep their own.
-        if isinstance(others, ABCSeries):
-            return [others]
-        elif isinstance(others, ABCIndexClass):
-            return [Series(others._values, index=idx)]
-        elif isinstance(others, ABCDataFrame):
-            return [others[x] for x in others]
-        elif isinstance(others, np.ndarray) and others.ndim == 2:
-            others = DataFrame(others, index=idx)
-            return [others[x] for x in others]
-        elif is_list_like(others, allow_sets=False):
-            others = list(others)  # ensure iterators do not get read twice etc
-
-            # in case of list-like `others`, all elements must be
-            # either Series/Index/np.ndarray (1-dim)...
-            if all(
-                isinstance(x, (ABCSeries, ABCIndexClass))
-                or (isinstance(x, np.ndarray) and x.ndim == 1)
-                for x in others
-            ):
-                los = []
-                while others:  # iterate through list and append each element
-                    los = los + self._get_series_list(others.pop(0))
-                return los
-            # ... or just strings
-            elif all(not is_list_like(x) for x in others):
-                return [Series(others, index=idx)]
-        raise TypeError(
-            "others must be Series, Index, DataFrame, np.ndarray "
-            "or list-like (either containing only strings or "
-            "containing only objects of type Series/Index/"
-            "np.ndarray[1-dim])"
-        )
-
-    @forbid_nonstring_types(["bytes", "mixed", "mixed-integer"])
-    def cat(self, others=None, sep=None, na_rep=None, join="left"):
-        """
-        Concatenate strings in the Series/Index with given separator.
-
-        If `others` is specified, this function concatenates the Series/Index
-        and elements of `others` element-wise.
-        If `others` is not passed, then all values in the Series/Index are
-        concatenated into a single string with a given `sep`.
-
-        Parameters
-        ----------
-        others : Series, Index, DataFrame, np.ndarray or list-like
-            Series, Index, DataFrame, np.ndarray (one- or two-dimensional) and
-            other list-likes of strings must have the same length as the
-            calling Series/Index, with the exception of indexed objects (i.e.
-            Series/Index/DataFrame) if `join` is not None.
-
-            If others is a list-like that contains a combination of Series,
-            Index or np.ndarray (1-dim), then all elements will be unpacked and
-            must satisfy the above criteria individually.
-
-            If others is None, the method returns the concatenation of all
-            strings in the calling Series/Index.
-        sep : str, default ''
-            The separator between the different elements/columns. By default
-            the empty string `''` is used.
-        na_rep : str or None, default None
-            Representation that is inserted for all missing values:
-
-            - If `na_rep` is None, and `others` is None, missing values in the
-              Series/Index are omitted from the result.
-            - If `na_rep` is None, and `others` is not None, a row containing a
-              missing value in any of the columns (before concatenation) will
-              have a missing value in the result.
-        join : {'left', 'right', 'outer', 'inner'}, default 'left'
-            Determines the join-style between the calling Series/Index and any
-            Series/Index/DataFrame in `others` (objects without an index need
-            to match the length of the calling Series/Index). To disable
-            alignment, use `.values` on any Series/Index/DataFrame in `others`.
-
-            .. versionadded:: 0.23.0
-            .. versionchanged:: 1.0.0
-                Changed default of `join` from None to `'left'`.
-
-        Returns
-        -------
-        str, Series or Index
-            If `others` is None, `str` is returned, otherwise a `Series/Index`
-            (same type as caller) of objects is returned.
-
-        See Also
-        --------
-        split : Split each string in the Series/Index.
-        join : Join lists contained as elements in the Series/Index.
-
-        Examples
-        --------
-        When not passing `others`, all values are concatenated into a single
-        string:
-
-        >>> s = pd.Series(['a', 'b', np.nan, 'd'])
-        >>> s.str.cat(sep=' ')
-        'a b d'
-
-        By default, NA values in the Series are ignored. Using `na_rep`, they
-        can be given a representation:
-
-        >>> s.str.cat(sep=' ', na_rep='?')
-        'a b ? d'
-
-        If `others` is specified, corresponding values are concatenated with
-        the separator. Result will be a Series of strings.
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',')
-        0    a,A
-        1    b,B
-        2    NaN
-        3    d,D
-        dtype: object
-
-        Missing values will remain missing in the result, but can again be
-        represented using `na_rep`
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')
-        0    a,A
-        1    b,B
-        2    -,C
-        3    d,D
-        dtype: object
-
-        If `sep` is not specified, the values are concatenated without
-        separation.
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')
-        0    aA
-        1    bB
-        2    -C
-        3    dD
-        dtype: object
-
-        Series with different indexes can be aligned before concatenation. The
-        `join`-keyword works as in other methods.
-
-        >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2])
-        >>> s.str.cat(t, join='left', na_rep='-')
-        0    aa
-        1    b-
-        2    -c
-        3    dd
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='outer', na_rep='-')
-        0    aa
-        1    b-
-        2    -c
-        3    dd
-        4    -e
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='inner', na_rep='-')
-        0    aa
-        2    -c
-        3    dd
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='right', na_rep='-')
-        3    dd
-        0    aa
-        4    -e
-        2    -c
-        dtype: object
-
-        For more examples, see :ref:`here <text.concatenate>`.
-        """
-        from pandas import Index, Series, concat
-
-        if isinstance(others, str):
-            raise ValueError("Did you mean to supply a `sep` keyword?")
-        if sep is None:
-            sep = ""
-
-        if isinstance(self._orig, ABCIndexClass):
-            data = Series(self._orig, index=self._orig)
-        else:  # Series
-            data = self._orig
-
-        # concatenate Series/Index with itself if no "others"
-        if others is None:
-            data = ensure_object(data)
-            na_mask = isna(data)
-            if na_rep is None and na_mask.any():
-                data = data[~na_mask]
-            elif na_rep is not None and na_mask.any():
-                data = np.where(na_mask, na_rep, data)
-            return sep.join(data)
-
-        try:
-            # turn anything in "others" into lists of Series
-            others = self._get_series_list(others)
-        except ValueError as err:  # do not catch TypeError raised by _get_series_list
-            raise ValueError(
-                "If `others` contains arrays or lists (or other "
-                "list-likes without an index), these must all be "
-                "of the same length as the calling Series/Index."
-            ) from err
-
-        # align if required
-        if any(not data.index.equals(x.index) for x in others):
-            # Need to add keys for uniqueness in case of duplicate columns
-            others = concat(
-                others,
-                axis=1,
-                join=(join if join == "inner" else "outer"),
-                keys=range(len(others)),
-                sort=False,
-                copy=False,
-            )
-            data, others = data.align(others, join=join)
-            others = [others[x] for x in others]  # again list of Series
-
-        all_cols = [ensure_object(x) for x in [data] + others]
-        na_masks = np.array([isna(x) for x in all_cols])
-        union_mask = np.logical_or.reduce(na_masks, axis=0)
-
-        if na_rep is None and union_mask.any():
-            # no na_rep means NaNs for all rows where any column has a NaN
-            # only necessary if there are actually any NaNs
-            result = np.empty(len(data), dtype=object)
-            np.putmask(result, union_mask, np.nan)
-
-            not_masked = ~union_mask
-            result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep)
-        elif na_rep is not None and union_mask.any():
-            # fill NaNs with na_rep in case there are actually any NaNs
-            all_cols = [
-                np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols)
-            ]
-            result = cat_safe(all_cols, sep)
-        else:
-            # no NaNs - can just concatenate
-            result = cat_safe(all_cols, sep)
-
-        if isinstance(self._orig, ABCIndexClass):
-            # add dtype for case that result is all-NA
-            result = Index(result, dtype=object, name=self._orig.name)
-        else:  # Series
-            if is_categorical_dtype(self._orig.dtype):
-                # We need to infer the new categories.
-                dtype = None
-            else:
-                dtype = self._orig.dtype
-            result = Series(result, dtype=dtype, index=data.index, name=self._orig.name)
-        return result
-
-    _shared_docs[
-        "str_split"
-    ] = r"""
-    Split strings around given separator/delimiter.
-
-    Splits the string in the Series/Index from the %(side)s,
-    at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    pat : str, optional
-        String or regular expression to split on.
-        If not specified, split on whitespace.
-    n : int, default -1 (all)
-        Limit number of splits in output.
-        ``None``, 0 and -1 will be interpreted as return all splits.
-    expand : bool, default False
-        Expand the split strings into separate columns.
-
-        * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
-        * If ``False``, return Series/Index, containing lists of strings.
-
-    Returns
-    -------
-    Series, Index, DataFrame or MultiIndex
-        Type matches caller unless ``expand=True`` (see Notes).
-
-    See Also
-    --------
-    Series.str.split : Split strings around given separator/delimiter.
-    Series.str.rsplit : Splits string around given separator/delimiter,
-        starting from the right.
-    Series.str.join : Join lists contained as elements in the Series/Index
-        with passed delimiter.
-    str.split : Standard library version for split.
-    str.rsplit : Standard library version for rsplit.
-
-    Notes
-    -----
-    The handling of the `n` keyword depends on the number of found splits:
-
-    - If found splits > `n`,  make first `n` splits only
-    - If found splits <= `n`, make all splits
-    - If for a certain row the number of found splits < `n`,
-      append `None` for padding up to `n` if ``expand=True``
-
-    If using ``expand=True``, Series and Index callers return DataFrame and
-    MultiIndex objects, respectively.
-
-    Examples
-    --------
-    >>> s = pd.Series(
-    ...     [
-    ...         "this is a regular sentence",
-    ...         "https://docs.python.org/3/tutorial/index.html",
-    ...         np.nan
-    ...     ]
-    ... )
-    >>> s
-    0                       this is a regular sentence
-    1    https://docs.python.org/3/tutorial/index.html
-    2                                              NaN
-    dtype: object
-
-    In the default setting, the string is split by whitespace.
-
-    >>> s.str.split()
-    0                   [this, is, a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    Without the `n` parameter, the outputs of `rsplit` and `split`
-    are identical.
-
-    >>> s.str.rsplit()
-    0                   [this, is, a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    The `n` parameter can be used to limit the number of splits on the
-    delimiter. The outputs of `split` and `rsplit` are different.
-
-    >>> s.str.split(n=2)
-    0                     [this, is, a regular sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    >>> s.str.rsplit(n=2)
-    0                     [this is a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    The `pat` parameter can be used to split by other characters.
-
-    >>> s.str.split(pat="/")
-    0                         [this is a regular sentence]
-    1    [https:, , docs.python.org, 3, tutorial, index...
-    2                                                  NaN
-    dtype: object
-
-    When using ``expand=True``, the split elements will expand out into
-    separate columns. If NaN is present, it is propagated throughout
-    the columns during the split.
-
-    >>> s.str.split(expand=True)
-                                                   0     1     2        3         4
-    0                                           this    is     a  regular  sentence
-    1  https://docs.python.org/3/tutorial/index.html  None  None     None      None
-    2                                            NaN   NaN   NaN      NaN       NaN
-
-    For slightly more complex use cases like splitting the html document name
-    from a url, a combination of parameter settings can be used.
-
-    >>> s.str.rsplit("/", n=1, expand=True)
-                                        0           1
-    0          this is a regular sentence        None
-    1  https://docs.python.org/3/tutorial  index.html
-    2                                 NaN         NaN
-
-    Remember to escape special characters when explicitly using regular
-    expressions.
-
-    >>> s = pd.Series(["1+1=2"])
-    >>> s
-    0    1+1=2
-    dtype: object
-    >>> s.str.split(r"\+|=", expand=True)
-         0    1    2
-    0    1    1    2
-    """
-
-    @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
-    @forbid_nonstring_types(["bytes"])
-    def split(self, pat=None, n=-1, expand=False):
-        result = str_split(self._parent, pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
-    @forbid_nonstring_types(["bytes"])
-    def rsplit(self, pat=None, n=-1, expand=False):
-        result = str_rsplit(self._parent, pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    _shared_docs[
-        "str_partition"
-    ] = """
-    Split the string at the %(side)s occurrence of `sep`.
-
-    This method splits the string at the %(side)s occurrence of `sep`,
-    and returns 3 elements containing the part before the separator,
-    the separator itself, and the part after the separator.
-    If the separator is not found, return %(return)s.
-
-    Parameters
-    ----------
-    sep : str, default whitespace
-        String to split on.
-    expand : bool, default True
-        If True, return DataFrame/MultiIndex expanding dimensionality.
-        If False, return Series/Index.
-
-    Returns
-    -------
-    DataFrame/MultiIndex or Series/Index of objects
-
-    See Also
-    --------
-    %(also)s
-    Series.str.split : Split strings around given separators.
-    str.partition : Standard library version.
-
-    Examples
-    --------
-
-    >>> s = pd.Series(['Linda van der Berg', 'George Pitt-Rivers'])
-    >>> s
-    0    Linda van der Berg
-    1    George Pitt-Rivers
-    dtype: object
-
-    >>> s.str.partition()
-            0  1             2
-    0   Linda     van der Berg
-    1  George      Pitt-Rivers
-
-    To partition by the last space instead of the first one:
-
-    >>> s.str.rpartition()
-                   0  1            2
-    0  Linda van der            Berg
-    1         George     Pitt-Rivers
-
-    To partition by something different than a space:
-
-    >>> s.str.partition('-')
-                        0  1       2
-    0  Linda van der Berg
-    1         George Pitt  -  Rivers
-
-    To return a Series containing tuples instead of a DataFrame:
-
-    >>> s.str.partition('-', expand=False)
-    0    (Linda van der Berg, , )
-    1    (George Pitt, -, Rivers)
-    dtype: object
-
-    Also available on indices:
-
-    >>> idx = pd.Index(['X 123', 'Y 999'])
-    >>> idx
-    Index(['X 123', 'Y 999'], dtype='object')
-
-    Which will create a MultiIndex:
-
-    >>> idx.str.partition()
-    MultiIndex([('X', ' ', '123'),
-                ('Y', ' ', '999')],
-               )
-
-    Or an index with tuples with ``expand=False``:
-
-    >>> idx.str.partition(expand=False)
-    Index([('X', ' ', '123'), ('Y', ' ', '999')], dtype='object')
-    """
-
-    @Appender(
-        _shared_docs["str_partition"]
-        % {
-            "side": "first",
-            "return": "3 elements containing the string itself, followed by two "
-            "empty strings",
-            "also": "rpartition : Split the string at the last occurrence of `sep`.",
-        }
-    )
-    @forbid_nonstring_types(["bytes"])
-    def partition(self, sep=" ", expand=True):
-        f = lambda x: x.partition(sep)
-        result = _na_map(f, self._parent)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @Appender(
-        _shared_docs["str_partition"]
-        % {
-            "side": "last",
-            "return": "3 elements containing two empty strings, followed by the "
-            "string itself",
-            "also": "partition : Split the string at the first occurrence of `sep`.",
-        }
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rpartition(self, sep=" ", expand=True):
-        f = lambda x: x.rpartition(sep)
-        result = _na_map(f, self._parent)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @copy(str_get)
-    def get(self, i):
-        result = str_get(self._parent, i)
-        return self._wrap_result(result)
-
-    @copy(str_join)
-    @forbid_nonstring_types(["bytes"])
-    def join(self, sep):
-        result = str_join(self._parent, sep)
-        return self._wrap_result(result)
-
-    @copy(str_contains)
-    @forbid_nonstring_types(["bytes"])
-    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
-        result = str_contains(
-            self._parent, pat, case=case, flags=flags, na=na, regex=regex
-        )
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_match)
-    @forbid_nonstring_types(["bytes"])
-    def match(self, pat, case=True, flags=0, na=np.nan):
-        result = str_match(self._parent, pat, case=case, flags=flags, na=na)
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_fullmatch)
-    @forbid_nonstring_types(["bytes"])
-    def fullmatch(self, pat, case=True, flags=0, na=np.nan):
-        result = str_fullmatch(self._parent, pat, case=case, flags=flags, na=na)
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_replace)
-    @forbid_nonstring_types(["bytes"])
-    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
-        result = str_replace(
-            self._parent, pat, repl, n=n, case=case, flags=flags, regex=regex
-        )
-        return self._wrap_result(result)
-
-    @copy(str_repeat)
-    @forbid_nonstring_types(["bytes"])
-    def repeat(self, repeats):
-        result = str_repeat(self._parent, repeats)
-        return self._wrap_result(result)
-
-    @copy(str_pad)
-    @forbid_nonstring_types(["bytes"])
-    def pad(self, width, side="left", fillchar=" "):
-        result = str_pad(self._parent, width, side=side, fillchar=fillchar)
-        return self._wrap_result(result)
-
-    _shared_docs[
-        "str_pad"
-    ] = """
-    Pad %(side)s side of strings in the Series/Index.
-
-    Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    width : int
-        Minimum width of resulting string; additional characters will be filled
-        with ``fillchar``.
-    fillchar : str
-        Additional character for filling, default is whitespace.
-
-    Returns
-    -------
-    filled : Series/Index of objects.
-    """
-
-    @Appender(_shared_docs["str_pad"] % dict(side="left and right", method="center"))
-    @forbid_nonstring_types(["bytes"])
-    def center(self, width, fillchar=" "):
-        return self.pad(width, side="both", fillchar=fillchar)
-
-    @Appender(_shared_docs["str_pad"] % dict(side="right", method="ljust"))
-    @forbid_nonstring_types(["bytes"])
-    def ljust(self, width, fillchar=" "):
-        return self.pad(width, side="right", fillchar=fillchar)
-
-    @Appender(_shared_docs["str_pad"] % dict(side="left", method="rjust"))
-    @forbid_nonstring_types(["bytes"])
-    def rjust(self, width, fillchar=" "):
-        return self.pad(width, side="left", fillchar=fillchar)
-
-    @forbid_nonstring_types(["bytes"])
-    def zfill(self, width):
-        """
-        Pad strings in the Series/Index by prepending '0' characters.
-
-        Strings in the Series/Index are padded with '0' characters on the
-        left of the string to reach a total string length  `width`. Strings
-        in the Series/Index with length greater or equal to `width` are
-        unchanged.
-
-        Parameters
-        ----------
-        width : int
-            Minimum length of resulting string; strings with length less
-            than `width` be prepended with '0' characters.
-
-        Returns
-        -------
-        Series/Index of objects.
-
-        See Also
-        --------
-        Series.str.rjust : Fills the left side of strings with an arbitrary
-            character.
-        Series.str.ljust : Fills the right side of strings with an arbitrary
-            character.
-        Series.str.pad : Fills the specified sides of strings with an arbitrary
-            character.
-        Series.str.center : Fills boths sides of strings with an arbitrary
-            character.
-
-        Notes
-        -----
-        Differs from :meth:`str.zfill` which has special handling
-        for '+'/'-' in the string.
-
-        Examples
-        --------
-        >>> s = pd.Series(['-1', '1', '1000', 10, np.nan])
-        >>> s
-        0      -1
-        1       1
-        2    1000
-        3      10
-        4     NaN
-        dtype: object
-
-        Note that ``10`` and ``NaN`` are not strings, therefore they are
-        converted to ``NaN``. The minus sign in ``'-1'`` is treated as a
-        regular character and the zero is added to the left of it
-        (:meth:`str.zfill` would have moved it to the left). ``1000``
-        remains unchanged as it is longer than `width`.
-
-        >>> s.str.zfill(3)
-        0     0-1
-        1     001
-        2    1000
-        3     NaN
-        4     NaN
-        dtype: object
-        """
-        result = str_pad(self._parent, width, side="left", fillchar="0")
-        return self._wrap_result(result)
-
-    @copy(str_slice)
-    def slice(self, start=None, stop=None, step=None):
-        result = str_slice(self._parent, start, stop, step)
-        return self._wrap_result(result)
-
-    @copy(str_slice_replace)
-    @forbid_nonstring_types(["bytes"])
-    def slice_replace(self, start=None, stop=None, repl=None):
-        result = str_slice_replace(self._parent, start, stop, repl)
-        return self._wrap_result(result)
-
-    @copy(str_decode)
-    def decode(self, encoding, errors="strict"):
-        # need to allow bytes here
-        result = str_decode(self._parent, encoding, errors)
-        # TODO: Not sure how to handle this.
-        return self._wrap_result(result, returns_string=False)
-
-    @copy(str_encode)
-    @forbid_nonstring_types(["bytes"])
-    def encode(self, encoding, errors="strict"):
-        result = str_encode(self._parent, encoding, errors)
-        return self._wrap_result(result, returns_string=False)
-
-    _shared_docs[
-        "str_strip"
-    ] = r"""
-    Remove %(position)s characters.
-
-    Strip whitespaces (including newlines) or a set of specified characters
-    from each string in the Series/Index from %(side)s.
-    Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    to_strip : str or None, default None
-        Specifying the set of characters to be removed.
-        All combinations of this set of characters will be stripped.
-        If None then whitespaces are removed.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    Series.str.strip : Remove leading and trailing characters in Series/Index.
-    Series.str.lstrip : Remove leading characters in Series/Index.
-    Series.str.rstrip : Remove trailing characters in Series/Index.
-
-    Examples
-    --------
-    >>> s = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', np.nan])
-    >>> s
-    0    1. Ant.
-    1    2. Bee!\n
-    2    3. Cat?\t
-    3          NaN
-    dtype: object
-
-    >>> s.str.strip()
-    0    1. Ant.
-    1    2. Bee!
-    2    3. Cat?
-    3        NaN
-    dtype: object
-
-    >>> s.str.lstrip('123.')
-    0    Ant.
-    1    Bee!\n
-    2    Cat?\t
-    3       NaN
-    dtype: object
-
-    >>> s.str.rstrip('.!? \n\t')
-    0    1. Ant
-    1    2. Bee
-    2    3. Cat
-    3       NaN
-    dtype: object
-
-    >>> s.str.strip('123.!? \n\t')
-    0    Ant
-    1    Bee
-    2    Cat
-    3    NaN
-    dtype: object
-    """
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(
-            side="left and right sides", method="strip", position="leading and trailing"
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def strip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="both")
-        return self._wrap_result(result)
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(side="left side", method="lstrip", position="leading")
-    )
-    @forbid_nonstring_types(["bytes"])
-    def lstrip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="left")
-        return self._wrap_result(result)
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(side="right side", method="rstrip", position="trailing")
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rstrip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="right")
-        return self._wrap_result(result)
-
-    @copy(str_wrap)
-    @forbid_nonstring_types(["bytes"])
-    def wrap(self, width, **kwargs):
-        result = str_wrap(self._parent, width, **kwargs)
-        return self._wrap_result(result)
-
-    @copy(str_get_dummies)
-    @forbid_nonstring_types(["bytes"])
-    def get_dummies(self, sep="|"):
-        # we need to cast to Series of strings as only that has all
-        # methods available for making the dummies...
-        data = self._orig.astype(str) if self._is_categorical else self._parent
-        result, name = str_get_dummies(data, sep)
-        return self._wrap_result(
-            result,
-            use_codes=(not self._is_categorical),
-            name=name,
-            expand=True,
-            returns_string=False,
-        )
-
-    @copy(str_translate)
-    @forbid_nonstring_types(["bytes"])
-    def translate(self, table):
-        result = str_translate(self._parent, table)
-        return self._wrap_result(result)
-
-    count = _pat_wrapper(str_count, flags=True, name="count", returns_string=False)
-    startswith = _pat_wrapper(
-        str_startswith, na=True, name="startswith", returns_string=False
-    )
-    endswith = _pat_wrapper(
-        str_endswith, na=True, name="endswith", returns_string=False
-    )
-    findall = _pat_wrapper(
-        str_findall, flags=True, name="findall", returns_string=False
-    )
-
-    @copy(str_extract)
-    @forbid_nonstring_types(["bytes"])
-    def extract(self, pat, flags=0, expand=True):
-        return str_extract(self, pat, flags=flags, expand=expand)
-
-    @copy(str_extractall)
-    @forbid_nonstring_types(["bytes"])
-    def extractall(self, pat, flags=0):
-        return str_extractall(self._orig, pat, flags=flags)
-
-    _shared_docs[
-        "find"
-    ] = """
-    Return %(side)s indexes in each strings in the Series/Index.
-
-    Each of returned indexes corresponds to the position where the
-    substring is fully contained between [start:end]. Return -1 on
-    failure. Equivalent to standard :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-
-    Returns
-    -------
-    Series or Index of int.
-
-    See Also
-    --------
-    %(also)s
-    """
-
-    @Appender(
-        _shared_docs["find"]
-        % dict(
-            side="lowest",
-            method="find",
-            also="rfind : Return highest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def find(self, sub, start=0, end=None):
-        result = str_find(self._parent, sub, start=start, end=end, side="left")
-        return self._wrap_result(result, returns_string=False)
-
-    @Appender(
-        _shared_docs["find"]
-        % dict(
-            side="highest",
-            method="rfind",
-            also="find : Return lowest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rfind(self, sub, start=0, end=None):
-        result = str_find(self._parent, sub, start=start, end=end, side="right")
-        return self._wrap_result(result, returns_string=False)
-
-    @forbid_nonstring_types(["bytes"])
-    def normalize(self, form):
-        """
-        Return the Unicode normal form for the strings in the Series/Index.
-
-        For more information on the forms, see the
-        :func:`unicodedata.normalize`.
-
-        Parameters
-        ----------
-        form : {'NFC', 'NFKC', 'NFD', 'NFKD'}
-            Unicode form.
-
-        Returns
-        -------
-        normalized : Series/Index of objects
-        """
-        import unicodedata
-
-        f = lambda x: unicodedata.normalize(form, x)
-        result = _na_map(f, self._parent, dtype=str)
-        return self._wrap_result(result)
-
-    _shared_docs[
-        "index"
-    ] = """
-    Return %(side)s indexes in each string in Series/Index.
-
-    Each of the returned indexes corresponds to the position where the
-    substring is fully contained between [start:end]. This is the same
-    as ``str.%(similar)s`` except instead of returning -1, it raises a
-    ValueError when the substring is not found. Equivalent to standard
-    ``str.%(method)s``.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    %(also)s
-    """
-
-    @Appender(
-        _shared_docs["index"]
-        % dict(
-            side="lowest",
-            similar="find",
-            method="index",
-            also="rindex : Return highest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def index(self, sub, start=0, end=None):
-        result = str_index(self._parent, sub, start=start, end=end, side="left")
-        return self._wrap_result(result, returns_string=False)
-
-    @Appender(
-        _shared_docs["index"]
-        % dict(
-            side="highest",
-            similar="rfind",
-            method="rindex",
-            also="index : Return lowest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rindex(self, sub, start=0, end=None):
-        result = str_index(self._parent, sub, start=start, end=end, side="right")
-        return self._wrap_result(result, returns_string=False)
-
-    _shared_docs[
-        "len"
-    ] = """
-    Compute the length of each element in the Series/Index.
-
-    The element may be a sequence (such as a string, tuple or list) or a collection
-    (such as a dictionary).
-
-    Returns
-    -------
-    Series or Index of int
-        A Series or Index of integer values indicating the length of each
-        element in the Series or Index.
-
-    See Also
-    --------
-    str.len : Python built-in function returning the length of an object.
-    Series.size : Returns the length of the Series.
-
-    Examples
-    --------
-    Returns the length (number of characters) in a string. Returns the
-    number of entries for dictionaries, lists or tuples.
-
-    >>> s = pd.Series(['dog',
-    ...                 '',
-    ...                 5,
-    ...                 {'foo' : 'bar'},
-    ...                 [2, 3, 5, 7],
-    ...                 ('one', 'two', 'three')])
-    >>> s
-    0                  dog
-    1
-    2                    5
-    3       {'foo': 'bar'}
-    4         [2, 3, 5, 7]
-    5    (one, two, three)
-    dtype: object
-    >>> s.str.len()
-    0    3.0
-    1    0.0
-    2    NaN
-    3    1.0
-    4    4.0
-    5    3.0
-    dtype: float64
-    """
-    len = _noarg_wrapper(
-        len,
-        docstring=_shared_docs["len"],
-        forbidden_types=None,
-        dtype=np.dtype("int64"),
-        returns_string=False,
-    )
-
-    _shared_docs[
-        "casemethods"
-    ] = """
-    Convert strings in the Series/Index to %(type)s.
-    %(version)s
-    Equivalent to :meth:`str.%(method)s`.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    Series.str.lower : Converts all characters to lowercase.
-    Series.str.upper : Converts all characters to uppercase.
-    Series.str.title : Converts first character of each word to uppercase and
-        remaining to lowercase.
-    Series.str.capitalize : Converts first character to uppercase and
-        remaining to lowercase.
-    Series.str.swapcase : Converts uppercase to lowercase and lowercase to
-        uppercase.
-    Series.str.casefold: Removes all case distinctions in the string.
-
-    Examples
-    --------
-    >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
-    >>> s
-    0                 lower
-    1              CAPITALS
-    2    this is a sentence
-    3              SwApCaSe
-    dtype: object
-
-    >>> s.str.lower()
-    0                 lower
-    1              capitals
-    2    this is a sentence
-    3              swapcase
-    dtype: object
-
-    >>> s.str.upper()
-    0                 LOWER
-    1              CAPITALS
-    2    THIS IS A SENTENCE
-    3              SWAPCASE
-    dtype: object
-
-    >>> s.str.title()
-    0                 Lower
-    1              Capitals
-    2    This Is A Sentence
-    3              Swapcase
-    dtype: object
-
-    >>> s.str.capitalize()
-    0                 Lower
-    1              Capitals
-    2    This is a sentence
-    3              Swapcase
-    dtype: object
-
-    >>> s.str.swapcase()
-    0                 LOWER
-    1              capitals
-    2    THIS IS A SENTENCE
-    3              sWaPcAsE
-    dtype: object
-    """
-
-    # _doc_args holds dict of strings to use in substituting casemethod docs
-    _doc_args: Dict[str, Dict[str, str]] = {}
-    _doc_args["lower"] = dict(type="lowercase", method="lower", version="")
-    _doc_args["upper"] = dict(type="uppercase", method="upper", version="")
-    _doc_args["title"] = dict(type="titlecase", method="title", version="")
-    _doc_args["capitalize"] = dict(
-        type="be capitalized", method="capitalize", version=""
-    )
-    _doc_args["swapcase"] = dict(type="be swapcased", method="swapcase", version="")
-    _doc_args["casefold"] = dict(
-        type="be casefolded",
-        method="casefold",
-        version="\n    .. versionadded:: 0.25.0\n",
-    )
-    lower = _noarg_wrapper(
-        lambda x: x.lower(),
-        name="lower",
-        docstring=_shared_docs["casemethods"] % _doc_args["lower"],
-        dtype=str,
-    )
-    upper = _noarg_wrapper(
-        lambda x: x.upper(),
-        name="upper",
-        docstring=_shared_docs["casemethods"] % _doc_args["upper"],
-        dtype=str,
-    )
-    title = _noarg_wrapper(
-        lambda x: x.title(),
-        name="title",
-        docstring=_shared_docs["casemethods"] % _doc_args["title"],
-        dtype=str,
-    )
-    capitalize = _noarg_wrapper(
-        lambda x: x.capitalize(),
-        name="capitalize",
-        docstring=_shared_docs["casemethods"] % _doc_args["capitalize"],
-        dtype=str,
-    )
-    swapcase = _noarg_wrapper(
-        lambda x: x.swapcase(),
-        name="swapcase",
-        docstring=_shared_docs["casemethods"] % _doc_args["swapcase"],
-        dtype=str,
-    )
-    casefold = _noarg_wrapper(
-        lambda x: x.casefold(),
-        name="casefold",
-        docstring=_shared_docs["casemethods"] % _doc_args["casefold"],
-        dtype=str,
-    )
-
-    _shared_docs[
-        "ismethods"
-    ] = """
-    Check whether all characters in each string are %(type)s.
-
-    This is equivalent to running the Python string method
-    :meth:`str.%(method)s` for each element of the Series/Index. If a string
-    has zero characters, ``False`` is returned for that check.
-
-    Returns
-    -------
-    Series or Index of bool
-        Series or Index of boolean values with the same length as the original
-        Series/Index.
-
-    See Also
-    --------
-    Series.str.isalpha : Check whether all characters are alphabetic.
-    Series.str.isnumeric : Check whether all characters are numeric.
-    Series.str.isalnum : Check whether all characters are alphanumeric.
-    Series.str.isdigit : Check whether all characters are digits.
-    Series.str.isdecimal : Check whether all characters are decimal.
-    Series.str.isspace : Check whether all characters are whitespace.
-    Series.str.islower : Check whether all characters are lowercase.
-    Series.str.isupper : Check whether all characters are uppercase.
-    Series.str.istitle : Check whether all characters are titlecase.
-
-    Examples
-    --------
-    **Checks for Alphabetic and Numeric Characters**
-
-    >>> s1 = pd.Series(['one', 'one1', '1', ''])
-
-    >>> s1.str.isalpha()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    >>> s1.str.isnumeric()
-    0    False
-    1    False
-    2     True
-    3    False
-    dtype: bool
-
-    >>> s1.str.isalnum()
-    0     True
-    1     True
-    2     True
-    3    False
-    dtype: bool
-
-    Note that checks against characters mixed with any additional punctuation
-    or whitespace will evaluate to false for an alphanumeric check.
-
-    >>> s2 = pd.Series(['A B', '1.5', '3,000'])
-    >>> s2.str.isalnum()
-    0    False
-    1    False
-    2    False
-    dtype: bool
-
-    **More Detailed Checks for Numeric Characters**
-
-    There are several different but overlapping sets of numeric characters that
-    can be checked for.
-
-    >>> s3 = pd.Series(['23', '³', '⅕', ''])
-
-    The ``s3.str.isdecimal`` method checks for characters used to form numbers
-    in base 10.
-
-    >>> s3.str.isdecimal()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also
-    includes special digits, like superscripted and subscripted digits in
-    unicode.
-
-    >>> s3.str.isdigit()
-    0     True
-    1     True
-    2    False
-    3    False
-    dtype: bool
-
-    The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also
-    includes other characters that can represent quantities such as unicode
-    fractions.
-
-    >>> s3.str.isnumeric()
-    0     True
-    1     True
-    2     True
-    3    False
-    dtype: bool
-
-    **Checks for Whitespace**
-
-    >>> s4 = pd.Series([' ', '\\t\\r\\n ', ''])
-    >>> s4.str.isspace()
-    0     True
-    1     True
-    2    False
-    dtype: bool
-
-    **Checks for Character Case**
-
-    >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
-
-    >>> s5.str.islower()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    >>> s5.str.isupper()
-    0    False
-    1    False
-    2     True
-    3    False
-    dtype: bool
-
-    The ``s5.str.istitle`` method checks for whether all words are in title
-    case (whether only the first letter of each word is capitalized). Words are
-    assumed to be as any sequence of non-numeric characters separated by
-    whitespace characters.
-
-    >>> s5.str.istitle()
-    0    False
-    1     True
-    2    False
-    3    False
-    dtype: bool
-    """
-    _doc_args["isalnum"] = dict(type="alphanumeric", method="isalnum")
-    _doc_args["isalpha"] = dict(type="alphabetic", method="isalpha")
-    _doc_args["isdigit"] = dict(type="digits", method="isdigit")
-    _doc_args["isspace"] = dict(type="whitespace", method="isspace")
-    _doc_args["islower"] = dict(type="lowercase", method="islower")
-    _doc_args["isupper"] = dict(type="uppercase", method="isupper")
-    _doc_args["istitle"] = dict(type="titlecase", method="istitle")
-    _doc_args["isnumeric"] = dict(type="numeric", method="isnumeric")
-    _doc_args["isdecimal"] = dict(type="decimal", method="isdecimal")
-    # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624)
-    isalnum = _noarg_wrapper(
-        lambda x: x.isalnum(),
-        name="isalnum",
-        docstring=_shared_docs["ismethods"] % _doc_args["isalnum"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isalpha = _noarg_wrapper(
-        lambda x: x.isalpha(),
-        name="isalpha",
-        docstring=_shared_docs["ismethods"] % _doc_args["isalpha"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isdigit = _noarg_wrapper(
-        lambda x: x.isdigit(),
-        name="isdigit",
-        docstring=_shared_docs["ismethods"] % _doc_args["isdigit"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isspace = _noarg_wrapper(
-        lambda x: x.isspace(),
-        name="isspace",
-        docstring=_shared_docs["ismethods"] % _doc_args["isspace"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    islower = _noarg_wrapper(
-        lambda x: x.islower(),
-        name="islower",
-        docstring=_shared_docs["ismethods"] % _doc_args["islower"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isupper = _noarg_wrapper(
-        lambda x: x.isupper(),
-        name="isupper",
-        docstring=_shared_docs["ismethods"] % _doc_args["isupper"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    istitle = _noarg_wrapper(
-        lambda x: x.istitle(),
-        name="istitle",
-        docstring=_shared_docs["ismethods"] % _doc_args["istitle"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isnumeric = _noarg_wrapper(
-        lambda x: x.isnumeric(),
-        name="isnumeric",
-        docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isdecimal = _noarg_wrapper(
-        lambda x: x.isdecimal(),
-        name="isdecimal",
-        docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-
-    @classmethod
-    def _make_accessor(cls, data):
-        cls._validate(data)
-        return cls(data)
diff --git a/pandas/core/strings/__init__.py b/pandas/core/strings/__init__.py
new file mode 100644
index 0000000000000..c36a96edf7125
--- /dev/null
+++ b/pandas/core/strings/__init__.py
@@ -0,0 +1 @@
+from .accessor import StringMethods
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
new file mode 100644
index 0000000000000..84630066ef871
--- /dev/null
+++ b/pandas/core/strings/accessor.py
@@ -0,0 +1,2752 @@
+import codecs
+from functools import wraps
+import operator
+from typing import Dict
+import warnings
+
+import numpy as np
+
+import pandas._libs.lib as lib
+from pandas.util._decorators import Appender
+
+from pandas.core.dtypes.common import is_bool_dtype, is_categorical_dtype, is_list_like
+from pandas.core.dtypes.generic import (
+    ABCDataFrame,
+    ABCIndexClass,
+    ABCMultiIndex,
+    ABCSeries,
+)
+
+from pandas.core.algorithms import take_1d
+from pandas.core.arrays.numpy_ import PandasArray
+from pandas.core.base import NoNewAttributesMixin
+from pandas.core.strings.object_array import ObjectProxy
+
+_shared_docs: Dict[str, str] = dict()
+_cpython_optimized_encoders = (
+    "utf-8",
+    "utf8",
+    "latin-1",
+    "latin1",
+    "iso-8859-1",
+    "mbcs",
+    "ascii",
+)
+_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32")
+
+
+def forbid_nonstring_types(forbidden, name=None):
+    """
+    Decorator to forbid specific types for a method of StringMethods.
+
+    For calling `.str.{method}` on a Series or Index, it is necessary to first
+    initialize the :class:`StringMethods` object, and then call the method.
+    However, different methods allow different input types, and so this can not
+    be checked during :meth:`StringMethods.__init__`, but must be done on a
+    per-method basis. This decorator exists to facilitate this process, and
+    make it explicit which (inferred) types are disallowed by the method.
+
+    :meth:`StringMethods.__init__` allows the *union* of types its different
+    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
+    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
+
+    The default string types ['string', 'empty'] are allowed for all methods.
+    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
+    then needs to forbid the types it is not intended for.
+
+    Parameters
+    ----------
+    forbidden : list-of-str or None
+        List of forbidden non-string types, may be one or more of
+        `['bytes', 'mixed', 'mixed-integer']`.
+    name : str, default None
+        Name of the method to use in the error message. By default, this is
+        None, in which case the name from the method being wrapped will be
+        copied. However, for working with further wrappers (like _pat_wrapper
+        and _noarg_wrapper), it is necessary to specify the name.
+
+    Returns
+    -------
+    func : wrapper
+        The method to which the decorator is applied, with an added check that
+        enforces the inferred type to not be in the list of forbidden types.
+
+    Raises
+    ------
+    TypeError
+        If the inferred type of the underlying data is in `forbidden`.
+    """
+    # deal with None
+    forbidden = [] if forbidden is None else forbidden
+
+    allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set(
+        forbidden
+    )
+
+    def _forbid_nonstring_types(func):
+        func_name = func.__name__ if name is None else name
+
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self._inferred_dtype not in allowed_types:
+                msg = (
+                    f"Cannot use .str.{func_name} with values of "
+                    f"inferred dtype '{self._inferred_dtype}'."
+                )
+                raise TypeError(msg)
+            return func(self, *args, **kwargs)
+
+        wrapper.__name__ = func_name
+        return wrapper
+
+    return _forbid_nonstring_types
+
+
+def _map_and_wrap(name, docstring):
+    def wrapper(self):
+        result = operator.methodcaller(name)(self._array._str)
+        return self._wrap_result(result)
+
+    wrapper.__doc__ = docstring
+    return wrapper
+
+
+class StringMethods(NoNewAttributesMixin):
+    """
+    Vectorized string functions for Series and Index.
+
+    NAs stay NA unless handled otherwise by a particular method.
+    Patterned after Python's string methods, with some inspiration from
+    R's stringr package.
+
+    Examples
+    --------
+    >>> s = pd.Series(["A_Str_Series"])
+    >>> s
+    0    A_Str_Series
+    dtype: object
+
+    >>> s.str.split("_")
+    0    [A, Str, Series]
+    dtype: object
+
+    >>> s.str.replace("_", "")
+    0    AStrSeries
+    dtype: object
+    """
+
+    def __init__(self, data):
+        from pandas.core.arrays.string_ import StringDtype
+
+        self._inferred_dtype = self._validate(data)
+        self._is_categorical = is_categorical_dtype(data.dtype)
+        self._is_string = isinstance(data.dtype, StringDtype)
+        array = data.array
+
+        if isinstance(array, PandasArray):
+            # wrap in an object proxy to get the str methods.
+            # Alternatively, just add _str to PandasArray.
+            array = ObjectProxy(array._ndarray)
+        self._array = array
+
+        if isinstance(data, ABCSeries):
+            self._index = data.index
+            self._name = data.name
+        else:
+            self._index = self._name = None
+
+        # ._values.categories works for both Series/Index
+        self._parent = data._values.categories if self._is_categorical else data
+        # save orig to blow up categoricals to the right type
+        self._orig = data
+        self._freeze()
+
+    @staticmethod
+    def _validate(data):
+        """
+        Auxiliary function for StringMethods, infers and checks dtype of data.
+
+        This is a "first line of defence" at the creation of the StringMethods-
+        object (see _make_accessor), and just checks that the dtype is in the
+        *union* of the allowed types over all string methods below; this
+        restriction is then refined on a per-method basis using the decorator
+        @forbid_nonstring_types (more info in the corresponding docstring).
+
+        This really should exclude all series/index with any non-string values,
+        but that isn't practical for performance reasons until we have a str
+        dtype (GH 9343 / 13877)
+
+        Parameters
+        ----------
+        data : The content of the Series
+
+        Returns
+        -------
+        dtype : inferred dtype of data
+        """
+        from pandas import StringDtype
+
+        if isinstance(data, ABCMultiIndex):
+            raise AttributeError(
+                "Can only use .str accessor with Index, not MultiIndex"
+            )
+
+        # see _libs/lib.pyx for list of inferred types
+        allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]
+
+        values = getattr(data, "values", data)  # Series / Index
+        values = getattr(values, "categories", values)  # categorical / normal
+
+        # explicitly allow StringDtype
+        if isinstance(values.dtype, StringDtype):
+            return "string"
+
+        try:
+            inferred_dtype = lib.infer_dtype(values, skipna=True)
+        except ValueError:
+            # GH#27571 mostly occurs with ExtensionArray
+            inferred_dtype = None
+
+        if inferred_dtype not in allowed_types:
+            raise AttributeError("Can only use .str accessor with string values!")
+        return inferred_dtype
+
+    def __getitem__(self, key):
+        return self._array._str[key]
+        # if isinstance(key, slice):
+        #     return self.slice(start=key.start, stop=key.stop, step=key.step)
+        # else:
+        #     return self.get(key)
+
+    def __iter__(self):
+        warnings.warn(
+            "Columnar iteration over characters will be deprecated in future releases.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        i = 0
+        g = self.get(i)
+        while g.notna().any():
+            yield g
+            i += 1
+            g = self.get(i)
+
+    def _wrap_result(
+        self,
+        result,
+        use_codes=True,
+        name=None,
+        expand=None,
+        fill_value=np.nan,
+        returns_string=True,
+    ):
+        from pandas import Index, MultiIndex, Series
+
+        # for category, we do the stuff on the categories, so blow it up
+        # to the full series again
+        # But for some operations, we have to do the stuff on the full values,
+        # so make it possible to skip this step as the method already did this
+        # before the transformation...
+        if use_codes and self._is_categorical:
+            # if self._orig is a CategoricalIndex, there is no .cat-accessor
+            result = take_1d(
+                result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value
+            )
+
+        if not hasattr(result, "ndim") or not hasattr(result, "dtype"):
+            return result
+        assert result.ndim < 3
+
+        # We can be wrapping a string / object / categorical result, in which
+        # case we'll want to return the same dtype as the input.
+        # Or we can be wrapping a numeric output, in which case we don't want
+        # to return a StringArray.
+        if self._is_string and returns_string:
+            dtype = "string"
+        else:
+            dtype = None
+
+        if expand is None:
+            # infer from ndim if expand is not specified
+            expand = result.ndim != 1
+
+        elif expand is True and not isinstance(self._orig, ABCIndexClass):
+            # required when expand=True is explicitly specified
+            # not needed when inferred
+
+            def cons_row(x):
+                if is_list_like(x):
+                    return x
+                else:
+                    return [x]
+
+            result = [cons_row(x) for x in result]
+            if result:
+                # propagate nan values to match longest sequence (GH 18450)
+                max_len = max(len(x) for x in result)
+                result = [
+                    x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result
+                ]
+
+        if not isinstance(expand, bool):
+            raise ValueError("expand must be True or False")
+
+        if expand is False:
+            # if expand is False, result should have the same name
+            # as the original otherwise specified
+            if name is None:
+                name = getattr(result, "name", None)
+            if name is None:
+                # do not use logical or, _orig may be a DataFrame
+                # which has "name" column
+                name = self._orig.name
+
+        # Wait until we are sure result is a Series or Index before
+        # checking attributes (GH 12180)
+        if isinstance(self._orig, ABCIndexClass):
+            # if result is a boolean np.array, return the np.array
+            # instead of wrapping it into a boolean Index (GH 8875)
+            if is_bool_dtype(result):
+                return result
+
+            if expand:
+                result = list(result)
+                out = MultiIndex.from_tuples(result, names=name)
+                if out.nlevels == 1:
+                    # We had all tuples of length-one, which are
+                    # better represented as a regular Index.
+                    out = out.get_level_values(0)
+                return out
+            else:
+                return Index(result, name=name)
+        else:
+            index = self._orig.index
+            if expand:
+                cons = self._orig._constructor_expanddim
+                result = cons(result, columns=name, index=index, dtype=dtype)
+            else:
+                # Must be a Series
+                cons = self._orig._constructor
+                result = cons(result, name=name, index=index, dtype=dtype)
+            return result
+
+    def _get_series_list(self, others):
+        """
+        Auxiliary function for :meth:`str.cat`. Turn potentially mixed input
+        into a list of Series (elements without an index must match the length
+        of the calling Series/Index).
+
+        Parameters
+        ----------
+        others : Series, DataFrame, np.ndarray, list-like or list-like of
+            Objects that are either Series, Index or np.ndarray (1-dim).
+
+        Returns
+        -------
+        list of Series
+            Others transformed into list of Series.
+        """
+        from pandas import DataFrame, Series
+
+        # self._orig is either Series or Index
+        idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index
+
+        # Generally speaking, all objects without an index inherit the index
+        # `idx` of the calling Series/Index - i.e. must have matching length.
+        # Objects with an index (i.e. Series/Index/DataFrame) keep their own.
+        if isinstance(others, ABCSeries):
+            return [others]
+        elif isinstance(others, ABCIndexClass):
+            return [Series(others._values, index=idx)]
+        elif isinstance(others, ABCDataFrame):
+            return [others[x] for x in others]
+        elif isinstance(others, np.ndarray) and others.ndim == 2:
+            others = DataFrame(others, index=idx)
+            return [others[x] for x in others]
+        elif is_list_like(others, allow_sets=False):
+            others = list(others)  # ensure iterators do not get read twice etc
+
+            # in case of list-like `others`, all elements must be
+            # either Series/Index/np.ndarray (1-dim)...
+            if all(
+                isinstance(x, (ABCSeries, ABCIndexClass))
+                or (isinstance(x, np.ndarray) and x.ndim == 1)
+                for x in others
+            ):
+                los = []
+                while others:  # iterate through list and append each element
+                    los = los + self._get_series_list(others.pop(0))
+                return los
+            # ... or just strings
+            elif all(not is_list_like(x) for x in others):
+                return [Series(others, index=idx)]
+        raise TypeError(
+            "others must be Series, Index, DataFrame, np.ndarray "
+            "or list-like (either containing only strings or "
+            "containing only objects of type Series/Index/"
+            "np.ndarray[1-dim])"
+        )
+
+    @forbid_nonstring_types(["bytes", "mixed", "mixed-integer"])
+    def cat(self, others=None, sep=None, na_rep=None, join="left"):
+        """
+        Concatenate strings in the Series/Index with given separator.
+
+        If `others` is specified, this function concatenates the Series/Index
+        and elements of `others` element-wise.
+        If `others` is not passed, then all values in the Series/Index are
+        concatenated into a single string with a given `sep`.
+
+        Parameters
+        ----------
+        others : Series, Index, DataFrame, np.ndarray or list-like
+            Series, Index, DataFrame, np.ndarray (one- or two-dimensional) and
+            other list-likes of strings must have the same length as the
+            calling Series/Index, with the exception of indexed objects (i.e.
+            Series/Index/DataFrame) if `join` is not None.
+
+            If others is a list-like that contains a combination of Series,
+            Index or np.ndarray (1-dim), then all elements will be unpacked and
+            must satisfy the above criteria individually.
+
+            If others is None, the method returns the concatenation of all
+            strings in the calling Series/Index.
+        sep : str, default ''
+            The separator between the different elements/columns. By default
+            the empty string `''` is used.
+        na_rep : str or None, default None
+            Representation that is inserted for all missing values:
+
+            - If `na_rep` is None, and `others` is None, missing values in the
+              Series/Index are omitted from the result.
+            - If `na_rep` is None, and `others` is not None, a row containing a
+              missing value in any of the columns (before concatenation) will
+              have a missing value in the result.
+        join : {'left', 'right', 'outer', 'inner'}, default 'left'
+            Determines the join-style between the calling Series/Index and any
+            Series/Index/DataFrame in `others` (objects without an index need
+            to match the length of the calling Series/Index). To disable
+            alignment, use `.values` on any Series/Index/DataFrame in `others`.
+
+            .. versionadded:: 0.23.0
+            .. versionchanged:: 1.0.0
+                Changed default of `join` from None to `'left'`.
+
+        Returns
+        -------
+        str, Series or Index
+            If `others` is None, `str` is returned, otherwise a `Series/Index`
+            (same type as caller) of objects is returned.
+
+        See Also
+        --------
+        split : Split each string in the Series/Index.
+        join : Join lists contained as elements in the Series/Index.
+
+        Examples
+        --------
+        When not passing `others`, all values are concatenated into a single
+        string:
+
+        >>> s = pd.Series(['a', 'b', np.nan, 'd'])
+        >>> s.str.cat(sep=' ')
+        'a b d'
+
+        By default, NA values in the Series are ignored. Using `na_rep`, they
+        can be given a representation:
+
+        >>> s.str.cat(sep=' ', na_rep='?')
+        'a b ? d'
+
+        If `others` is specified, corresponding values are concatenated with
+        the separator. Result will be a Series of strings.
+
+        >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',')
+        0    a,A
+        1    b,B
+        2    NaN
+        3    d,D
+        dtype: object
+
+        Missing values will remain missing in the result, but can again be
+        represented using `na_rep`
+
+        >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')
+        0    a,A
+        1    b,B
+        2    -,C
+        3    d,D
+        dtype: object
+
+        If `sep` is not specified, the values are concatenated without
+        separation.
+
+        >>> s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')
+        0    aA
+        1    bB
+        2    -C
+        3    dD
+        dtype: object
+
+        Series with different indexes can be aligned before concatenation. The
+        `join`-keyword works as in other methods.
+
+        >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2])
+        >>> s.str.cat(t, join='left', na_rep='-')
+        0    aa
+        1    b-
+        2    -c
+        3    dd
+        dtype: object
+        >>>
+        >>> s.str.cat(t, join='outer', na_rep='-')
+        0    aa
+        1    b-
+        2    -c
+        3    dd
+        4    -e
+        dtype: object
+        >>>
+        >>> s.str.cat(t, join='inner', na_rep='-')
+        0    aa
+        2    -c
+        3    dd
+        dtype: object
+        >>>
+        >>> s.str.cat(t, join='right', na_rep='-')
+        3    dd
+        0    aa
+        4    -e
+        2    -c
+        dtype: object
+
+        For more examples, see :ref:`here <text.concatenate>`.
+        """
+        return self._array._str.cat(others, sep, na_rep, join)
+
+    _shared_docs[
+        "str_split"
+    ] = r"""
+    Split strings around given separator/delimiter.
+
+    Splits the string in the Series/Index from the %(side)s,
+    at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
+
+    Parameters
+    ----------
+    pat : str, optional
+        String or regular expression to split on.
+        If not specified, split on whitespace.
+    n : int, default -1 (all)
+        Limit number of splits in output.
+        ``None``, 0 and -1 will be interpreted as return all splits.
+    expand : bool, default False
+        Expand the split strings into separate columns.
+
+        * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
+        * If ``False``, return Series/Index, containing lists of strings.
+
+    Returns
+    -------
+    Series, Index, DataFrame or MultiIndex
+        Type matches caller unless ``expand=True`` (see Notes).
+
+    See Also
+    --------
+    Series.str.split : Split strings around given separator/delimiter.
+    Series.str.rsplit : Splits string around given separator/delimiter,
+        starting from the right.
+    Series.str.join : Join lists contained as elements in the Series/Index
+        with passed delimiter.
+    str.split : Standard library version for split.
+    str.rsplit : Standard library version for rsplit.
+
+    Notes
+    -----
+    The handling of the `n` keyword depends on the number of found splits:
+
+    - If found splits > `n`,  make first `n` splits only
+    - If found splits <= `n`, make all splits
+    - If for a certain row the number of found splits < `n`,
+      append `None` for padding up to `n` if ``expand=True``
+
+    If using ``expand=True``, Series and Index callers return DataFrame and
+    MultiIndex objects, respectively.
+
+    Examples
+    --------
+    >>> s = pd.Series(
+    ...     [
+    ...         "this is a regular sentence",
+    ...         "https://docs.python.org/3/tutorial/index.html",
+    ...         np.nan
+    ...     ]
+    ... )
+    >>> s
+    0                       this is a regular sentence
+    1    https://docs.python.org/3/tutorial/index.html
+    2                                              NaN
+    dtype: object
+
+    In the default setting, the string is split by whitespace.
+
+    >>> s.str.split()
+    0                   [this, is, a, regular, sentence]
+    1    [https://docs.python.org/3/tutorial/index.html]
+    2                                                NaN
+    dtype: object
+
+    Without the `n` parameter, the outputs of `rsplit` and `split`
+    are identical.
+
+    >>> s.str.rsplit()
+    0                   [this, is, a, regular, sentence]
+    1    [https://docs.python.org/3/tutorial/index.html]
+    2                                                NaN
+    dtype: object
+
+    The `n` parameter can be used to limit the number of splits on the
+    delimiter. The outputs of `split` and `rsplit` are different.
+
+    >>> s.str.split(n=2)
+    0                     [this, is, a regular sentence]
+    1    [https://docs.python.org/3/tutorial/index.html]
+    2                                                NaN
+    dtype: object
+
+    >>> s.str.rsplit(n=2)
+    0                     [this is a, regular, sentence]
+    1    [https://docs.python.org/3/tutorial/index.html]
+    2                                                NaN
+    dtype: object
+
+    The `pat` parameter can be used to split by other characters.
+
+    >>> s.str.split(pat="/")
+    0                         [this is a regular sentence]
+    1    [https:, , docs.python.org, 3, tutorial, index...
+    2                                                  NaN
+    dtype: object
+
+    When using ``expand=True``, the split elements will expand out into
+    separate columns. If NaN is present, it is propagated throughout
+    the columns during the split.
+
+    >>> s.str.split(expand=True)
+                                                   0     1     2        3         4
+    0                                           this    is     a  regular  sentence
+    1  https://docs.python.org/3/tutorial/index.html  None  None     None      None
+    2                                            NaN   NaN   NaN      NaN       NaN
+
+    For slightly more complex use cases like splitting the html document name
+    from a url, a combination of parameter settings can be used.
+
+    >>> s.str.rsplit("/", n=1, expand=True)
+                                        0           1
+    0          this is a regular sentence        None
+    1  https://docs.python.org/3/tutorial  index.html
+    2                                 NaN         NaN
+
+    Remember to escape special characters when explicitly using regular
+    expressions.
+
+    >>> s = pd.Series(["1+1=2"])
+    >>> s
+    0    1+1=2
+    dtype: object
+    >>> s.str.split(r"\+|=", expand=True)
+         0    1    2
+    0    1    1    2
+    """
+
+    @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
+    @forbid_nonstring_types(["bytes"])
+    def split(self, pat=None, n=-1, expand=False):
+        result = self._array._str.split(pat, n, expand)
+        return self._wrap_result(result, expand, returns_string=expand)
+
+    @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
+    @forbid_nonstring_types(["bytes"])
+    def rsplit(self, pat=None, n=-1, expand=False):
+        result = self._array._str.rsplit(pat, n=n)
+        return self._wrap_result(result, expand=expand, returns_string=expand)
+
+    _shared_docs[
+        "str_partition"
+    ] = """
+    Split the string at the %(side)s occurrence of `sep`.
+
+    This method splits the string at the %(side)s occurrence of `sep`,
+    and returns 3 elements containing the part before the separator,
+    the separator itself, and the part after the separator.
+    If the separator is not found, return %(return)s.
+
+    Parameters
+    ----------
+    sep : str, default whitespace
+        String to split on.
+    expand : bool, default True
+        If True, return DataFrame/MultiIndex expanding dimensionality.
+        If False, return Series/Index.
+
+    Returns
+    -------
+    DataFrame/MultiIndex or Series/Index of objects
+
+    See Also
+    --------
+    %(also)s
+    Series.str.split : Split strings around given separators.
+    str.partition : Standard library version.
+
+    Examples
+    --------
+
+    >>> s = pd.Series(['Linda van der Berg', 'George Pitt-Rivers'])
+    >>> s
+    0    Linda van der Berg
+    1    George Pitt-Rivers
+    dtype: object
+
+    >>> s.str.partition()
+            0  1             2
+    0   Linda     van der Berg
+    1  George      Pitt-Rivers
+
+    To partition by the last space instead of the first one:
+
+    >>> s.str.rpartition()
+                   0  1            2
+    0  Linda van der            Berg
+    1         George     Pitt-Rivers
+
+    To partition by something different than a space:
+
+    >>> s.str.partition('-')
+                        0  1       2
+    0  Linda van der Berg
+    1         George Pitt  -  Rivers
+
+    To return a Series containing tuples instead of a DataFrame:
+
+    >>> s.str.partition('-', expand=False)
+    0    (Linda van der Berg, , )
+    1    (George Pitt, -, Rivers)
+    dtype: object
+
+    Also available on indices:
+
+    >>> idx = pd.Index(['X 123', 'Y 999'])
+    >>> idx
+    Index(['X 123', 'Y 999'], dtype='object')
+
+    Which will create a MultiIndex:
+
+    >>> idx.str.partition()
+    MultiIndex([('X', ' ', '123'),
+                ('Y', ' ', '999')],
+               )
+
+    Or an index with tuples with ``expand=False``:
+
+    >>> idx.str.partition(expand=False)
+    Index([('X', ' ', '123'), ('Y', ' ', '999')], dtype='object')
+    """
+
+    @Appender(
+        _shared_docs["str_partition"]
+        % {
+            "side": "first",
+            "return": "3 elements containing the string itself, followed by two "
+            "empty strings",
+            "also": "rpartition : Split the string at the last occurrence of `sep`.",
+        }
+    )
+    @forbid_nonstring_types(["bytes"])
+    def partition(self, sep=" ", expand=True):
+        result = self._array._str.partition(sep, expand)
+        return self._wrap_result(result, expand=expand, returns_string=expand)
+
+    @Appender(
+        _shared_docs["str_partition"]
+        % {
+            "side": "last",
+            "return": "3 elements containing two empty strings, followed by the "
+            "string itself",
+            "also": "partition : Split the string at the first occurrence of `sep`.",
+        }
+    )
+    @forbid_nonstring_types(["bytes"])
+    def rpartition(self, sep=" ", expand=True):
+        result = self._array._str.rpartion(sep, expand)
+        return self._wrap_result(result, expand=expand, returns_string=expand)
+
+    def get(self, i):
+        """
+        Extract element from each component at specified position.
+
+        Extract element from lists, tuples, or strings in each element in the
+        Series/Index.
+
+        Parameters
+        ----------
+        i : int
+            Position of element to extract.
+
+        Returns
+        -------
+        Series or Index
+
+        Examples
+        --------
+        >>> s = pd.Series(["String",
+        ...               (1, 2, 3),
+        ...               ["a", "b", "c"],
+        ...               123,
+        ...               -456,
+        ...               {1: "Hello", "2": "World"}])
+        >>> s
+        0                        String
+        1                     (1, 2, 3)
+        2                     [a, b, c]
+        3                           123
+        4                          -456
+        5    {1: 'Hello', '2': 'World'}
+        dtype: object
+
+        >>> s.str.get(1)
+        0        t
+        1        2
+        2        b
+        3      NaN
+        4      NaN
+        5    Hello
+        dtype: object
+
+        >>> s.str.get(-1)
+        0      g
+        1      3
+        2      c
+        3    NaN
+        4    NaN
+        5    None
+        dtype: object
+        """
+        result = self._array._str.get(i)
+        return self._wrap_result(result)
+
+    @forbid_nonstring_types(["bytes"])
+    def join(self, sep):
+        """
+        Join lists contained as elements in the Series/Index with passed delimiter.
+
+        If the elements of a Series are lists themselves, join the content of these
+        lists using the delimiter passed to the function.
+        This function is an equivalent to :meth:`str.join`.
+
+        Parameters
+        ----------
+        sep : str
+            Delimiter to use between list entries.
+
+        Returns
+        -------
+        Series/Index: object
+            The list entries concatenated by intervening occurrences of the
+            delimiter.
+
+        Raises
+        ------
+        AttributeError
+            If the supplied Series contains neither strings nor lists.
+
+        See Also
+        --------
+        str.join : Standard library version of this method.
+        Series.str.split : Split strings around given separator/delimiter.
+
+        Notes
+        -----
+        If any of the list items is not a string object, the result of the join
+        will be `NaN`.
+
+        Examples
+        --------
+        Example with a list that contains non-string elements.
+
+        >>> s = pd.Series([['lion', 'elephant', 'zebra'],
+        ...                [1.1, 2.2, 3.3],
+        ...                ['cat', np.nan, 'dog'],
+        ...                ['cow', 4.5, 'goat'],
+        ...                ['duck', ['swan', 'fish'], 'guppy']])
+        >>> s
+        0        [lion, elephant, zebra]
+        1                [1.1, 2.2, 3.3]
+        2                [cat, nan, dog]
+        3               [cow, 4.5, goat]
+        4    [duck, [swan, fish], guppy]
+        dtype: object
+
+        Join all lists using a '-'. The lists containing object(s) of types other
+        than str will produce a NaN.
+
+        >>> s.str.join('-')
+        0    lion-elephant-zebra
+        1                    NaN
+        2                    NaN
+        3                    NaN
+        4                    NaN
+        dtype: object
+        """
+        # XXX: Does this use the Series? If so then we can't dispatch.
+        result = self._array._str.join(sep)
+        return self._wrap_result(result)
+
+    @forbid_nonstring_types(["bytes"])
+    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
+        """
+        Test if pattern or regex is contained within a string of a Series or Index.
+
+        Return boolean Series or Index based on whether a given pattern or regex is
+        contained within a string of a Series or Index.
+
+        Parameters
+        ----------
+        pat : str
+            Character sequence or regular expression.
+        case : bool, default True
+            If True, case sensitive.
+        flags : int, default 0 (no flags)
+            Flags to pass through to the re module, e.g. re.IGNORECASE.
+        na : default NaN
+            Fill value for missing values.
+        regex : bool, default True
+            If True, assumes the pat is a regular expression.
+
+            If False, treats the pat as a literal string.
+
+        Returns
+        -------
+        Series or Index of boolean values
+            A Series or Index of boolean values indicating whether the
+            given pattern is contained within the string of each element
+            of the Series or Index.
+
+        See Also
+        --------
+        match : Analogous, but stricter, relying on re.match instead of re.search.
+        Series.str.startswith : Test if the start of each string element matches a
+            pattern.
+        Series.str.endswith : Same as startswith, but tests the end of string.
+
+        Examples
+        --------
+        Returning a Series of booleans using only a literal pattern.
+
+        >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
+        >>> s1.str.contains('og', regex=False)
+        0    False
+        1     True
+        2    False
+        3    False
+        4      NaN
+        dtype: object
+
+        Returning an Index of booleans using only a literal pattern.
+
+        >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
+        >>> ind.str.contains('23', regex=False)
+        Index([False, False, False, True, nan], dtype='object')
+
+        Specifying case sensitivity using `case`.
+
+        >>> s1.str.contains('oG', case=True, regex=True)
+        0    False
+        1    False
+        2    False
+        3    False
+        4      NaN
+        dtype: object
+
+        Specifying `na` to be `False` instead of `NaN` replaces NaN values
+        with `False`. If Series or Index does not contain NaN values
+        the resultant dtype will be `bool`, otherwise, an `object` dtype.
+
+        >>> s1.str.contains('og', na=False, regex=True)
+        0    False
+        1     True
+        2    False
+        3    False
+        4    False
+        dtype: bool
+
+        Returning 'house' or 'dog' when either expression occurs in a string.
+
+        >>> s1.str.contains('house|dog', regex=True)
+        0    False
+        1     True
+        2     True
+        3    False
+        4      NaN
+        dtype: object
+
+        Ignoring case sensitivity using `flags` with regex.
+
+        >>> import re
+        >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True)
+        0    False
+        1    False
+        2     True
+        3    False
+        4      NaN
+        dtype: object
+
+        Returning any digit using regular expression.
+
+        >>> s1.str.contains('\\d', regex=True)
+        0    False
+        1    False
+        2    False
+        3     True
+        4      NaN
+        dtype: object
+
+        Ensure `pat` is a not a literal pattern when `regex` is set to True.
+        Note in the following example one might expect only `s2[1]` and `s2[3]` to
+        return `True`. However, '.0' as a regex matches any character
+        followed by a 0.
+
+        >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35'])
+        >>> s2.str.contains('.0', regex=True)
+        0     True
+        1     True
+        2    False
+        3     True
+        4    False
+        dtype: bool
+        """
+        result = self._array._str.contains(pat, case, flags, na, regex)
+        return self._wrap_result(result, fill_value=na, returns_string=False)
+
+    @forbid_nonstring_types(["bytes"])
+    def match(self, pat, case=True, flags=0, na=np.nan):
+        """
+        Determine if each string starts with a match of a regular expression.
+
+        Parameters
+        ----------
+        pat : str
+            Character sequence or regular expression.
+        case : bool, default True
+            If True, case sensitive.
+        flags : int, default 0 (no flags)
+            Regex module flags, e.g. re.IGNORECASE.
+        na : default NaN
+            Fill value for missing values.
+
+        Returns
+        -------
+        Series/array of boolean values
+
+        See Also
+        --------
+        fullmatch : Stricter matching that requires the entire string to match.
+        contains : Analogous, but less strict, relying on re.search instead of
+            re.match.
+        extract : Extract matched groups.
+        """
+        result = self._array._str.match(pat, case=case, flags=flags, na=na)
+        return self._wrap_result(result, fill_value=na, returns_string=False)
+
+    def fullmatch(self, pat, case=True, flags=0, na=np.nan):
+        """
+        Determine if each string entirely matches a regular expression.
+
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        pat : str
+            Character sequence or regular expression.
+        case : bool, default True
+            If True, case sensitive.
+        flags : int, default 0 (no flags)
+            Regex module flags, e.g. re.IGNORECASE.
+        na : default NaN
+            Fill value for missing values.
+
+        Returns
+        -------
+        Series/array of boolean values
+
+        See Also
+        --------
+        match : Similar, but also returns `True` when only a *prefix* of the string
+            matches the regular expression.
+        extract : Extract matched groups.
+        """
+        result = self._array._str.fullmatch(pat, case=case, flags=flags, na=na)
+        return self._wrap_result(result, fill_value=na, returns_string=False)
+
+    @forbid_nonstring_types(["bytes"])
+    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
+        r"""
+        Replace each occurrence of pattern/regex in the Series/Index.
+
+        Equivalent to :meth:`str.replace` or :func:`re.sub`, depending on the regex value.
+
+        Parameters
+        ----------
+        pat : str or compiled regex
+            String can be a character sequence or regular expression.
+        repl : str or callable
+            Replacement string or a callable. The callable is passed the regex
+            match object and must return a replacement string to be used.
+            See :func:`re.sub`.
+        n : int, default -1 (all)
+            Number of replacements to make from start.
+        case : bool, default None
+            Determines if replace is case sensitive:
+
+            - If True, case sensitive (the default if `pat` is a string)
+            - Set to False for case insensitive
+            - Cannot be set if `pat` is a compiled regex.
+
+        flags : int, default 0 (no flags)
+            Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled
+            regex.
+        regex : bool, default True
+            Determines if assumes the passed-in pattern is a regular expression:
+
+            - If True, assumes the passed-in pattern is a regular expression.
+            - If False, treats the pattern as a literal string
+            - Cannot be set to False if `pat` is a compiled regex or `repl` is
+            a callable.
+
+            .. versionadded:: 0.23.0
+
+        Returns
+        -------
+        Series or Index of object
+            A copy of the object with all matching occurrences of `pat` replaced by
+            `repl`.
+
+        Raises
+        ------
+        ValueError
+            * if `regex` is False and `repl` is a callable or `pat` is a compiled
+            regex
+            * if `pat` is a compiled regex and `case` or `flags` is set
+
+        Notes
+        -----
+        When `pat` is a compiled regex, all flags should be included in the
+        compiled regex. Use of `case`, `flags`, or `regex=False` with a compiled
+        regex will raise an error.
+
+        Examples
+        --------
+        When `pat` is a string and `regex` is True (the default), the given `pat`
+        is compiled as a regex. When `repl` is a string, it replaces matching
+        regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are
+        left as is:
+
+        >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)
+        0    bao
+        1    baz
+        2    NaN
+        dtype: object
+
+        When `pat` is a string and `regex` is False, every `pat` is replaced with
+        `repl` as with :meth:`str.replace`:
+
+        >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)
+        0    bao
+        1    fuz
+        2    NaN
+        dtype: object
+
+        When `repl` is a callable, it is called on every `pat` using
+        :func:`re.sub`. The callable should expect one positional argument
+        (a regex object) and return a string.
+
+        To get the idea:
+
+        >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)
+        0    <re.Match object; span=(0, 1), match='f'>oo
+        1    <re.Match object; span=(0, 1), match='f'>uz
+        2                                            NaN
+        dtype: object
+
+        Reverse every lowercase alphabetic word:
+
+        >>> repl = lambda m: m.group(0)[::-1]
+        >>> pd.Series(['foo 123', 'bar baz', np.nan]).str.replace(r'[a-z]+', repl)
+        0    oof 123
+        1    rab zab
+        2        NaN
+        dtype: object
+
+        Using regex groups (extract second group and swap case):
+
+        >>> pat = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
+        >>> repl = lambda m: m.group('two').swapcase()
+        >>> pd.Series(['One Two Three', 'Foo Bar Baz']).str.replace(pat, repl)
+        0    tWO
+        1    bAR
+        dtype: object
+
+        Using a compiled regex with flags
+
+        >>> import re
+        >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE)
+        >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar')
+        0    foo
+        1    bar
+        2    NaN
+        dtype: object
+        """
+        result = self._array._str.replace(
+            pat, repl, n=n, case=case, flags=flags, regex=regex
+        )
+        return self._wrap_result(result)
+
+    @forbid_nonstring_types(["bytes"])
+    def repeat(self, repeats):
+        """
+        Duplicate each string in the Series or Index.
+
+        Parameters
+        ----------
+        repeats : int or sequence of int
+            Same value for all (int) or different value per (sequence).
+
+        Returns
+        -------
+        Series or Index of object
+            Series or Index of repeated string objects specified by
+            input parameter repeats.
+
+        Examples
+        --------
+        >>> s = pd.Series(['a', 'b', 'c'])
+        >>> s
+        0    a
+        1    b
+        2    c
+        dtype: object
+
+        Single int repeats string in Series
+
+        >>> s.str.repeat(repeats=2)
+        0    aa
+        1    bb
+        2    cc
+        dtype: object
+
+        Sequence of int repeats corresponding string in Series
+
+        >>> s.str.repeat(repeats=[1, 2, 3])
+        0      a
+        1     bb
+        2    ccc
+        dtype: object
+        """
+        result = self._array._str.repeat(repeats)
+        return self._wrap_result(result)
+
+    @forbid_nonstring_types(["bytes"])
+    def pad(self, width, side="left", fillchar=" "):
+        """
+        Pad strings in the Series/Index up to width.
+
+        Parameters
+        ----------
+        width : int
+            Minimum width of resulting string; additional characters will be filled
+            with character defined in `fillchar`.
+        side : {'left', 'right', 'both'}, default 'left'
+            Side from which to fill resulting string.
+        fillchar : str, default ' '
+            Additional character for filling, default is whitespace.
+
+        Returns
+        -------
+        Series or Index of object
+            Returns Series or Index with minimum number of char in object.
+
+        See Also
+        --------
+        Series.str.rjust : Fills the left side of strings with an arbitrary
+            character. Equivalent to ``Series.str.pad(side='left')``.
+        Series.str.ljust : Fills the right side of strings with an arbitrary
+            character. Equivalent to ``Series.str.pad(side='right')``.
+        Series.str.center : Fills boths sides of strings with an arbitrary
+            character. Equivalent to ``Series.str.pad(side='both')``.
+        Series.str.zfill : Pad strings in the Series/Index by prepending '0'
+            character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``.
+
+        Examples
+        --------
+        >>> s = pd.Series(["caribou", "tiger"])
+        >>> s
+        0    caribou
+        1      tiger
+        dtype: object
+
+        >>> s.str.pad(width=10)
+        0       caribou
+        1         tiger
+        dtype: object
+
+        >>> s.str.pad(width=10, side='right', fillchar='-')
+        0    caribou---
+        1    tiger-----
+        dtype: object
+
+        >>> s.str.pad(width=10, side='both', fillchar='-')
+        0    -caribou--
+        1    --tiger---
+        dtype: object
+        """
+        result = self._array._str.pad(width, side=side, fillchar=fillchar)
+        return self._wrap_result(result)
+
+    _shared_docs[
+        "str_pad"
+    ] = """
+    Pad %(side)s side of strings in the Series/Index.
+
+    Equivalent to :meth:`str.%(method)s`.
+
+    Parameters
+    ----------
+    width : int
+        Minimum width of resulting string; additional characters will be filled
+        with ``fillchar``.
+    fillchar : str
+        Additional character for filling, default is whitespace.
+
+    Returns
+    -------
+    filled : Series/Index of objects.
+    """
+    # XXX: Do we need to dispatch to center, etc, or is it equivalent?
+
+    @Appender(_shared_docs["str_pad"] % dict(side="left and right", method="center"))
+    @forbid_nonstring_types(["bytes"])
+    def center(self, width, fillchar=" "):
+        return self.pad(width, side="both", fillchar=fillchar)
+
+    @Appender(_shared_docs["str_pad"] % dict(side="right", method="ljust"))
+    @forbid_nonstring_types(["bytes"])
+    def ljust(self, width, fillchar=" "):
+        return self.pad(width, side="right", fillchar=fillchar)
+
+    @Appender(_shared_docs["str_pad"] % dict(side="left", method="rjust"))
+    @forbid_nonstring_types(["bytes"])
+    def rjust(self, width, fillchar=" "):
+        return self.pad(width, side="left", fillchar=fillchar)
+
+    @forbid_nonstring_types(["bytes"])
+    def zfill(self, width):
+        """
+        Pad strings in the Series/Index by prepending '0' characters.
+
+        Strings in the Series/Index are padded with '0' characters on the
+        left of the string to reach a total string length  `width`. Strings
+        in the Series/Index with length greater or equal to `width` are
+        unchanged.
+
+        Parameters
+        ----------
+        width : int
+            Minimum length of resulting string; strings with length less
+            than `width` be prepended with '0' characters.
+
+        Returns
+        -------
+        Series/Index of objects.
+
+        See Also
+        --------
+        Series.str.rjust : Fills the left side of strings with an arbitrary
+            character.
+        Series.str.ljust : Fills the right side of strings with an arbitrary
+            character.
+        Series.str.pad : Fills the specified sides of strings with an arbitrary
+            character.
+        Series.str.center : Fills boths sides of strings with an arbitrary
+            character.
+
+        Notes
+        -----
+        Differs from :meth:`str.zfill` which has special handling
+        for '+'/'-' in the string.
+
+        Examples
+        --------
+        >>> s = pd.Series(['-1', '1', '1000', 10, np.nan])
+        >>> s
+        0      -1
+        1       1
+        2    1000
+        3      10
+        4     NaN
+        dtype: object
+
+        Note that ``10`` and ``NaN`` are not strings, therefore they are
+        converted to ``NaN``. The minus sign in ``'-1'`` is treated as a
+        regular character and the zero is added to the left of it
+        (:meth:`str.zfill` would have moved it to the left). ``1000``
+        remains unchanged as it is longer than `width`.
+
+        >>> s.str.zfill(3)
+        0     0-1
+        1     001
+        2    1000
+        3     NaN
+        4     NaN
+        dtype: object
+        """
+        result = self._array._str.pad(width, side="left", fillchar="0")
+        return self._wrap_result(result)
+
+    def slice(self, start=None, stop=None, step=None):
+        """
+        Slice substrings from each element in the Series or Index.
+
+        Parameters
+        ----------
+        start : int, optional
+            Start position for slice operation.
+        stop : int, optional
+            Stop position for slice operation.
+        step : int, optional
+            Step size for slice operation.
+
+        Returns
+        -------
+        Series or Index of object
+            Series or Index from sliced substring from original string object.
+
+        See Also
+        --------
+        Series.str.slice_replace : Replace a slice with a string.
+        Series.str.get : Return element at position.
+            Equivalent to `Series.str.slice(start=i, stop=i+1)` with `i`
+            being the position.
+
+        Examples
+        --------
+        >>> s = pd.Series(["koala", "fox", "chameleon"])
+        >>> s
+        0        koala
+        1          fox
+        2    chameleon
+        dtype: object
+
+        >>> s.str.slice(start=1)
+        0        oala
+        1          ox
+        2    hameleon
+        dtype: object
+
+        >>> s.str.slice(start=-1)
+        0           a
+        1           x
+        2           n
+        dtype: object
+
+        >>> s.str.slice(stop=2)
+        0    ko
+        1    fo
+        2    ch
+        dtype: object
+
+        >>> s.str.slice(step=2)
+        0      kaa
+        1       fx
+        2    caeen
+        dtype: object
+
+        >>> s.str.slice(start=0, stop=5, step=3)
+        0    kl
+        1     f
+        2    cm
+        dtype: object
+
+        Equivalent behaviour to:
+
+        >>> s.str[0:5:3]
+        0    kl
+        1     f
+        2    cm
+        dtype: object
+        """
+        result = self._array._str.slice(start, stop, step)
+        return self._wrap_result(result)
+
+    @forbid_nonstring_types(["bytes"])
+    def slice_replace(self, start=None, stop=None, repl=None):
+        """
+        Replace a positional slice of a string with another value.
+
+        Parameters
+        ----------
+        start : int, optional
+            Left index position to use for the slice. If not specified (None),
+            the slice is unbounded on the left, i.e. slice from the start
+            of the string.
+        stop : int, optional
+            Right index position to use for the slice. If not specified (None),
+            the slice is unbounded on the right, i.e. slice until the
+            end of the string.
+        repl : str, optional
+            String for replacement. If not specified (None), the sliced region
+            is replaced with an empty string.
+
+        Returns
+        -------
+        Series or Index
+            Same type as the original object.
+
+        See Also
+        --------
+        Series.str.slice : Just slicing without replacement.
+
+        Examples
+        --------
+        >>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde'])
+        >>> s
+        0        a
+        1       ab
+        2      abc
+        3     abdc
+        4    abcde
+        dtype: object
+
+        Specify just `start`, meaning replace `start` until the end of the
+        string with `repl`.
+
+        >>> s.str.slice_replace(1, repl='X')
+        0    aX
+        1    aX
+        2    aX
+        3    aX
+        4    aX
+        dtype: object
+
+        Specify just `stop`, meaning the start of the string to `stop` is replaced
+        with `repl`, and the rest of the string is included.
+
+        >>> s.str.slice_replace(stop=2, repl='X')
+        0       X
+        1       X
+        2      Xc
+        3     Xdc
+        4    Xcde
+        dtype: object
+
+        Specify `start` and `stop`, meaning the slice from `start` to `stop` is
+        replaced with `repl`. Everything before or after `start` and `stop` is
+        included as is.
+
+        >>> s.str.slice_replace(start=1, stop=3, repl='X')
+        0      aX
+        1      aX
+        2      aX
+        3     aXc
+        4    aXde
+        dtype: object
+        """
+        result = self._array._str.slice_replace(start, stop, repl)
+        return self._wrap_result(result)
+
+    def decode(self, encoding, errors="strict"):
+        """
+        Decode character string in the Series/Index using indicated encoding.
+
+        Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
+        python3.
+
+        Parameters
+        ----------
+        encoding : str
+        errors : str, optional
+
+        Returns
+        -------
+        Series or Index
+        """
+        # TODO: Add a similar _bytes interface.
+        if encoding in _cpython_optimized_decoders:
+            # CPython optimized implementation
+            f = lambda x: x.decode(encoding, errors)
+        else:
+            decoder = codecs.getdecoder(encoding)
+            f = lambda x: decoder(x, errors)[0]
+        arr = self._array
+        # assert isinstance(arr, (StringArray,))
+        return arr._str._map(f)
+
+    @forbid_nonstring_types(["bytes"])
+    def encode(self, encoding, errors="strict"):
+        """
+        Encode character string in the Series/Index using indicated encoding.
+
+        Equivalent to :meth:`str.encode`.
+
+        Parameters
+        ----------
+        encoding : str
+        errors : str, optional
+
+        Returns
+        -------
+        encoded : Series/Index of objects
+        """
+        result = self._array._str.encode(encoding, errors)
+        return self._wrap_result(result, returns_string=False)
+
+    _shared_docs[
+        "str_strip"
+    ] = r"""
+    Remove %(position)s characters.
+
+    Strip whitespaces (including newlines) or a set of specified characters
+    from each string in the Series/Index from %(side)s.
+    Equivalent to :meth:`str.%(method)s`.
+
+    Parameters
+    ----------
+    to_strip : str or None, default None
+        Specifying the set of characters to be removed.
+        All combinations of this set of characters will be stripped.
+        If None then whitespaces are removed.
+
+    Returns
+    -------
+    Series or Index of object
+
+    See Also
+    --------
+    Series.str.strip : Remove leading and trailing characters in Series/Index.
+    Series.str.lstrip : Remove leading characters in Series/Index.
+    Series.str.rstrip : Remove trailing characters in Series/Index.
+
+    Examples
+    --------
+    >>> s = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', np.nan])
+    >>> s
+    0    1. Ant.
+    1    2. Bee!\n
+    2    3. Cat?\t
+    3          NaN
+    dtype: object
+
+    >>> s.str.strip()
+    0    1. Ant.
+    1    2. Bee!
+    2    3. Cat?
+    3        NaN
+    dtype: object
+
+    >>> s.str.lstrip('123.')
+    0    Ant.
+    1    Bee!\n
+    2    Cat?\t
+    3       NaN
+    dtype: object
+
+    >>> s.str.rstrip('.!? \n\t')
+    0    1. Ant
+    1    2. Bee
+    2    3. Cat
+    3       NaN
+    dtype: object
+
+    >>> s.str.strip('123.!? \n\t')
+    0    Ant
+    1    Bee
+    2    Cat
+    3    NaN
+    dtype: object
+    """
+
+    @Appender(
+        _shared_docs["str_strip"]
+        % dict(
+            side="left and right sides", method="strip", position="leading and trailing"
+        )
+    )
+    @forbid_nonstring_types(["bytes"])
+    def strip(self, to_strip=None):
+        result = self._array._str.strip(self._parent, to_strip)
+        return self._wrap_result(result)
+
+    @Appender(
+        _shared_docs["str_strip"]
+        % dict(side="left side", method="lstrip", position="leading")
+    )
+    @forbid_nonstring_types(["bytes"])
+    def lstrip(self, to_strip=None):
+        result = self._array._str.lstrip(self._parent, to_strip)
+        return self._wrap_result(result)
+
+    @Appender(
+        _shared_docs["str_strip"]
+        % dict(side="right side", method="rstrip", position="trailing")
+    )
+    @forbid_nonstring_types(["bytes"])
+    def rstrip(self, to_strip=None):
+        result = self._array._str.rstip(self._parent, to_strip)
+        return self._wrap_result(result)
+
+    @forbid_nonstring_types(["bytes"])
+    def wrap(self, width, **kwargs):
+        r"""
+        Wrap strings in Series/Index at specified line width.
+
+        This method has the same keyword parameters and defaults as
+        :class:`textwrap.TextWrapper`.
+
+        Parameters
+        ----------
+        width : int
+            Maximum line width.
+        expand_tabs : bool, optional
+            If True, tab characters will be expanded to spaces (default: True).
+        replace_whitespace : bool, optional
+            If True, each whitespace character (as defined by string.whitespace)
+            remaining after tab expansion will be replaced by a single space
+            (default: True).
+        drop_whitespace : bool, optional
+            If True, whitespace that, after wrapping, happens to end up at the
+            beginning or end of a line is dropped (default: True).
+        break_long_words : bool, optional
+            If True, then words longer than width will be broken in order to ensure
+            that no lines are longer than width. If it is false, long words will
+            not be broken, and some lines may be longer than width (default: True).
+        break_on_hyphens : bool, optional
+            If True, wrapping will occur preferably on whitespace and right after
+            hyphens in compound words, as it is customary in English. If false,
+            only whitespaces will be considered as potentially good places for line
+            breaks, but you need to set break_long_words to false if you want truly
+            insecable words (default: True).
+
+        Returns
+        -------
+        Series or Index
+
+        Notes
+        -----
+        Internally, this method uses a :class:`textwrap.TextWrapper` instance with
+        default settings. To achieve behavior matching R's stringr library str_wrap
+        function, use the arguments:
+
+        - expand_tabs = False
+        - replace_whitespace = True
+        - drop_whitespace = True
+        - break_long_words = False
+        - break_on_hyphens = False
+
+        Examples
+        --------
+        >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped'])
+        >>> s.str.wrap(12)
+        0             line to be\nwrapped
+        1    another line\nto be\nwrapped
+        dtype: object
+        """
+        result = self._array._str.wrap(self._parent, width, **kwargs)
+        return self._wrap_result(result)
+
+    @forbid_nonstring_types(["bytes"])
+    def get_dummies(self, sep="|"):
+        """
+        Return DataFrame of dummy/indicator variables for Series.
+
+        Each string in Series is split by sep and returned as a DataFrame
+        of dummy/indicator variables.
+
+        Parameters
+        ----------
+        sep : str, default "|"
+            String to split on.
+
+        Returns
+        -------
+        DataFrame
+            Dummy variables corresponding to values of the Series.
+
+        See Also
+        --------
+        get_dummies : Convert categorical variable into dummy/indicator
+            variables.
+
+        Examples
+        --------
+        >>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies()
+        a  b  c
+        0  1  1  0
+        1  1  0  0
+        2  1  0  1
+
+        >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
+        a  b  c
+        0  1  1  0
+        1  0  0  0
+        2  1  0  1
+        """
+        # we need to cast to Series of strings as only that has all
+        # methods available for making the dummies...
+        # XXX: data = self._orig.astype(str) if self._is_categorical else self._parent
+        result, name = self._array._str.get_dummies(sep)
+        # result, name = str_get_dummies(data, sep)
+        return self._wrap_result(
+            result,
+            use_codes=(not self._is_categorical),
+            name=name,
+            expand=True,
+            returns_string=False,
+        )
+
+    # @copy(str_translate)
+    @forbid_nonstring_types(["bytes"])
+    def translate(self, table):
+        """
+        Map all characters in the string through the given mapping table.
+
+        Equivalent to standard :meth:`str.translate`.
+
+        Parameters
+        ----------
+        table : dict
+            Table is a mapping of Unicode ordinals to Unicode ordinals, strings, or
+            None. Unmapped characters are left untouched.
+            Characters mapped to None are deleted. :meth:`str.maketrans` is a
+            helper function for making translation tables.
+
+        Returns
+        -------
+        Series or Index
+        """
+        result = self._array._str.translate(table)
+        return self._wrap_result(result)
+
+    def count(self, pat, flags):
+        """
+        Count occurrences of pattern in each string of the Series/Index.
+
+        This function is used to count the number of times a particular regex
+        pattern is repeated in each of the string elements of the
+        :class:`~pandas.Series`.
+
+        Parameters
+        ----------
+        pat : str
+            Valid regular expression.
+        flags : int, default 0, meaning no flags
+            Flags for the `re` module. For a complete list, `see here
+            <https://docs.python.org/3/howto/regex.html#compilation-flags>`_.
+        **kwargs
+            For compatibility with other string methods. Not used.
+
+        Returns
+        -------
+        Series or Index
+            Same type as the calling object containing the integer counts.
+
+        See Also
+        --------
+        re : Standard library module for regular expressions.
+        str.count : Standard library version, without regular expression support.
+
+        Notes
+        -----
+        Some characters need to be escaped when passing in `pat`.
+        eg. ``'$'`` has a special meaning in regex and must be escaped when
+        finding this literal character.
+
+        Examples
+        --------
+        >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat'])
+        >>> s.str.count('a')
+        0    0.0
+        1    0.0
+        2    2.0
+        3    2.0
+        4    NaN
+        5    0.0
+        6    1.0
+        dtype: float64
+
+        Escape ``'$'`` to find the literal dollar sign.
+
+        >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
+        >>> s.str.count('\\$')
+        0    1
+        1    0
+        2    1
+        3    2
+        4    2
+        5    0
+        dtype: int64
+
+        This is also available on Index
+
+        >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')
+        Int64Index([0, 0, 2, 1], dtype='int64')
+        """
+        result = self._array._str.count(pat, flags)
+        return self._wrap_result(result, returns_string=False)
+
+    def startswith(self, pat, na=None):
+        """
+        Test if the start of each string element matches a pattern.
+
+        Equivalent to :meth:`str.startswith`.
+
+        Parameters
+        ----------
+        pat : str
+            Character sequence. Regular expressions are not accepted.
+        na : object, default NaN
+            Object shown if element tested is not a string.
+
+        Returns
+        -------
+        Series or Index of bool
+            A Series of booleans indicating whether the given pattern matches
+            the start of each string element.
+
+        See Also
+        --------
+        str.startswith : Python standard library string method.
+        Series.str.endswith : Same as startswith, but tests the end of string.
+        Series.str.contains : Tests if string element contains a pattern.
+
+        Examples
+        --------
+        >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan])
+        >>> s
+        0     bat
+        1    Bear
+        2     cat
+        3     NaN
+        dtype: object
+
+        >>> s.str.startswith('b')
+        0     True
+        1    False
+        2    False
+        3      NaN
+        dtype: object
+
+        Specifying `na` to be `False` instead of `NaN`.
+
+        >>> s.str.startswith('b', na=False)
+        0     True
+        1    False
+        2    False
+        3    False
+        dtype: bool
+        """
+        # XXX: changed default na to None
+        result = self._array._str.startswith(pat, na=na)
+        return self._wrap_result(result, returns_string=False)
+
+    def endswith(self, pat, na=None):
+        """
+        Test if the end of each string element matches a pattern.
+
+        Equivalent to :meth:`str.endswith`.
+
+        Parameters
+        ----------
+        pat : str
+            Character sequence. Regular expressions are not accepted.
+        na : object, default NaN
+            Object shown if element tested is not a string.
+
+        Returns
+        -------
+        Series or Index of bool
+            A Series of booleans indicating whether the given pattern matches
+            the end of each string element.
+
+        See Also
+        --------
+        str.endswith : Python standard library string method.
+        Series.str.startswith : Same as endswith, but tests the start of string.
+        Series.str.contains : Tests if string element contains a pattern.
+
+        Examples
+        --------
+        >>> s = pd.Series(['bat', 'bear', 'caT', np.nan])
+        >>> s
+        0     bat
+        1    bear
+        2     caT
+        3     NaN
+        dtype: object
+
+        >>> s.str.endswith('t')
+        0     True
+        1    False
+        2    False
+        3      NaN
+        dtype: object
+
+        Specifying `na` to be `False` instead of `NaN`.
+
+        >>> s.str.endswith('t', na=False)
+        0     True
+        1    False
+        2    False
+        3    False
+        dtype: bool
+        """
+        # XXX: changed default na to None
+        result = self._array._str.endswith(pat, na=na)
+        return self._wrap_result(result, returns_string=False)
+
+    def findall(self, pat, flags=0):
+        """
+        Find all occurrences of pattern or regular expression in the Series/Index.
+
+        Equivalent to applying :func:`re.findall` to all the elements in the
+        Series/Index.
+
+        Parameters
+        ----------
+        pat : str
+            Pattern or regular expression.
+        flags : int, default 0
+            Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which
+            means no flags).
+
+        Returns
+        -------
+        Series/Index of lists of strings
+            All non-overlapping matches of pattern or regular expression in each
+            string of this Series/Index.
+
+        See Also
+        --------
+        count : Count occurrences of pattern or regular expression in each string
+            of the Series/Index.
+        extractall : For each string in the Series, extract groups from all matches
+            of regular expression and return a DataFrame with one row for each
+            match and one column for each group.
+        re.findall : The equivalent ``re`` function to all non-overlapping matches
+            of pattern or regular expression in string, as a list of strings.
+
+        Examples
+        --------
+        >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit'])
+
+        The search for the pattern 'Monkey' returns one match:
+
+        >>> s.str.findall('Monkey')
+        0          []
+        1    [Monkey]
+        2          []
+        dtype: object
+
+        On the other hand, the search for the pattern 'MONKEY' doesn't return any
+        match:
+
+        >>> s.str.findall('MONKEY')
+        0    []
+        1    []
+        2    []
+        dtype: object
+
+        Flags can be added to the pattern or regular expression. For instance,
+        to find the pattern 'MONKEY' ignoring the case:
+
+        >>> import re
+        >>> s.str.findall('MONKEY', flags=re.IGNORECASE)
+        0          []
+        1    [Monkey]
+        2          []
+        dtype: object
+
+        When the pattern matches more than one string in the Series, all matches
+        are returned:
+
+        >>> s.str.findall('on')
+        0    [on]
+        1    [on]
+        2      []
+        dtype: object
+
+        Regular expressions are supported too. For instance, the search for all the
+        strings ending with the word 'on' is shown next:
+
+        >>> s.str.findall('on$')
+        0    [on]
+        1      []
+        2      []
+        dtype: object
+
+        If the pattern is found more than once in the same string, then a list of
+        multiple strings is returned:
+
+        >>> s.str.findall('b')
+        0        []
+        1        []
+        2    [b, b]
+        dtype: object
+        """
+        result = self._array._str.findall(pat, flags)
+        return self._wrap_result(result, returns_string=False)
+
+    @forbid_nonstring_types(["bytes"])
+    def extract(self, pat, flags=0, expand=True):
+        r"""
+        Extract capture groups in the regex `pat` as columns in a DataFrame.
+
+        For each subject string in the Series, extract groups from the
+        first match of regular expression `pat`.
+
+        Parameters
+        ----------
+        pat : str
+            Regular expression pattern with capturing groups.
+        flags : int, default 0 (no flags)
+            Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that
+            modify regular expression matching for things like case,
+            spaces, etc. For more details, see :mod:`re`.
+        expand : bool, default True
+            If True, return DataFrame with one column per capture group.
+            If False, return a Series/Index if there is one capture group
+            or DataFrame if there are multiple capture groups.
+
+        Returns
+        -------
+        DataFrame or Series or Index
+            A DataFrame with one row for each subject string, and one
+            column for each group. Any capture group names in regular
+            expression pat will be used for column names; otherwise
+            capture group numbers will be used. The dtype of each result
+            column is always object, even when no match is found. If
+            ``expand=False`` and pat has only one capture group, then
+            return a Series (if subject is a Series) or Index (if subject
+            is an Index).
+
+        See Also
+        --------
+        extractall : Returns all matches (not just the first match).
+
+        Examples
+        --------
+        A pattern with two groups will return a DataFrame with two columns.
+        Non-matches will be NaN.
+
+        >>> s = pd.Series(['a1', 'b2', 'c3'])
+        >>> s.str.extract(r'([ab])(\d)')
+            0    1
+        0    a    1
+        1    b    2
+        2  NaN  NaN
+
+        A pattern may contain optional groups.
+
+        >>> s.str.extract(r'([ab])?(\d)')
+            0  1
+        0    a  1
+        1    b  2
+        2  NaN  3
+
+        Named groups will become column names in the result.
+
+        >>> s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')
+        letter digit
+        0      a     1
+        1      b     2
+        2    NaN   NaN
+
+        A pattern with one group will return a DataFrame with one column
+        if expand=True.
+
+        >>> s.str.extract(r'[ab](\d)', expand=True)
+            0
+        0    1
+        1    2
+        2  NaN
+
+        A pattern with one group will return a Series if expand=False.
+
+        >>> s.str.extract(r'[ab](\d)', expand=False)
+        0      1
+        1      2
+        2    NaN
+        dtype: object
+        """
+        result = self._array._str.extract(pat, flags)
+        return self._wrap_result(result, expand=expand)
+
+    @forbid_nonstring_types(["bytes"])
+    def extractall(self, pat, flags=0):
+        r"""
+        Extract capture groups in the regex `pat` as columns in DataFrame.
+
+        For each subject string in the Series, extract groups from all
+        matches of regular expression pat. When each subject string in the
+        Series has exactly one match, extractall(pat).xs(0, level='match')
+        is the same as extract(pat).
+
+        Parameters
+        ----------
+        pat : str
+            Regular expression pattern with capturing groups.
+        flags : int, default 0 (no flags)
+            A ``re`` module flag, for example ``re.IGNORECASE``. These allow
+            to modify regular expression matching for things like case, spaces,
+            etc. Multiple flags can be combined with the bitwise OR operator,
+            for example ``re.IGNORECASE | re.MULTILINE``.
+
+        Returns
+        -------
+        DataFrame
+            A ``DataFrame`` with one row for each match, and one column for each
+            group. Its rows have a ``MultiIndex`` with first levels that come from
+            the subject ``Series``. The last level is named 'match' and indexes the
+            matches in each item of the ``Series``. Any capture group names in
+            regular expression pat will be used for column names; otherwise capture
+            group numbers will be used.
+
+        See Also
+        --------
+        extract : Returns first match only (not all matches).
+
+        Examples
+        --------
+        A pattern with one group will return a DataFrame with one column.
+        Indices with no matches will not appear in the result.
+
+        >>> s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
+        >>> s.str.extractall(r"[ab](\d)")
+                0
+        match
+        A 0      1
+        1      2
+        B 0      1
+
+        Capture group names are used for column names of the result.
+
+        >>> s.str.extractall(r"[ab](?P<digit>\d)")
+                digit
+        match
+        A 0         1
+        1         2
+        B 0         1
+
+        A pattern with two groups will return a DataFrame with two columns.
+
+        >>> s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
+                letter digit
+        match
+        A 0          a     1
+        1          a     2
+        B 0          b     1
+
+        Optional groups that do not match are NaN in the result.
+
+        >>> s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")
+                letter digit
+        match
+        A 0          a     1
+        1          a     2
+        B 0          b     1
+        C 0        NaN     1
+        """
+        result = self._array._str.extractall(pat, flags)
+        return self._wrap_result(result, expand=True)
+
+    _shared_docs[
+        "find"
+    ] = """
+    Return %(side)s indexes in each strings in the Series/Index.
+
+    Each of returned indexes corresponds to the position where the
+    substring is fully contained between [start:end]. Return -1 on
+    failure. Equivalent to standard :meth:`str.%(method)s`.
+
+    Parameters
+    ----------
+    sub : str
+        Substring being searched.
+    start : int
+        Left edge index.
+    end : int
+        Right edge index.
+
+    Returns
+    -------
+    Series or Index of int.
+
+    See Also
+    --------
+    %(also)s
+    """
+
+    @Appender(
+        _shared_docs["find"]
+        % dict(
+            side="lowest",
+            method="find",
+            also="rfind : Return highest indexes in each strings.",
+        )
+    )
+    @forbid_nonstring_types(["bytes"])
+    def find(self, sub, start=0, end=None):
+        # result = str_find(self._parent, sub, start=start, end=end, side="left")
+        result = self._array.str.find(sub, start, end)
+        return self._wrap_result(result, returns_string=False)
+
+    @Appender(
+        _shared_docs["find"]
+        % dict(
+            side="highest",
+            method="rfind",
+            also="find : Return lowest indexes in each strings.",
+        )
+    )
+    @forbid_nonstring_types(["bytes"])
+    def rfind(self, sub, start=0, end=None):
+        result = self._array._str.rfind(sub, start=start, end=end)
+        return self._wrap_result(result, returns_string=False)
+
+    @forbid_nonstring_types(["bytes"])
+    def normalize(self, form):
+        """
+        Return the Unicode normal form for the strings in the Series/Index.
+
+        For more information on the forms, see the
+        :func:`unicodedata.normalize`.
+
+        Parameters
+        ----------
+        form : {'NFC', 'NFKC', 'NFD', 'NFKD'}
+            Unicode form.
+
+        Returns
+        -------
+        normalized : Series/Index of objects
+        """
+        result = self._array._str.normalize(form)
+        return self._wrap_result(result)
+
+    _shared_docs[
+        "index"
+    ] = """
+    Return %(side)s indexes in each string in Series/Index.
+
+    Each of the returned indexes corresponds to the position where the
+    substring is fully contained between [start:end]. This is the same
+    as ``str.%(similar)s`` except instead of returning -1, it raises a
+    ValueError when the substring is not found. Equivalent to standard
+    ``str.%(method)s``.
+
+    Parameters
+    ----------
+    sub : str
+        Substring being searched.
+    start : int
+        Left edge index.
+    end : int
+        Right edge index.
+
+    Returns
+    -------
+    Series or Index of object
+
+    See Also
+    --------
+    %(also)s
+    """
+
+    @Appender(
+        _shared_docs["index"]
+        % dict(
+            side="lowest",
+            similar="find",
+            method="index",
+            also="rindex : Return highest indexes in each strings.",
+        )
+    )
+    @forbid_nonstring_types(["bytes"])
+    def index(self, sub, start=0, end=None):
+        result = self._array._str.index(sub, start=start, end=end)
+        return self._wrap_result(result, returns_string=False)
+
+    @Appender(
+        _shared_docs["index"]
+        % dict(
+            side="highest",
+            similar="rfind",
+            method="rindex",
+            also="index : Return lowest indexes in each strings.",
+        )
+    )
+    @forbid_nonstring_types(["bytes"])
+    def rindex(self, sub, start=0, end=None):
+        result = self._array._str.rindex(sub, start=start, end=end)
+        return self._wrap_result(result, returns_string=False)
+
+    def len(self):
+        """
+        Compute the length of each element in the Series/Index.
+
+        The element may be a sequence (such as a string, tuple or list) or a collection
+        (such as a dictionary).
+
+        Returns
+        -------
+        Series or Index of int
+            A Series or Index of integer values indicating the length of each
+            element in the Series or Index.
+
+        See Also
+        --------
+        str.len : Python built-in function returning the length of an object.
+        Series.size : Returns the length of the Series.
+
+        Examples
+        --------
+        Returns the length (number of characters) in a string. Returns the
+        number of entries for dictionaries, lists or tuples.
+
+        >>> s = pd.Series(['dog',
+        ...                 '',
+        ...                 5,
+        ...                 {'foo' : 'bar'},
+        ...                 [2, 3, 5, 7],
+        ...                 ('one', 'two', 'three')])
+        >>> s
+        0                  dog
+        1
+        2                    5
+        3       {'foo': 'bar'}
+        4         [2, 3, 5, 7]
+        5    (one, two, three)
+        dtype: object
+        >>> s.str.len()
+        0    3.0
+        1    0.0
+        2    NaN
+        3    1.0
+        4    4.0
+        5    3.0
+        dtype: float64
+        """
+        result = self._array._str.len()
+        return self._wrap_result(result, returns_string=False)
+
+    _shared_docs[
+        "casemethods"
+    ] = """
+    Convert strings in the Series/Index to %(type)s.
+    %(version)s
+    Equivalent to :meth:`str.%(method)s`.
+
+    Returns
+    -------
+    Series or Index of object
+
+    See Also
+    --------
+    Series.str.lower : Converts all characters to lowercase.
+    Series.str.upper : Converts all characters to uppercase.
+    Series.str.title : Converts first character of each word to uppercase and
+        remaining to lowercase.
+    Series.str.capitalize : Converts first character to uppercase and
+        remaining to lowercase.
+    Series.str.swapcase : Converts uppercase to lowercase and lowercase to
+        uppercase.
+    Series.str.casefold: Removes all case distinctions in the string.
+
+    Examples
+    --------
+    >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
+    >>> s
+    0                 lower
+    1              CAPITALS
+    2    this is a sentence
+    3              SwApCaSe
+    dtype: object
+
+    >>> s.str.lower()
+    0                 lower
+    1              capitals
+    2    this is a sentence
+    3              swapcase
+    dtype: object
+
+    >>> s.str.upper()
+    0                 LOWER
+    1              CAPITALS
+    2    THIS IS A SENTENCE
+    3              SWAPCASE
+    dtype: object
+
+    >>> s.str.title()
+    0                 Lower
+    1              Capitals
+    2    This Is A Sentence
+    3              Swapcase
+    dtype: object
+
+    >>> s.str.capitalize()
+    0                 Lower
+    1              Capitals
+    2    This is a sentence
+    3              Swapcase
+    dtype: object
+
+    >>> s.str.swapcase()
+    0                 LOWER
+    1              capitals
+    2    THIS IS A SENTENCE
+    3              sWaPcAsE
+    dtype: object
+    """
+    # Types:
+    #   cases:
+    #       upper, lower, title, capitalize, swapcase, casefold
+    #   boolean:
+    #     isalpha, isnumeric isalnum isdigit isdecimal isspace islower isupper istitle
+    # _doc_args holds dict of strings to use in substituting casemethod docs
+    _doc_args: Dict[str, Dict[str, str]] = {}
+    _doc_args["lower"] = dict(type="lowercase", method="lower", version="")
+    _doc_args["upper"] = dict(type="uppercase", method="upper", version="")
+    _doc_args["title"] = dict(type="titlecase", method="title", version="")
+    _doc_args["capitalize"] = dict(
+        type="be capitalized", method="capitalize", version=""
+    )
+    _doc_args["swapcase"] = dict(type="be swapcased", method="swapcase", version="")
+    _doc_args["casefold"] = dict(
+        type="be casefolded",
+        method="casefold",
+        version="\n    .. versionadded:: 0.25.0\n",
+    )
+
+    @Appender(_shared_docs["casemethods"] % _doc_args["lower"])
+    def lower(self):
+        result = self._array._str.lower()
+        return self._wrap_result(result)
+
+    @Appender(_shared_docs["casemethods"] % _doc_args["upper"])
+    def upper(self):
+        result = self._array._str.upper()
+        return self._wrap_result(result)
+
+    @Appender(_shared_docs["casemethods"] % _doc_args["title"])
+    def title(self):
+        result = self._array._str.title()
+        return self._wrap_result(result)
+
+    @Appender(_shared_docs["casemethods"] % _doc_args["capitalize"])
+    def capitalize(self):
+        result = self._array._str.capitalize()
+        return self._wrap_result(result)
+
+    @Appender(_shared_docs["casemethods"] % _doc_args["swapcase"])
+    def swapcase(self):
+        result = self._array._str.swapcase()
+        return self._wrap_result(result)
+
+    @Appender(_shared_docs["casemethods"] % _doc_args["casefold"])
+    def casefold(self):
+        result = self._array._str.casefold()
+        return self._wrap_result(result)
+
+    _shared_docs[
+        "ismethods"
+    ] = """
+    Check whether all characters in each string are %(type)s.
+
+    This is equivalent to running the Python string method
+    :meth:`str.%(method)s` for each element of the Series/Index. If a string
+    has zero characters, ``False`` is returned for that check.
+
+    Returns
+    -------
+    Series or Index of bool
+        Series or Index of boolean values with the same length as the original
+        Series/Index.
+
+    See Also
+    --------
+    Series.str.isalpha : Check whether all characters are alphabetic.
+    Series.str.isnumeric : Check whether all characters are numeric.
+    Series.str.isalnum : Check whether all characters are alphanumeric.
+    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isdecimal : Check whether all characters are decimal.
+    Series.str.isspace : Check whether all characters are whitespace.
+    Series.str.islower : Check whether all characters are lowercase.
+    Series.str.isupper : Check whether all characters are uppercase.
+    Series.str.istitle : Check whether all characters are titlecase.
+
+    Examples
+    --------
+    **Checks for Alphabetic and Numeric Characters**
+
+    >>> s1 = pd.Series(['one', 'one1', '1', ''])
+
+    >>> s1.str.isalpha()
+    0     True
+    1    False
+    2    False
+    3    False
+    dtype: bool
+
+    >>> s1.str.isnumeric()
+    0    False
+    1    False
+    2     True
+    3    False
+    dtype: bool
+
+    >>> s1.str.isalnum()
+    0     True
+    1     True
+    2     True
+    3    False
+    dtype: bool
+
+    Note that checks against characters mixed with any additional punctuation
+    or whitespace will evaluate to false for an alphanumeric check.
+
+    >>> s2 = pd.Series(['A B', '1.5', '3,000'])
+    >>> s2.str.isalnum()
+    0    False
+    1    False
+    2    False
+    dtype: bool
+
+    **More Detailed Checks for Numeric Characters**
+
+    There are several different but overlapping sets of numeric characters that
+    can be checked for.
+
+    >>> s3 = pd.Series(['23', '³', '⅕', ''])
+
+    The ``s3.str.isdecimal`` method checks for characters used to form numbers
+    in base 10.
+
+    >>> s3.str.isdecimal()
+    0     True
+    1    False
+    2    False
+    3    False
+    dtype: bool
+
+    The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also
+    includes special digits, like superscripted and subscripted digits in
+    unicode.
+
+    >>> s3.str.isdigit()
+    0     True
+    1     True
+    2    False
+    3    False
+    dtype: bool
+
+    The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also
+    includes other characters that can represent quantities such as unicode
+    fractions.
+
+    >>> s3.str.isnumeric()
+    0     True
+    1     True
+    2     True
+    3    False
+    dtype: bool
+
+    **Checks for Whitespace**
+
+    >>> s4 = pd.Series([' ', '\\t\\r\\n ', ''])
+    >>> s4.str.isspace()
+    0     True
+    1     True
+    2    False
+    dtype: bool
+
+    **Checks for Character Case**
+
+    >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
+
+    >>> s5.str.islower()
+    0     True
+    1    False
+    2    False
+    3    False
+    dtype: bool
+
+    >>> s5.str.isupper()
+    0    False
+    1    False
+    2     True
+    3    False
+    dtype: bool
+
+    The ``s5.str.istitle`` method checks for whether all words are in title
+    case (whether only the first letter of each word is capitalized). Words are
+    assumed to be as any sequence of non-numeric characters separated by
+    whitespace characters.
+
+    >>> s5.str.istitle()
+    0    False
+    1     True
+    2    False
+    3    False
+    dtype: bool
+    """
+    _doc_args["isalnum"] = dict(type="alphanumeric", method="isalnum")
+    _doc_args["isalpha"] = dict(type="alphabetic", method="isalpha")
+    _doc_args["isdigit"] = dict(type="digits", method="isdigit")
+    _doc_args["isspace"] = dict(type="whitespace", method="isspace")
+    _doc_args["islower"] = dict(type="lowercase", method="islower")
+    _doc_args["isupper"] = dict(type="uppercase", method="isupper")
+    _doc_args["istitle"] = dict(type="titlecase", method="istitle")
+    _doc_args["isnumeric"] = dict(type="numeric", method="isnumeric")
+    _doc_args["isdecimal"] = dict(type="decimal", method="isdecimal")
+    # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624)
+
+    isalnum = _map_and_wrap(
+        "isalnum", docstring=_shared_docs["ismethods"] % _doc_args["isalnum"]
+    )
+    isalpha = _map_and_wrap(
+        "isalpha", docstring=_shared_docs["ismethods"] % _doc_args["isalpha"]
+    )
+    isdigit = _map_and_wrap(
+        "isdigit", docstring=_shared_docs["ismethods"] % _doc_args["isdigit"]
+    )
+    isspace = _map_and_wrap(
+        "isspace", docstring=_shared_docs["ismethods"] % _doc_args["isalnum"]
+    )
+    islower = _map_and_wrap(
+        "islower", docstring=_shared_docs["ismethods"] % _doc_args["islower"]
+    )
+    isupper = _map_and_wrap(
+        "isupper", docstring=_shared_docs["ismethods"] % _doc_args["isupper"]
+    )
+    istitle = _map_and_wrap(
+        "istitle", docstring=_shared_docs["ismethods"] % _doc_args["istitle"]
+    )
+    isnumeric = _map_and_wrap(
+        "isnumeric", docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"]
+    )
+    isdecimal = _map_and_wrap(
+        "isdecimal", docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"]
+    )
+
+    @classmethod
+    def _make_accessor(cls, data):
+        cls._validate(data)
+        return cls(data)
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
new file mode 100644
index 0000000000000..c32cfffa98126
--- /dev/null
+++ b/pandas/core/strings/base.py
@@ -0,0 +1,33 @@
+import unicodedata
+
+
+class BaseStringArrayMethods:
+    """Base class for it."""
+
+    def __init__(self, array):
+        self._array = array
+
+    def upper(self):
+        return self._map(lambda x: x.upper())
+
+    def isalnum(self):
+        return self._map(str.isalnum, dtype="bool")
+
+    def capitalize(self):
+        return self._map(str.capitalize)
+
+    def casefold(self):
+        return self._map(str.casefold)
+
+    def title(self):
+        return self._map(str.title)
+
+    def swapcase(self):
+        return self._map(str.swapcase)
+
+    def lower(self):
+        return self._map(str.lower)
+
+    def normalize(self, form):
+        f = lambda x: unicodedata.normalize(form, x)
+        return self._map(f)
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
new file mode 100644
index 0000000000000..acf234b711062
--- /dev/null
+++ b/pandas/core/strings/object_array.py
@@ -0,0 +1,145 @@
+import re
+
+import numpy as np
+
+import pandas._libs.lib as lib
+
+from pandas.core.dtypes.generic import ABCSeries
+from pandas.core.dtypes.missing import isna
+
+from pandas.core.accessor import CachedAccessor
+from pandas.core.arrays.numpy_ import PandasArray
+from pandas.core.strings.base import BaseStringArrayMethods
+
+
+class ObjectArrayMethods(BaseStringArrayMethods):
+    def _map(self, f, na_mask=True, na_value=np.nan, dtype=np.dtype(object)):
+        # n.b.: na_mask is the default.
+        # need to figure out when it was false, maybe split
+        arr = self._array  # object-dtype ndarray.
+
+        if not len(arr):
+            return np.ndarray(0, dtype=dtype)
+        if na_value is None:
+            na_value = np.nan
+
+        if isinstance(arr, ABCSeries):
+            arr = arr._values  # TODO: extract_array?
+        if not isinstance(arr, np.ndarray):
+            arr = np.asarray(arr, dtype=object)
+        if na_mask:
+            mask = isna(arr)
+            convert = not np.all(mask)
+            try:
+                result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert)
+            except (TypeError, AttributeError) as e:
+                # Reraise the exception if callable `f` got wrong number of args.
+                # The user may want to be warned by this, instead of getting NaN
+                p_err = (
+                    r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
+                    r"(?(3)required )positional arguments?"
+                )
+
+                if len(e.args) >= 1 and re.search(p_err, e.args[0]):
+                    # FIXME: this should be totally avoidable
+                    raise e
+
+                def g(x):
+                    try:
+                        return f(x)
+                    except (TypeError, AttributeError):
+                        return na_value
+
+                return self._map_object(g, dtype=dtype)
+            if na_value is not np.nan:
+                np.putmask(result, mask, na_value)
+                if result.dtype == object:
+                    result = lib.maybe_convert_objects(result)
+            return result
+        else:
+            return lib.map_infer(arr, f)
+
+    def cat(self, others=None, sep=None, na_rep=None, join="left"):
+        from pandas import Index, Series, concat
+
+        if isinstance(others, str):
+            raise ValueError("Did you mean to supply a `sep` keyword?")
+        if sep is None:
+            sep = ""
+
+        if isinstance(self._orig, ABCIndexClass):
+            data = Series(self._orig, index=self._orig)
+        else:  # Series
+            data = self._orig
+
+        # concatenate Series/Index with itself if no "others"
+        if others is None:
+            data = ensure_object(data)
+            na_mask = isna(data)
+            if na_rep is None and na_mask.any():
+                data = data[~na_mask]
+            elif na_rep is not None and na_mask.any():
+                data = np.where(na_mask, na_rep, data)
+            return sep.join(data)
+
+        try:
+            # turn anything in "others" into lists of Series
+            others = self._get_series_list(others)
+        except ValueError as err:  # do not catch TypeError raised by _get_series_list
+            raise ValueError(
+                "If `others` contains arrays or lists (or other "
+                "list-likes without an index), these must all be "
+                "of the same length as the calling Series/Index."
+            ) from err
+
+        # align if required
+        if any(not data.index.equals(x.index) for x in others):
+            # Need to add keys for uniqueness in case of duplicate columns
+            others = concat(
+                others,
+                axis=1,
+                join=(join if join == "inner" else "outer"),
+                keys=range(len(others)),
+                sort=False,
+                copy=False,
+            )
+            data, others = data.align(others, join=join)
+            others = [others[x] for x in others]  # again list of Series
+
+        all_cols = [ensure_object(x) for x in [data] + others]
+        na_masks = np.array([isna(x) for x in all_cols])
+        union_mask = np.logical_or.reduce(na_masks, axis=0)
+
+        if na_rep is None and union_mask.any():
+            # no na_rep means NaNs for all rows where any column has a NaN
+            # only necessary if there are actually any NaNs
+            result = np.empty(len(data), dtype=object)
+            np.putmask(result, union_mask, np.nan)
+
+            not_masked = ~union_mask
+            result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep)
+        elif na_rep is not None and union_mask.any():
+            # fill NaNs with na_rep in case there are actually any NaNs
+            all_cols = [
+                np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols)
+            ]
+            result = cat_safe(all_cols, sep)
+        else:
+            # no NaNs - can just concatenate
+            result = cat_safe(all_cols, sep)
+
+        if isinstance(self._orig, ABCIndexClass):
+            # add dtype for case that result is all-NA
+            result = Index(result, dtype=object, name=self._orig.name)
+        else:  # Series
+            if is_categorical_dtype(self._orig.dtype):
+                # We need to infer the new categories.
+                dtype = None
+            else:
+                dtype = self._orig.dtype
+            result = Series(result, dtype=dtype, index=data.index, name=self._orig.name)
+        return result
+
+
+class ObjectProxy(PandasArray):
+    _str = CachedAccessor("str", ObjectArrayMethods)
diff --git a/pandas/core/strings_.py b/pandas/core/strings_.py
new file mode 100644
index 0000000000000..09ed48e59a489
--- /dev/null
+++ b/pandas/core/strings_.py
@@ -0,0 +1,775 @@
+import codecs
+from functools import wraps
+import re
+import textwrap
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Pattern, Type, Union
+import unicodedata
+import warnings
+
+import numpy as np
+
+import pandas._libs.lib as lib
+import pandas._libs.missing as libmissing
+import pandas._libs.ops as libops
+from pandas._typing import ArrayLike, Dtype, Scalar
+from pandas.util._decorators import Appender
+
+from pandas.core.dtypes.common import (
+    ensure_object,
+    is_bool_dtype,
+    is_categorical_dtype,
+    is_extension_array_dtype,
+    is_integer,
+    is_integer_dtype,
+    is_list_like,
+    is_object_dtype,
+    is_re,
+    is_scalar,
+    is_string_dtype,
+)
+from pandas.core.dtypes.generic import (
+    ABCDataFrame,
+    ABCIndexClass,
+    ABCMultiIndex,
+    ABCSeries,
+)
+from pandas.core.dtypes.missing import isna
+
+from pandas.core.accessor import CachedAccessor
+from pandas.core.algorithms import take_1d
+from pandas.core.arrays.numpy_ import PandasArray
+
+if TYPE_CHECKING:
+    from pandas.arrays import StringArray
+
+_cpython_optimized_encoders = (
+    "utf-8",
+    "utf8",
+    "latin-1",
+    "latin1",
+    "iso-8859-1",
+    "mbcs",
+    "ascii",
+)
+_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32")
+
+_shared_docs: Dict[str, str] = dict()
+
+
+def cat_core(list_of_columns: List, sep: str):
+    """
+    Auxiliary function for :meth:`str.cat`
+
+    Parameters
+    ----------
+    list_of_columns : list of numpy arrays
+        List of arrays to be concatenated with sep;
+        these arrays may not contain NaNs!
+    sep : string
+        The separator string for concatenating the columns.
+
+    Returns
+    -------
+    nd.array
+        The concatenation of list_of_columns with sep.
+    """
+    if sep == "":
+        # no need to interleave sep if it is empty
+        arr_of_cols = np.asarray(list_of_columns, dtype=object)
+        return np.sum(arr_of_cols, axis=0)
+    list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
+    list_with_sep[::2] = list_of_columns
+    arr_with_sep = np.asarray(list_with_sep, dtype=object)
+    return np.sum(arr_with_sep, axis=0)
+
+
+def cat_safe(list_of_columns: List, sep: str):
+    """
+    Auxiliary function for :meth:`str.cat`.
+
+    Same signature as cat_core, but handles TypeErrors in concatenation, which
+    happen if the arrays in list_of columns have the wrong dtypes or content.
+
+    Parameters
+    ----------
+    list_of_columns : list of numpy arrays
+        List of arrays to be concatenated with sep;
+        these arrays may not contain NaNs!
+    sep : string
+        The separator string for concatenating the columns.
+
+    Returns
+    -------
+    nd.array
+        The concatenation of list_of_columns with sep.
+    """
+    try:
+        result = cat_core(list_of_columns, sep)
+    except TypeError:
+        # if there are any non-string values (wrong dtype or hidden behind
+        # object dtype), np.sum will fail; catch and return with better message
+        for column in list_of_columns:
+            dtype = lib.infer_dtype(column, skipna=True)
+            if dtype not in ["string", "empty"]:
+                raise TypeError(
+                    "Concatenation requires list-likes containing only "
+                    "strings (or missing values). Offending values found in "
+                    f"column {dtype}"
+                ) from None
+    return result
+
+
+def str_count(arr, pat, flags=0):
+    regex = re.compile(pat, flags=flags)
+    f = lambda x: len(regex.findall(x))
+    return _na_map(f, arr, dtype="int64")
+
+
+def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
+    if regex:
+        if not case:
+            flags |= re.IGNORECASE
+
+        regex = re.compile(pat, flags=flags)
+
+        if regex.groups > 0:
+            warnings.warn(
+                "This pattern has match groups. To actually get the "
+                "groups, use str.extract.",
+                UserWarning,
+                stacklevel=3,
+            )
+
+        f = lambda x: regex.search(x) is not None
+    else:
+        if case:
+            f = lambda x: pat in x
+        else:
+            upper_pat = pat.upper()
+            f = lambda x: upper_pat in x
+            uppered = _na_map(lambda x: x.upper(), arr)
+            return _na_map(f, uppered, na, dtype=np.dtype(bool))
+    return _na_map(f, arr, na, dtype=np.dtype(bool))
+
+
+def str_startswith(arr, pat, na=np.nan):
+    f = lambda x: x.startswith(pat)
+    return _na_map(f, arr, na, dtype=np.dtype(bool))
+
+
+def str_endswith(arr, pat, na=np.nan):
+   f = lambda x: x.endswith(pat)
+    return _na_map(f, arr, na, dtype=np.dtype(bool))
+
+
+def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
+   # Check whether repl is valid (GH 13438, GH 15055)
+    if not (isinstance(repl, str) or callable(repl)):
+        raise TypeError("repl must be a string or callable")
+
+    is_compiled_re = is_re(pat)
+    if regex:
+        if is_compiled_re:
+            if (case is not None) or (flags != 0):
+                raise ValueError(
+                    "case and flags cannot be set when pat is a compiled regex"
+                )
+        else:
+            # not a compiled regex
+            # set default case
+            if case is None:
+                case = True
+
+            # add case flag, if provided
+            if case is False:
+                flags |= re.IGNORECASE
+        if is_compiled_re or len(pat) > 1 or flags or callable(repl):
+            n = n if n >= 0 else 0
+            compiled = re.compile(pat, flags=flags)
+            f = lambda x: compiled.sub(repl=repl, string=x, count=n)
+        else:
+            f = lambda x: x.replace(pat, repl, n)
+    else:
+        if is_compiled_re:
+            raise ValueError(
+                "Cannot use a compiled regex as replacement pattern with regex=False"
+            )
+        if callable(repl):
+            raise ValueError("Cannot use a callable replacement when regex=False")
+        f = lambda x: x.replace(pat, repl, n)
+
+    return _na_map(f, arr, dtype=str)
+
+
+def str_repeat(arr, repeats):
+    if is_scalar(repeats):
+
+        def scalar_rep(x):
+            try:
+                return bytes.__mul__(x, repeats)
+            except TypeError:
+                return str.__mul__(x, repeats)
+
+        return _na_map(scalar_rep, arr, dtype=str)
+    else:
+
+        def rep(x, r):
+            if x is libmissing.NA:
+                return x
+            try:
+                return bytes.__mul__(x, r)
+            except TypeError:
+                return str.__mul__(x, r)
+
+        repeats = np.asarray(repeats, dtype=object)
+        result = libops.vec_binop(np.asarray(arr), repeats, rep)
+        return result
+
+
+def str_match(
+    arr: ArrayLike,
+    pat: Union[str, Pattern],
+    case: bool = True,
+    flags: int = 0,
+    na: Scalar = np.nan,
+):
+    if not case:
+        flags |= re.IGNORECASE
+
+    regex = re.compile(pat, flags=flags)
+
+    f = lambda x: regex.match(x) is not None
+
+    return _na_map(f, arr, na, dtype=np.dtype(bool))
+
+
+def str_fullmatch(
+    arr: ArrayLike,
+    pat: Union[str, Pattern],
+    case: bool = True,
+    flags: int = 0,
+    na: Scalar = np.nan,
+):
+   if not case:
+        flags |= re.IGNORECASE
+
+    regex = re.compile(pat, flags=flags)
+
+    f = lambda x: regex.fullmatch(x) is not None
+
+    return _na_map(f, arr, na, dtype=np.dtype(bool))
+
+
+def _get_single_group_name(rx):
+    try:
+        return list(rx.groupindex.keys()).pop()
+    except IndexError:
+        return None
+
+
+def _groups_or_na_fun(regex):
+    """Used in both extract_noexpand and extract_frame"""
+    if regex.groups == 0:
+        raise ValueError("pattern contains no capture groups")
+    empty_row = [np.nan] * regex.groups
+
+    def f(x):
+        if not isinstance(x, str):
+            return empty_row
+        m = regex.search(x)
+        if m:
+            return [np.nan if item is None else item for item in m.groups()]
+        else:
+            return empty_row
+
+    return f
+
+
+def _result_dtype(arr):
+    # workaround #27953
+    # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
+    # when the list of values is empty.
+    from pandas.core.arrays.string_ import StringDtype
+
+    if isinstance(arr.dtype, StringDtype):
+        return arr.dtype.name
+    else:
+        return object
+
+
+def _str_extract_noexpand(arr, pat, flags=0):
+    """
+    Find groups in each string in the Series using passed regular
+    expression. This function is called from
+    str_extract(expand=False), and can return Series, DataFrame, or
+    Index.
+
+    """
+    from pandas import DataFrame
+
+    regex = re.compile(pat, flags=flags)
+    groups_or_na = _groups_or_na_fun(regex)
+
+    if regex.groups == 1:
+        result = np.array([groups_or_na(val)[0] for val in arr], dtype=object)
+        name = _get_single_group_name(regex)
+    else:
+        if isinstance(arr, ABCIndexClass):
+            raise ValueError("only one regex group is supported with Index")
+        name = None
+        names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
+        columns = [names.get(1 + i, i) for i in range(regex.groups)]
+        if arr.empty:
+            result = DataFrame(columns=columns, dtype=object)
+        else:
+            dtype = _result_dtype(arr)
+            result = DataFrame(
+                [groups_or_na(val) for val in arr],
+                columns=columns,
+                index=arr.index,
+                dtype=dtype,
+            )
+    return result, name
+
+
+def _str_extract_frame(arr, pat, flags=0):
+    """
+    For each subject string in the Series, extract groups from the
+    first match of regular expression pat. This function is called from
+    str_extract(expand=True), and always returns a DataFrame.
+
+    """
+    from pandas import DataFrame
+
+    regex = re.compile(pat, flags=flags)
+    groups_or_na = _groups_or_na_fun(regex)
+    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
+    columns = [names.get(1 + i, i) for i in range(regex.groups)]
+
+    if len(arr) == 0:
+        return DataFrame(columns=columns, dtype=object)
+    try:
+        result_index = arr.index
+    except AttributeError:
+        result_index = None
+    dtype = _result_dtype(arr)
+    return DataFrame(
+        [groups_or_na(val) for val in arr],
+        columns=columns,
+        index=result_index,
+        dtype=dtype,
+    )
+
+
+def str_extract(arr, pat, flags=0, expand=True):
+   if not isinstance(expand, bool):
+        raise ValueError("expand must be True or False")
+    if expand:
+        return _str_extract_frame(arr._orig, pat, flags=flags)
+    else:
+        result, name = _str_extract_noexpand(arr._parent, pat, flags=flags)
+        return arr._wrap_result(result, name=name, expand=expand)
+
+
+def str_extractall(arr, pat, flags=0):
+   regex = re.compile(pat, flags=flags)
+    # the regex must contain capture groups.
+    if regex.groups == 0:
+        raise ValueError("pattern contains no capture groups")
+
+    if isinstance(arr, ABCIndexClass):
+        arr = arr.to_series().reset_index(drop=True)
+
+    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
+    columns = [names.get(1 + i, i) for i in range(regex.groups)]
+    match_list = []
+    index_list = []
+    is_mi = arr.index.nlevels > 1
+
+    for subject_key, subject in arr.items():
+        if isinstance(subject, str):
+
+            if not is_mi:
+                subject_key = (subject_key,)
+
+            for match_i, match_tuple in enumerate(regex.findall(subject)):
+                if isinstance(match_tuple, str):
+                    match_tuple = (match_tuple,)
+                na_tuple = [np.NaN if group == "" else group for group in match_tuple]
+                match_list.append(na_tuple)
+                result_key = tuple(subject_key + (match_i,))
+                index_list.append(result_key)
+
+    from pandas import MultiIndex
+
+    index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"])
+    dtype = _result_dtype(arr)
+
+    result = arr._constructor_expanddim(
+        match_list, index=index, columns=columns, dtype=dtype
+    )
+    return result
+
+
+def str_get_dummies(arr, sep="|"):
+    arr = arr.fillna("")
+    try:
+        arr = sep + arr + sep
+    except TypeError:
+        arr = sep + arr.astype(str) + sep
+
+    tags = set()
+    for ts in arr.str.split(sep):
+        tags.update(ts)
+    tags = sorted(tags - {""})
+
+    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
+
+    for i, t in enumerate(tags):
+        pat = sep + t + sep
+        dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
+    return dummies, tags
+
+
+def str_join(arr, sep):
+    return _na_map(sep.join, arr, dtype=str)
+
+
+def str_findall(arr, pat, flags=0):
+    regex = re.compile(pat, flags=flags)
+    return _na_map(regex.findall, arr)
+
+
+def str_find(arr, sub, start=0, end=None, side="left"):
+   if not isinstance(sub, str):
+        msg = f"expected a string object, not {type(sub).__name__}"
+        raise TypeError(msg)
+
+    if side == "left":
+        method = "find"
+    elif side == "right":
+        method = "rfind"
+    else:  # pragma: no cover
+        raise ValueError("Invalid side")
+
+    if end is None:
+        f = lambda x: getattr(x, method)(sub, start)
+    else:
+        f = lambda x: getattr(x, method)(sub, start, end)
+
+    return _na_map(f, arr, dtype=np.dtype("int64"))
+
+
+def str_index(arr, sub, start=0, end=None, side="left"):
+    if not isinstance(sub, str):
+        msg = f"expected a string object, not {type(sub).__name__}"
+        raise TypeError(msg)
+
+    if side == "left":
+        method = "index"
+    elif side == "right":
+        method = "rindex"
+    else:  # pragma: no cover
+        raise ValueError("Invalid side")
+
+    if end is None:
+        f = lambda x: getattr(x, method)(sub, start)
+    else:
+        f = lambda x: getattr(x, method)(sub, start, end)
+
+    return _na_map(f, arr, dtype=np.dtype("int64"))
+
+
+def str_pad(arr, width, side="left", fillchar=" "):
+    if not isinstance(fillchar, str):
+        msg = f"fillchar must be a character, not {type(fillchar).__name__}"
+        raise TypeError(msg)
+
+    if len(fillchar) != 1:
+        raise TypeError("fillchar must be a character, not str")
+
+    if not is_integer(width):
+        msg = f"width must be of integer type, not {type(width).__name__}"
+        raise TypeError(msg)
+
+    if side == "left":
+        f = lambda x: x.rjust(width, fillchar)
+    elif side == "right":
+        f = lambda x: x.ljust(width, fillchar)
+    elif side == "both":
+        f = lambda x: x.center(width, fillchar)
+    else:  # pragma: no cover
+        raise ValueError("Invalid side")
+
+    return _na_map(f, arr, dtype=str)
+
+
+def str_split(arr, pat=None, n=None):
+
+    if pat is None:
+        if n is None or n == 0:
+            n = -1
+        f = lambda x: x.split(pat, n)
+    else:
+        if len(pat) == 1:
+            if n is None or n == 0:
+                n = -1
+            f = lambda x: x.split(pat, n)
+        else:
+            if n is None or n == -1:
+                n = 0
+            regex = re.compile(pat)
+            f = lambda x: regex.split(x, maxsplit=n)
+    res = _na_map(f, arr)
+    return res
+
+
+def str_rsplit(arr, pat=None, n=None):
+
+    if n is None or n == 0:
+        n = -1
+    f = lambda x: x.rsplit(pat, n)
+    res = _na_map(f, arr)
+    return res
+
+
+def str_slice(arr, start=None, stop=None, step=None):
+    obj = slice(start, stop, step)
+    f = lambda x: x[obj]
+    return _na_map(f, arr, dtype=str)
+
+
+def str_slice_replace(arr, start=None, stop=None, repl=None):
+   if repl is None:
+        repl = ""
+
+    def f(x):
+        if x[start:stop] == "":
+            local_stop = start
+        else:
+            local_stop = stop
+        y = ""
+        if start is not None:
+            y += x[:start]
+        y += repl
+        if stop is not None:
+            y += x[local_stop:]
+        return y
+
+    return _na_map(f, arr, dtype=str)
+
+
+def str_strip(arr, to_strip=None, side="both"):
+    """
+    Strip whitespace (including newlines) from each string in the
+    Series/Index.
+
+    Parameters
+    ----------
+    to_strip : str or unicode
+    side : {'left', 'right', 'both'}, default 'both'
+
+    Returns
+    -------
+    Series or Index
+    """
+    if side == "both":
+        f = lambda x: x.strip(to_strip)
+    elif side == "left":
+        f = lambda x: x.lstrip(to_strip)
+    elif side == "right":
+        f = lambda x: x.rstrip(to_strip)
+    else:  # pragma: no cover
+        raise ValueError("Invalid side")
+    return _na_map(f, arr, dtype=str)
+
+
+def str_wrap(arr, width, **kwargs):
+    kwargs["width"] = width
+
+    tw = textwrap.TextWrapper(**kwargs)
+
+    return _na_map(lambda s: "\n".join(tw.wrap(s)), arr, dtype=str)
+
+
+def str_translate(arr, table):
+     return _na_map(lambda x: x.translate(table), arr, dtype=str)
+
+
+def str_get(arr, i):
+    def f(x):
+        if isinstance(x, dict):
+            return x.get(i)
+        elif len(x) > i >= -len(x):
+            return x[i]
+        return np.nan
+
+    return _na_map(f, arr)
+
+
+def str_decode(arr, encoding, errors="strict"):
+    """
+    Decode character string in the Series/Index using indicated encoding.
+
+    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
+    python3.
+
+    Parameters
+    ----------
+    encoding : str
+    errors : str, optional
+
+    Returns
+    -------
+    Series or Index
+    """
+    if encoding in _cpython_optimized_decoders:
+        # CPython optimized implementation
+        f = lambda x: x.decode(encoding, errors)
+    else:
+        decoder = codecs.getdecoder(encoding)
+        f = lambda x: decoder(x, errors)[0]
+    return _na_map(f, arr)
+
+
+def str_encode(arr, encoding, errors="strict"):
+    if encoding in _cpython_optimized_encoders:
+        # CPython optimized implementation
+        f = lambda x: x.encode(encoding, errors)
+    else:
+        encoder = codecs.getencoder(encoding)
+        f = lambda x: encoder(x, errors)[0]
+    return _na_map(f, arr)
+
+
+def forbid_nonstring_types(forbidden, name=None):
+    """
+    Decorator to forbid specific types for a method of StringMethods.
+
+    For calling `.str.{method}` on a Series or Index, it is necessary to first
+    initialize the :class:`StringMethods` object, and then call the method.
+    However, different methods allow different input types, and so this can not
+    be checked during :meth:`StringMethods.__init__`, but must be done on a
+    per-method basis. This decorator exists to facilitate this process, and
+    make it explicit which (inferred) types are disallowed by the method.
+
+    :meth:`StringMethods.__init__` allows the *union* of types its different
+    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
+    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
+
+    The default string types ['string', 'empty'] are allowed for all methods.
+    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
+    then needs to forbid the types it is not intended for.
+
+    Parameters
+    ----------
+    forbidden : list-of-str or None
+        List of forbidden non-string types, may be one or more of
+        `['bytes', 'mixed', 'mixed-integer']`.
+    name : str, default None
+        Name of the method to use in the error message. By default, this is
+        None, in which case the name from the method being wrapped will be
+        copied. However, for working with further wrappers (like _pat_wrapper
+        and _noarg_wrapper), it is necessary to specify the name.
+
+    Returns
+    -------
+    func : wrapper
+        The method to which the decorator is applied, with an added check that
+        enforces the inferred type to not be in the list of forbidden types.
+
+    Raises
+    ------
+    TypeError
+        If the inferred type of the underlying data is in `forbidden`.
+    """
+    # deal with None
+    forbidden = [] if forbidden is None else forbidden
+
+    allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set(
+        forbidden
+    )
+
+    def _forbid_nonstring_types(func):
+        func_name = func.__name__ if name is None else name
+
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self._inferred_dtype not in allowed_types:
+                msg = (
+                    f"Cannot use .str.{func_name} with values of "
+                    f"inferred dtype '{self._inferred_dtype}'."
+                )
+                raise TypeError(msg)
+            return func(self, *args, **kwargs)
+
+        wrapper.__name__ = func_name
+        return wrapper
+
+    return _forbid_nonstring_types
+
+
+def _noarg_wrapper(
+    f,
+    name=None,
+    docstring=None,
+    forbidden_types=["bytes"],
+    returns_string=True,
+    **kwargs,
+):
+    @forbid_nonstring_types(forbidden_types, name=name)
+    def wrapper(self):
+        result = _na_map(f, self._parent, **kwargs)
+        return self._wrap_result(result, returns_string=returns_string)
+
+    wrapper.__name__ = f.__name__ if name is None else name
+    if docstring is not None:
+        wrapper.__doc__ = docstring
+    else:
+        raise ValueError("Provide docstring")
+
+    return wrapper
+
+
+def _pat_wrapper(
+    f,
+    flags=False,
+    na=False,
+    name=None,
+    forbidden_types=["bytes"],
+    returns_string=True,
+    **kwargs,
+):
+    @forbid_nonstring_types(forbidden_types, name=name)
+    def wrapper1(self, pat):
+        result = f(self._parent, pat)
+        return self._wrap_result(result, returns_string=returns_string)
+
+    @forbid_nonstring_types(forbidden_types, name=name)
+    def wrapper2(self, pat, flags=0, **kwargs):
+        result = f(self._parent, pat, flags=flags, **kwargs)
+        return self._wrap_result(result, returns_string=returns_string)
+
+    @forbid_nonstring_types(forbidden_types, name=name)
+    def wrapper3(self, pat, na=np.nan):
+        result = f(self._parent, pat, na=na)
+        return self._wrap_result(result, returns_string=returns_string)
+
+    wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
+
+    wrapper.__name__ = f.__name__ if name is None else name
+    if f.__doc__:
+        wrapper.__doc__ = f.__doc__
+
+    return wrapper
+
+
+def copy(source):
+    """Copy a docstring from another source function (if present)"""
+
+    def do_copy(target):
+        if source.__doc__:
+            target.__doc__ = source.__doc__
+        return target
+
+    return do_copy

From 89f8e6a38066a1cbcdc83d2592faef894100d0fe Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 11 Sep 2020 13:25:51 -0500
Subject: [PATCH 04/24] annoyed

---
 pandas/core/arrays/categorical.py             |   5 +-
 pandas/core/arrays/string_.py                 | 206 ++---
 pandas/core/strings/accessor.py               | 446 ++++++++--
 pandas/core/strings/base.py                   |  30 +-
 pandas/core/strings/categorical_strings.py    |  17 +
 pandas/core/strings/object_array.py           | 492 ++++++++---
 pandas/core/strings_.py                       | 775 ------------------
 .../tests/arrays/string_/test_string_arrow.py |  26 -
 pandas/tests/extension/arrow/test_string.py   |   7 +-
 pandas/tests/extension/test_string_arrow.py   | 150 ----
 pandas/tests/test_strings.py                  | 124 +--
 11 files changed, 902 insertions(+), 1376 deletions(-)
 create mode 100644 pandas/core/strings/categorical_strings.py
 delete mode 100644 pandas/core/strings_.py
 delete mode 100644 pandas/tests/arrays/string_/test_string_arrow.py
 delete mode 100644 pandas/tests/extension/test_string_arrow.py

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 81f9456502bf0..33ba2c9cff985 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -41,7 +41,7 @@
 from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
 
 from pandas.core import ops
-from pandas.core.accessor import PandasDelegate, delegate_names
+from pandas.core.accessor import CachedAccessor, PandasDelegate, delegate_names
 import pandas.core.algorithms as algorithms
 from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d
 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
@@ -52,6 +52,7 @@
 from pandas.core.missing import interpolate_2d
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.sorting import nargsort
+from pandas.core.strings.categorical_strings import CategoricalStringMethods
 
 from pandas.io.formats import console
 
@@ -2336,6 +2337,8 @@ def replace(self, to_replace, value, inplace: bool = False):
         if not inplace:
             return cat
 
+    _str = CachedAccessor("_str", CategoricalStringMethods)
+
 
 # The Series.cat accessor
 
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index d8c76d0615d45..7cc70e3660f81 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -1,12 +1,9 @@
 import operator
-from typing import TYPE_CHECKING, Any, Callable, Dict, Type, Union
+from typing import TYPE_CHECKING, Type, Union
 
 import numpy as np
 
-from pandas._config import get_option
-
 from pandas._libs import lib, missing as libmissing
-from pandas._typing import ArrayLike, Dtype
 
 from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype
 from pandas.core.dtypes.common import (
@@ -22,12 +19,11 @@
 from pandas.core import ops
 from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import IntegerArray, PandasArray
-from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.arrays.integer import _IntegerDtype
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
-from pandas.core.strings.base import BaseStringArrayMethods
+from pandas.core.strings.object_array import ObjectArrayMethods
 
 if TYPE_CHECKING:
     import pyarrow  # noqa: F401
@@ -62,82 +58,16 @@ class StringDtype(ExtensionDtype):
     StringDtype
     """
 
+    name = "string"
+
     #: StringDtype.na_value uses pandas.NA
     na_value = libmissing.NA
-    _metadata = ("storage",)
-
-    def __init__(self, storage=None):
-        if storage is None:
-            storage = get_option("mode.string_storage")
-        if storage not in {"python", "pyarrow"}:
-            raise ValueError(
-                f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
-            )
-        self.storage = storage
-
-    @property
-    def name(self):
-        return f"StringDtype[{self.storage}]"
 
     @property
     def type(self) -> Type[str]:
         return str
 
     @classmethod
-    def construct_from_string(cls, string):
-        """
-        Construct a StringDtype from a string.
-
-        Parameters
-        ----------
-        string : str
-            The type of the name. The storage type will be taking from `string`.
-            Valid options and their storage types are
-
-            ========================== ==============
-            string                     result storage
-            ========================== ==============
-            ``'string'``               global default
-            ``'string[python]'``       python
-            ``'StringDtype[python]'``  python
-            ``'string[pyarrow]'``      pyarrow
-            ``'StringDtype[pyarrow]'`` pyarrow
-            ========================== =============
-
-        Returns
-        -------
-        StringDtype
-
-        Raise
-        -----
-        TypeError
-            If the string is not a valid option.
-
-        """
-        if not isinstance(string, str):
-            raise TypeError(
-                f"'construct_from_string' expects a string, got {type(string)}"
-            )
-        if string == "string":
-            # TODO: use global default
-            return cls()
-        elif string in {"string[python]", "StringDtype[python]"}:
-            return cls(storage="python")
-        elif string in {"string[pyarrow]", "StringDtype[pyarrow]"}:
-            return cls(storage="pyarrow")
-        else:
-            raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
-
-    def __eq__(self, other: Any) -> bool:
-        if isinstance(other, str) and other == "string":
-            return True
-        return super().__eq__(other)
-
-    def __hash__(self) -> int:
-        # custom __eq__ so have to override __hash__
-        return super().__hash__()
-
-    # XXX: this is a classmethod, but we need to know the storage type.
     def construct_array_type(self) -> Type["StringArray"]:
         """
         Return the array type associated with this dtype.
@@ -146,14 +76,9 @@ def construct_array_type(self) -> Type["StringArray"]:
         -------
         type
         """
-        from .string_arrow import ArrowStringArray
+        return StringArray
 
-        if self.storage == "python":
-            return StringArray
-        else:
-            return ArrowStringArray
-
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.name
 
     def __from_arrow__(
@@ -163,7 +88,6 @@ def __from_arrow__(
         Construct StringArray from pyarrow Array/ChunkedArray.
         """
         import pyarrow  # noqa: F811
-        from .string_arrow import ArrowStringArray
 
         if isinstance(array, pyarrow.Array):
             chunks = [array]
@@ -177,87 +101,57 @@ def __from_arrow__(
             str_arr = StringArray._from_sequence(np.array(arr))
             results.append(str_arr)
 
-        return ArrowStringArray._concat_same_type(results)
-
-
-def _map_stringarray(
-    func: Callable[[str], Any],
-    arr: "StringArray",
-    na_value: Any = libmissing.NA,
-    dtype: Dtype = StringDtype(),
-) -> ArrayLike:
-    """
-    Map a callable over valid elements of a StringArray.
-
-    Parameters
-    ----------
-    func : Callable[[str], Any]
-        Apply to each valid element.
-    arr : StringArray
-    na_value : Any
-        The value to use for missing values. By default, this is
-        the original value (NA).
-    dtype : Dtype
-        The result dtype to use. Specifying this avoids an intermediate
-        object-dtype allocation.
-
-    Returns
-    -------
-    ArrayLike
-        An ExtensionArray for integer or string dtypes, otherwise
-        an ndarray.
-
-    """
-    from pandas.arrays import BooleanArray, IntegerArray, StringArray
+        return StringArray._concat_same_type(results)
 
-    mask = isna(arr)
 
-    assert isinstance(arr, StringArray)
-    arr = np.asarray(arr)
-    if na_value is None:
-        na_value = libmissing.NA
+class StringArrayMethods(ObjectArrayMethods):
+    def _map(self, f, na_value=libmissing.NA, dtype=StringDtype()):
+        from pandas.arrays import BooleanArray, IntegerArray, StringArray
 
-    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-        constructor: Union[Type[IntegerArray], Type[BooleanArray]]
-        if is_integer_dtype(dtype):
-            constructor = IntegerArray
-        else:
-            constructor = BooleanArray
-
-        na_value_is_na = isna(na_value)
-        if na_value_is_na:
-            na_value = 1
-        result = lib.map_infer_mask(
-            arr,
-            func,
-            mask.view("uint8"),
-            convert=False,
-            na_value=na_value,
-            dtype=np.dtype(dtype),
-        )
+        arr = self._array
+        mask = isna(arr)
 
-        if not na_value_is_na:
-            mask[:] = False
+        assert isinstance(arr, StringArray)
+        arr = np.asarray(arr)
+        if na_value is None:
+            na_value = libmissing.NA
 
-        return constructor(result, mask)
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            constructor: Union[Type[IntegerArray], Type[BooleanArray]]
+            if is_integer_dtype(dtype):
+                constructor = IntegerArray
+            else:
+                constructor = BooleanArray
+
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                na_value = 1
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                dtype=np.dtype(dtype),
+            )
 
-    elif is_string_dtype(dtype) and not is_object_dtype(dtype):
-        # i.e. StringDtype
-        result = lib.map_infer_mask(
-            arr, func, mask.view("uint8"), convert=False, na_value=na_value
-        )
-        return StringArray(result)
-    else:
-        # This is when the result type is object. We reach this when
-        # -> We know the result type is truly object (e.g. .encode returns bytes
-        #    or .findall returns a list).
-        # -> We don't know the result type. E.g. `.get` can return anything.
-        return lib.map_infer_mask(arr, func, mask.view("uint8"))
+            if not na_value_is_na:
+                mask[:] = False
 
+            return constructor(result, mask)
 
-class StringArrayMethods(BaseStringArrayMethods):
-    def _map(self, f, na_result=libmissing.NA, dtype=StringDtype()):
-        return _map_stringarray(f, self._array, na_result, dtype)
+        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
+            # i.e. StringDtype
+            result = lib.map_infer_mask(
+                arr, f, mask.view("uint8"), convert=False, na_value=na_value
+            )
+            return StringArray(result)
+        else:
+            # This is when the result type is object. We reach this when
+            # -> We know the result type is truly object (e.g. .encode returns bytes
+            #    or .findall returns a list).
+            # -> We don't know the result type. E.g. `.get` can return anything.
+            return lib.map_infer_mask(arr, f, mask.view("uint8"))
 
 
 class StringArray(PandasArray):
@@ -507,7 +401,7 @@ def _add_arithmetic_ops(cls):
         cls.__rmul__ = cls._create_arithmetic_method(ops.rmul)
 
     _create_comparison_method = _create_arithmetic_method
-    _str = CachedAccessor("str", StringArrayMethods)
+    _str = CachedAccessor("_str", StringArrayMethods)
 
 
 StringArray._add_arithmetic_ops()
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 84630066ef871..334769468e671 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -1,7 +1,8 @@
 import codecs
 from functools import wraps
 import operator
-from typing import Dict
+import re
+from typing import Dict, List
 import warnings
 
 import numpy as np
@@ -9,15 +10,21 @@
 import pandas._libs.lib as lib
 from pandas.util._decorators import Appender
 
-from pandas.core.dtypes.common import is_bool_dtype, is_categorical_dtype, is_list_like
+from pandas.core.dtypes.common import (
+    ensure_object,
+    is_bool_dtype,
+    is_categorical_dtype,
+    is_integer,
+    is_list_like,
+)
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCIndexClass,
     ABCMultiIndex,
     ABCSeries,
 )
+from pandas.core.dtypes.missing import isna
 
-from pandas.core.algorithms import take_1d
 from pandas.core.arrays.numpy_ import PandasArray
 from pandas.core.base import NoNewAttributesMixin
 from pandas.core.strings.object_array import ObjectProxy
@@ -103,6 +110,7 @@ def wrapper(self, *args, **kwargs):
 
 
 def _map_and_wrap(name, docstring):
+    @forbid_nonstring_types(["bytes"], name=name)
     def wrapper(self):
         result = operator.methodcaller(name)(self._array._str)
         return self._wrap_result(result)
@@ -143,7 +151,7 @@ def __init__(self, data):
         self._is_string = isinstance(data.dtype, StringDtype)
         array = data.array
 
-        if isinstance(array, PandasArray):
+        if type(array) is PandasArray:
             # wrap in an object proxy to get the str methods.
             # Alternatively, just add _str to PandasArray.
             array = ObjectProxy(array._ndarray)
@@ -167,7 +175,7 @@ def _validate(data):
         Auxiliary function for StringMethods, infers and checks dtype of data.
 
         This is a "first line of defence" at the creation of the StringMethods-
-        object (see _make_accessor), and just checks that the dtype is in the
+        object, and just checks that the dtype is in the
         *union* of the allowed types over all string methods below; this
         restriction is then refined on a per-method basis using the decorator
         @forbid_nonstring_types (more info in the corresponding docstring).
@@ -212,11 +220,8 @@ def _validate(data):
         return inferred_dtype
 
     def __getitem__(self, key):
-        return self._array._str[key]
-        # if isinstance(key, slice):
-        #     return self.slice(start=key.start, stop=key.stop, step=key.step)
-        # else:
-        #     return self.get(key)
+        result = self._array._str[key]
+        return self._wrap_result(result)
 
     def __iter__(self):
         warnings.warn(
@@ -232,26 +237,9 @@ def __iter__(self):
             g = self.get(i)
 
     def _wrap_result(
-        self,
-        result,
-        use_codes=True,
-        name=None,
-        expand=None,
-        fill_value=np.nan,
-        returns_string=True,
+        self, result, name=None, expand=None, fill_value=np.nan, returns_string=True,
     ):
-        from pandas import Index, MultiIndex, Series
-
-        # for category, we do the stuff on the categories, so blow it up
-        # to the full series again
-        # But for some operations, we have to do the stuff on the full values,
-        # so make it possible to skip this step as the method already did this
-        # before the transformation...
-        if use_codes and self._is_categorical:
-            # if self._orig is a CategoricalIndex, there is no .cat-accessor
-            result = take_1d(
-                result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value
-            )
+        from pandas import Index, MultiIndex
 
         if not hasattr(result, "ndim") or not hasattr(result, "dtype"):
             return result
@@ -261,11 +249,8 @@ def _wrap_result(
         # case we'll want to return the same dtype as the input.
         # Or we can be wrapping a numeric output, in which case we don't want
         # to return a StringArray.
-        if self._is_string and returns_string:
-            dtype = "string"
-        else:
-            dtype = None
-
+        # XXX: see if this can be removed.
+        # Ideally the array method returns the right array type.
         if expand is None:
             # infer from ndim if expand is not specified
             expand = result.ndim != 1
@@ -321,13 +306,19 @@ def cons_row(x):
                 return Index(result, name=name)
         else:
             index = self._orig.index
+            # This is a mess.
+            if self._is_string and returns_string:
+                dtype = "string"
+            else:
+                dtype = None
+
             if expand:
                 cons = self._orig._constructor_expanddim
                 result = cons(result, columns=name, index=index, dtype=dtype)
             else:
                 # Must be a Series
                 cons = self._orig._constructor
-                result = cons(result, name=name, index=index, dtype=dtype)
+                result = cons(result, name=name, index=index)
             return result
 
     def _get_series_list(self, others):
@@ -522,7 +513,86 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
         For more examples, see :ref:`here <text.concatenate>`.
         """
-        return self._array._str.cat(others, sep, na_rep, join)
+        # XXX: not dispatched yet.
+        from pandas import Index, Series, concat
+
+        if isinstance(others, str):
+            raise ValueError("Did you mean to supply a `sep` keyword?")
+        if sep is None:
+            sep = ""
+
+        if isinstance(self._orig, ABCIndexClass):
+            data = Series(self._orig, index=self._orig)
+        else:  # Series
+            data = self._orig
+
+        # concatenate Series/Index with itself if no "others"
+        if others is None:
+            data = ensure_object(data)
+            na_mask = isna(data)
+            if na_rep is None and na_mask.any():
+                data = data[~na_mask]
+            elif na_rep is not None and na_mask.any():
+                data = np.where(na_mask, na_rep, data)
+            return sep.join(data)
+
+        try:
+            # turn anything in "others" into lists of Series
+            others = self._get_series_list(others)
+        except ValueError as err:  # do not catch TypeError raised by _get_series_list
+            raise ValueError(
+                "If `others` contains arrays or lists (or other "
+                "list-likes without an index), these must all be "
+                "of the same length as the calling Series/Index."
+            ) from err
+
+        # align if required
+        if any(not data.index.equals(x.index) for x in others):
+            # Need to add keys for uniqueness in case of duplicate columns
+            others = concat(
+                others,
+                axis=1,
+                join=(join if join == "inner" else "outer"),
+                keys=range(len(others)),
+                sort=False,
+                copy=False,
+            )
+            data, others = data.align(others, join=join)
+            others = [others[x] for x in others]  # again list of Series
+
+        all_cols = [ensure_object(x) for x in [data] + others]
+        na_masks = np.array([isna(x) for x in all_cols])
+        union_mask = np.logical_or.reduce(na_masks, axis=0)
+
+        if na_rep is None and union_mask.any():
+            # no na_rep means NaNs for all rows where any column has a NaN
+            # only necessary if there are actually any NaNs
+            result = np.empty(len(data), dtype=object)
+            np.putmask(result, union_mask, np.nan)
+
+            not_masked = ~union_mask
+            result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep)
+        elif na_rep is not None and union_mask.any():
+            # fill NaNs with na_rep in case there are actually any NaNs
+            all_cols = [
+                np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols)
+            ]
+            result = cat_safe(all_cols, sep)
+        else:
+            # no NaNs - can just concatenate
+            result = cat_safe(all_cols, sep)
+
+        if isinstance(self._orig, ABCIndexClass):
+            # add dtype for case that result is all-NA
+            result = Index(result, dtype=object, name=self._orig.name)
+        else:  # Series
+            if is_categorical_dtype(self._orig.dtype):
+                # We need to infer the new categories.
+                dtype = None
+            else:
+                dtype = self._orig.dtype
+            result = Series(result, dtype=dtype, index=data.index, name=self._orig.name)
+        return result
 
     _shared_docs[
         "str_split"
@@ -663,7 +733,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     @forbid_nonstring_types(["bytes"])
     def split(self, pat=None, n=-1, expand=False):
         result = self._array._str.split(pat, n, expand)
-        return self._wrap_result(result, expand, returns_string=expand)
+        return self._wrap_result(result, returns_string=expand, expand=expand)
 
     @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
     @forbid_nonstring_types(["bytes"])
@@ -778,7 +848,7 @@ def partition(self, sep=" ", expand=True):
     )
     @forbid_nonstring_types(["bytes"])
     def rpartition(self, sep=" ", expand=True):
-        result = self._array._str.rpartion(sep, expand)
+        result = self._array._str.rpartition(sep, expand)
         return self._wrap_result(result, expand=expand, returns_string=expand)
 
     def get(self, i):
@@ -1058,6 +1128,7 @@ def match(self, pat, case=True, flags=0, na=np.nan):
         result = self._array._str.match(pat, case=case, flags=flags, na=na)
         return self._wrap_result(result, fill_value=na, returns_string=False)
 
+    @forbid_nonstring_types(["bytes"])
     def fullmatch(self, pat, case=True, flags=0, na=np.nan):
         """
         Determine if each string entirely matches a regular expression.
@@ -1093,7 +1164,8 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         r"""
         Replace each occurrence of pattern/regex in the Series/Index.
 
-        Equivalent to :meth:`str.replace` or :func:`re.sub`, depending on the regex value.
+        Equivalent to :meth:`str.replace` or :func:`re.sub`, depending on
+        the regex value.
 
         Parameters
         ----------
@@ -1309,6 +1381,17 @@ def pad(self, width, side="left", fillchar=" "):
         1    --tiger---
         dtype: object
         """
+        if not isinstance(fillchar, str):
+            msg = f"fillchar must be a character, not {type(fillchar).__name__}"
+            raise TypeError(msg)
+
+        if len(fillchar) != 1:
+            raise TypeError("fillchar must be a character, not str")
+
+        if not is_integer(width):
+            msg = f"width must be of integer type, not {type(width).__name__}"
+            raise TypeError(msg)
+
         result = self._array._str.pad(width, side=side, fillchar=fillchar)
         return self._wrap_result(result)
 
@@ -1409,7 +1492,7 @@ def zfill(self, width):
         4     NaN
         dtype: object
         """
-        result = self._array._str.pad(width, side="left", fillchar="0")
+        result = self.pad(width, side="left", fillchar="0")
         return self._wrap_result(result)
 
     def slice(self, start=None, stop=None, step=None):
@@ -1588,7 +1671,8 @@ def decode(self, encoding, errors="strict"):
             f = lambda x: decoder(x, errors)[0]
         arr = self._array
         # assert isinstance(arr, (StringArray,))
-        return arr._str._map(f)
+        result = arr._str._map(f)
+        return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
     def encode(self, encoding, errors="strict"):
@@ -1682,7 +1766,7 @@ def encode(self, encoding, errors="strict"):
     )
     @forbid_nonstring_types(["bytes"])
     def strip(self, to_strip=None):
-        result = self._array._str.strip(self._parent, to_strip)
+        result = self._array._str.strip(to_strip)
         return self._wrap_result(result)
 
     @Appender(
@@ -1691,7 +1775,7 @@ def strip(self, to_strip=None):
     )
     @forbid_nonstring_types(["bytes"])
     def lstrip(self, to_strip=None):
-        result = self._array._str.lstrip(self._parent, to_strip)
+        result = self._array._str.lstrip(to_strip)
         return self._wrap_result(result)
 
     @Appender(
@@ -1700,7 +1784,7 @@ def lstrip(self, to_strip=None):
     )
     @forbid_nonstring_types(["bytes"])
     def rstrip(self, to_strip=None):
-        result = self._array._str.rstip(self._parent, to_strip)
+        result = self._array._str.rstrip(to_strip)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1759,7 +1843,7 @@ def wrap(self, width, **kwargs):
         1    another line\nto be\nwrapped
         dtype: object
         """
-        result = self._array._str.wrap(self._parent, width, **kwargs)
+        result = self._array._str.wrap(width, **kwargs)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1804,15 +1888,8 @@ def get_dummies(self, sep="|"):
         # XXX: data = self._orig.astype(str) if self._is_categorical else self._parent
         result, name = self._array._str.get_dummies(sep)
         # result, name = str_get_dummies(data, sep)
-        return self._wrap_result(
-            result,
-            use_codes=(not self._is_categorical),
-            name=name,
-            expand=True,
-            returns_string=False,
-        )
+        return self._wrap_result(result, name=name, expand=True, returns_string=False,)
 
-    # @copy(str_translate)
     @forbid_nonstring_types(["bytes"])
     def translate(self, table):
         """
@@ -1835,7 +1912,8 @@ def translate(self, table):
         result = self._array._str.translate(table)
         return self._wrap_result(result)
 
-    def count(self, pat, flags):
+    @forbid_nonstring_types(["bytes"])
+    def count(self, pat, flags=0):
         """
         Count occurrences of pattern in each string of the Series/Index.
 
@@ -1902,6 +1980,7 @@ def count(self, pat, flags):
         result = self._array._str.count(pat, flags)
         return self._wrap_result(result, returns_string=False)
 
+    @forbid_nonstring_types(["bytes"])
     def startswith(self, pat, na=None):
         """
         Test if the start of each string element matches a pattern.
@@ -1957,6 +2036,7 @@ def startswith(self, pat, na=None):
         result = self._array._str.startswith(pat, na=na)
         return self._wrap_result(result, returns_string=False)
 
+    @forbid_nonstring_types(["bytes"])
     def endswith(self, pat, na=None):
         """
         Test if the end of each string element matches a pattern.
@@ -2012,6 +2092,7 @@ def endswith(self, pat, na=None):
         result = self._array._str.endswith(pat, na=na)
         return self._wrap_result(result, returns_string=False)
 
+    @forbid_nonstring_types(["bytes"])
     def findall(self, pat, flags=0):
         """
         Find all occurrences of pattern or regular expression in the Series/Index.
@@ -2186,8 +2267,8 @@ def extract(self, pat, flags=0, expand=True):
         2    NaN
         dtype: object
         """
-        result = self._array._str.extract(pat, flags)
-        return self._wrap_result(result, expand=expand)
+        # XXX: not dispatched
+        return str_extract(self, pat, flags, expand=expand)
 
     @forbid_nonstring_types(["bytes"])
     def extractall(self, pat, flags=0):
@@ -2264,8 +2345,8 @@ def extractall(self, pat, flags=0):
         B 0          b     1
         C 0        NaN     1
         """
-        result = self._array._str.extractall(pat, flags)
-        return self._wrap_result(result, expand=True)
+        # XXX: not dispatched
+        return str_extractall(self._orig, pat, flags)
 
     _shared_docs[
         "find"
@@ -2304,8 +2385,11 @@ def extractall(self, pat, flags=0):
     )
     @forbid_nonstring_types(["bytes"])
     def find(self, sub, start=0, end=None):
-        # result = str_find(self._parent, sub, start=start, end=end, side="left")
-        result = self._array.str.find(sub, start, end)
+        if not isinstance(sub, str):
+            msg = f"expected a string object, not {type(sub).__name__}"
+            raise TypeError(msg)
+
+        result = self._array._str.find(sub, start, end)
         return self._wrap_result(result, returns_string=False)
 
     @Appender(
@@ -2318,6 +2402,10 @@ def find(self, sub, start=0, end=None):
     )
     @forbid_nonstring_types(["bytes"])
     def rfind(self, sub, start=0, end=None):
+        if not isinstance(sub, str):
+            msg = f"expected a string object, not {type(sub).__name__}"
+            raise TypeError(msg)
+
         result = self._array._str.rfind(sub, start=start, end=end)
         return self._wrap_result(result, returns_string=False)
 
@@ -2381,6 +2469,10 @@ def normalize(self, form):
     )
     @forbid_nonstring_types(["bytes"])
     def index(self, sub, start=0, end=None):
+        if not isinstance(sub, str):
+            msg = f"expected a string object, not {type(sub).__name__}"
+            raise TypeError(msg)
+
         result = self._array._str.index(sub, start=start, end=end)
         return self._wrap_result(result, returns_string=False)
 
@@ -2395,6 +2487,10 @@ def index(self, sub, start=0, end=None):
     )
     @forbid_nonstring_types(["bytes"])
     def rindex(self, sub, start=0, end=None):
+        if not isinstance(sub, str):
+            msg = f"expected a string object, not {type(sub).__name__}"
+            raise TypeError(msg)
+
         result = self._array._str.rindex(sub, start=start, end=end)
         return self._wrap_result(result, returns_string=False)
 
@@ -2536,31 +2632,37 @@ def len(self):
     )
 
     @Appender(_shared_docs["casemethods"] % _doc_args["lower"])
+    @forbid_nonstring_types(["bytes"])
     def lower(self):
         result = self._array._str.lower()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["upper"])
+    @forbid_nonstring_types(["bytes"])
     def upper(self):
         result = self._array._str.upper()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["title"])
+    @forbid_nonstring_types(["bytes"])
     def title(self):
         result = self._array._str.title()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["capitalize"])
+    @forbid_nonstring_types(["bytes"])
     def capitalize(self):
         result = self._array._str.capitalize()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["swapcase"])
+    @forbid_nonstring_types(["bytes"])
     def swapcase(self):
         result = self._array._str.swapcase()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["casefold"])
+    @forbid_nonstring_types(["bytes"])
     def casefold(self):
         result = self._array._str.casefold()
         return self._wrap_result(result)
@@ -2746,7 +2848,223 @@ def casefold(self):
         "isdecimal", docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"]
     )
 
-    @classmethod
-    def _make_accessor(cls, data):
-        cls._validate(data)
-        return cls(data)
+
+def cat_safe(list_of_columns: List, sep: str):
+    """
+    Auxiliary function for :meth:`str.cat`.
+
+    Same signature as cat_core, but handles TypeErrors in concatenation, which
+    happen if the arrays in list_of columns have the wrong dtypes or content.
+
+    Parameters
+    ----------
+    list_of_columns : list of numpy arrays
+        List of arrays to be concatenated with sep;
+        these arrays may not contain NaNs!
+    sep : string
+        The separator string for concatenating the columns.
+
+    Returns
+    -------
+    nd.array
+        The concatenation of list_of_columns with sep.
+    """
+    try:
+        result = cat_core(list_of_columns, sep)
+    except TypeError:
+        # if there are any non-string values (wrong dtype or hidden behind
+        # object dtype), np.sum will fail; catch and return with better message
+        for column in list_of_columns:
+            dtype = lib.infer_dtype(column, skipna=True)
+            if dtype not in ["string", "empty"]:
+                raise TypeError(
+                    "Concatenation requires list-likes containing only "
+                    "strings (or missing values). Offending values found in "
+                    f"column {dtype}"
+                ) from None
+    return result
+
+
+def cat_core(list_of_columns: List, sep: str):
+    """
+    Auxiliary function for :meth:`str.cat`
+
+    Parameters
+    ----------
+    list_of_columns : list of numpy arrays
+        List of arrays to be concatenated with sep;
+        these arrays may not contain NaNs!
+    sep : string
+        The separator string for concatenating the columns.
+
+    Returns
+    -------
+    nd.array
+        The concatenation of list_of_columns with sep.
+    """
+    if sep == "":
+        # no need to interleave sep if it is empty
+        arr_of_cols = np.asarray(list_of_columns, dtype=object)
+        return np.sum(arr_of_cols, axis=0)
+    list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
+    list_with_sep[::2] = list_of_columns
+    arr_with_sep = np.asarray(list_with_sep, dtype=object)
+    return np.sum(arr_with_sep, axis=0)
+
+
+def _groups_or_na_fun(regex):
+    """Used in both extract_noexpand and extract_frame"""
+    if regex.groups == 0:
+        raise ValueError("pattern contains no capture groups")
+    empty_row = [np.nan] * regex.groups
+
+    def f(x):
+        if not isinstance(x, str):
+            return empty_row
+        m = regex.search(x)
+        if m:
+            return [np.nan if item is None else item for item in m.groups()]
+        else:
+            return empty_row
+
+    return f
+
+
+def _result_dtype(arr):
+    # workaround #27953
+    # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
+    # when the list of values is empty.
+    from pandas.core.arrays.string_ import StringDtype
+
+    if isinstance(arr.dtype, StringDtype):
+        return arr.dtype.name
+    else:
+        return object
+
+
+def _get_single_group_name(rx):
+    try:
+        return list(rx.groupindex.keys()).pop()
+    except IndexError:
+        return None
+
+
+def _str_extract_noexpand(arr, pat, flags=0):
+    """
+    Find groups in each string in the Series using passed regular
+    expression. This function is called from
+    str_extract(expand=False), and can return Series, DataFrame, or
+    Index.
+
+    """
+    from pandas import DataFrame, array
+
+    regex = re.compile(pat, flags=flags)
+    groups_or_na = _groups_or_na_fun(regex)
+    result_dtype = _result_dtype(arr)
+
+    if regex.groups == 1:
+        result = np.array([groups_or_na(val)[0] for val in arr], dtype=object)
+        name = _get_single_group_name(regex)
+        # not dispatching, so we have to reconstruct here.
+        result = array(result, dtype=result_dtype)
+    else:
+        if isinstance(arr, ABCIndexClass):
+            raise ValueError("only one regex group is supported with Index")
+        name = None
+        names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
+        columns = [names.get(1 + i, i) for i in range(regex.groups)]
+        if arr.size == 0:
+            result = DataFrame(columns=columns, dtype=object)
+        else:
+            dtype = _result_dtype(arr)
+            result = DataFrame(
+                [groups_or_na(val) for val in arr],
+                columns=columns,
+                index=arr.index,
+                dtype=dtype,
+            )
+    return result, name
+
+
+def _str_extract_frame(arr, pat, flags=0):
+    """
+    For each subject string in the Series, extract groups from the
+    first match of regular expression pat. This function is called from
+    str_extract(expand=True), and always returns a DataFrame.
+
+    """
+    from pandas import DataFrame
+
+    regex = re.compile(pat, flags=flags)
+    groups_or_na = _groups_or_na_fun(regex)
+    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
+    columns = [names.get(1 + i, i) for i in range(regex.groups)]
+
+    if len(arr) == 0:
+        return DataFrame(columns=columns, dtype=object)
+    try:
+        result_index = arr.index
+    except AttributeError:
+        result_index = None
+    dtype = _result_dtype(arr)
+    return DataFrame(
+        [groups_or_na(val) for val in arr],
+        columns=columns,
+        index=result_index,
+        dtype=dtype,
+    )
+
+
+def str_extract(arr, pat, flags=0, expand=True):
+    if not isinstance(expand, bool):
+        raise ValueError("expand must be True or False")
+    if expand:
+        return _str_extract_frame(arr._orig, pat, flags=flags)
+    else:
+        # XXX: some dead code now.
+        # arr = arr._array
+        # if isinstance(arr, ObjectProxy):
+        # arr = arr._ndarray
+        result, name = _str_extract_noexpand(arr._orig, pat, flags=flags)
+        return arr._wrap_result(result, name=name, expand=expand)
+
+
+def str_extractall(arr, pat, flags=0):
+    regex = re.compile(pat, flags=flags)
+    # the regex must contain capture groups.
+    if regex.groups == 0:
+        raise ValueError("pattern contains no capture groups")
+
+    if isinstance(arr, ABCIndexClass):
+        arr = arr.to_series().reset_index(drop=True)
+
+    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
+    columns = [names.get(1 + i, i) for i in range(regex.groups)]
+    match_list = []
+    index_list = []
+    is_mi = arr.index.nlevels > 1
+
+    for subject_key, subject in arr.items():
+        if isinstance(subject, str):
+
+            if not is_mi:
+                subject_key = (subject_key,)
+
+            for match_i, match_tuple in enumerate(regex.findall(subject)):
+                if isinstance(match_tuple, str):
+                    match_tuple = (match_tuple,)
+                na_tuple = [np.NaN if group == "" else group for group in match_tuple]
+                match_list.append(na_tuple)
+                result_key = tuple(subject_key + (match_i,))
+                index_list.append(result_key)
+
+    from pandas import MultiIndex
+
+    index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"])
+    dtype = _result_dtype(arr)
+
+    result = arr._constructor_expanddim(
+        match_list, index=index, columns=columns, dtype=dtype
+    )
+    return result
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index c32cfffa98126..9d484f449b14b 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -1,33 +1,5 @@
-import unicodedata
-
-
 class BaseStringArrayMethods:
-    """Base class for it."""
+    """Base class for array _str accessor."""
 
     def __init__(self, array):
         self._array = array
-
-    def upper(self):
-        return self._map(lambda x: x.upper())
-
-    def isalnum(self):
-        return self._map(str.isalnum, dtype="bool")
-
-    def capitalize(self):
-        return self._map(str.capitalize)
-
-    def casefold(self):
-        return self._map(str.casefold)
-
-    def title(self):
-        return self._map(str.title)
-
-    def swapcase(self):
-        return self._map(str.swapcase)
-
-    def lower(self):
-        return self._map(str.lower)
-
-    def normalize(self, form):
-        f = lambda x: unicodedata.normalize(form, x)
-        return self._map(f)
diff --git a/pandas/core/strings/categorical_strings.py b/pandas/core/strings/categorical_strings.py
new file mode 100644
index 0000000000000..3be3825032988
--- /dev/null
+++ b/pandas/core/strings/categorical_strings.py
@@ -0,0 +1,17 @@
+import numpy as np
+
+from pandas.core.algorithms import take_1d
+from pandas.core.strings.object_array import ObjectArrayMethods
+
+
+class CategoricalStringMethods(ObjectArrayMethods):
+    def _map(self, f, na_value=np.nan, dtype=np.dtype(object)):
+        arr = self._array  # Categorical
+        categories = arr.categories
+        codes = arr.codes
+        result = ObjectArrayMethods(categories)._map(f, na_value, dtype)
+        return take_1d(result, codes, fill_value=na_value)
+
+    def get_dummies(self, sep="|"):
+        # sep may not be in categories. Just bail on this.
+        return ObjectArrayMethods(self._array.astype(str)).get_dummies(sep)
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index acf234b711062..116b53778ac9d 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -1,10 +1,17 @@
 import re
+import textwrap
+from typing import Pattern, Union
+import unicodedata
+import warnings
 
 import numpy as np
 
 import pandas._libs.lib as lib
+import pandas._libs.missing as libmissing
+import pandas._libs.ops as libops
+from pandas._typing import Scalar
 
-from pandas.core.dtypes.generic import ABCSeries
+from pandas.core.dtypes.common import is_re, is_scalar
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.accessor import CachedAccessor
@@ -13,9 +20,7 @@
 
 
 class ObjectArrayMethods(BaseStringArrayMethods):
-    def _map(self, f, na_mask=True, na_value=np.nan, dtype=np.dtype(object)):
-        # n.b.: na_mask is the default.
-        # need to figure out when it was false, maybe split
+    def _map(self, f, na_value=np.nan, dtype=np.dtype(object)):
         arr = self._array  # object-dtype ndarray.
 
         if not len(arr):
@@ -23,123 +28,388 @@ def _map(self, f, na_mask=True, na_value=np.nan, dtype=np.dtype(object)):
         if na_value is None:
             na_value = np.nan
 
-        if isinstance(arr, ABCSeries):
-            arr = arr._values  # TODO: extract_array?
         if not isinstance(arr, np.ndarray):
             arr = np.asarray(arr, dtype=object)
-        if na_mask:
-            mask = isna(arr)
-            convert = not np.all(mask)
-            try:
-                result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert)
-            except (TypeError, AttributeError) as e:
-                # Reraise the exception if callable `f` got wrong number of args.
-                # The user may want to be warned by this, instead of getting NaN
-                p_err = (
-                    r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
-                    r"(?(3)required )positional arguments?"
+        mask = isna(arr)
+        convert = not np.all(mask)
+        try:
+            result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert)
+        except (TypeError, AttributeError) as e:
+            # Reraise the exception if callable `f` got wrong number of args.
+            # The user may want to be warned by this, instead of getting NaN
+            p_err = (
+                r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
+                r"(?(3)required )positional arguments?"
+            )
+
+            if len(e.args) >= 1 and re.search(p_err, e.args[0]):
+                # FIXME: this should be totally avoidable
+                raise e
+
+            def g(x):
+                # This type of fallback behavior can be removed once
+                # we remove object-dtype .str accessor.
+                try:
+                    return f(x)
+                except (TypeError, AttributeError):
+                    return na_value
+
+            return self._map(g, na_value=na_value, dtype=dtype)
+        if na_value is not np.nan:
+            np.putmask(result, mask, na_value)
+            if result.dtype == object:
+                result = lib.maybe_convert_objects(result)
+        return result
+
+    def __getitem__(self, key):
+        if isinstance(key, slice):
+            return self.slice(start=key.start, stop=key.stop, step=key.step)
+        else:
+            return self.get(key)
+
+    def count(self, pat, flags=0):
+        regex = re.compile(pat, flags=flags)
+        f = lambda x: len(regex.findall(x))
+        return self._map(f, dtype="int64")
+
+    def pad(self, width, side="left", fillchar=" "):
+        if side == "left":
+            f = lambda x: x.rjust(width, fillchar)
+        elif side == "right":
+            f = lambda x: x.ljust(width, fillchar)
+        elif side == "both":
+            f = lambda x: x.center(width, fillchar)
+        else:  # pragma: no cover
+            raise ValueError("Invalid side")
+        return self._map(f)
+
+    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
+        if regex:
+            if not case:
+                flags |= re.IGNORECASE
+
+            regex = re.compile(pat, flags=flags)
+
+            if regex.groups > 0:
+                warnings.warn(
+                    "This pattern has match groups. To actually get the "
+                    "groups, use str.extract.",
+                    UserWarning,
+                    stacklevel=3,
                 )
 
-                if len(e.args) >= 1 and re.search(p_err, e.args[0]):
-                    # FIXME: this should be totally avoidable
-                    raise e
-
-                def g(x):
-                    try:
-                        return f(x)
-                    except (TypeError, AttributeError):
-                        return na_value
-
-                return self._map_object(g, dtype=dtype)
-            if na_value is not np.nan:
-                np.putmask(result, mask, na_value)
-                if result.dtype == object:
-                    result = lib.maybe_convert_objects(result)
+            f = lambda x: regex.search(x) is not None
+        else:
+            if case:
+                f = lambda x: pat in x
+            else:
+                upper_pat = pat.upper()
+                f = lambda x: upper_pat in x.upper()
+        return self._map(f, na, dtype=np.dtype("bool"))
+
+    def startswith(self, pat, na=np.nan):
+        f = lambda x: x.startswith(pat)
+        return self._map(f, na_value=na, dtype=np.dtype(bool))
+
+    def endswith(self, pat, na=np.nan):
+        f = lambda x: x.endswith(pat)
+        return self._map(f, na_value=na, dtype=np.dtype(bool))
+
+    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
+        # Check whether repl is valid (GH 13438, GH 15055)
+        if not (isinstance(repl, str) or callable(repl)):
+            raise TypeError("repl must be a string or callable")
+
+        is_compiled_re = is_re(pat)
+        if regex:
+            if is_compiled_re:
+                if (case is not None) or (flags != 0):
+                    raise ValueError(
+                        "case and flags cannot be set when pat is a compiled regex"
+                    )
+            else:
+                # not a compiled regex
+                # set default case
+                if case is None:
+                    case = True
+
+                # add case flag, if provided
+                if case is False:
+                    flags |= re.IGNORECASE
+            if is_compiled_re or len(pat) > 1 or flags or callable(repl):
+                n = n if n >= 0 else 0
+                compiled = re.compile(pat, flags=flags)
+                f = lambda x: compiled.sub(repl=repl, string=x, count=n)
+            else:
+                f = lambda x: x.replace(pat, repl, n)
+        else:
+            if is_compiled_re:
+                raise ValueError(
+                    "Cannot use a compiled regex as replacement pattern with "
+                    "regex=False"
+                )
+            if callable(repl):
+                raise ValueError("Cannot use a callable replacement when regex=False")
+            f = lambda x: x.replace(pat, repl, n)
+
+        return self._map(f, dtype=str)
+
+    def repeat(self, repeats):
+        if is_scalar(repeats):
+
+            def scalar_rep(x):
+                try:
+                    return bytes.__mul__(x, repeats)
+                except TypeError:
+                    return str.__mul__(x, repeats)
+
+            return self._map(scalar_rep, dtype=str)
+        else:
+            from pandas.core.arrays.string_ import StringArray
+
+            def rep(x, r):
+                if x is libmissing.NA:
+                    return x
+                try:
+                    return bytes.__mul__(x, r)
+                except TypeError:
+                    return str.__mul__(x, r)
+
+            repeats = np.asarray(repeats, dtype=object)
+            result = libops.vec_binop(np.asarray(self._array), repeats, rep)
+            if isinstance(self._array, StringArray):
+                # Not going through map, so we have to do this here.
+                result = StringArray._from_sequence(result)
             return result
+
+    def match(
+        self,
+        pat: Union[str, Pattern],
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar = np.nan,
+    ):
+        if not case:
+            flags |= re.IGNORECASE
+
+        regex = re.compile(pat, flags=flags)
+
+        f = lambda x: regex.match(x) is not None
+        return self._map(f, na_value=na, dtype=np.dtype(bool))
+
+    def fullmatch(
+        self,
+        pat: Union[str, Pattern],
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar = np.nan,
+    ):
+        if not case:
+            flags |= re.IGNORECASE
+
+        regex = re.compile(pat, flags=flags)
+
+        f = lambda x: regex.fullmatch(x) is not None
+        return self._map(f, na_value=na, dtype=np.dtype(bool))
+
+    def encode(self, encoding, errors="strict"):
+        f = lambda x: x.encode(encoding, errors=errors)
+        return self._map(f, dtype=object)
+
+    def find(self, sub, start=0, end=None):
+        return self._find(sub, start, end, side="left")
+
+    def rfind(self, sub, start=0, end=None):
+        return self._find(sub, start, end, side="right")
+
+    def _find(self, sub, start, end, side):
+        if side == "left":
+            method = "find"
+        elif side == "right":
+            method = "rfind"
+        else:  # pragma: no cover
+            raise ValueError("Invalid side")
+
+        if end is None:
+            f = lambda x: getattr(x, method)(sub, start)
         else:
-            return lib.map_infer(arr, f)
-
-    def cat(self, others=None, sep=None, na_rep=None, join="left"):
-        from pandas import Index, Series, concat
-
-        if isinstance(others, str):
-            raise ValueError("Did you mean to supply a `sep` keyword?")
-        if sep is None:
-            sep = ""
-
-        if isinstance(self._orig, ABCIndexClass):
-            data = Series(self._orig, index=self._orig)
-        else:  # Series
-            data = self._orig
-
-        # concatenate Series/Index with itself if no "others"
-        if others is None:
-            data = ensure_object(data)
-            na_mask = isna(data)
-            if na_rep is None and na_mask.any():
-                data = data[~na_mask]
-            elif na_rep is not None and na_mask.any():
-                data = np.where(na_mask, na_rep, data)
-            return sep.join(data)
+            f = lambda x: getattr(x, method)(sub, start, end)
+        return self._map(f, dtype="int64")
 
-        try:
-            # turn anything in "others" into lists of Series
-            others = self._get_series_list(others)
-        except ValueError as err:  # do not catch TypeError raised by _get_series_list
-            raise ValueError(
-                "If `others` contains arrays or lists (or other "
-                "list-likes without an index), these must all be "
-                "of the same length as the calling Series/Index."
-            ) from err
-
-        # align if required
-        if any(not data.index.equals(x.index) for x in others):
-            # Need to add keys for uniqueness in case of duplicate columns
-            others = concat(
-                others,
-                axis=1,
-                join=(join if join == "inner" else "outer"),
-                keys=range(len(others)),
-                sort=False,
-                copy=False,
-            )
-            data, others = data.align(others, join=join)
-            others = [others[x] for x in others]  # again list of Series
-
-        all_cols = [ensure_object(x) for x in [data] + others]
-        na_masks = np.array([isna(x) for x in all_cols])
-        union_mask = np.logical_or.reduce(na_masks, axis=0)
-
-        if na_rep is None and union_mask.any():
-            # no na_rep means NaNs for all rows where any column has a NaN
-            # only necessary if there are actually any NaNs
-            result = np.empty(len(data), dtype=object)
-            np.putmask(result, union_mask, np.nan)
-
-            not_masked = ~union_mask
-            result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep)
-        elif na_rep is not None and union_mask.any():
-            # fill NaNs with na_rep in case there are actually any NaNs
-            all_cols = [
-                np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols)
-            ]
-            result = cat_safe(all_cols, sep)
+    def findall(self, pat, flags=0):
+        regex = re.compile(pat, flags=flags)
+        return self._map(regex.findall, dtype="object")
+
+    def get(self, i):
+        def f(x):
+            if isinstance(x, dict):
+                return x.get(i)
+            elif len(x) > i >= -len(x):
+                return x[i]
+            return np.nan
+
+        return self._map(f)
+
+    def index(self, sub, start=0, end=None):
+        if end:
+            f = lambda x: x.index(sub, start, end)
         else:
-            # no NaNs - can just concatenate
-            result = cat_safe(all_cols, sep)
-
-        if isinstance(self._orig, ABCIndexClass):
-            # add dtype for case that result is all-NA
-            result = Index(result, dtype=object, name=self._orig.name)
-        else:  # Series
-            if is_categorical_dtype(self._orig.dtype):
-                # We need to infer the new categories.
-                dtype = None
-            else:
-                dtype = self._orig.dtype
-            result = Series(result, dtype=dtype, index=data.index, name=self._orig.name)
+            f = lambda x: x.index(sub, start, end)
+        return self._map(f, dtype="int64")
+
+    def rindex(self, sub, start=0, end=None):
+        if end:
+            f = lambda x: x.rindex(sub, start, end)
+        else:
+            f = lambda x: x.rindex(sub, start, end)
+        return self._map(f, dtype="int64")
+
+    def join(self, sep):
+        return self._map(sep.join)
+
+    def partition(self, sep, expand):
+        result = self._map(lambda x: x.partition(sep), dtype="object")
         return result
 
+    def rpartition(self, sep, expand):
+        return self._map(lambda x: x.rpartition(sep), dtype="object")
+
+    def len(self):
+        return self._map(len, dtype="int64")
+
+    def slice(self, start=None, stop=None, step=None):
+        obj = slice(start, stop, step)
+        return self._map(lambda x: x[obj])
+
+    def slice_replace(self, start=None, stop=None, repl=None):
+        if repl is None:
+            repl = ""
+
+        def f(x):
+            if x[start:stop] == "":
+                local_stop = start
+            else:
+                local_stop = stop
+            y = ""
+            if start is not None:
+                y += x[:start]
+            y += repl
+            if stop is not None:
+                y += x[local_stop:]
+            return y
+
+        return self._map(f)
+
+    def split(self, pat=None, n=-1, expand=False):
+        if pat is None:
+            if n is None or n == 0:
+                n = -1
+            f = lambda x: x.split(pat, n)
+        else:
+            if len(pat) == 1:
+                if n is None or n == 0:
+                    n = -1
+                f = lambda x: x.split(pat, n)
+            else:
+                if n is None or n == -1:
+                    n = 0
+                regex = re.compile(pat)
+                f = lambda x: regex.split(x, maxsplit=n)
+        return self._map(f, dtype=object)
+
+    def rsplit(self, pat=None, n=-1):
+        if n is None or n == 0:
+            n = -1
+        f = lambda x: x.rsplit(pat, n)
+        return self._map(f, dtype="object")
+
+    def translate(self, table):
+        return self._map(lambda x: x.translate(table))
+
+    def wrap(self, width, **kwargs):
+        kwargs["width"] = width
+        tw = textwrap.TextWrapper(**kwargs)
+        return self._map(lambda s: "\n".join(tw.wrap(s)))
+
+    def get_dummies(self, sep="|"):
+        from pandas import Series
+
+        arr = Series(self._array).fillna("")
+        try:
+            arr = sep + arr + sep
+        except TypeError:
+            arr = sep + arr.astype(str) + sep
+
+        tags = set()
+        for ts in Series(arr).str.split(sep):
+            tags.update(ts)
+        tags = sorted(tags - {""})
+
+        dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
+
+        for i, t in enumerate(tags):
+            pat = sep + t + sep
+            dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
+        return dummies, tags
+
+    def upper(self):
+        return self._map(lambda x: x.upper())
+
+    def isalnum(self):
+        return self._map(str.isalnum, dtype="bool")
+
+    def isalpha(self):
+        return self._map(str.isalpha, dtype="bool")
+
+    def isdecimal(self):
+        return self._map(str.isdecimal, dtype="bool")
+
+    def isdigit(self):
+        return self._map(str.isdigit, dtype="bool")
+
+    def islower(self):
+        return self._map(str.islower, dtype="bool")
+
+    def isnumeric(self):
+        return self._map(str.isnumeric, dtype="bool")
+
+    def isspace(self):
+        return self._map(str.isspace, dtype="bool")
+
+    def istitle(self):
+        return self._map(str.istitle, dtype="bool")
+
+    def isupper(self):
+        return self._map(str.isupper, dtype="bool")
+
+    def capitalize(self):
+        return self._map(str.capitalize)
+
+    def casefold(self):
+        return self._map(str.casefold)
+
+    def title(self):
+        return self._map(str.title)
+
+    def swapcase(self):
+        return self._map(str.swapcase)
+
+    def lower(self):
+        return self._map(str.lower)
+
+    def normalize(self, form):
+        f = lambda x: unicodedata.normalize(form, x)
+        return self._map(f)
+
+    def strip(self, to_strip=None):
+        return self._map(lambda x: x.strip(to_strip))
+
+    def lstrip(self, to_strip=None):
+        return self._map(lambda x: x.lstrip(to_strip))
+
+    def rstrip(self, to_strip=None):
+        return self._map(lambda x: x.rstrip(to_strip))
+
 
 class ObjectProxy(PandasArray):
     _str = CachedAccessor("str", ObjectArrayMethods)
diff --git a/pandas/core/strings_.py b/pandas/core/strings_.py
deleted file mode 100644
index 09ed48e59a489..0000000000000
--- a/pandas/core/strings_.py
+++ /dev/null
@@ -1,775 +0,0 @@
-import codecs
-from functools import wraps
-import re
-import textwrap
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Pattern, Type, Union
-import unicodedata
-import warnings
-
-import numpy as np
-
-import pandas._libs.lib as lib
-import pandas._libs.missing as libmissing
-import pandas._libs.ops as libops
-from pandas._typing import ArrayLike, Dtype, Scalar
-from pandas.util._decorators import Appender
-
-from pandas.core.dtypes.common import (
-    ensure_object,
-    is_bool_dtype,
-    is_categorical_dtype,
-    is_extension_array_dtype,
-    is_integer,
-    is_integer_dtype,
-    is_list_like,
-    is_object_dtype,
-    is_re,
-    is_scalar,
-    is_string_dtype,
-)
-from pandas.core.dtypes.generic import (
-    ABCDataFrame,
-    ABCIndexClass,
-    ABCMultiIndex,
-    ABCSeries,
-)
-from pandas.core.dtypes.missing import isna
-
-from pandas.core.accessor import CachedAccessor
-from pandas.core.algorithms import take_1d
-from pandas.core.arrays.numpy_ import PandasArray
-
-if TYPE_CHECKING:
-    from pandas.arrays import StringArray
-
-_cpython_optimized_encoders = (
-    "utf-8",
-    "utf8",
-    "latin-1",
-    "latin1",
-    "iso-8859-1",
-    "mbcs",
-    "ascii",
-)
-_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32")
-
-_shared_docs: Dict[str, str] = dict()
-
-
-def cat_core(list_of_columns: List, sep: str):
-    """
-    Auxiliary function for :meth:`str.cat`
-
-    Parameters
-    ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
-    sep : string
-        The separator string for concatenating the columns.
-
-    Returns
-    -------
-    nd.array
-        The concatenation of list_of_columns with sep.
-    """
-    if sep == "":
-        # no need to interleave sep if it is empty
-        arr_of_cols = np.asarray(list_of_columns, dtype=object)
-        return np.sum(arr_of_cols, axis=0)
-    list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
-    list_with_sep[::2] = list_of_columns
-    arr_with_sep = np.asarray(list_with_sep, dtype=object)
-    return np.sum(arr_with_sep, axis=0)
-
-
-def cat_safe(list_of_columns: List, sep: str):
-    """
-    Auxiliary function for :meth:`str.cat`.
-
-    Same signature as cat_core, but handles TypeErrors in concatenation, which
-    happen if the arrays in list_of columns have the wrong dtypes or content.
-
-    Parameters
-    ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
-    sep : string
-        The separator string for concatenating the columns.
-
-    Returns
-    -------
-    nd.array
-        The concatenation of list_of_columns with sep.
-    """
-    try:
-        result = cat_core(list_of_columns, sep)
-    except TypeError:
-        # if there are any non-string values (wrong dtype or hidden behind
-        # object dtype), np.sum will fail; catch and return with better message
-        for column in list_of_columns:
-            dtype = lib.infer_dtype(column, skipna=True)
-            if dtype not in ["string", "empty"]:
-                raise TypeError(
-                    "Concatenation requires list-likes containing only "
-                    "strings (or missing values). Offending values found in "
-                    f"column {dtype}"
-                ) from None
-    return result
-
-
-def str_count(arr, pat, flags=0):
-    regex = re.compile(pat, flags=flags)
-    f = lambda x: len(regex.findall(x))
-    return _na_map(f, arr, dtype="int64")
-
-
-def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
-    if regex:
-        if not case:
-            flags |= re.IGNORECASE
-
-        regex = re.compile(pat, flags=flags)
-
-        if regex.groups > 0:
-            warnings.warn(
-                "This pattern has match groups. To actually get the "
-                "groups, use str.extract.",
-                UserWarning,
-                stacklevel=3,
-            )
-
-        f = lambda x: regex.search(x) is not None
-    else:
-        if case:
-            f = lambda x: pat in x
-        else:
-            upper_pat = pat.upper()
-            f = lambda x: upper_pat in x
-            uppered = _na_map(lambda x: x.upper(), arr)
-            return _na_map(f, uppered, na, dtype=np.dtype(bool))
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_startswith(arr, pat, na=np.nan):
-    f = lambda x: x.startswith(pat)
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_endswith(arr, pat, na=np.nan):
-   f = lambda x: x.endswith(pat)
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
-   # Check whether repl is valid (GH 13438, GH 15055)
-    if not (isinstance(repl, str) or callable(repl)):
-        raise TypeError("repl must be a string or callable")
-
-    is_compiled_re = is_re(pat)
-    if regex:
-        if is_compiled_re:
-            if (case is not None) or (flags != 0):
-                raise ValueError(
-                    "case and flags cannot be set when pat is a compiled regex"
-                )
-        else:
-            # not a compiled regex
-            # set default case
-            if case is None:
-                case = True
-
-            # add case flag, if provided
-            if case is False:
-                flags |= re.IGNORECASE
-        if is_compiled_re or len(pat) > 1 or flags or callable(repl):
-            n = n if n >= 0 else 0
-            compiled = re.compile(pat, flags=flags)
-            f = lambda x: compiled.sub(repl=repl, string=x, count=n)
-        else:
-            f = lambda x: x.replace(pat, repl, n)
-    else:
-        if is_compiled_re:
-            raise ValueError(
-                "Cannot use a compiled regex as replacement pattern with regex=False"
-            )
-        if callable(repl):
-            raise ValueError("Cannot use a callable replacement when regex=False")
-        f = lambda x: x.replace(pat, repl, n)
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_repeat(arr, repeats):
-    if is_scalar(repeats):
-
-        def scalar_rep(x):
-            try:
-                return bytes.__mul__(x, repeats)
-            except TypeError:
-                return str.__mul__(x, repeats)
-
-        return _na_map(scalar_rep, arr, dtype=str)
-    else:
-
-        def rep(x, r):
-            if x is libmissing.NA:
-                return x
-            try:
-                return bytes.__mul__(x, r)
-            except TypeError:
-                return str.__mul__(x, r)
-
-        repeats = np.asarray(repeats, dtype=object)
-        result = libops.vec_binop(np.asarray(arr), repeats, rep)
-        return result
-
-
-def str_match(
-    arr: ArrayLike,
-    pat: Union[str, Pattern],
-    case: bool = True,
-    flags: int = 0,
-    na: Scalar = np.nan,
-):
-    if not case:
-        flags |= re.IGNORECASE
-
-    regex = re.compile(pat, flags=flags)
-
-    f = lambda x: regex.match(x) is not None
-
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_fullmatch(
-    arr: ArrayLike,
-    pat: Union[str, Pattern],
-    case: bool = True,
-    flags: int = 0,
-    na: Scalar = np.nan,
-):
-   if not case:
-        flags |= re.IGNORECASE
-
-    regex = re.compile(pat, flags=flags)
-
-    f = lambda x: regex.fullmatch(x) is not None
-
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def _get_single_group_name(rx):
-    try:
-        return list(rx.groupindex.keys()).pop()
-    except IndexError:
-        return None
-
-
-def _groups_or_na_fun(regex):
-    """Used in both extract_noexpand and extract_frame"""
-    if regex.groups == 0:
-        raise ValueError("pattern contains no capture groups")
-    empty_row = [np.nan] * regex.groups
-
-    def f(x):
-        if not isinstance(x, str):
-            return empty_row
-        m = regex.search(x)
-        if m:
-            return [np.nan if item is None else item for item in m.groups()]
-        else:
-            return empty_row
-
-    return f
-
-
-def _result_dtype(arr):
-    # workaround #27953
-    # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
-    # when the list of values is empty.
-    from pandas.core.arrays.string_ import StringDtype
-
-    if isinstance(arr.dtype, StringDtype):
-        return arr.dtype.name
-    else:
-        return object
-
-
-def _str_extract_noexpand(arr, pat, flags=0):
-    """
-    Find groups in each string in the Series using passed regular
-    expression. This function is called from
-    str_extract(expand=False), and can return Series, DataFrame, or
-    Index.
-
-    """
-    from pandas import DataFrame
-
-    regex = re.compile(pat, flags=flags)
-    groups_or_na = _groups_or_na_fun(regex)
-
-    if regex.groups == 1:
-        result = np.array([groups_or_na(val)[0] for val in arr], dtype=object)
-        name = _get_single_group_name(regex)
-    else:
-        if isinstance(arr, ABCIndexClass):
-            raise ValueError("only one regex group is supported with Index")
-        name = None
-        names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-        columns = [names.get(1 + i, i) for i in range(regex.groups)]
-        if arr.empty:
-            result = DataFrame(columns=columns, dtype=object)
-        else:
-            dtype = _result_dtype(arr)
-            result = DataFrame(
-                [groups_or_na(val) for val in arr],
-                columns=columns,
-                index=arr.index,
-                dtype=dtype,
-            )
-    return result, name
-
-
-def _str_extract_frame(arr, pat, flags=0):
-    """
-    For each subject string in the Series, extract groups from the
-    first match of regular expression pat. This function is called from
-    str_extract(expand=True), and always returns a DataFrame.
-
-    """
-    from pandas import DataFrame
-
-    regex = re.compile(pat, flags=flags)
-    groups_or_na = _groups_or_na_fun(regex)
-    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-    columns = [names.get(1 + i, i) for i in range(regex.groups)]
-
-    if len(arr) == 0:
-        return DataFrame(columns=columns, dtype=object)
-    try:
-        result_index = arr.index
-    except AttributeError:
-        result_index = None
-    dtype = _result_dtype(arr)
-    return DataFrame(
-        [groups_or_na(val) for val in arr],
-        columns=columns,
-        index=result_index,
-        dtype=dtype,
-    )
-
-
-def str_extract(arr, pat, flags=0, expand=True):
-   if not isinstance(expand, bool):
-        raise ValueError("expand must be True or False")
-    if expand:
-        return _str_extract_frame(arr._orig, pat, flags=flags)
-    else:
-        result, name = _str_extract_noexpand(arr._parent, pat, flags=flags)
-        return arr._wrap_result(result, name=name, expand=expand)
-
-
-def str_extractall(arr, pat, flags=0):
-   regex = re.compile(pat, flags=flags)
-    # the regex must contain capture groups.
-    if regex.groups == 0:
-        raise ValueError("pattern contains no capture groups")
-
-    if isinstance(arr, ABCIndexClass):
-        arr = arr.to_series().reset_index(drop=True)
-
-    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-    columns = [names.get(1 + i, i) for i in range(regex.groups)]
-    match_list = []
-    index_list = []
-    is_mi = arr.index.nlevels > 1
-
-    for subject_key, subject in arr.items():
-        if isinstance(subject, str):
-
-            if not is_mi:
-                subject_key = (subject_key,)
-
-            for match_i, match_tuple in enumerate(regex.findall(subject)):
-                if isinstance(match_tuple, str):
-                    match_tuple = (match_tuple,)
-                na_tuple = [np.NaN if group == "" else group for group in match_tuple]
-                match_list.append(na_tuple)
-                result_key = tuple(subject_key + (match_i,))
-                index_list.append(result_key)
-
-    from pandas import MultiIndex
-
-    index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"])
-    dtype = _result_dtype(arr)
-
-    result = arr._constructor_expanddim(
-        match_list, index=index, columns=columns, dtype=dtype
-    )
-    return result
-
-
-def str_get_dummies(arr, sep="|"):
-    arr = arr.fillna("")
-    try:
-        arr = sep + arr + sep
-    except TypeError:
-        arr = sep + arr.astype(str) + sep
-
-    tags = set()
-    for ts in arr.str.split(sep):
-        tags.update(ts)
-    tags = sorted(tags - {""})
-
-    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
-
-    for i, t in enumerate(tags):
-        pat = sep + t + sep
-        dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
-    return dummies, tags
-
-
-def str_join(arr, sep):
-    return _na_map(sep.join, arr, dtype=str)
-
-
-def str_findall(arr, pat, flags=0):
-    regex = re.compile(pat, flags=flags)
-    return _na_map(regex.findall, arr)
-
-
-def str_find(arr, sub, start=0, end=None, side="left"):
-   if not isinstance(sub, str):
-        msg = f"expected a string object, not {type(sub).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        method = "find"
-    elif side == "right":
-        method = "rfind"
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    if end is None:
-        f = lambda x: getattr(x, method)(sub, start)
-    else:
-        f = lambda x: getattr(x, method)(sub, start, end)
-
-    return _na_map(f, arr, dtype=np.dtype("int64"))
-
-
-def str_index(arr, sub, start=0, end=None, side="left"):
-    if not isinstance(sub, str):
-        msg = f"expected a string object, not {type(sub).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        method = "index"
-    elif side == "right":
-        method = "rindex"
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    if end is None:
-        f = lambda x: getattr(x, method)(sub, start)
-    else:
-        f = lambda x: getattr(x, method)(sub, start, end)
-
-    return _na_map(f, arr, dtype=np.dtype("int64"))
-
-
-def str_pad(arr, width, side="left", fillchar=" "):
-    if not isinstance(fillchar, str):
-        msg = f"fillchar must be a character, not {type(fillchar).__name__}"
-        raise TypeError(msg)
-
-    if len(fillchar) != 1:
-        raise TypeError("fillchar must be a character, not str")
-
-    if not is_integer(width):
-        msg = f"width must be of integer type, not {type(width).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        f = lambda x: x.rjust(width, fillchar)
-    elif side == "right":
-        f = lambda x: x.ljust(width, fillchar)
-    elif side == "both":
-        f = lambda x: x.center(width, fillchar)
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_split(arr, pat=None, n=None):
-
-    if pat is None:
-        if n is None or n == 0:
-            n = -1
-        f = lambda x: x.split(pat, n)
-    else:
-        if len(pat) == 1:
-            if n is None or n == 0:
-                n = -1
-            f = lambda x: x.split(pat, n)
-        else:
-            if n is None or n == -1:
-                n = 0
-            regex = re.compile(pat)
-            f = lambda x: regex.split(x, maxsplit=n)
-    res = _na_map(f, arr)
-    return res
-
-
-def str_rsplit(arr, pat=None, n=None):
-
-    if n is None or n == 0:
-        n = -1
-    f = lambda x: x.rsplit(pat, n)
-    res = _na_map(f, arr)
-    return res
-
-
-def str_slice(arr, start=None, stop=None, step=None):
-    obj = slice(start, stop, step)
-    f = lambda x: x[obj]
-    return _na_map(f, arr, dtype=str)
-
-
-def str_slice_replace(arr, start=None, stop=None, repl=None):
-   if repl is None:
-        repl = ""
-
-    def f(x):
-        if x[start:stop] == "":
-            local_stop = start
-        else:
-            local_stop = stop
-        y = ""
-        if start is not None:
-            y += x[:start]
-        y += repl
-        if stop is not None:
-            y += x[local_stop:]
-        return y
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_strip(arr, to_strip=None, side="both"):
-    """
-    Strip whitespace (including newlines) from each string in the
-    Series/Index.
-
-    Parameters
-    ----------
-    to_strip : str or unicode
-    side : {'left', 'right', 'both'}, default 'both'
-
-    Returns
-    -------
-    Series or Index
-    """
-    if side == "both":
-        f = lambda x: x.strip(to_strip)
-    elif side == "left":
-        f = lambda x: x.lstrip(to_strip)
-    elif side == "right":
-        f = lambda x: x.rstrip(to_strip)
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-    return _na_map(f, arr, dtype=str)
-
-
-def str_wrap(arr, width, **kwargs):
-    kwargs["width"] = width
-
-    tw = textwrap.TextWrapper(**kwargs)
-
-    return _na_map(lambda s: "\n".join(tw.wrap(s)), arr, dtype=str)
-
-
-def str_translate(arr, table):
-     return _na_map(lambda x: x.translate(table), arr, dtype=str)
-
-
-def str_get(arr, i):
-    def f(x):
-        if isinstance(x, dict):
-            return x.get(i)
-        elif len(x) > i >= -len(x):
-            return x[i]
-        return np.nan
-
-    return _na_map(f, arr)
-
-
-def str_decode(arr, encoding, errors="strict"):
-    """
-    Decode character string in the Series/Index using indicated encoding.
-
-    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
-    python3.
-
-    Parameters
-    ----------
-    encoding : str
-    errors : str, optional
-
-    Returns
-    -------
-    Series or Index
-    """
-    if encoding in _cpython_optimized_decoders:
-        # CPython optimized implementation
-        f = lambda x: x.decode(encoding, errors)
-    else:
-        decoder = codecs.getdecoder(encoding)
-        f = lambda x: decoder(x, errors)[0]
-    return _na_map(f, arr)
-
-
-def str_encode(arr, encoding, errors="strict"):
-    if encoding in _cpython_optimized_encoders:
-        # CPython optimized implementation
-        f = lambda x: x.encode(encoding, errors)
-    else:
-        encoder = codecs.getencoder(encoding)
-        f = lambda x: encoder(x, errors)[0]
-    return _na_map(f, arr)
-
-
-def forbid_nonstring_types(forbidden, name=None):
-    """
-    Decorator to forbid specific types for a method of StringMethods.
-
-    For calling `.str.{method}` on a Series or Index, it is necessary to first
-    initialize the :class:`StringMethods` object, and then call the method.
-    However, different methods allow different input types, and so this can not
-    be checked during :meth:`StringMethods.__init__`, but must be done on a
-    per-method basis. This decorator exists to facilitate this process, and
-    make it explicit which (inferred) types are disallowed by the method.
-
-    :meth:`StringMethods.__init__` allows the *union* of types its different
-    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
-    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
-
-    The default string types ['string', 'empty'] are allowed for all methods.
-    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
-    then needs to forbid the types it is not intended for.
-
-    Parameters
-    ----------
-    forbidden : list-of-str or None
-        List of forbidden non-string types, may be one or more of
-        `['bytes', 'mixed', 'mixed-integer']`.
-    name : str, default None
-        Name of the method to use in the error message. By default, this is
-        None, in which case the name from the method being wrapped will be
-        copied. However, for working with further wrappers (like _pat_wrapper
-        and _noarg_wrapper), it is necessary to specify the name.
-
-    Returns
-    -------
-    func : wrapper
-        The method to which the decorator is applied, with an added check that
-        enforces the inferred type to not be in the list of forbidden types.
-
-    Raises
-    ------
-    TypeError
-        If the inferred type of the underlying data is in `forbidden`.
-    """
-    # deal with None
-    forbidden = [] if forbidden is None else forbidden
-
-    allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set(
-        forbidden
-    )
-
-    def _forbid_nonstring_types(func):
-        func_name = func.__name__ if name is None else name
-
-        @wraps(func)
-        def wrapper(self, *args, **kwargs):
-            if self._inferred_dtype not in allowed_types:
-                msg = (
-                    f"Cannot use .str.{func_name} with values of "
-                    f"inferred dtype '{self._inferred_dtype}'."
-                )
-                raise TypeError(msg)
-            return func(self, *args, **kwargs)
-
-        wrapper.__name__ = func_name
-        return wrapper
-
-    return _forbid_nonstring_types
-
-
-def _noarg_wrapper(
-    f,
-    name=None,
-    docstring=None,
-    forbidden_types=["bytes"],
-    returns_string=True,
-    **kwargs,
-):
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper(self):
-        result = _na_map(f, self._parent, **kwargs)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    wrapper.__name__ = f.__name__ if name is None else name
-    if docstring is not None:
-        wrapper.__doc__ = docstring
-    else:
-        raise ValueError("Provide docstring")
-
-    return wrapper
-
-
-def _pat_wrapper(
-    f,
-    flags=False,
-    na=False,
-    name=None,
-    forbidden_types=["bytes"],
-    returns_string=True,
-    **kwargs,
-):
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper1(self, pat):
-        result = f(self._parent, pat)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper2(self, pat, flags=0, **kwargs):
-        result = f(self._parent, pat, flags=flags, **kwargs)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper3(self, pat, na=np.nan):
-        result = f(self._parent, pat, na=na)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
-
-    wrapper.__name__ = f.__name__ if name is None else name
-    if f.__doc__:
-        wrapper.__doc__ = f.__doc__
-
-    return wrapper
-
-
-def copy(source):
-    """Copy a docstring from another source function (if present)"""
-
-    def do_copy(target):
-        if source.__doc__:
-            target.__doc__ = source.__doc__
-        return target
-
-    return do_copy
diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py
deleted file mode 100644
index 40e3f21670ea0..0000000000000
--- a/pandas/tests/arrays/string_/test_string_arrow.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import pytest
-
-import pandas as pd
-import pandas.testing as tm
-
-
-def test_eq_all_na():
-    a = pd.array([pd.NA, pd.NA], dtype=pd.StringDtype("pyarrow"))
-    result = a == a
-    expected = pd.array([pd.NA, pd.NA], dtype="boolean")
-    tm.assert_extension_array_equal(result, expected)
-
-
-def test_config():
-    # python by default
-    assert pd.StringDtype().storage == "python"
-    arr = pd.array(["a", "b"])
-    assert arr.dtype.storage == "python"
-
-    with pd.option_context("mode.string_storage", "pyarrow"):
-        assert pd.StringDtype().storage == "pyarrow"
-        arr = pd.array(["a", "b"])
-        assert arr.dtype.storage == "pyarrow"
-
-    with pytest.raises(ValueError):
-        pd.options.mode.string_storage = "foo"
diff --git a/pandas/tests/extension/arrow/test_string.py b/pandas/tests/extension/arrow/test_string.py
index f32f1e415ddc7..abd5c1f386dc5 100644
--- a/pandas/tests/extension/arrow/test_string.py
+++ b/pandas/tests/extension/arrow/test_string.py
@@ -4,9 +4,10 @@
 
 pytest.importorskip("pyarrow", minversion="0.13.0")
 
+from .arrays import ArrowStringDtype  # isort:skip
+
 
 def test_constructor_from_list():
     # GH 27673
-    result = pd.Series(["E"], dtype=pd.StringDtype(storage="pyarrow"))
-    assert isinstance(result.dtype, pd.StringDtype)
-    assert result.dtype.storage == "pyarrow"
+    result = pd.Series(["E"], dtype=ArrowStringDtype())
+    assert isinstance(result.dtype, ArrowStringDtype)
diff --git a/pandas/tests/extension/test_string_arrow.py b/pandas/tests/extension/test_string_arrow.py
deleted file mode 100644
index 848e8a435b530..0000000000000
--- a/pandas/tests/extension/test_string_arrow.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import string
-
-import numpy as np
-import pytest
-
-import pandas as pd
-from pandas.core.arrays.string_arrow import ArrowStringArray
-from pandas.tests.extension import base
-
-
-@pytest.fixture
-def dtype():
-    return pd.StringDtype(storage="pyarrow")
-
-
-@pytest.fixture
-def data():
-    strings = np.random.choice(list(string.ascii_letters), size=100)
-    while strings[0] == strings[1]:
-        strings = np.random.choice(list(string.ascii_letters), size=100)
-
-    return ArrowStringArray._from_sequence(strings)
-
-
-@pytest.fixture
-def data_missing():
-    """Length 2 array with [NA, Valid]"""
-    return ArrowStringArray._from_sequence([pd.NA, "A"])
-
-
-@pytest.fixture
-def data_for_sorting():
-    return ArrowStringArray._from_sequence(["B", "C", "A"])
-
-
-@pytest.fixture
-def data_missing_for_sorting():
-    return ArrowStringArray._from_sequence(["B", pd.NA, "A"])
-
-
-@pytest.fixture
-def na_value():
-    return pd.NA
-
-
-@pytest.fixture
-def data_for_grouping():
-    return ArrowStringArray._from_sequence(["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"])
-
-
-class TestDtype(base.BaseDtypeTests):
-    pass
-
-
-class TestInterface(base.BaseInterfaceTests):
-    @pytest.mark.xfail(reason="Fails until implement, remove before merge")
-    def test_view(self, data):
-        base.BaseInterfaceTests.test_view(self, data)
-
-
-class TestConstructors(base.BaseConstructorsTests):
-    pass
-
-
-class TestReshaping(base.BaseReshapingTests):
-    pass
-
-
-class TestGetitem(base.BaseGetitemTests):
-    @pytest.mark.xfail(
-        reason="pyarrow.lib.ArrowNotImplementedError: Function "
-        "fill_null has no kernel matching input types "
-        "(array[string], scalar[string])"
-    )
-    def test_take_non_na_fill_value(self, data_missing):
-        super().test_take_non_na_fill_value(data_missing)
-
-    @pytest.mark.xfail(
-        reason="pyarrow.lib.ArrowNotImplementedError: Function fill_null has no "
-        "kernel matching input types (array[string], scalar[string])"
-    )
-    def test_reindex_non_na_fill_value(self, data_missing):
-        super().test_reindex_non_na_fill_value(self, data_missing)
-
-
-class TestSetitem(base.BaseSetitemTests):
-    @pytest.mark.xfail(reason="TODO")
-    def test_setitem_preserves_views(self, data):
-        # Unclear where the issue is (pyarrow getitem, our getitem, our slice)
-        # and what to do here.
-        super().test_setitem_preserves_views(data)
-
-
-class TestMissing(base.BaseMissingTests):
-    pass
-
-
-class TestNoReduce(base.BaseNoReduceTests):
-    @pytest.mark.parametrize("skipna", [True, False])
-    def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
-        op_name = all_numeric_reductions
-
-        if op_name in ["min", "max"]:
-            return None
-
-        s = pd.Series(data)
-        with pytest.raises(TypeError):
-            getattr(s, op_name)(skipna=skipna)
-
-
-class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="returns nullable")
-    def test_value_counts(self, all_data, dropna):
-        return super().test_value_counts(all_data, dropna)
-
-
-class TestCasting(base.BaseCastingTests):
-    pass
-
-
-class TestComparisonOps(base.BaseComparisonOpsTests):
-    def _compare_other(self, s, data, op_name, other):
-        if op_name not in {"__eq__", "__ne__"}:
-            pytest.skip(f"{op_name} is not implemented.")
-        result = getattr(s, op_name)(other)
-        expected = getattr(s.astype(object), op_name)(other).astype("boolean")
-        self.assert_series_equal(result, expected)
-
-    def test_compare_scalar(self, data, all_compare_operators):
-        op_name = all_compare_operators
-        s = pd.Series(data)
-        self._compare_other(s, data, op_name, "abc")
-
-    def test_compare_array(self, data, all_compare_operators):
-        op_name = all_compare_operators
-        s = pd.Series(data)
-        other = pd.Series([data[0]] * len(data), dtype=data.dtype)
-        self._compare_other(s, data, op_name, other)
-
-
-class TestParsing(base.BaseParsingTests):
-    pass
-
-
-class TestPrinting(base.BasePrintingTests):
-    pass
-
-
-class TestGroupBy(base.BaseGroupbyTests):
-    pass
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index d9396d70f9112..7eee4e5deb2d1 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -724,10 +724,6 @@ def test_count(self):
             ["foo", "foofoo", np.nan, "foooofooofommmfoo"], dtype=np.object_
         )
 
-        result = strings.str_count(values, "f[o]+")
-        exp = np.array([1, 2, np.nan, 4])
-        tm.assert_numpy_array_equal(result, exp)
-
         result = Series(values).str.count("f[o]+")
         exp = Series([1, 2, np.nan, 4])
         assert isinstance(result, Series)
@@ -738,10 +734,6 @@ def test_count(self):
             ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0],
             dtype=object,
         )
-        rs = strings.str_count(mixed, "a")
-        xp = np.array([1, np.nan, 0, np.nan, np.nan, 0, np.nan, np.nan, np.nan])
-        tm.assert_numpy_array_equal(rs, xp)
-
         rs = Series(mixed).str.count("a")
         xp = Series([1, np.nan, 0, np.nan, np.nan, 0, np.nan, np.nan, np.nan])
         assert isinstance(rs, Series)
@@ -751,46 +743,55 @@ def test_contains(self):
         values = np.array(
             ["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_
         )
+        values = Series(values)
         pat = "mmm[_]+"
 
-        result = strings.str_contains(values, pat)
-        expected = np.array([False, np.nan, True, True, False], dtype=np.object_)
-        tm.assert_numpy_array_equal(result, expected)
+        result = values.str.contains(pat)
+        expected = Series(
+            np.array([False, np.nan, True, True, False], dtype=np.object_)
+        )
+        tm.assert_series_equal(result, expected)
 
-        result = strings.str_contains(values, pat, regex=False)
-        expected = np.array([False, np.nan, False, False, True], dtype=np.object_)
-        tm.assert_numpy_array_equal(result, expected)
+        result = values.str.contains(pat, regex=False)
+        expected = Series(
+            np.array([False, np.nan, False, False, True], dtype=np.object_)
+        )
+        tm.assert_series_equal(result, expected)
 
-        values = np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=object)
-        result = strings.str_contains(values, pat)
-        expected = np.array([False, False, True, True])
+        values = Series(np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=object))
+        result = values.str.contains(pat)
+        expected = Series(np.array([False, False, True, True]))
         assert result.dtype == np.bool_
-        tm.assert_numpy_array_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
         # case insensitive using regex
-        values = np.array(["Foo", "xYz", "fOOomMm__fOo", "MMM_"], dtype=object)
-        result = strings.str_contains(values, "FOO|mmm", case=False)
-        expected = np.array([True, False, True, True])
-        tm.assert_numpy_array_equal(result, expected)
+        values = Series(np.array(["Foo", "xYz", "fOOomMm__fOo", "MMM_"], dtype=object))
+        result = values.str.contains("FOO|mmm", case=False)
+        expected = Series(np.array([True, False, True, True]))
+        tm.assert_series_equal(result, expected)
 
         # case insensitive without regex
-        result = strings.str_contains(values, "foo", regex=False, case=False)
-        expected = np.array([True, False, True, False])
-        tm.assert_numpy_array_equal(result, expected)
+        result = Series(values).str.contains("foo", regex=False, case=False)
+        expected = Series(np.array([True, False, True, False]))
+        tm.assert_series_equal(result, expected)
 
         # mixed
-        mixed = np.array(
-            ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0],
-            dtype=object,
+        mixed = Series(
+            np.array(
+                ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0],
+                dtype=object,
+            )
         )
-        rs = strings.str_contains(mixed, "o")
-        xp = np.array(
-            [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan],
-            dtype=np.object_,
+        rs = mixed.str.contains("o")
+        xp = Series(
+            np.array(
+                [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan],
+                dtype=np.object_,
+            )
         )
-        tm.assert_numpy_array_equal(rs, xp)
+        tm.assert_series_equal(rs, xp)
 
-        rs = Series(mixed).str.contains("o")
+        rs = mixed.str.contains("o")
         xp = Series(
             [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan]
         )
@@ -798,22 +799,26 @@ def test_contains(self):
         tm.assert_series_equal(rs, xp)
 
         # unicode
-        values = np.array(["foo", np.nan, "fooommm__foo", "mmm_"], dtype=np.object_)
+        values = Series(
+            np.array(["foo", np.nan, "fooommm__foo", "mmm_"], dtype=np.object_)
+        )
         pat = "mmm[_]+"
 
-        result = strings.str_contains(values, pat)
-        expected = np.array([False, np.nan, True, True], dtype=np.object_)
-        tm.assert_numpy_array_equal(result, expected)
+        result = values.str.contains(pat)
+        expected = Series(np.array([False, np.nan, True, True], dtype=np.object_))
+        tm.assert_series_equal(result, expected)
 
-        result = strings.str_contains(values, pat, na=False)
-        expected = np.array([False, False, True, True])
-        tm.assert_numpy_array_equal(result, expected)
+        result = values.str.contains(pat, na=False)
+        expected = Series(np.array([False, False, True, True]))
+        tm.assert_series_equal(result, expected)
 
-        values = np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=np.object_)
-        result = strings.str_contains(values, pat)
-        expected = np.array([False, False, True, True])
+        values = Series(
+            np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=np.object_)
+        )
+        result = values.str.contains(pat)
+        expected = Series(np.array([False, False, True, True]))
         assert result.dtype == np.bool_
-        tm.assert_numpy_array_equal(result, expected)
+        tm.assert_series_equal(result, expected)
 
     def test_contains_for_object_category(self):
         # gh 22158
@@ -853,15 +858,7 @@ def test_startswith(self):
             ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0],
             dtype=np.object_,
         )
-        rs = strings.str_startswith(mixed, "f")
-        xp = np.array(
-            [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan],
-            dtype=np.object_,
-        )
-        tm.assert_numpy_array_equal(rs, xp)
-
         rs = Series(mixed).str.startswith("f")
-        assert isinstance(rs, Series)
         xp = Series(
             [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan]
         )
@@ -882,18 +879,10 @@ def test_endswith(self):
             ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0],
             dtype=object,
         )
-        rs = strings.str_endswith(mixed, "f")
-        xp = np.array(
-            [False, np.nan, False, np.nan, np.nan, False, np.nan, np.nan, np.nan],
-            dtype=np.object_,
-        )
-        tm.assert_numpy_array_equal(rs, xp)
-
         rs = Series(mixed).str.endswith("f")
         xp = Series(
             [False, np.nan, False, np.nan, np.nan, False, np.nan, np.nan, np.nan]
         )
-        assert isinstance(rs, Series)
         tm.assert_series_equal(rs, xp)
 
     def test_title(self):
@@ -1193,6 +1182,11 @@ def test_match(self):
         exp = Series([True, np.nan, np.nan])
         tm.assert_series_equal(exp, res)
 
+        values = Series(["ab", "AB", "abc", "ABC"])
+        result = values.str.match("ab", case=False)
+        expected = Series([True, True, True, True])
+        tm.assert_series_equal(result, expected)
+
     def test_fullmatch(self):
         # GH 32806
         values = Series(["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"])
@@ -1209,6 +1203,11 @@ def test_fullmatch(self):
         string_exp = Series([True, False, np.nan, False], dtype="boolean")
         tm.assert_series_equal(result, string_exp)
 
+        values = Series(["ab", "AB", "abc", "ABC"])
+        result = values.str.fullmatch("ab", case=False)
+        expected = Series([True, True, False, False])
+        tm.assert_series_equal(result, expected)
+
     def test_extract_expand_None(self):
         values = Series(["fooBAD__barBAD", np.nan, "foo"])
         with pytest.raises(ValueError, match="expand must be True or False"):
@@ -2232,6 +2231,9 @@ def _check(result, expected):
             with pytest.raises(TypeError, match=msg):
                 result = s.str.index(0)
 
+            with pytest.raises(TypeError, match=msg):
+                result = s.str.rindex(0)
+
         # test with nan
         s = Series(["abcb", "ab", "bcbe", np.nan])
         result = s.str.index("b")

From fabc01e2a81767a84d7108fca228e942658e2322 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 14 Sep 2020 07:41:07 -0500
Subject: [PATCH 05/24] wip

---
 pandas/core/arrays/base.py                    |   9 +-
 pandas/core/arrays/categorical.py             |   2 +-
 pandas/core/arrays/string_.py                 |  65 +--
 pandas/core/arrays/string_arrow.py            | 450 ------------------
 pandas/core/config_init.py                    |  13 -
 pandas/core/strings/__init__.py               |   3 +
 pandas/core/strings/accessor.py               |  23 +-
 pandas/core/strings/base.py                   | 230 ++++++++-
 ...{categorical_strings.py => categorical.py} |   0
 pandas/core/strings/object_array.py           |   6 +-
 pandas/core/strings/string_array.py           |  70 +++
 11 files changed, 322 insertions(+), 549 deletions(-)
 delete mode 100644 pandas/core/arrays/string_arrow.py
 rename pandas/core/strings/{categorical_strings.py => categorical.py} (100%)
 create mode 100644 pandas/core/strings/string_array.py

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index de22af70643c2..e93cdb608dffb 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -354,8 +354,6 @@ def __ne__(self, other: Any) -> ArrayLike:
         """
         Return for `self != other` (element-wise in-equality).
         """
-        if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
-            return NotImplemented
         return ~(self == other)
 
     def to_numpy(
@@ -459,7 +457,6 @@ def astype(self, dtype, copy=True):
         from pandas.core.arrays.string_ import StringDtype
 
         dtype = pandas_dtype(dtype)
-        # FIXME: Really hard-code here?
         if isinstance(dtype, StringDtype):  # allow conversion to StringArrays
             return dtype.construct_array_type()._from_sequence(self, copy=False)
 
@@ -925,9 +922,9 @@ def take(
               from the right (the default). This is similar to
               :func:`numpy.take`.
 
-            * True: ``-1`` in `indices` indicate missing values.
-              These values are set to `fill_value`. Any other other negative
-              value raise a ``ValueError``.
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
 
         fill_value : any, optional
             Fill value to use for NA-indices when `allow_fill` is True.
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f2a27da79ddbb..12d6d21f19e04 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -52,7 +52,7 @@
 from pandas.core.missing import interpolate_2d
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.sorting import nargsort
-from pandas.core.strings.categorical_strings import CategoricalStringMethods
+from pandas.core.strings.categorical import CategoricalStringMethods
 
 from pandas.io.formats import console
 
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 7cc70e3660f81..46d0fedbe8f39 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -6,14 +6,7 @@
 from pandas._libs import lib, missing as libmissing
 
 from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype
-from pandas.core.dtypes.common import (
-    is_array_like,
-    is_bool_dtype,
-    is_integer_dtype,
-    is_object_dtype,
-    is_string_dtype,
-    pandas_dtype,
-)
+from pandas.core.dtypes.common import is_array_like, pandas_dtype
 
 from pandas import compat
 from pandas.core import ops
@@ -23,7 +16,7 @@
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
-from pandas.core.strings.object_array import ObjectArrayMethods
+from pandas.core.strings.string_array import StringArrayMethods
 
 if TYPE_CHECKING:
     import pyarrow  # noqa: F401
@@ -68,7 +61,7 @@ def type(self) -> Type[str]:
         return str
 
     @classmethod
-    def construct_array_type(self) -> Type["StringArray"]:
+    def construct_array_type(cls) -> Type["StringArray"]:
         """
         Return the array type associated with this dtype.
 
@@ -79,7 +72,7 @@ def construct_array_type(self) -> Type["StringArray"]:
         return StringArray
 
     def __repr__(self) -> str:
-        return self.name
+        return "StringDtype"
 
     def __from_arrow__(
         self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
@@ -104,56 +97,6 @@ def __from_arrow__(
         return StringArray._concat_same_type(results)
 
 
-class StringArrayMethods(ObjectArrayMethods):
-    def _map(self, f, na_value=libmissing.NA, dtype=StringDtype()):
-        from pandas.arrays import BooleanArray, IntegerArray, StringArray
-
-        arr = self._array
-        mask = isna(arr)
-
-        assert isinstance(arr, StringArray)
-        arr = np.asarray(arr)
-        if na_value is None:
-            na_value = libmissing.NA
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            constructor: Union[Type[IntegerArray], Type[BooleanArray]]
-            if is_integer_dtype(dtype):
-                constructor = IntegerArray
-            else:
-                constructor = BooleanArray
-
-            na_value_is_na = isna(na_value)
-            if na_value_is_na:
-                na_value = 1
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                dtype=np.dtype(dtype),
-            )
-
-            if not na_value_is_na:
-                mask[:] = False
-
-            return constructor(result, mask)
-
-        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
-            # i.e. StringDtype
-            result = lib.map_infer_mask(
-                arr, f, mask.view("uint8"), convert=False, na_value=na_value
-            )
-            return StringArray(result)
-        else:
-            # This is when the result type is object. We reach this when
-            # -> We know the result type is truly object (e.g. .encode returns bytes
-            #    or .findall returns a list).
-            # -> We don't know the result type. E.g. `.get` can return anything.
-            return lib.map_infer_mask(arr, f, mask.view("uint8"))
-
-
 class StringArray(PandasArray):
     """
     Extension array for string data.
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
deleted file mode 100644
index 65aa38db4f6f6..0000000000000
--- a/pandas/core/arrays/string_arrow.py
+++ /dev/null
@@ -1,450 +0,0 @@
-from collections.abc import Iterable
-from typing import Any, Optional, Sequence, Tuple, Union
-
-import numpy as np
-import pyarrow as pa
-import pyarrow.compute as pc
-
-from pandas._libs import missing as libmissing
-from pandas._typing import ArrayLike
-
-from pandas.core.dtypes.missing import isna
-
-from pandas.api.types import (
-    is_array_like,
-    is_bool_dtype,
-    is_int64_dtype,
-    is_integer,
-    is_integer_dtype,
-    is_scalar,
-)
-from pandas.core.accessor import CachedAccessor
-from pandas.core.algorithms import factorize
-from pandas.core.arrays.base import ExtensionArray
-from pandas.core.arrays.string_ import StringDtype
-from pandas.core.indexers import check_array_indexer
-
-
-def _as_pandas_scalar(arrow_scalar: pa.Scalar) -> Optional[str]:
-    scalar = arrow_scalar.as_py()
-    if scalar is None:
-        return libmissing.NA
-    else:
-        return scalar
-
-
-class ArrowStringMethods:
-    def __init__(self, arr):
-        self._data = arr
-
-    def upper(self):
-        import pyarrow.compute as pc
-
-        result = pc.utf8_upper(self._data.data)
-        return ArrowStringArray(result)
-
-
-class ArrowStringArray(ExtensionArray):
-    """
-    Extension array for string data in a ``pyarrow.ChunkedArray``.
-
-    .. versionadded:: 1.1.0
-
-    .. warning::
-
-       ArrowStringArray is considered experimental. The implementation and
-       parts of the API may change without warning.
-
-    Parameters
-    ----------
-    values : pyarrow.Array or pyarrow.ChunkedArray
-        The array of data.
-
-    Attributes
-    ----------
-    None
-
-    Methods
-    -------
-    None
-
-    See Also
-    --------
-    array
-        The recommended function for creating a ArrowStringArray.
-    Series.str
-        The string methods are available on Series backed by
-        a ArrowStringArray.
-
-    Notes
-    -----
-    ArrowStringArray returns a BooleanArray for comparison methods.
-
-    Examples
-    --------
-    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="arrow_string")
-    <ArrowStringArray>
-    ['This is', 'some text', <NA>, 'data.']
-    Length: 4, dtype: arrow_string
-    """
-
-    def __init__(self, values):
-        if isinstance(values, pa.Array):
-            self.data = pa.chunked_array([values])
-        elif isinstance(values, pa.ChunkedArray):
-            self.data = values
-        else:
-            raise ValueError(f"Unsupported type '{type(values)}' for ArrowStringArray")
-        self._dtype = StringDtype(storage="pyarrow")
-
-    @classmethod
-    def _from_sequence(cls, scalars, dtype=None, copy=False):
-        # TODO(ARROW-9407): Accept pd.NA in Arrow
-        scalars_corrected = [None if isna(x) else x for x in scalars]
-        return cls(pa.array(scalars_corrected, type=pa.string()))
-
-    @property
-    def dtype(self) -> StringDtype:
-        """
-        An instance of 'StringDtype'.
-        """
-        return self._dtype
-
-    def __array__(self, *args, **kwargs) -> "np.ndarray":
-        """Correctly construct numpy arrays when passed to `np.asarray()`."""
-        return self.data.__array__(*args, **kwargs)
-
-    def __arrow_array__(self, type=None):
-        """Convert myself to a pyarrow Array or ChunkedArray."""
-        return self.data
-
-    @property
-    def size(self) -> int:
-        """
-        Return the number of elements in this array.
-
-        Returns
-        -------
-        size : int
-        """
-        return len(self.data)
-
-    @property
-    def shape(self) -> Tuple[int]:
-        """Return the shape of the data."""
-        # This may be patched by pandas to support pseudo-2D operations.
-        return (len(self.data),)
-
-    @property
-    def ndim(self) -> int:
-        """Return the number of dimensions of the underlying data."""
-        return 1
-
-    def __len__(self) -> int:
-        """
-        Length of this array.
-
-        Returns
-        -------
-        length : int
-        """
-        return len(self.data)
-
-    @classmethod
-    def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
-        return cls._from_sequence(strings, dtype=dtype, copy=copy)
-
-    def __getitem__(self, item):
-        # type (Any) -> Any
-        """Select a subset of self.
-
-        Parameters
-        ----------
-        item : int, slice, or ndarray
-            * int: The position in 'self' to get.
-            * slice: A slice object, where 'start', 'stop', and 'step' are
-              integers or None
-            * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
-
-        Returns
-        -------
-        item : scalar or ExtensionArray
-
-        Notes
-        -----
-        For scalar ``item``, return a scalar value suitable for the array's
-        type. This should be an instance of ``self.dtype.type``.
-        For slice ``key``, return an instance of ``ExtensionArray``, even
-        if the slice is length 0 or 1.
-        For a boolean mask, return an instance of ``ExtensionArray``, filtered
-        to the values where ``item`` is True.
-        """
-        item = check_array_indexer(self, item)
-
-        if isinstance(item, Iterable):
-            if not is_array_like(item):
-                item = np.array(item)
-            if len(item) == 0:
-                return type(self)(pa.chunked_array([], type=pa.string()))
-            elif is_integer_dtype(item):
-                return self.take(item)
-            elif is_bool_dtype(item):
-                return type(self)(self.data.filter(item))
-            else:
-                raise IndexError(
-                    "Only integers, slices and integer or "
-                    "boolean arrays are valid indices."
-                )
-        elif is_integer(item):
-            if item < 0:
-                item += len(self)
-            if item >= len(self):
-                raise IndexError("index out of bounds")
-
-        value = self.data[item]
-        if isinstance(value, pa.ChunkedArray):
-            return type(self)(value)
-        else:
-            return _as_pandas_scalar(value)
-
-    @property
-    def nbytes(self) -> int:
-        """
-        The number of bytes needed to store this object in memory.
-        """
-        return self.data.nbytes
-
-    def isna(self) -> np.ndarray:
-        """
-        Boolean NumPy array indicating if each value is missing.
-
-        This should return a 1-D array the same length as 'self'.
-        """
-        # TODO: Implement .to_numpy for ChunkedArray
-        return self.data.is_null().to_pandas().values
-
-    def copy(self) -> ExtensionArray:
-        """
-        Return a copy of the array.
-
-        Parameters
-        ----------
-        deep : bool, default False
-            Also copy the underlying data backing this array.
-
-        Returns
-        -------
-        ExtensionArray
-        """
-        return type(self)(self.data)
-
-    def __eq__(self, other: Any) -> ArrayLike:
-        """
-        Return for `self == other` (element-wise equality).
-        """
-        from pandas import array, Series, DataFrame, Index
-
-        if isinstance(other, (Series, DataFrame, Index)):
-            return NotImplemented
-        if isinstance(other, ArrowStringArray):
-            result = pc.equal(self.data, other.data)
-        elif is_scalar(other):
-            result = pc.equal(self.data, pa.scalar(other))
-        else:
-            raise NotImplementedError("Neither scalar nor ArrowStringArray")
-
-        # TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
-        return array(result.to_pandas().values, dtype="boolean")
-
-    def __setitem__(self, key, value):
-        # type: (Union[int, np.ndarray], Any) -> None
-        """Set one or more values inplace.
-
-        Parameters
-        ----------
-        key : int, ndarray, or slice
-            When called from, e.g. ``Series.__setitem__``, ``key`` will be
-            one of
-
-            * scalar int
-            * ndarray of integers.
-            * boolean ndarray
-            * slice object
-
-        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
-            value or values to be set of ``key``.
-
-        Returns
-        -------
-        None
-        """
-        key = check_array_indexer(self, key)
-
-        if is_integer(key):
-            if not is_scalar(value):
-                raise ValueError("Must pass scalars with scalar indexer")
-            elif isna(value):
-                value = None
-            elif not isinstance(value, str):
-                raise ValueError("Scalar must be NA or str")
-
-            # Slice data and insert inbetween
-            new_data = [
-                *self.data[0:key].chunks,
-                pa.array([value], type=pa.string()),
-                *self.data[(key + 1) :].chunks,
-            ]
-            self.data = pa.chunked_array(new_data)
-        else:
-            # Convert to integer indices and iteratively assign.
-            # TODO: Make a faster variant of this in Arrow upstream.
-            #       This is probably extremely slow.
-
-            # Convert all possible input key types to an array of integers
-            if is_bool_dtype(key):
-                # TODO(ARROW-9430): Directly support setitem(booleans)
-                key_array = np.argwhere(key).flatten()
-            elif isinstance(key, slice):
-                key_array = np.array(range(len(self))[key])
-            else:
-                # TODO(ARROW-9431): Directly support setitem(integers)
-                key_array = np.asanyarray(key)
-
-            if is_scalar(value):
-                value = np.broadcast_to(value, len(key_array))
-            else:
-                value = np.asarray(value)
-
-            if len(key_array) != len(value):
-                raise ValueError("Length of indexer and values mismatch")
-
-            for k, v in zip(key_array, value):
-                self[k] = v
-
-    def take(
-        self, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None
-    ) -> "ExtensionArray":
-        """
-        Take elements from an array.
-
-        Parameters
-        ----------
-        indices : sequence of int
-            Indices to be taken.
-        allow_fill : bool, default False
-            How to handle negative values in `indices`.
-
-            * False: negative values in `indices` indicate positional indices
-              from the right (the default). This is similar to
-              :func:`numpy.take`.
-
-            * True: negative values in `indices` indicate
-              missing values. These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
-
-        fill_value : any, optional
-            Fill value to use for NA-indices when `allow_fill` is True.
-            This may be ``None``, in which case the default NA value for
-            the type, ``self.dtype.na_value``, is used.
-
-            For many ExtensionArrays, there will be two representations of
-            `fill_value`: a user-facing "boxed" scalar, and a low-level
-            physical NA value. `fill_value` should be the user-facing version,
-            and the implementation should handle translating that to the
-            physical version for processing the take if necessary.
-
-        Returns
-        -------
-        ExtensionArray
-
-        Raises
-        ------
-        IndexError
-            When the indices are out of bounds for the array.
-        ValueError
-            When `indices` contains negative values other than ``-1``
-            and `allow_fill` is True.
-
-        See Also
-        --------
-        numpy.take
-        api.extensions.take
-
-        Notes
-        -----
-        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
-        ``iloc``, when `indices` is a sequence of values. Additionally,
-        it's called by :meth:`Series.reindex`, or any other method
-        that causes realignment, with a `fill_value`.
-        """
-        # TODO: Remove once we got rid of the (indices < 0) check
-        if not is_array_like(indices):
-            indices_array = np.asanyarray(indices)
-        else:
-            indices_array = indices
-
-        if len(self.data) == 0 and (indices_array >= 0).any():
-            raise IndexError("cannot do a non-empty take")
-        if len(indices_array) > 0 and indices_array.max() >= len(self.data):
-            raise IndexError("out of bounds value in 'indices'.")
-
-        if allow_fill:
-            if (indices_array < 0).any():
-                if indices_array.min() < -1:
-                    raise ValueError(
-                        "'indicies' contains negative values other "
-                        "-1 with 'allow_fill=True."
-                    )
-                # TODO(ARROW-9433): Treat negative indices as NULL
-                indices_array = pa.array(indices_array, mask=indices_array < 0)
-                result = self.data.take(indices_array)
-                if isna(fill_value):
-                    return type(self)(result)
-                return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
-            else:
-                # Nothing to fill
-                return type(self)(self.data.take(indices))
-        else:  # allow_fill=False
-            # TODO(ARROW-9432): Treat negative indices as indices from the right.
-            if (indices_array < 0).any():
-                # Don't modify in-place
-                indices_array = np.copy(indices_array)
-                indices_array[indices_array < 0] += len(self.data)
-            return type(self)(self.data.take(indices_array))
-
-    def value_counts(self, dropna=True):
-        from pandas import Series
-
-        if dropna:
-            na = self.isna()
-            self = self[~na]
-        counts = self.data.value_counts()
-        return Series(counts.field(1), counts.field(0))
-
-    def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, "ExtensionArray"]:
-        # see https://github.com/xhochy/fletcher/blob/master/fletcher/base.py
-        # doesn't handle dictionary types.
-        if self.data.num_chunks == 1:
-            encoded = self.data.chunk(0).dictionary_encode()
-            indices = encoded.indices.to_pandas()
-            if indices.dtype.kind == "f":
-                indices[np.isnan(indices)] = na_sentinel
-                indices = indices.astype(int)
-            if not is_int64_dtype(indices):
-                indices = indices.astype(np.int64)
-            return indices.values, type(self)(encoded.dictionary)
-        else:
-            np_array = self.data.to_pandas().values
-            return factorize(np_array, na_sentinel=na_sentinel)
-
-    @classmethod
-    def _concat_same_type(
-        cls, to_concat: Sequence["ArrowStringArray"]
-    ) -> "ArrowStringArray":
-        return cls(
-            pa.chunked_array(
-                [array for ea in to_concat for array in ea.data.iterchunks()]
-            )
-        )
-
-    str = CachedAccessor("str", ArrowStringMethods)
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index c7e0e7ef19010..bfe20551cbcfc 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -504,19 +504,6 @@ def use_inf_as_na_cb(key):
     )
 
 
-string_storage_doc = """
-: string
-    The default storage for StringDtype.
-"""
-
-with cf.config_prefix("mode"):
-    cf.register_option(
-        "string_storage",
-        "python",
-        string_storage_doc,
-        validator=is_one_of_factory(["python", "pyarrow"]),
-    )
-
 # Set up the io.excel specific reader configuration.
 reader_engine_doc = """
 : string
diff --git a/pandas/core/strings/__init__.py b/pandas/core/strings/__init__.py
index c36a96edf7125..bf0ac8ef872ca 100644
--- a/pandas/core/strings/__init__.py
+++ b/pandas/core/strings/__init__.py
@@ -1 +1,4 @@
 from .accessor import StringMethods
+from .base import BaseStringArrayMethods
+
+__all__ = ["StringMethods", "BaseStringArrayMethods"]
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 334769468e671..f5c91b692d0e7 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -143,6 +143,12 @@ class StringMethods(NoNewAttributesMixin):
     dtype: object
     """
 
+    # TODO: Dispatch all the methods
+    # Currently the following are not dispatched to the array
+    # * cat
+    # * extract
+    # * extractall
+
     def __init__(self, data):
         from pandas.core.arrays.string_ import StringDtype
 
@@ -249,7 +255,6 @@ def _wrap_result(
         # case we'll want to return the same dtype as the input.
         # Or we can be wrapping a numeric output, in which case we don't want
         # to return a StringArray.
-        # XXX: see if this can be removed.
         # Ideally the array method returns the right array type.
         if expand is None:
             # infer from ndim if expand is not specified
@@ -513,7 +518,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
         For more examples, see :ref:`here <text.concatenate>`.
         """
-        # XXX: not dispatched yet.
+        # TODO: dispatch
         from pandas import Index, Series, concat
 
         if isinstance(others, str):
@@ -968,7 +973,6 @@ def join(self, sep):
         4                    NaN
         dtype: object
         """
-        # XXX: Does this use the Series? If so then we can't dispatch.
         result = self._array._str.join(sep)
         return self._wrap_result(result)
 
@@ -1414,7 +1418,6 @@ def pad(self, width, side="left", fillchar=" "):
     -------
     filled : Series/Index of objects.
     """
-    # XXX: Do we need to dispatch to center, etc, or is it equivalent?
 
     @Appender(_shared_docs["str_pad"] % dict(side="left and right", method="center"))
     @forbid_nonstring_types(["bytes"])
@@ -1885,9 +1888,7 @@ def get_dummies(self, sep="|"):
         """
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
-        # XXX: data = self._orig.astype(str) if self._is_categorical else self._parent
         result, name = self._array._str.get_dummies(sep)
-        # result, name = str_get_dummies(data, sep)
         return self._wrap_result(result, name=name, expand=True, returns_string=False,)
 
     @forbid_nonstring_types(["bytes"])
@@ -2032,7 +2033,6 @@ def startswith(self, pat, na=None):
         3    False
         dtype: bool
         """
-        # XXX: changed default na to None
         result = self._array._str.startswith(pat, na=na)
         return self._wrap_result(result, returns_string=False)
 
@@ -2088,7 +2088,6 @@ def endswith(self, pat, na=None):
         3    False
         dtype: bool
         """
-        # XXX: changed default na to None
         result = self._array._str.endswith(pat, na=na)
         return self._wrap_result(result, returns_string=False)
 
@@ -2267,7 +2266,7 @@ def extract(self, pat, flags=0, expand=True):
         2    NaN
         dtype: object
         """
-        # XXX: not dispatched
+        # TODO: dispatch
         return str_extract(self, pat, flags, expand=expand)
 
     @forbid_nonstring_types(["bytes"])
@@ -2345,7 +2344,7 @@ def extractall(self, pat, flags=0):
         B 0          b     1
         C 0        NaN     1
         """
-        # XXX: not dispatched
+        # TODO: dispatch
         return str_extractall(self._orig, pat, flags)
 
     _shared_docs[
@@ -3022,10 +3021,6 @@ def str_extract(arr, pat, flags=0, expand=True):
     if expand:
         return _str_extract_frame(arr._orig, pat, flags=flags)
     else:
-        # XXX: some dead code now.
-        # arr = arr._array
-        # if isinstance(arr, ObjectProxy):
-        # arr = arr._ndarray
         result, name = _str_extract_noexpand(arr._orig, pat, flags=flags)
         return arr._wrap_result(result, name=name, expand=expand)
 
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index 9d484f449b14b..f9e70f8559fd9 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -1,5 +1,229 @@
-class BaseStringArrayMethods:
-    """Base class for array _str accessor."""
+import abc
+from typing import Pattern, Union
 
-    def __init__(self, array):
+import numpy as np
+
+from pandas._typing import Scalar
+
+from pandas.core.arrays.base import ExtensionArray
+
+
+class BaseStringArrayMethods(abc.ABC):
+    """
+    Base class for array _str accessor.
+
+    This is where ExtensionArrays can override the implementation of
+    Series.str.<method>. The rough layout is
+
+    * User calls Series.str.<method>
+    * pandas extracts the extension array from the Series
+    * pandas calls ``extension_array._str.<method>(*args, **kwargs)``
+    * pandas wraps the result, to return to the user.
+
+    See :ref:`Series.str` for the docstring of each method.
+    """
+
+    def __init__(self, array: ExtensionArray):
         self._array = array
+
+    def __getitem__(self, key):
+        if isinstance(key, slice):
+            return self.slice(start=key.start, stop=key.stop, step=key.step)
+        else:
+            return self.get(key)
+
+    @abc.abstractmethod
+    def count(self, pat, flags=0):
+        pass
+
+    @abc.abstractmethod
+    def pad(self, width, side="left", fillchar=" "):
+        pass
+
+    @abc.abstractmethod
+    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
+        pass
+
+    @abc.abstractmethod
+    def startswith(self, pat, na=np.nan):
+        pass
+
+    @abc.abstractmethod
+    def endswith(self, pat, na=np.nan):
+        pass
+
+    @abc.abstractmethod
+    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
+        pass
+
+    @abc.abstractmethod
+    def repeat(self, repeats):
+        pass
+
+    @abc.abstractmethod
+    def match(
+        self,
+        pat: Union[str, Pattern],
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar = np.nan,
+    ):
+        pass
+
+    @abc.abstractmethod
+    def fullmatch(
+        self,
+        pat: Union[str, Pattern],
+        case: bool = True,
+        flags: int = 0,
+        na: Scalar = np.nan,
+    ):
+        pass
+
+    @abc.abstractmethod
+    def encode(self, encoding, errors="strict"):
+        pass
+
+    @abc.abstractmethod
+    def find(self, sub, start=0, end=None):
+        pass
+
+    @abc.abstractmethod
+    def rfind(self, sub, start=0, end=None):
+        pass
+
+    @abc.abstractmethod
+    def findall(self, pat, flags=0):
+        pass
+
+    @abc.abstractmethod
+    def get(self, i):
+        pass
+
+    @abc.abstractmethod
+    def index(self, sub, start=0, end=None):
+        pass
+
+    @abc.abstractmethod
+    def rindex(self, sub, start=0, end=None):
+        pass
+
+    @abc.abstractmethod
+    def join(self, sep):
+        pass
+
+    @abc.abstractmethod
+    def partition(self, sep, expand):
+        pass
+
+    @abc.abstractmethod
+    def rpartition(self, sep, expand):
+        pass
+
+    @abc.abstractmethod
+    def len(self):
+        pass
+
+    @abc.abstractmethod
+    def slice(self, start=None, stop=None, step=None):
+        pass
+
+    @abc.abstractmethod
+    def slice_replace(self, start=None, stop=None, repl=None):
+        pass
+
+    @abc.abstractmethod
+    def translate(self, table):
+        pass
+
+    @abc.abstractmethod
+    def wrap(self, width, **kwargs):
+        pass
+
+    @abc.abstractmethod
+    def get_dummies(self, sep="|"):
+        pass
+
+    @abc.abstractmethod
+    def isalnum(self):
+        pass
+
+    @abc.abstractmethod
+    def isalpha(self):
+        pass
+
+    @abc.abstractmethod
+    def isdecimal(self):
+        pass
+
+    @abc.abstractmethod
+    def isdigit(self):
+        pass
+
+    @abc.abstractmethod
+    def islower(self):
+        pass
+
+    @abc.abstractmethod
+    def isnumeric(self):
+        pass
+
+    @abc.abstractmethod
+    def isspace(self):
+        pass
+
+    @abc.abstractmethod
+    def istitle(self):
+        pass
+
+    @abc.abstractmethod
+    def isupper(self):
+        pass
+
+    @abc.abstractmethod
+    def capitalize(self):
+        pass
+
+    @abc.abstractmethod
+    def casefold(self):
+        pass
+
+    @abc.abstractmethod
+    def title(self):
+        pass
+
+    @abc.abstractmethod
+    def swapcase(self):
+        pass
+
+    @abc.abstractmethod
+    def lower(self):
+        pass
+
+    @abc.abstractmethod
+    def upper(self):
+        pass
+
+    @abc.abstractmethod
+    def normalize(self, form):
+        pass
+
+    @abc.abstractmethod
+    def strip(self, to_strip=None):
+        pass
+
+    @abc.abstractmethod
+    def lstrip(self, to_strip=None):
+        pass
+
+    @abc.abstractmethod
+    def rstrip(self, to_strip=None):
+        pass
+
+    @abc.abstractmethod
+    def split(self, pat=None, n=-1, expand=False):
+        pass
+
+    @abc.abstractmethod
+    def rsplit(self, pat=None, n=-1):
+        pass
diff --git a/pandas/core/strings/categorical_strings.py b/pandas/core/strings/categorical.py
similarity index 100%
rename from pandas/core/strings/categorical_strings.py
rename to pandas/core/strings/categorical.py
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 116b53778ac9d..dc6144f0320cf 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -20,8 +20,12 @@
 
 
 class ObjectArrayMethods(BaseStringArrayMethods):
-    def _map(self, f, na_value=np.nan, dtype=np.dtype(object)):
+    def _map(self, f, na_value=None, dtype=None):
         arr = self._array  # object-dtype ndarray.
+        if dtype is None:
+            dtype = np.dtype("object")
+        if na_value is None:
+            na_value = np.nan
 
         if not len(arr):
             return np.ndarray(0, dtype=dtype)
diff --git a/pandas/core/strings/string_array.py b/pandas/core/strings/string_array.py
new file mode 100644
index 0000000000000..cfc7cb5db9c64
--- /dev/null
+++ b/pandas/core/strings/string_array.py
@@ -0,0 +1,70 @@
+from typing import Type, Union
+
+import numpy as np
+
+from pandas._libs import lib, missing as libmissing
+
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_integer_dtype,
+    is_object_dtype,
+    is_string_dtype,
+)
+
+from pandas.core.missing import isna
+from pandas.core.strings.object_array import ObjectArrayMethods
+
+
+class StringArrayMethods(ObjectArrayMethods):
+    def _map(self, f, na_value=None, dtype=None):
+        from pandas.arrays import BooleanArray, IntegerArray, StringArray
+        from pandas.core.arrays.string_ import StringDtype
+
+        if dtype is None:
+            dtype = StringDtype()
+        if na_value is None:
+            na_value = libmissing.NA
+
+        arr = self._array
+        mask = isna(arr)
+
+        arr = np.asarray(arr)
+        if na_value is None:
+            na_value = libmissing.NA
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            constructor: Union[Type[IntegerArray], Type[BooleanArray]]
+            if is_integer_dtype(dtype):
+                constructor = IntegerArray
+            else:
+                constructor = BooleanArray
+
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                na_value = 1
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                dtype=np.dtype(dtype),
+            )
+
+            if not na_value_is_na:
+                mask[:] = False
+
+            return constructor(result, mask)
+
+        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
+            # i.e. StringDtype
+            result = lib.map_infer_mask(
+                arr, f, mask.view("uint8"), convert=False, na_value=na_value
+            )
+            return StringArray(result)
+        else:
+            # This is when the result type is object. We reach this when
+            # -> We know the result type is truly object (e.g. .encode returns bytes
+            #    or .findall returns a list).
+            # -> We don't know the result type. E.g. `.get` can return anything.
+            return lib.map_infer_mask(arr, f, mask.view("uint8"))

From a4d4ad5b7e68c4b06fa257cd4abc8dbfb8441591 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 14 Sep 2020 08:18:21 -0500
Subject: [PATCH 06/24] remove old

---
 pandas/core/strings.py | 3650 ----------------------------------------
 1 file changed, 3650 deletions(-)
 delete mode 100644 pandas/core/strings.py

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
deleted file mode 100644
index ab6c9cfb51414..0000000000000
--- a/pandas/core/strings.py
+++ /dev/null
@@ -1,3650 +0,0 @@
-import codecs
-from functools import wraps
-import re
-import textwrap
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Pattern, Type, Union
-import warnings
-
-import numpy as np
-
-import pandas._libs.lib as lib
-import pandas._libs.missing as libmissing
-import pandas._libs.ops as libops
-from pandas._typing import ArrayLike, Dtype, Scalar
-from pandas.util._decorators import Appender
-
-from pandas.core.dtypes.common import (
-    ensure_object,
-    is_bool_dtype,
-    is_categorical_dtype,
-    is_extension_array_dtype,
-    is_integer,
-    is_integer_dtype,
-    is_list_like,
-    is_object_dtype,
-    is_re,
-    is_scalar,
-    is_string_dtype,
-)
-from pandas.core.dtypes.generic import (
-    ABCDataFrame,
-    ABCIndexClass,
-    ABCMultiIndex,
-    ABCSeries,
-)
-from pandas.core.dtypes.missing import isna
-
-from pandas.core.algorithms import take_1d
-from pandas.core.base import NoNewAttributesMixin
-from pandas.core.construction import extract_array
-
-if TYPE_CHECKING:
-    from pandas.arrays import StringArray
-
-_cpython_optimized_encoders = (
-    "utf-8",
-    "utf8",
-    "latin-1",
-    "latin1",
-    "iso-8859-1",
-    "mbcs",
-    "ascii",
-)
-_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32")
-
-_shared_docs: Dict[str, str] = dict()
-
-
-def cat_core(list_of_columns: List, sep: str):
-    """
-    Auxiliary function for :meth:`str.cat`
-
-    Parameters
-    ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
-    sep : string
-        The separator string for concatenating the columns.
-
-    Returns
-    -------
-    nd.array
-        The concatenation of list_of_columns with sep.
-    """
-    if sep == "":
-        # no need to interleave sep if it is empty
-        arr_of_cols = np.asarray(list_of_columns, dtype=object)
-        return np.sum(arr_of_cols, axis=0)
-    list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
-    list_with_sep[::2] = list_of_columns
-    arr_with_sep = np.asarray(list_with_sep, dtype=object)
-    return np.sum(arr_with_sep, axis=0)
-
-
-def cat_safe(list_of_columns: List, sep: str):
-    """
-    Auxiliary function for :meth:`str.cat`.
-
-    Same signature as cat_core, but handles TypeErrors in concatenation, which
-    happen if the arrays in list_of columns have the wrong dtypes or content.
-
-    Parameters
-    ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
-    sep : string
-        The separator string for concatenating the columns.
-
-    Returns
-    -------
-    nd.array
-        The concatenation of list_of_columns with sep.
-    """
-    try:
-        result = cat_core(list_of_columns, sep)
-    except TypeError:
-        # if there are any non-string values (wrong dtype or hidden behind
-        # object dtype), np.sum will fail; catch and return with better message
-        for column in list_of_columns:
-            dtype = lib.infer_dtype(column, skipna=True)
-            if dtype not in ["string", "empty"]:
-                raise TypeError(
-                    "Concatenation requires list-likes containing only "
-                    "strings (or missing values). Offending values found in "
-                    f"column {dtype}"
-                ) from None
-    return result
-
-
-def _na_map(f, arr, na_result=None, dtype=np.dtype(object)):
-    if is_extension_array_dtype(arr.dtype):
-        if na_result is None:
-            na_result = libmissing.NA
-        # just StringDtype
-        arr = extract_array(arr)
-        return _map_stringarray(f, arr, na_value=na_result, dtype=dtype)
-    if na_result is None:
-        na_result = np.nan
-    return _map_object(f, arr, na_mask=True, na_value=na_result, dtype=dtype)
-
-
-def _map_stringarray(
-    func: Callable[[str], Any], arr: "StringArray", na_value: Any, dtype: Dtype
-) -> ArrayLike:
-    """
-    Map a callable over valid elements of a StringArray.
-
-    Parameters
-    ----------
-    func : Callable[[str], Any]
-        Apply to each valid element.
-    arr : StringArray
-    na_value : Any
-        The value to use for missing values. By default, this is
-        the original value (NA).
-    dtype : Dtype
-        The result dtype to use. Specifying this avoids an intermediate
-        object-dtype allocation.
-
-    Returns
-    -------
-    ArrayLike
-        An ExtensionArray for integer or string dtypes, otherwise
-        an ndarray.
-
-    """
-    from pandas.arrays import BooleanArray, IntegerArray, StringArray
-
-    mask = isna(arr)
-
-    assert isinstance(arr, StringArray)
-    arr = np.asarray(arr)
-
-    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-        constructor: Union[Type[IntegerArray], Type[BooleanArray]]
-        if is_integer_dtype(dtype):
-            constructor = IntegerArray
-        else:
-            constructor = BooleanArray
-
-        na_value_is_na = isna(na_value)
-        if na_value_is_na:
-            na_value = 1
-        result = lib.map_infer_mask(
-            arr,
-            func,
-            mask.view("uint8"),
-            convert=False,
-            na_value=na_value,
-            dtype=np.dtype(dtype),
-        )
-
-        if not na_value_is_na:
-            mask[:] = False
-
-        return constructor(result, mask)
-
-    elif is_string_dtype(dtype) and not is_object_dtype(dtype):
-        # i.e. StringDtype
-        result = lib.map_infer_mask(
-            arr, func, mask.view("uint8"), convert=False, na_value=na_value
-        )
-        return StringArray(result)
-    else:
-        # This is when the result type is object. We reach this when
-        # -> We know the result type is truly object (e.g. .encode returns bytes
-        #    or .findall returns a list).
-        # -> We don't know the result type. E.g. `.get` can return anything.
-        return lib.map_infer_mask(arr, func, mask.view("uint8"))
-
-
-def _map_object(f, arr, na_mask=False, na_value=np.nan, dtype=np.dtype(object)):
-    if not len(arr):
-        return np.ndarray(0, dtype=dtype)
-
-    if isinstance(arr, ABCSeries):
-        arr = arr._values  # TODO: extract_array?
-    if not isinstance(arr, np.ndarray):
-        arr = np.asarray(arr, dtype=object)
-    if na_mask:
-        mask = isna(arr)
-        convert = not np.all(mask)
-        try:
-            result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert)
-        except (TypeError, AttributeError) as e:
-            # Reraise the exception if callable `f` got wrong number of args.
-            # The user may want to be warned by this, instead of getting NaN
-            p_err = (
-                r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
-                r"(?(3)required )positional arguments?"
-            )
-
-            if len(e.args) >= 1 and re.search(p_err, e.args[0]):
-                # FIXME: this should be totally avoidable
-                raise e
-
-            def g(x):
-                try:
-                    return f(x)
-                except (TypeError, AttributeError):
-                    return na_value
-
-            return _map_object(g, arr, dtype=dtype)
-        if na_value is not np.nan:
-            np.putmask(result, mask, na_value)
-            if result.dtype == object:
-                result = lib.maybe_convert_objects(result)
-        return result
-    else:
-        return lib.map_infer(arr, f)
-
-
-def str_count(arr, pat, flags=0):
-    """
-    Count occurrences of pattern in each string of the Series/Index.
-
-    This function is used to count the number of times a particular regex
-    pattern is repeated in each of the string elements of the
-    :class:`~pandas.Series`.
-
-    Parameters
-    ----------
-    pat : str
-        Valid regular expression.
-    flags : int, default 0, meaning no flags
-        Flags for the `re` module. For a complete list, `see here
-        <https://docs.python.org/3/howto/regex.html#compilation-flags>`_.
-    **kwargs
-        For compatibility with other string methods. Not used.
-
-    Returns
-    -------
-    Series or Index
-        Same type as the calling object containing the integer counts.
-
-    See Also
-    --------
-    re : Standard library module for regular expressions.
-    str.count : Standard library version, without regular expression support.
-
-    Notes
-    -----
-    Some characters need to be escaped when passing in `pat`.
-    eg. ``'$'`` has a special meaning in regex and must be escaped when
-    finding this literal character.
-
-    Examples
-    --------
-    >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat'])
-    >>> s.str.count('a')
-    0    0.0
-    1    0.0
-    2    2.0
-    3    2.0
-    4    NaN
-    5    0.0
-    6    1.0
-    dtype: float64
-
-    Escape ``'$'`` to find the literal dollar sign.
-
-    >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
-    >>> s.str.count('\\$')
-    0    1
-    1    0
-    2    1
-    3    2
-    4    2
-    5    0
-    dtype: int64
-
-    This is also available on Index
-
-    >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')
-    Int64Index([0, 0, 2, 1], dtype='int64')
-    """
-    regex = re.compile(pat, flags=flags)
-    f = lambda x: len(regex.findall(x))
-    return _na_map(f, arr, dtype="int64")
-
-
-def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
-    """
-    Test if pattern or regex is contained within a string of a Series or Index.
-
-    Return boolean Series or Index based on whether a given pattern or regex is
-    contained within a string of a Series or Index.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Flags to pass through to the re module, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-    regex : bool, default True
-        If True, assumes the pat is a regular expression.
-
-        If False, treats the pat as a literal string.
-
-    Returns
-    -------
-    Series or Index of boolean values
-        A Series or Index of boolean values indicating whether the
-        given pattern is contained within the string of each element
-        of the Series or Index.
-
-    See Also
-    --------
-    match : Analogous, but stricter, relying on re.match instead of re.search.
-    Series.str.startswith : Test if the start of each string element matches a
-        pattern.
-    Series.str.endswith : Same as startswith, but tests the end of string.
-
-    Examples
-    --------
-    Returning a Series of booleans using only a literal pattern.
-
-    >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
-    >>> s1.str.contains('og', regex=False)
-    0    False
-    1     True
-    2    False
-    3    False
-    4      NaN
-    dtype: object
-
-    Returning an Index of booleans using only a literal pattern.
-
-    >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
-    >>> ind.str.contains('23', regex=False)
-    Index([False, False, False, True, nan], dtype='object')
-
-    Specifying case sensitivity using `case`.
-
-    >>> s1.str.contains('oG', case=True, regex=True)
-    0    False
-    1    False
-    2    False
-    3    False
-    4      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN` replaces NaN values
-    with `False`. If Series or Index does not contain NaN values
-    the resultant dtype will be `bool`, otherwise, an `object` dtype.
-
-    >>> s1.str.contains('og', na=False, regex=True)
-    0    False
-    1     True
-    2    False
-    3    False
-    4    False
-    dtype: bool
-
-    Returning 'house' or 'dog' when either expression occurs in a string.
-
-    >>> s1.str.contains('house|dog', regex=True)
-    0    False
-    1     True
-    2     True
-    3    False
-    4      NaN
-    dtype: object
-
-    Ignoring case sensitivity using `flags` with regex.
-
-    >>> import re
-    >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True)
-    0    False
-    1    False
-    2     True
-    3    False
-    4      NaN
-    dtype: object
-
-    Returning any digit using regular expression.
-
-    >>> s1.str.contains('\\d', regex=True)
-    0    False
-    1    False
-    2    False
-    3     True
-    4      NaN
-    dtype: object
-
-    Ensure `pat` is a not a literal pattern when `regex` is set to True.
-    Note in the following example one might expect only `s2[1]` and `s2[3]` to
-    return `True`. However, '.0' as a regex matches any character
-    followed by a 0.
-
-    >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35'])
-    >>> s2.str.contains('.0', regex=True)
-    0     True
-    1     True
-    2    False
-    3     True
-    4    False
-    dtype: bool
-    """
-    if regex:
-        if not case:
-            flags |= re.IGNORECASE
-
-        regex = re.compile(pat, flags=flags)
-
-        if regex.groups > 0:
-            warnings.warn(
-                "This pattern has match groups. To actually get the "
-                "groups, use str.extract.",
-                UserWarning,
-                stacklevel=3,
-            )
-
-        f = lambda x: regex.search(x) is not None
-    else:
-        if case:
-            f = lambda x: pat in x
-        else:
-            upper_pat = pat.upper()
-            f = lambda x: upper_pat in x
-            uppered = _na_map(lambda x: x.upper(), arr)
-            return _na_map(f, uppered, na, dtype=np.dtype(bool))
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_startswith(arr, pat, na=np.nan):
-    """
-    Test if the start of each string element matches a pattern.
-
-    Equivalent to :meth:`str.startswith`.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence. Regular expressions are not accepted.
-    na : object, default NaN
-        Object shown if element tested is not a string.
-
-    Returns
-    -------
-    Series or Index of bool
-        A Series of booleans indicating whether the given pattern matches
-        the start of each string element.
-
-    See Also
-    --------
-    str.startswith : Python standard library string method.
-    Series.str.endswith : Same as startswith, but tests the end of string.
-    Series.str.contains : Tests if string element contains a pattern.
-
-    Examples
-    --------
-    >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan])
-    >>> s
-    0     bat
-    1    Bear
-    2     cat
-    3     NaN
-    dtype: object
-
-    >>> s.str.startswith('b')
-    0     True
-    1    False
-    2    False
-    3      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN`.
-
-    >>> s.str.startswith('b', na=False)
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-    """
-    f = lambda x: x.startswith(pat)
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_endswith(arr, pat, na=np.nan):
-    """
-    Test if the end of each string element matches a pattern.
-
-    Equivalent to :meth:`str.endswith`.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence. Regular expressions are not accepted.
-    na : object, default NaN
-        Object shown if element tested is not a string.
-
-    Returns
-    -------
-    Series or Index of bool
-        A Series of booleans indicating whether the given pattern matches
-        the end of each string element.
-
-    See Also
-    --------
-    str.endswith : Python standard library string method.
-    Series.str.startswith : Same as endswith, but tests the start of string.
-    Series.str.contains : Tests if string element contains a pattern.
-
-    Examples
-    --------
-    >>> s = pd.Series(['bat', 'bear', 'caT', np.nan])
-    >>> s
-    0     bat
-    1    bear
-    2     caT
-    3     NaN
-    dtype: object
-
-    >>> s.str.endswith('t')
-    0     True
-    1    False
-    2    False
-    3      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN`.
-
-    >>> s.str.endswith('t', na=False)
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-    """
-    f = lambda x: x.endswith(pat)
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
-    r"""
-    Replace each occurrence of pattern/regex in the Series/Index.
-
-    Equivalent to :meth:`str.replace` or :func:`re.sub`, depending on the regex value.
-
-    Parameters
-    ----------
-    pat : str or compiled regex
-        String can be a character sequence or regular expression.
-    repl : str or callable
-        Replacement string or a callable. The callable is passed the regex
-        match object and must return a replacement string to be used.
-        See :func:`re.sub`.
-    n : int, default -1 (all)
-        Number of replacements to make from start.
-    case : bool, default None
-        Determines if replace is case sensitive:
-
-        - If True, case sensitive (the default if `pat` is a string)
-        - Set to False for case insensitive
-        - Cannot be set if `pat` is a compiled regex.
-
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled
-        regex.
-    regex : bool, default True
-        Determines if assumes the passed-in pattern is a regular expression:
-
-        - If True, assumes the passed-in pattern is a regular expression.
-        - If False, treats the pattern as a literal string
-        - Cannot be set to False if `pat` is a compiled regex or `repl` is
-          a callable.
-
-    Returns
-    -------
-    Series or Index of object
-        A copy of the object with all matching occurrences of `pat` replaced by
-        `repl`.
-
-    Raises
-    ------
-    ValueError
-        * if `regex` is False and `repl` is a callable or `pat` is a compiled
-          regex
-        * if `pat` is a compiled regex and `case` or `flags` is set
-
-    Notes
-    -----
-    When `pat` is a compiled regex, all flags should be included in the
-    compiled regex. Use of `case`, `flags`, or `regex=False` with a compiled
-    regex will raise an error.
-
-    Examples
-    --------
-    When `pat` is a string and `regex` is True (the default), the given `pat`
-    is compiled as a regex. When `repl` is a string, it replaces matching
-    regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are
-    left as is:
-
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)
-    0    bao
-    1    baz
-    2    NaN
-    dtype: object
-
-    When `pat` is a string and `regex` is False, every `pat` is replaced with
-    `repl` as with :meth:`str.replace`:
-
-    >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)
-    0    bao
-    1    fuz
-    2    NaN
-    dtype: object
-
-    When `repl` is a callable, it is called on every `pat` using
-    :func:`re.sub`. The callable should expect one positional argument
-    (a regex object) and return a string.
-
-    To get the idea:
-
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)
-    0    <re.Match object; span=(0, 1), match='f'>oo
-    1    <re.Match object; span=(0, 1), match='f'>uz
-    2                                            NaN
-    dtype: object
-
-    Reverse every lowercase alphabetic word:
-
-    >>> repl = lambda m: m.group(0)[::-1]
-    >>> pd.Series(['foo 123', 'bar baz', np.nan]).str.replace(r'[a-z]+', repl)
-    0    oof 123
-    1    rab zab
-    2        NaN
-    dtype: object
-
-    Using regex groups (extract second group and swap case):
-
-    >>> pat = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
-    >>> repl = lambda m: m.group('two').swapcase()
-    >>> pd.Series(['One Two Three', 'Foo Bar Baz']).str.replace(pat, repl)
-    0    tWO
-    1    bAR
-    dtype: object
-
-    Using a compiled regex with flags
-
-    >>> import re
-    >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE)
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar')
-    0    foo
-    1    bar
-    2    NaN
-    dtype: object
-    """
-    # Check whether repl is valid (GH 13438, GH 15055)
-    if not (isinstance(repl, str) or callable(repl)):
-        raise TypeError("repl must be a string or callable")
-
-    is_compiled_re = is_re(pat)
-    if regex:
-        if is_compiled_re:
-            if (case is not None) or (flags != 0):
-                raise ValueError(
-                    "case and flags cannot be set when pat is a compiled regex"
-                )
-        else:
-            # not a compiled regex
-            # set default case
-            if case is None:
-                case = True
-
-            # add case flag, if provided
-            if case is False:
-                flags |= re.IGNORECASE
-        if is_compiled_re or len(pat) > 1 or flags or callable(repl):
-            n = n if n >= 0 else 0
-            compiled = re.compile(pat, flags=flags)
-            f = lambda x: compiled.sub(repl=repl, string=x, count=n)
-        else:
-            f = lambda x: x.replace(pat, repl, n)
-    else:
-        if is_compiled_re:
-            raise ValueError(
-                "Cannot use a compiled regex as replacement pattern with regex=False"
-            )
-        if callable(repl):
-            raise ValueError("Cannot use a callable replacement when regex=False")
-        f = lambda x: x.replace(pat, repl, n)
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_repeat(arr, repeats):
-    """
-    Duplicate each string in the Series or Index.
-
-    Parameters
-    ----------
-    repeats : int or sequence of int
-        Same value for all (int) or different value per (sequence).
-
-    Returns
-    -------
-    Series or Index of object
-        Series or Index of repeated string objects specified by
-        input parameter repeats.
-
-    Examples
-    --------
-    >>> s = pd.Series(['a', 'b', 'c'])
-    >>> s
-    0    a
-    1    b
-    2    c
-    dtype: object
-
-    Single int repeats string in Series
-
-    >>> s.str.repeat(repeats=2)
-    0    aa
-    1    bb
-    2    cc
-    dtype: object
-
-    Sequence of int repeats corresponding string in Series
-
-    >>> s.str.repeat(repeats=[1, 2, 3])
-    0      a
-    1     bb
-    2    ccc
-    dtype: object
-    """
-    if is_scalar(repeats):
-
-        def scalar_rep(x):
-            try:
-                return bytes.__mul__(x, repeats)
-            except TypeError:
-                return str.__mul__(x, repeats)
-
-        return _na_map(scalar_rep, arr, dtype=str)
-    else:
-
-        def rep(x, r):
-            if x is libmissing.NA:
-                return x
-            try:
-                return bytes.__mul__(x, r)
-            except TypeError:
-                return str.__mul__(x, r)
-
-        repeats = np.asarray(repeats, dtype=object)
-        result = libops.vec_binop(np.asarray(arr), repeats, rep)
-        return result
-
-
-def str_match(
-    arr: ArrayLike,
-    pat: Union[str, Pattern],
-    case: bool = True,
-    flags: int = 0,
-    na: Scalar = np.nan,
-):
-    """
-    Determine if each string starts with a match of a regular expression.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-
-    Returns
-    -------
-    Series/array of boolean values
-
-    See Also
-    --------
-    fullmatch : Stricter matching that requires the entire string to match.
-    contains : Analogous, but less strict, relying on re.search instead of
-        re.match.
-    extract : Extract matched groups.
-    """
-    if not case:
-        flags |= re.IGNORECASE
-
-    regex = re.compile(pat, flags=flags)
-
-    f = lambda x: regex.match(x) is not None
-
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_fullmatch(
-    arr: ArrayLike,
-    pat: Union[str, Pattern],
-    case: bool = True,
-    flags: int = 0,
-    na: Scalar = np.nan,
-):
-    """
-    Determine if each string entirely matches a regular expression.
-
-    .. versionadded:: 1.1.0
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-
-    Returns
-    -------
-    Series/array of boolean values
-
-    See Also
-    --------
-    match : Similar, but also returns `True` when only a *prefix* of the string
-        matches the regular expression.
-    extract : Extract matched groups.
-    """
-    if not case:
-        flags |= re.IGNORECASE
-
-    regex = re.compile(pat, flags=flags)
-
-    f = lambda x: regex.fullmatch(x) is not None
-
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def _get_single_group_name(rx):
-    try:
-        return list(rx.groupindex.keys()).pop()
-    except IndexError:
-        return None
-
-
-def _groups_or_na_fun(regex):
-    """Used in both extract_noexpand and extract_frame"""
-    if regex.groups == 0:
-        raise ValueError("pattern contains no capture groups")
-    empty_row = [np.nan] * regex.groups
-
-    def f(x):
-        if not isinstance(x, str):
-            return empty_row
-        m = regex.search(x)
-        if m:
-            return [np.nan if item is None else item for item in m.groups()]
-        else:
-            return empty_row
-
-    return f
-
-
-def _result_dtype(arr):
-    # workaround #27953
-    # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
-    # when the list of values is empty.
-    if arr.dtype.name == "string":
-        return "string"
-    else:
-        return object
-
-
-def _str_extract_noexpand(arr, pat, flags=0):
-    """
-    Find groups in each string in the Series using passed regular
-    expression. This function is called from
-    str_extract(expand=False), and can return Series, DataFrame, or
-    Index.
-
-    """
-    from pandas import DataFrame
-
-    regex = re.compile(pat, flags=flags)
-    groups_or_na = _groups_or_na_fun(regex)
-
-    if regex.groups == 1:
-        result = np.array([groups_or_na(val)[0] for val in arr], dtype=object)
-        name = _get_single_group_name(regex)
-    else:
-        if isinstance(arr, ABCIndexClass):
-            raise ValueError("only one regex group is supported with Index")
-        name = None
-        names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-        columns = [names.get(1 + i, i) for i in range(regex.groups)]
-        if arr.empty:
-            result = DataFrame(columns=columns, dtype=object)
-        else:
-            dtype = _result_dtype(arr)
-            result = DataFrame(
-                [groups_or_na(val) for val in arr],
-                columns=columns,
-                index=arr.index,
-                dtype=dtype,
-            )
-    return result, name
-
-
-def _str_extract_frame(arr, pat, flags=0):
-    """
-    For each subject string in the Series, extract groups from the
-    first match of regular expression pat. This function is called from
-    str_extract(expand=True), and always returns a DataFrame.
-
-    """
-    from pandas import DataFrame
-
-    regex = re.compile(pat, flags=flags)
-    groups_or_na = _groups_or_na_fun(regex)
-    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-    columns = [names.get(1 + i, i) for i in range(regex.groups)]
-
-    if len(arr) == 0:
-        return DataFrame(columns=columns, dtype=object)
-    try:
-        result_index = arr.index
-    except AttributeError:
-        result_index = None
-    dtype = _result_dtype(arr)
-    return DataFrame(
-        [groups_or_na(val) for val in arr],
-        columns=columns,
-        index=result_index,
-        dtype=dtype,
-    )
-
-
-def str_extract(arr, pat, flags=0, expand=True):
-    r"""
-    Extract capture groups in the regex `pat` as columns in a DataFrame.
-
-    For each subject string in the Series, extract groups from the
-    first match of regular expression `pat`.
-
-    Parameters
-    ----------
-    pat : str
-        Regular expression pattern with capturing groups.
-    flags : int, default 0 (no flags)
-        Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that
-        modify regular expression matching for things like case,
-        spaces, etc. For more details, see :mod:`re`.
-    expand : bool, default True
-        If True, return DataFrame with one column per capture group.
-        If False, return a Series/Index if there is one capture group
-        or DataFrame if there are multiple capture groups.
-
-    Returns
-    -------
-    DataFrame or Series or Index
-        A DataFrame with one row for each subject string, and one
-        column for each group. Any capture group names in regular
-        expression pat will be used for column names; otherwise
-        capture group numbers will be used. The dtype of each result
-        column is always object, even when no match is found. If
-        ``expand=False`` and pat has only one capture group, then
-        return a Series (if subject is a Series) or Index (if subject
-        is an Index).
-
-    See Also
-    --------
-    extractall : Returns all matches (not just the first match).
-
-    Examples
-    --------
-    A pattern with two groups will return a DataFrame with two columns.
-    Non-matches will be NaN.
-
-    >>> s = pd.Series(['a1', 'b2', 'c3'])
-    >>> s.str.extract(r'([ab])(\d)')
-         0    1
-    0    a    1
-    1    b    2
-    2  NaN  NaN
-
-    A pattern may contain optional groups.
-
-    >>> s.str.extract(r'([ab])?(\d)')
-         0  1
-    0    a  1
-    1    b  2
-    2  NaN  3
-
-    Named groups will become column names in the result.
-
-    >>> s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')
-      letter digit
-    0      a     1
-    1      b     2
-    2    NaN   NaN
-
-    A pattern with one group will return a DataFrame with one column
-    if expand=True.
-
-    >>> s.str.extract(r'[ab](\d)', expand=True)
-         0
-    0    1
-    1    2
-    2  NaN
-
-    A pattern with one group will return a Series if expand=False.
-
-    >>> s.str.extract(r'[ab](\d)', expand=False)
-    0      1
-    1      2
-    2    NaN
-    dtype: object
-    """
-    if not isinstance(expand, bool):
-        raise ValueError("expand must be True or False")
-    if expand:
-        return _str_extract_frame(arr._orig, pat, flags=flags)
-    else:
-        result, name = _str_extract_noexpand(arr._parent, pat, flags=flags)
-        return arr._wrap_result(result, name=name, expand=expand)
-
-
-def str_extractall(arr, pat, flags=0):
-    r"""
-    Extract capture groups in the regex `pat` as columns in DataFrame.
-
-    For each subject string in the Series, extract groups from all
-    matches of regular expression pat. When each subject string in the
-    Series has exactly one match, extractall(pat).xs(0, level='match')
-    is the same as extract(pat).
-
-    Parameters
-    ----------
-    pat : str
-        Regular expression pattern with capturing groups.
-    flags : int, default 0 (no flags)
-        A ``re`` module flag, for example ``re.IGNORECASE``. These allow
-        to modify regular expression matching for things like case, spaces,
-        etc. Multiple flags can be combined with the bitwise OR operator,
-        for example ``re.IGNORECASE | re.MULTILINE``.
-
-    Returns
-    -------
-    DataFrame
-        A ``DataFrame`` with one row for each match, and one column for each
-        group. Its rows have a ``MultiIndex`` with first levels that come from
-        the subject ``Series``. The last level is named 'match' and indexes the
-        matches in each item of the ``Series``. Any capture group names in
-        regular expression pat will be used for column names; otherwise capture
-        group numbers will be used.
-
-    See Also
-    --------
-    extract : Returns first match only (not all matches).
-
-    Examples
-    --------
-    A pattern with one group will return a DataFrame with one column.
-    Indices with no matches will not appear in the result.
-
-    >>> s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
-    >>> s.str.extractall(r"[ab](\d)")
-             0
-      match
-    A 0      1
-      1      2
-    B 0      1
-
-    Capture group names are used for column names of the result.
-
-    >>> s.str.extractall(r"[ab](?P<digit>\d)")
-            digit
-      match
-    A 0         1
-      1         2
-    B 0         1
-
-    A pattern with two groups will return a DataFrame with two columns.
-
-    >>> s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
-            letter digit
-      match
-    A 0          a     1
-      1          a     2
-    B 0          b     1
-
-    Optional groups that do not match are NaN in the result.
-
-    >>> s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")
-            letter digit
-      match
-    A 0          a     1
-      1          a     2
-    B 0          b     1
-    C 0        NaN     1
-    """
-    regex = re.compile(pat, flags=flags)
-    # the regex must contain capture groups.
-    if regex.groups == 0:
-        raise ValueError("pattern contains no capture groups")
-
-    if isinstance(arr, ABCIndexClass):
-        arr = arr.to_series().reset_index(drop=True)
-
-    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-    columns = [names.get(1 + i, i) for i in range(regex.groups)]
-    match_list = []
-    index_list = []
-    is_mi = arr.index.nlevels > 1
-
-    for subject_key, subject in arr.items():
-        if isinstance(subject, str):
-
-            if not is_mi:
-                subject_key = (subject_key,)
-
-            for match_i, match_tuple in enumerate(regex.findall(subject)):
-                if isinstance(match_tuple, str):
-                    match_tuple = (match_tuple,)
-                na_tuple = [np.NaN if group == "" else group for group in match_tuple]
-                match_list.append(na_tuple)
-                result_key = tuple(subject_key + (match_i,))
-                index_list.append(result_key)
-
-    from pandas import MultiIndex
-
-    index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"])
-    dtype = _result_dtype(arr)
-
-    result = arr._constructor_expanddim(
-        match_list, index=index, columns=columns, dtype=dtype
-    )
-    return result
-
-
-def str_get_dummies(arr, sep="|"):
-    """
-    Return DataFrame of dummy/indicator variables for Series.
-
-    Each string in Series is split by sep and returned as a DataFrame
-    of dummy/indicator variables.
-
-    Parameters
-    ----------
-    sep : str, default "|"
-        String to split on.
-
-    Returns
-    -------
-    DataFrame
-        Dummy variables corresponding to values of the Series.
-
-    See Also
-    --------
-    get_dummies : Convert categorical variable into dummy/indicator
-        variables.
-
-    Examples
-    --------
-    >>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies()
-       a  b  c
-    0  1  1  0
-    1  1  0  0
-    2  1  0  1
-
-    >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
-       a  b  c
-    0  1  1  0
-    1  0  0  0
-    2  1  0  1
-    """
-    arr = arr.fillna("")
-    try:
-        arr = sep + arr + sep
-    except TypeError:
-        arr = sep + arr.astype(str) + sep
-
-    tags = set()
-    for ts in arr.str.split(sep):
-        tags.update(ts)
-    tags = sorted(tags - {""})
-
-    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
-
-    for i, t in enumerate(tags):
-        pat = sep + t + sep
-        dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
-    return dummies, tags
-
-
-def str_join(arr, sep):
-    """
-    Join lists contained as elements in the Series/Index with passed delimiter.
-
-    If the elements of a Series are lists themselves, join the content of these
-    lists using the delimiter passed to the function.
-    This function is an equivalent to :meth:`str.join`.
-
-    Parameters
-    ----------
-    sep : str
-        Delimiter to use between list entries.
-
-    Returns
-    -------
-    Series/Index: object
-        The list entries concatenated by intervening occurrences of the
-        delimiter.
-
-    Raises
-    ------
-    AttributeError
-        If the supplied Series contains neither strings nor lists.
-
-    See Also
-    --------
-    str.join : Standard library version of this method.
-    Series.str.split : Split strings around given separator/delimiter.
-
-    Notes
-    -----
-    If any of the list items is not a string object, the result of the join
-    will be `NaN`.
-
-    Examples
-    --------
-    Example with a list that contains non-string elements.
-
-    >>> s = pd.Series([['lion', 'elephant', 'zebra'],
-    ...                [1.1, 2.2, 3.3],
-    ...                ['cat', np.nan, 'dog'],
-    ...                ['cow', 4.5, 'goat'],
-    ...                ['duck', ['swan', 'fish'], 'guppy']])
-    >>> s
-    0        [lion, elephant, zebra]
-    1                [1.1, 2.2, 3.3]
-    2                [cat, nan, dog]
-    3               [cow, 4.5, goat]
-    4    [duck, [swan, fish], guppy]
-    dtype: object
-
-    Join all lists using a '-'. The lists containing object(s) of types other
-    than str will produce a NaN.
-
-    >>> s.str.join('-')
-    0    lion-elephant-zebra
-    1                    NaN
-    2                    NaN
-    3                    NaN
-    4                    NaN
-    dtype: object
-    """
-    return _na_map(sep.join, arr, dtype=str)
-
-
-def str_findall(arr, pat, flags=0):
-    """
-    Find all occurrences of pattern or regular expression in the Series/Index.
-
-    Equivalent to applying :func:`re.findall` to all the elements in the
-    Series/Index.
-
-    Parameters
-    ----------
-    pat : str
-        Pattern or regular expression.
-    flags : int, default 0
-        Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which
-        means no flags).
-
-    Returns
-    -------
-    Series/Index of lists of strings
-        All non-overlapping matches of pattern or regular expression in each
-        string of this Series/Index.
-
-    See Also
-    --------
-    count : Count occurrences of pattern or regular expression in each string
-        of the Series/Index.
-    extractall : For each string in the Series, extract groups from all matches
-        of regular expression and return a DataFrame with one row for each
-        match and one column for each group.
-    re.findall : The equivalent ``re`` function to all non-overlapping matches
-        of pattern or regular expression in string, as a list of strings.
-
-    Examples
-    --------
-    >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit'])
-
-    The search for the pattern 'Monkey' returns one match:
-
-    >>> s.str.findall('Monkey')
-    0          []
-    1    [Monkey]
-    2          []
-    dtype: object
-
-    On the other hand, the search for the pattern 'MONKEY' doesn't return any
-    match:
-
-    >>> s.str.findall('MONKEY')
-    0    []
-    1    []
-    2    []
-    dtype: object
-
-    Flags can be added to the pattern or regular expression. For instance,
-    to find the pattern 'MONKEY' ignoring the case:
-
-    >>> import re
-    >>> s.str.findall('MONKEY', flags=re.IGNORECASE)
-    0          []
-    1    [Monkey]
-    2          []
-    dtype: object
-
-    When the pattern matches more than one string in the Series, all matches
-    are returned:
-
-    >>> s.str.findall('on')
-    0    [on]
-    1    [on]
-    2      []
-    dtype: object
-
-    Regular expressions are supported too. For instance, the search for all the
-    strings ending with the word 'on' is shown next:
-
-    >>> s.str.findall('on$')
-    0    [on]
-    1      []
-    2      []
-    dtype: object
-
-    If the pattern is found more than once in the same string, then a list of
-    multiple strings is returned:
-
-    >>> s.str.findall('b')
-    0        []
-    1        []
-    2    [b, b]
-    dtype: object
-    """
-    regex = re.compile(pat, flags=flags)
-    return _na_map(regex.findall, arr)
-
-
-def str_find(arr, sub, start=0, end=None, side="left"):
-    """
-    Return indexes in each strings in the Series/Index where the
-    substring is fully contained between [start:end]. Return -1 on failure.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-    side : {'left', 'right'}, default 'left'
-        Specifies a starting side, equivalent to ``find`` or ``rfind``.
-
-    Returns
-    -------
-    Series or Index
-        Indexes where substring is found.
-    """
-    if not isinstance(sub, str):
-        msg = f"expected a string object, not {type(sub).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        method = "find"
-    elif side == "right":
-        method = "rfind"
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    if end is None:
-        f = lambda x: getattr(x, method)(sub, start)
-    else:
-        f = lambda x: getattr(x, method)(sub, start, end)
-
-    return _na_map(f, arr, dtype=np.dtype("int64"))
-
-
-def str_index(arr, sub, start=0, end=None, side="left"):
-    if not isinstance(sub, str):
-        msg = f"expected a string object, not {type(sub).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        method = "index"
-    elif side == "right":
-        method = "rindex"
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    if end is None:
-        f = lambda x: getattr(x, method)(sub, start)
-    else:
-        f = lambda x: getattr(x, method)(sub, start, end)
-
-    return _na_map(f, arr, dtype=np.dtype("int64"))
-
-
-def str_pad(arr, width, side="left", fillchar=" "):
-    """
-    Pad strings in the Series/Index up to width.
-
-    Parameters
-    ----------
-    width : int
-        Minimum width of resulting string; additional characters will be filled
-        with character defined in `fillchar`.
-    side : {'left', 'right', 'both'}, default 'left'
-        Side from which to fill resulting string.
-    fillchar : str, default ' '
-        Additional character for filling, default is whitespace.
-
-    Returns
-    -------
-    Series or Index of object
-        Returns Series or Index with minimum number of char in object.
-
-    See Also
-    --------
-    Series.str.rjust : Fills the left side of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='left')``.
-    Series.str.ljust : Fills the right side of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='right')``.
-    Series.str.center : Fills boths sides of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='both')``.
-    Series.str.zfill : Pad strings in the Series/Index by prepending '0'
-        character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``.
-
-    Examples
-    --------
-    >>> s = pd.Series(["caribou", "tiger"])
-    >>> s
-    0    caribou
-    1      tiger
-    dtype: object
-
-    >>> s.str.pad(width=10)
-    0       caribou
-    1         tiger
-    dtype: object
-
-    >>> s.str.pad(width=10, side='right', fillchar='-')
-    0    caribou---
-    1    tiger-----
-    dtype: object
-
-    >>> s.str.pad(width=10, side='both', fillchar='-')
-    0    -caribou--
-    1    --tiger---
-    dtype: object
-    """
-    if not isinstance(fillchar, str):
-        msg = f"fillchar must be a character, not {type(fillchar).__name__}"
-        raise TypeError(msg)
-
-    if len(fillchar) != 1:
-        raise TypeError("fillchar must be a character, not str")
-
-    if not is_integer(width):
-        msg = f"width must be of integer type, not {type(width).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        f = lambda x: x.rjust(width, fillchar)
-    elif side == "right":
-        f = lambda x: x.ljust(width, fillchar)
-    elif side == "both":
-        f = lambda x: x.center(width, fillchar)
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_split(arr, pat=None, n=None):
-
-    if pat is None:
-        if n is None or n == 0:
-            n = -1
-        f = lambda x: x.split(pat, n)
-    else:
-        if len(pat) == 1:
-            if n is None or n == 0:
-                n = -1
-            f = lambda x: x.split(pat, n)
-        else:
-            if n is None or n == -1:
-                n = 0
-            regex = re.compile(pat)
-            f = lambda x: regex.split(x, maxsplit=n)
-    res = _na_map(f, arr)
-    return res
-
-
-def str_rsplit(arr, pat=None, n=None):
-
-    if n is None or n == 0:
-        n = -1
-    f = lambda x: x.rsplit(pat, n)
-    res = _na_map(f, arr)
-    return res
-
-
-def str_slice(arr, start=None, stop=None, step=None):
-    """
-    Slice substrings from each element in the Series or Index.
-
-    Parameters
-    ----------
-    start : int, optional
-        Start position for slice operation.
-    stop : int, optional
-        Stop position for slice operation.
-    step : int, optional
-        Step size for slice operation.
-
-    Returns
-    -------
-    Series or Index of object
-        Series or Index from sliced substring from original string object.
-
-    See Also
-    --------
-    Series.str.slice_replace : Replace a slice with a string.
-    Series.str.get : Return element at position.
-        Equivalent to `Series.str.slice(start=i, stop=i+1)` with `i`
-        being the position.
-
-    Examples
-    --------
-    >>> s = pd.Series(["koala", "fox", "chameleon"])
-    >>> s
-    0        koala
-    1          fox
-    2    chameleon
-    dtype: object
-
-    >>> s.str.slice(start=1)
-    0        oala
-    1          ox
-    2    hameleon
-    dtype: object
-
-    >>> s.str.slice(start=-1)
-    0           a
-    1           x
-    2           n
-    dtype: object
-
-    >>> s.str.slice(stop=2)
-    0    ko
-    1    fo
-    2    ch
-    dtype: object
-
-    >>> s.str.slice(step=2)
-    0      kaa
-    1       fx
-    2    caeen
-    dtype: object
-
-    >>> s.str.slice(start=0, stop=5, step=3)
-    0    kl
-    1     f
-    2    cm
-    dtype: object
-
-    Equivalent behaviour to:
-
-    >>> s.str[0:5:3]
-    0    kl
-    1     f
-    2    cm
-    dtype: object
-    """
-    obj = slice(start, stop, step)
-    f = lambda x: x[obj]
-    return _na_map(f, arr, dtype=str)
-
-
-def str_slice_replace(arr, start=None, stop=None, repl=None):
-    """
-    Replace a positional slice of a string with another value.
-
-    Parameters
-    ----------
-    start : int, optional
-        Left index position to use for the slice. If not specified (None),
-        the slice is unbounded on the left, i.e. slice from the start
-        of the string.
-    stop : int, optional
-        Right index position to use for the slice. If not specified (None),
-        the slice is unbounded on the right, i.e. slice until the
-        end of the string.
-    repl : str, optional
-        String for replacement. If not specified (None), the sliced region
-        is replaced with an empty string.
-
-    Returns
-    -------
-    Series or Index
-        Same type as the original object.
-
-    See Also
-    --------
-    Series.str.slice : Just slicing without replacement.
-
-    Examples
-    --------
-    >>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde'])
-    >>> s
-    0        a
-    1       ab
-    2      abc
-    3     abdc
-    4    abcde
-    dtype: object
-
-    Specify just `start`, meaning replace `start` until the end of the
-    string with `repl`.
-
-    >>> s.str.slice_replace(1, repl='X')
-    0    aX
-    1    aX
-    2    aX
-    3    aX
-    4    aX
-    dtype: object
-
-    Specify just `stop`, meaning the start of the string to `stop` is replaced
-    with `repl`, and the rest of the string is included.
-
-    >>> s.str.slice_replace(stop=2, repl='X')
-    0       X
-    1       X
-    2      Xc
-    3     Xdc
-    4    Xcde
-    dtype: object
-
-    Specify `start` and `stop`, meaning the slice from `start` to `stop` is
-    replaced with `repl`. Everything before or after `start` and `stop` is
-    included as is.
-
-    >>> s.str.slice_replace(start=1, stop=3, repl='X')
-    0      aX
-    1      aX
-    2      aX
-    3     aXc
-    4    aXde
-    dtype: object
-    """
-    if repl is None:
-        repl = ""
-
-    def f(x):
-        if x[start:stop] == "":
-            local_stop = start
-        else:
-            local_stop = stop
-        y = ""
-        if start is not None:
-            y += x[:start]
-        y += repl
-        if stop is not None:
-            y += x[local_stop:]
-        return y
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_strip(arr, to_strip=None, side="both"):
-    """
-    Strip whitespace (including newlines) from each string in the
-    Series/Index.
-
-    Parameters
-    ----------
-    to_strip : str or unicode
-    side : {'left', 'right', 'both'}, default 'both'
-
-    Returns
-    -------
-    Series or Index
-    """
-    if side == "both":
-        f = lambda x: x.strip(to_strip)
-    elif side == "left":
-        f = lambda x: x.lstrip(to_strip)
-    elif side == "right":
-        f = lambda x: x.rstrip(to_strip)
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-    return _na_map(f, arr, dtype=str)
-
-
-def str_wrap(arr, width, **kwargs):
-    r"""
-    Wrap strings in Series/Index at specified line width.
-
-    This method has the same keyword parameters and defaults as
-    :class:`textwrap.TextWrapper`.
-
-    Parameters
-    ----------
-    width : int
-        Maximum line width.
-    expand_tabs : bool, optional
-        If True, tab characters will be expanded to spaces (default: True).
-    replace_whitespace : bool, optional
-        If True, each whitespace character (as defined by string.whitespace)
-        remaining after tab expansion will be replaced by a single space
-        (default: True).
-    drop_whitespace : bool, optional
-        If True, whitespace that, after wrapping, happens to end up at the
-        beginning or end of a line is dropped (default: True).
-    break_long_words : bool, optional
-        If True, then words longer than width will be broken in order to ensure
-        that no lines are longer than width. If it is false, long words will
-        not be broken, and some lines may be longer than width (default: True).
-    break_on_hyphens : bool, optional
-        If True, wrapping will occur preferably on whitespace and right after
-        hyphens in compound words, as it is customary in English. If false,
-        only whitespaces will be considered as potentially good places for line
-        breaks, but you need to set break_long_words to false if you want truly
-        insecable words (default: True).
-
-    Returns
-    -------
-    Series or Index
-
-    Notes
-    -----
-    Internally, this method uses a :class:`textwrap.TextWrapper` instance with
-    default settings. To achieve behavior matching R's stringr library str_wrap
-    function, use the arguments:
-
-    - expand_tabs = False
-    - replace_whitespace = True
-    - drop_whitespace = True
-    - break_long_words = False
-    - break_on_hyphens = False
-
-    Examples
-    --------
-    >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped'])
-    >>> s.str.wrap(12)
-    0             line to be\nwrapped
-    1    another line\nto be\nwrapped
-    dtype: object
-    """
-    kwargs["width"] = width
-
-    tw = textwrap.TextWrapper(**kwargs)
-
-    return _na_map(lambda s: "\n".join(tw.wrap(s)), arr, dtype=str)
-
-
-def str_translate(arr, table):
-    """
-    Map all characters in the string through the given mapping table.
-
-    Equivalent to standard :meth:`str.translate`.
-
-    Parameters
-    ----------
-    table : dict
-        Table is a mapping of Unicode ordinals to Unicode ordinals, strings, or
-        None. Unmapped characters are left untouched.
-        Characters mapped to None are deleted. :meth:`str.maketrans` is a
-        helper function for making translation tables.
-
-    Returns
-    -------
-    Series or Index
-    """
-    return _na_map(lambda x: x.translate(table), arr, dtype=str)
-
-
-def str_get(arr, i):
-    """
-    Extract element from each component at specified position.
-
-    Extract element from lists, tuples, or strings in each element in the
-    Series/Index.
-
-    Parameters
-    ----------
-    i : int
-        Position of element to extract.
-
-    Returns
-    -------
-    Series or Index
-
-    Examples
-    --------
-    >>> s = pd.Series(["String",
-    ...               (1, 2, 3),
-    ...               ["a", "b", "c"],
-    ...               123,
-    ...               -456,
-    ...               {1: "Hello", "2": "World"}])
-    >>> s
-    0                        String
-    1                     (1, 2, 3)
-    2                     [a, b, c]
-    3                           123
-    4                          -456
-    5    {1: 'Hello', '2': 'World'}
-    dtype: object
-
-    >>> s.str.get(1)
-    0        t
-    1        2
-    2        b
-    3      NaN
-    4      NaN
-    5    Hello
-    dtype: object
-
-    >>> s.str.get(-1)
-    0      g
-    1      3
-    2      c
-    3    NaN
-    4    NaN
-    5    None
-    dtype: object
-    """
-
-    def f(x):
-        if isinstance(x, dict):
-            return x.get(i)
-        elif len(x) > i >= -len(x):
-            return x[i]
-        return np.nan
-
-    return _na_map(f, arr)
-
-
-def str_decode(arr, encoding, errors="strict"):
-    """
-    Decode character string in the Series/Index using indicated encoding.
-
-    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
-    python3.
-
-    Parameters
-    ----------
-    encoding : str
-    errors : str, optional
-
-    Returns
-    -------
-    Series or Index
-    """
-    if encoding in _cpython_optimized_decoders:
-        # CPython optimized implementation
-        f = lambda x: x.decode(encoding, errors)
-    else:
-        decoder = codecs.getdecoder(encoding)
-        f = lambda x: decoder(x, errors)[0]
-    return _na_map(f, arr)
-
-
-def str_encode(arr, encoding, errors="strict"):
-    """
-    Encode character string in the Series/Index using indicated encoding.
-
-    Equivalent to :meth:`str.encode`.
-
-    Parameters
-    ----------
-    encoding : str
-    errors : str, optional
-
-    Returns
-    -------
-    encoded : Series/Index of objects
-    """
-    if encoding in _cpython_optimized_encoders:
-        # CPython optimized implementation
-        f = lambda x: x.encode(encoding, errors)
-    else:
-        encoder = codecs.getencoder(encoding)
-        f = lambda x: encoder(x, errors)[0]
-    return _na_map(f, arr)
-
-
-def forbid_nonstring_types(forbidden, name=None):
-    """
-    Decorator to forbid specific types for a method of StringMethods.
-
-    For calling `.str.{method}` on a Series or Index, it is necessary to first
-    initialize the :class:`StringMethods` object, and then call the method.
-    However, different methods allow different input types, and so this can not
-    be checked during :meth:`StringMethods.__init__`, but must be done on a
-    per-method basis. This decorator exists to facilitate this process, and
-    make it explicit which (inferred) types are disallowed by the method.
-
-    :meth:`StringMethods.__init__` allows the *union* of types its different
-    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
-    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
-
-    The default string types ['string', 'empty'] are allowed for all methods.
-    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
-    then needs to forbid the types it is not intended for.
-
-    Parameters
-    ----------
-    forbidden : list-of-str or None
-        List of forbidden non-string types, may be one or more of
-        `['bytes', 'mixed', 'mixed-integer']`.
-    name : str, default None
-        Name of the method to use in the error message. By default, this is
-        None, in which case the name from the method being wrapped will be
-        copied. However, for working with further wrappers (like _pat_wrapper
-        and _noarg_wrapper), it is necessary to specify the name.
-
-    Returns
-    -------
-    func : wrapper
-        The method to which the decorator is applied, with an added check that
-        enforces the inferred type to not be in the list of forbidden types.
-
-    Raises
-    ------
-    TypeError
-        If the inferred type of the underlying data is in `forbidden`.
-    """
-    # deal with None
-    forbidden = [] if forbidden is None else forbidden
-
-    allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set(
-        forbidden
-    )
-
-    def _forbid_nonstring_types(func):
-        func_name = func.__name__ if name is None else name
-
-        @wraps(func)
-        def wrapper(self, *args, **kwargs):
-            if self._inferred_dtype not in allowed_types:
-                msg = (
-                    f"Cannot use .str.{func_name} with values of "
-                    f"inferred dtype '{self._inferred_dtype}'."
-                )
-                raise TypeError(msg)
-            return func(self, *args, **kwargs)
-
-        wrapper.__name__ = func_name
-        return wrapper
-
-    return _forbid_nonstring_types
-
-
-def _noarg_wrapper(
-    f,
-    name=None,
-    docstring=None,
-    forbidden_types=["bytes"],
-    returns_string=True,
-    **kwargs,
-):
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper(self):
-        result = _na_map(f, self._parent, **kwargs)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    wrapper.__name__ = f.__name__ if name is None else name
-    if docstring is not None:
-        wrapper.__doc__ = docstring
-    else:
-        raise ValueError("Provide docstring")
-
-    return wrapper
-
-
-def _pat_wrapper(
-    f,
-    flags=False,
-    na=False,
-    name=None,
-    forbidden_types=["bytes"],
-    returns_string=True,
-    **kwargs,
-):
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper1(self, pat):
-        result = f(self._parent, pat)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper2(self, pat, flags=0, **kwargs):
-        result = f(self._parent, pat, flags=flags, **kwargs)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper3(self, pat, na=np.nan):
-        result = f(self._parent, pat, na=na)
-        return self._wrap_result(result, returns_string=returns_string, fill_value=na)
-
-    wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
-
-    wrapper.__name__ = f.__name__ if name is None else name
-    if f.__doc__:
-        wrapper.__doc__ = f.__doc__
-
-    return wrapper
-
-
-def copy(source):
-    """Copy a docstring from another source function (if present)"""
-
-    def do_copy(target):
-        if source.__doc__:
-            target.__doc__ = source.__doc__
-        return target
-
-    return do_copy
-
-
-class StringMethods(NoNewAttributesMixin):
-    """
-    Vectorized string functions for Series and Index.
-
-    NAs stay NA unless handled otherwise by a particular method.
-    Patterned after Python's string methods, with some inspiration from
-    R's stringr package.
-
-    Examples
-    --------
-    >>> s = pd.Series(["A_Str_Series"])
-    >>> s
-    0    A_Str_Series
-    dtype: object
-
-    >>> s.str.split("_")
-    0    [A, Str, Series]
-    dtype: object
-
-    >>> s.str.replace("_", "")
-    0    AStrSeries
-    dtype: object
-    """
-
-    def __init__(self, data):
-        self._inferred_dtype = self._validate(data)
-        self._is_categorical = is_categorical_dtype(data.dtype)
-        self._is_string = data.dtype.name == "string"
-
-        # ._values.categories works for both Series/Index
-        self._parent = data._values.categories if self._is_categorical else data
-        # save orig to blow up categoricals to the right type
-        self._orig = data
-        self._freeze()
-
-    @staticmethod
-    def _validate(data):
-        """
-        Auxiliary function for StringMethods, infers and checks dtype of data.
-
-        This is a "first line of defence" at the creation of the StringMethods-
-        object (see _make_accessor), and just checks that the dtype is in the
-        *union* of the allowed types over all string methods below; this
-        restriction is then refined on a per-method basis using the decorator
-        @forbid_nonstring_types (more info in the corresponding docstring).
-
-        This really should exclude all series/index with any non-string values,
-        but that isn't practical for performance reasons until we have a str
-        dtype (GH 9343 / 13877)
-
-        Parameters
-        ----------
-        data : The content of the Series
-
-        Returns
-        -------
-        dtype : inferred dtype of data
-        """
-        from pandas import StringDtype
-
-        if isinstance(data, ABCMultiIndex):
-            raise AttributeError(
-                "Can only use .str accessor with Index, not MultiIndex"
-            )
-
-        # see _libs/lib.pyx for list of inferred types
-        allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]
-
-        values = getattr(data, "values", data)  # Series / Index
-        values = getattr(values, "categories", values)  # categorical / normal
-
-        # explicitly allow StringDtype
-        if isinstance(values.dtype, StringDtype):
-            return "string"
-
-        try:
-            inferred_dtype = lib.infer_dtype(values, skipna=True)
-        except ValueError:
-            # GH#27571 mostly occurs with ExtensionArray
-            inferred_dtype = None
-
-        if inferred_dtype not in allowed_types:
-            raise AttributeError("Can only use .str accessor with string values!")
-        return inferred_dtype
-
-    def __getitem__(self, key):
-        if isinstance(key, slice):
-            return self.slice(start=key.start, stop=key.stop, step=key.step)
-        else:
-            return self.get(key)
-
-    def __iter__(self):
-        warnings.warn(
-            "Columnar iteration over characters will be deprecated in future releases.",
-            FutureWarning,
-            stacklevel=2,
-        )
-        i = 0
-        g = self.get(i)
-        while g.notna().any():
-            yield g
-            i += 1
-            g = self.get(i)
-
-    def _wrap_result(
-        self,
-        result,
-        use_codes=True,
-        name=None,
-        expand=None,
-        fill_value=np.nan,
-        returns_string=True,
-    ):
-
-        from pandas import Index, MultiIndex, Series
-
-        # for category, we do the stuff on the categories, so blow it up
-        # to the full series again
-        # But for some operations, we have to do the stuff on the full values,
-        # so make it possible to skip this step as the method already did this
-        # before the transformation...
-        if use_codes and self._is_categorical:
-            # if self._orig is a CategoricalIndex, there is no .cat-accessor
-            result = take_1d(
-                result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value
-            )
-
-        if not hasattr(result, "ndim") or not hasattr(result, "dtype"):
-            return result
-        assert result.ndim < 3
-
-        # We can be wrapping a string / object / categorical result, in which
-        # case we'll want to return the same dtype as the input.
-        # Or we can be wrapping a numeric output, in which case we don't want
-        # to return a StringArray.
-        if self._is_string and returns_string:
-            dtype = "string"
-        else:
-            dtype = None
-
-        if expand is None:
-            # infer from ndim if expand is not specified
-            expand = result.ndim != 1
-
-        elif expand is True and not isinstance(self._orig, ABCIndexClass):
-            # required when expand=True is explicitly specified
-            # not needed when inferred
-
-            def cons_row(x):
-                if is_list_like(x):
-                    return x
-                else:
-                    return [x]
-
-            result = [cons_row(x) for x in result]
-            if result:
-                # propagate nan values to match longest sequence (GH 18450)
-                max_len = max(len(x) for x in result)
-                result = [
-                    x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result
-                ]
-
-        if not isinstance(expand, bool):
-            raise ValueError("expand must be True or False")
-
-        if expand is False:
-            # if expand is False, result should have the same name
-            # as the original otherwise specified
-            if name is None:
-                name = getattr(result, "name", None)
-            if name is None:
-                # do not use logical or, _orig may be a DataFrame
-                # which has "name" column
-                name = self._orig.name
-
-        # Wait until we are sure result is a Series or Index before
-        # checking attributes (GH 12180)
-        if isinstance(self._orig, ABCIndexClass):
-            # if result is a boolean np.array, return the np.array
-            # instead of wrapping it into a boolean Index (GH 8875)
-            if is_bool_dtype(result):
-                return result
-
-            if expand:
-                result = list(result)
-                out = MultiIndex.from_tuples(result, names=name)
-                if out.nlevels == 1:
-                    # We had all tuples of length-one, which are
-                    # better represented as a regular Index.
-                    out = out.get_level_values(0)
-                return out
-            else:
-                return Index(result, name=name)
-        else:
-            index = self._orig.index
-            if expand:
-                cons = self._orig._constructor_expanddim
-                result = cons(result, columns=name, index=index, dtype=dtype)
-            else:
-                # Must be a Series
-                cons = self._orig._constructor
-                result = cons(result, name=name, index=index, dtype=dtype)
-            return result
-
-    def _get_series_list(self, others):
-        """
-        Auxiliary function for :meth:`str.cat`. Turn potentially mixed input
-        into a list of Series (elements without an index must match the length
-        of the calling Series/Index).
-
-        Parameters
-        ----------
-        others : Series, DataFrame, np.ndarray, list-like or list-like of
-            Objects that are either Series, Index or np.ndarray (1-dim).
-
-        Returns
-        -------
-        list of Series
-            Others transformed into list of Series.
-        """
-        from pandas import DataFrame, Series
-
-        # self._orig is either Series or Index
-        idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index
-
-        # Generally speaking, all objects without an index inherit the index
-        # `idx` of the calling Series/Index - i.e. must have matching length.
-        # Objects with an index (i.e. Series/Index/DataFrame) keep their own.
-        if isinstance(others, ABCSeries):
-            return [others]
-        elif isinstance(others, ABCIndexClass):
-            return [Series(others._values, index=idx)]
-        elif isinstance(others, ABCDataFrame):
-            return [others[x] for x in others]
-        elif isinstance(others, np.ndarray) and others.ndim == 2:
-            others = DataFrame(others, index=idx)
-            return [others[x] for x in others]
-        elif is_list_like(others, allow_sets=False):
-            others = list(others)  # ensure iterators do not get read twice etc
-
-            # in case of list-like `others`, all elements must be
-            # either Series/Index/np.ndarray (1-dim)...
-            if all(
-                isinstance(x, (ABCSeries, ABCIndexClass))
-                or (isinstance(x, np.ndarray) and x.ndim == 1)
-                for x in others
-            ):
-                los = []
-                while others:  # iterate through list and append each element
-                    los = los + self._get_series_list(others.pop(0))
-                return los
-            # ... or just strings
-            elif all(not is_list_like(x) for x in others):
-                return [Series(others, index=idx)]
-        raise TypeError(
-            "others must be Series, Index, DataFrame, np.ndarray "
-            "or list-like (either containing only strings or "
-            "containing only objects of type Series/Index/"
-            "np.ndarray[1-dim])"
-        )
-
-    @forbid_nonstring_types(["bytes", "mixed", "mixed-integer"])
-    def cat(self, others=None, sep=None, na_rep=None, join="left"):
-        """
-        Concatenate strings in the Series/Index with given separator.
-
-        If `others` is specified, this function concatenates the Series/Index
-        and elements of `others` element-wise.
-        If `others` is not passed, then all values in the Series/Index are
-        concatenated into a single string with a given `sep`.
-
-        Parameters
-        ----------
-        others : Series, Index, DataFrame, np.ndarray or list-like
-            Series, Index, DataFrame, np.ndarray (one- or two-dimensional) and
-            other list-likes of strings must have the same length as the
-            calling Series/Index, with the exception of indexed objects (i.e.
-            Series/Index/DataFrame) if `join` is not None.
-
-            If others is a list-like that contains a combination of Series,
-            Index or np.ndarray (1-dim), then all elements will be unpacked and
-            must satisfy the above criteria individually.
-
-            If others is None, the method returns the concatenation of all
-            strings in the calling Series/Index.
-        sep : str, default ''
-            The separator between the different elements/columns. By default
-            the empty string `''` is used.
-        na_rep : str or None, default None
-            Representation that is inserted for all missing values:
-
-            - If `na_rep` is None, and `others` is None, missing values in the
-              Series/Index are omitted from the result.
-            - If `na_rep` is None, and `others` is not None, a row containing a
-              missing value in any of the columns (before concatenation) will
-              have a missing value in the result.
-        join : {'left', 'right', 'outer', 'inner'}, default 'left'
-            Determines the join-style between the calling Series/Index and any
-            Series/Index/DataFrame in `others` (objects without an index need
-            to match the length of the calling Series/Index). To disable
-            alignment, use `.values` on any Series/Index/DataFrame in `others`.
-
-            .. versionchanged:: 1.0.0
-                Changed default of `join` from None to `'left'`.
-
-        Returns
-        -------
-        str, Series or Index
-            If `others` is None, `str` is returned, otherwise a `Series/Index`
-            (same type as caller) of objects is returned.
-
-        See Also
-        --------
-        split : Split each string in the Series/Index.
-        join : Join lists contained as elements in the Series/Index.
-
-        Examples
-        --------
-        When not passing `others`, all values are concatenated into a single
-        string:
-
-        >>> s = pd.Series(['a', 'b', np.nan, 'd'])
-        >>> s.str.cat(sep=' ')
-        'a b d'
-
-        By default, NA values in the Series are ignored. Using `na_rep`, they
-        can be given a representation:
-
-        >>> s.str.cat(sep=' ', na_rep='?')
-        'a b ? d'
-
-        If `others` is specified, corresponding values are concatenated with
-        the separator. Result will be a Series of strings.
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',')
-        0    a,A
-        1    b,B
-        2    NaN
-        3    d,D
-        dtype: object
-
-        Missing values will remain missing in the result, but can again be
-        represented using `na_rep`
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')
-        0    a,A
-        1    b,B
-        2    -,C
-        3    d,D
-        dtype: object
-
-        If `sep` is not specified, the values are concatenated without
-        separation.
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')
-        0    aA
-        1    bB
-        2    -C
-        3    dD
-        dtype: object
-
-        Series with different indexes can be aligned before concatenation. The
-        `join`-keyword works as in other methods.
-
-        >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2])
-        >>> s.str.cat(t, join='left', na_rep='-')
-        0    aa
-        1    b-
-        2    -c
-        3    dd
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='outer', na_rep='-')
-        0    aa
-        1    b-
-        2    -c
-        3    dd
-        4    -e
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='inner', na_rep='-')
-        0    aa
-        2    -c
-        3    dd
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='right', na_rep='-')
-        3    dd
-        0    aa
-        4    -e
-        2    -c
-        dtype: object
-
-        For more examples, see :ref:`here <text.concatenate>`.
-        """
-        from pandas import Index, Series, concat
-
-        if isinstance(others, str):
-            raise ValueError("Did you mean to supply a `sep` keyword?")
-        if sep is None:
-            sep = ""
-
-        if isinstance(self._orig, ABCIndexClass):
-            data = Series(self._orig, index=self._orig)
-        else:  # Series
-            data = self._orig
-
-        # concatenate Series/Index with itself if no "others"
-        if others is None:
-            data = ensure_object(data)
-            na_mask = isna(data)
-            if na_rep is None and na_mask.any():
-                data = data[~na_mask]
-            elif na_rep is not None and na_mask.any():
-                data = np.where(na_mask, na_rep, data)
-            return sep.join(data)
-
-        try:
-            # turn anything in "others" into lists of Series
-            others = self._get_series_list(others)
-        except ValueError as err:  # do not catch TypeError raised by _get_series_list
-            raise ValueError(
-                "If `others` contains arrays or lists (or other "
-                "list-likes without an index), these must all be "
-                "of the same length as the calling Series/Index."
-            ) from err
-
-        # align if required
-        if any(not data.index.equals(x.index) for x in others):
-            # Need to add keys for uniqueness in case of duplicate columns
-            others = concat(
-                others,
-                axis=1,
-                join=(join if join == "inner" else "outer"),
-                keys=range(len(others)),
-                sort=False,
-                copy=False,
-            )
-            data, others = data.align(others, join=join)
-            others = [others[x] for x in others]  # again list of Series
-
-        all_cols = [ensure_object(x) for x in [data] + others]
-        na_masks = np.array([isna(x) for x in all_cols])
-        union_mask = np.logical_or.reduce(na_masks, axis=0)
-
-        if na_rep is None and union_mask.any():
-            # no na_rep means NaNs for all rows where any column has a NaN
-            # only necessary if there are actually any NaNs
-            result = np.empty(len(data), dtype=object)
-            np.putmask(result, union_mask, np.nan)
-
-            not_masked = ~union_mask
-            result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep)
-        elif na_rep is not None and union_mask.any():
-            # fill NaNs with na_rep in case there are actually any NaNs
-            all_cols = [
-                np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols)
-            ]
-            result = cat_safe(all_cols, sep)
-        else:
-            # no NaNs - can just concatenate
-            result = cat_safe(all_cols, sep)
-
-        if isinstance(self._orig, ABCIndexClass):
-            # add dtype for case that result is all-NA
-            result = Index(result, dtype=object, name=self._orig.name)
-        else:  # Series
-            if is_categorical_dtype(self._orig.dtype):
-                # We need to infer the new categories.
-                dtype = None
-            else:
-                dtype = self._orig.dtype
-            result = Series(result, dtype=dtype, index=data.index, name=self._orig.name)
-        return result
-
-    _shared_docs[
-        "str_split"
-    ] = r"""
-    Split strings around given separator/delimiter.
-
-    Splits the string in the Series/Index from the %(side)s,
-    at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    pat : str, optional
-        String or regular expression to split on.
-        If not specified, split on whitespace.
-    n : int, default -1 (all)
-        Limit number of splits in output.
-        ``None``, 0 and -1 will be interpreted as return all splits.
-    expand : bool, default False
-        Expand the split strings into separate columns.
-
-        * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
-        * If ``False``, return Series/Index, containing lists of strings.
-
-    Returns
-    -------
-    Series, Index, DataFrame or MultiIndex
-        Type matches caller unless ``expand=True`` (see Notes).
-
-    See Also
-    --------
-    Series.str.split : Split strings around given separator/delimiter.
-    Series.str.rsplit : Splits string around given separator/delimiter,
-        starting from the right.
-    Series.str.join : Join lists contained as elements in the Series/Index
-        with passed delimiter.
-    str.split : Standard library version for split.
-    str.rsplit : Standard library version for rsplit.
-
-    Notes
-    -----
-    The handling of the `n` keyword depends on the number of found splits:
-
-    - If found splits > `n`,  make first `n` splits only
-    - If found splits <= `n`, make all splits
-    - If for a certain row the number of found splits < `n`,
-      append `None` for padding up to `n` if ``expand=True``
-
-    If using ``expand=True``, Series and Index callers return DataFrame and
-    MultiIndex objects, respectively.
-
-    Examples
-    --------
-    >>> s = pd.Series(
-    ...     [
-    ...         "this is a regular sentence",
-    ...         "https://docs.python.org/3/tutorial/index.html",
-    ...         np.nan
-    ...     ]
-    ... )
-    >>> s
-    0                       this is a regular sentence
-    1    https://docs.python.org/3/tutorial/index.html
-    2                                              NaN
-    dtype: object
-
-    In the default setting, the string is split by whitespace.
-
-    >>> s.str.split()
-    0                   [this, is, a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    Without the `n` parameter, the outputs of `rsplit` and `split`
-    are identical.
-
-    >>> s.str.rsplit()
-    0                   [this, is, a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    The `n` parameter can be used to limit the number of splits on the
-    delimiter. The outputs of `split` and `rsplit` are different.
-
-    >>> s.str.split(n=2)
-    0                     [this, is, a regular sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    >>> s.str.rsplit(n=2)
-    0                     [this is a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    The `pat` parameter can be used to split by other characters.
-
-    >>> s.str.split(pat="/")
-    0                         [this is a regular sentence]
-    1    [https:, , docs.python.org, 3, tutorial, index...
-    2                                                  NaN
-    dtype: object
-
-    When using ``expand=True``, the split elements will expand out into
-    separate columns. If NaN is present, it is propagated throughout
-    the columns during the split.
-
-    >>> s.str.split(expand=True)
-                                                   0     1     2        3         4
-    0                                           this    is     a  regular  sentence
-    1  https://docs.python.org/3/tutorial/index.html  None  None     None      None
-    2                                            NaN   NaN   NaN      NaN       NaN
-
-    For slightly more complex use cases like splitting the html document name
-    from a url, a combination of parameter settings can be used.
-
-    >>> s.str.rsplit("/", n=1, expand=True)
-                                        0           1
-    0          this is a regular sentence        None
-    1  https://docs.python.org/3/tutorial  index.html
-    2                                 NaN         NaN
-
-    Remember to escape special characters when explicitly using regular
-    expressions.
-
-    >>> s = pd.Series(["1+1=2"])
-    >>> s
-    0    1+1=2
-    dtype: object
-    >>> s.str.split(r"\+|=", expand=True)
-         0    1    2
-    0    1    1    2
-    """
-
-    @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
-    @forbid_nonstring_types(["bytes"])
-    def split(self, pat=None, n=-1, expand=False):
-        result = str_split(self._parent, pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
-    @forbid_nonstring_types(["bytes"])
-    def rsplit(self, pat=None, n=-1, expand=False):
-        result = str_rsplit(self._parent, pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    _shared_docs[
-        "str_partition"
-    ] = """
-    Split the string at the %(side)s occurrence of `sep`.
-
-    This method splits the string at the %(side)s occurrence of `sep`,
-    and returns 3 elements containing the part before the separator,
-    the separator itself, and the part after the separator.
-    If the separator is not found, return %(return)s.
-
-    Parameters
-    ----------
-    sep : str, default whitespace
-        String to split on.
-    expand : bool, default True
-        If True, return DataFrame/MultiIndex expanding dimensionality.
-        If False, return Series/Index.
-
-    Returns
-    -------
-    DataFrame/MultiIndex or Series/Index of objects
-
-    See Also
-    --------
-    %(also)s
-    Series.str.split : Split strings around given separators.
-    str.partition : Standard library version.
-
-    Examples
-    --------
-
-    >>> s = pd.Series(['Linda van der Berg', 'George Pitt-Rivers'])
-    >>> s
-    0    Linda van der Berg
-    1    George Pitt-Rivers
-    dtype: object
-
-    >>> s.str.partition()
-            0  1             2
-    0   Linda     van der Berg
-    1  George      Pitt-Rivers
-
-    To partition by the last space instead of the first one:
-
-    >>> s.str.rpartition()
-                   0  1            2
-    0  Linda van der            Berg
-    1         George     Pitt-Rivers
-
-    To partition by something different than a space:
-
-    >>> s.str.partition('-')
-                        0  1       2
-    0  Linda van der Berg
-    1         George Pitt  -  Rivers
-
-    To return a Series containing tuples instead of a DataFrame:
-
-    >>> s.str.partition('-', expand=False)
-    0    (Linda van der Berg, , )
-    1    (George Pitt, -, Rivers)
-    dtype: object
-
-    Also available on indices:
-
-    >>> idx = pd.Index(['X 123', 'Y 999'])
-    >>> idx
-    Index(['X 123', 'Y 999'], dtype='object')
-
-    Which will create a MultiIndex:
-
-    >>> idx.str.partition()
-    MultiIndex([('X', ' ', '123'),
-                ('Y', ' ', '999')],
-               )
-
-    Or an index with tuples with ``expand=False``:
-
-    >>> idx.str.partition(expand=False)
-    Index([('X', ' ', '123'), ('Y', ' ', '999')], dtype='object')
-    """
-
-    @Appender(
-        _shared_docs["str_partition"]
-        % {
-            "side": "first",
-            "return": "3 elements containing the string itself, followed by two "
-            "empty strings",
-            "also": "rpartition : Split the string at the last occurrence of `sep`.",
-        }
-    )
-    @forbid_nonstring_types(["bytes"])
-    def partition(self, sep=" ", expand=True):
-        f = lambda x: x.partition(sep)
-        result = _na_map(f, self._parent)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @Appender(
-        _shared_docs["str_partition"]
-        % {
-            "side": "last",
-            "return": "3 elements containing two empty strings, followed by the "
-            "string itself",
-            "also": "partition : Split the string at the first occurrence of `sep`.",
-        }
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rpartition(self, sep=" ", expand=True):
-        f = lambda x: x.rpartition(sep)
-        result = _na_map(f, self._parent)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @copy(str_get)
-    def get(self, i):
-        result = str_get(self._parent, i)
-        return self._wrap_result(result)
-
-    @copy(str_join)
-    @forbid_nonstring_types(["bytes"])
-    def join(self, sep):
-        result = str_join(self._parent, sep)
-        return self._wrap_result(result)
-
-    @copy(str_contains)
-    @forbid_nonstring_types(["bytes"])
-    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
-        result = str_contains(
-            self._parent, pat, case=case, flags=flags, na=na, regex=regex
-        )
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_match)
-    @forbid_nonstring_types(["bytes"])
-    def match(self, pat, case=True, flags=0, na=np.nan):
-        result = str_match(self._parent, pat, case=case, flags=flags, na=na)
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_fullmatch)
-    @forbid_nonstring_types(["bytes"])
-    def fullmatch(self, pat, case=True, flags=0, na=np.nan):
-        result = str_fullmatch(self._parent, pat, case=case, flags=flags, na=na)
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_replace)
-    @forbid_nonstring_types(["bytes"])
-    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
-        result = str_replace(
-            self._parent, pat, repl, n=n, case=case, flags=flags, regex=regex
-        )
-        return self._wrap_result(result)
-
-    @copy(str_repeat)
-    @forbid_nonstring_types(["bytes"])
-    def repeat(self, repeats):
-        result = str_repeat(self._parent, repeats)
-        return self._wrap_result(result)
-
-    @copy(str_pad)
-    @forbid_nonstring_types(["bytes"])
-    def pad(self, width, side="left", fillchar=" "):
-        result = str_pad(self._parent, width, side=side, fillchar=fillchar)
-        return self._wrap_result(result)
-
-    _shared_docs[
-        "str_pad"
-    ] = """
-    Pad %(side)s side of strings in the Series/Index.
-
-    Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    width : int
-        Minimum width of resulting string; additional characters will be filled
-        with ``fillchar``.
-    fillchar : str
-        Additional character for filling, default is whitespace.
-
-    Returns
-    -------
-    filled : Series/Index of objects.
-    """
-
-    @Appender(_shared_docs["str_pad"] % dict(side="left and right", method="center"))
-    @forbid_nonstring_types(["bytes"])
-    def center(self, width, fillchar=" "):
-        return self.pad(width, side="both", fillchar=fillchar)
-
-    @Appender(_shared_docs["str_pad"] % dict(side="right", method="ljust"))
-    @forbid_nonstring_types(["bytes"])
-    def ljust(self, width, fillchar=" "):
-        return self.pad(width, side="right", fillchar=fillchar)
-
-    @Appender(_shared_docs["str_pad"] % dict(side="left", method="rjust"))
-    @forbid_nonstring_types(["bytes"])
-    def rjust(self, width, fillchar=" "):
-        return self.pad(width, side="left", fillchar=fillchar)
-
-    @forbid_nonstring_types(["bytes"])
-    def zfill(self, width):
-        """
-        Pad strings in the Series/Index by prepending '0' characters.
-
-        Strings in the Series/Index are padded with '0' characters on the
-        left of the string to reach a total string length  `width`. Strings
-        in the Series/Index with length greater or equal to `width` are
-        unchanged.
-
-        Parameters
-        ----------
-        width : int
-            Minimum length of resulting string; strings with length less
-            than `width` be prepended with '0' characters.
-
-        Returns
-        -------
-        Series/Index of objects.
-
-        See Also
-        --------
-        Series.str.rjust : Fills the left side of strings with an arbitrary
-            character.
-        Series.str.ljust : Fills the right side of strings with an arbitrary
-            character.
-        Series.str.pad : Fills the specified sides of strings with an arbitrary
-            character.
-        Series.str.center : Fills boths sides of strings with an arbitrary
-            character.
-
-        Notes
-        -----
-        Differs from :meth:`str.zfill` which has special handling
-        for '+'/'-' in the string.
-
-        Examples
-        --------
-        >>> s = pd.Series(['-1', '1', '1000', 10, np.nan])
-        >>> s
-        0      -1
-        1       1
-        2    1000
-        3      10
-        4     NaN
-        dtype: object
-
-        Note that ``10`` and ``NaN`` are not strings, therefore they are
-        converted to ``NaN``. The minus sign in ``'-1'`` is treated as a
-        regular character and the zero is added to the left of it
-        (:meth:`str.zfill` would have moved it to the left). ``1000``
-        remains unchanged as it is longer than `width`.
-
-        >>> s.str.zfill(3)
-        0     0-1
-        1     001
-        2    1000
-        3     NaN
-        4     NaN
-        dtype: object
-        """
-        result = str_pad(self._parent, width, side="left", fillchar="0")
-        return self._wrap_result(result)
-
-    @copy(str_slice)
-    def slice(self, start=None, stop=None, step=None):
-        result = str_slice(self._parent, start, stop, step)
-        return self._wrap_result(result)
-
-    @copy(str_slice_replace)
-    @forbid_nonstring_types(["bytes"])
-    def slice_replace(self, start=None, stop=None, repl=None):
-        result = str_slice_replace(self._parent, start, stop, repl)
-        return self._wrap_result(result)
-
-    @copy(str_decode)
-    def decode(self, encoding, errors="strict"):
-        # need to allow bytes here
-        result = str_decode(self._parent, encoding, errors)
-        # TODO: Not sure how to handle this.
-        return self._wrap_result(result, returns_string=False)
-
-    @copy(str_encode)
-    @forbid_nonstring_types(["bytes"])
-    def encode(self, encoding, errors="strict"):
-        result = str_encode(self._parent, encoding, errors)
-        return self._wrap_result(result, returns_string=False)
-
-    _shared_docs[
-        "str_strip"
-    ] = r"""
-    Remove %(position)s characters.
-
-    Strip whitespaces (including newlines) or a set of specified characters
-    from each string in the Series/Index from %(side)s.
-    Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    to_strip : str or None, default None
-        Specifying the set of characters to be removed.
-        All combinations of this set of characters will be stripped.
-        If None then whitespaces are removed.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    Series.str.strip : Remove leading and trailing characters in Series/Index.
-    Series.str.lstrip : Remove leading characters in Series/Index.
-    Series.str.rstrip : Remove trailing characters in Series/Index.
-
-    Examples
-    --------
-    >>> s = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', np.nan])
-    >>> s
-    0    1. Ant.
-    1    2. Bee!\n
-    2    3. Cat?\t
-    3          NaN
-    dtype: object
-
-    >>> s.str.strip()
-    0    1. Ant.
-    1    2. Bee!
-    2    3. Cat?
-    3        NaN
-    dtype: object
-
-    >>> s.str.lstrip('123.')
-    0    Ant.
-    1    Bee!\n
-    2    Cat?\t
-    3       NaN
-    dtype: object
-
-    >>> s.str.rstrip('.!? \n\t')
-    0    1. Ant
-    1    2. Bee
-    2    3. Cat
-    3       NaN
-    dtype: object
-
-    >>> s.str.strip('123.!? \n\t')
-    0    Ant
-    1    Bee
-    2    Cat
-    3    NaN
-    dtype: object
-    """
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(
-            side="left and right sides", method="strip", position="leading and trailing"
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def strip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="both")
-        return self._wrap_result(result)
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(side="left side", method="lstrip", position="leading")
-    )
-    @forbid_nonstring_types(["bytes"])
-    def lstrip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="left")
-        return self._wrap_result(result)
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(side="right side", method="rstrip", position="trailing")
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rstrip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="right")
-        return self._wrap_result(result)
-
-    @copy(str_wrap)
-    @forbid_nonstring_types(["bytes"])
-    def wrap(self, width, **kwargs):
-        result = str_wrap(self._parent, width, **kwargs)
-        return self._wrap_result(result)
-
-    @copy(str_get_dummies)
-    @forbid_nonstring_types(["bytes"])
-    def get_dummies(self, sep="|"):
-        # we need to cast to Series of strings as only that has all
-        # methods available for making the dummies...
-        data = self._orig.astype(str) if self._is_categorical else self._parent
-        result, name = str_get_dummies(data, sep)
-        return self._wrap_result(
-            result,
-            use_codes=(not self._is_categorical),
-            name=name,
-            expand=True,
-            returns_string=False,
-        )
-
-    @copy(str_translate)
-    @forbid_nonstring_types(["bytes"])
-    def translate(self, table):
-        result = str_translate(self._parent, table)
-        return self._wrap_result(result)
-
-    count = _pat_wrapper(str_count, flags=True, name="count", returns_string=False)
-    startswith = _pat_wrapper(
-        str_startswith, na=True, name="startswith", returns_string=False
-    )
-    endswith = _pat_wrapper(
-        str_endswith, na=True, name="endswith", returns_string=False
-    )
-    findall = _pat_wrapper(
-        str_findall, flags=True, name="findall", returns_string=False
-    )
-
-    @copy(str_extract)
-    @forbid_nonstring_types(["bytes"])
-    def extract(self, pat, flags=0, expand=True):
-        return str_extract(self, pat, flags=flags, expand=expand)
-
-    @copy(str_extractall)
-    @forbid_nonstring_types(["bytes"])
-    def extractall(self, pat, flags=0):
-        return str_extractall(self._orig, pat, flags=flags)
-
-    _shared_docs[
-        "find"
-    ] = """
-    Return %(side)s indexes in each strings in the Series/Index.
-
-    Each of returned indexes corresponds to the position where the
-    substring is fully contained between [start:end]. Return -1 on
-    failure. Equivalent to standard :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-
-    Returns
-    -------
-    Series or Index of int.
-
-    See Also
-    --------
-    %(also)s
-    """
-
-    @Appender(
-        _shared_docs["find"]
-        % dict(
-            side="lowest",
-            method="find",
-            also="rfind : Return highest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def find(self, sub, start=0, end=None):
-        result = str_find(self._parent, sub, start=start, end=end, side="left")
-        return self._wrap_result(result, returns_string=False)
-
-    @Appender(
-        _shared_docs["find"]
-        % dict(
-            side="highest",
-            method="rfind",
-            also="find : Return lowest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rfind(self, sub, start=0, end=None):
-        result = str_find(self._parent, sub, start=start, end=end, side="right")
-        return self._wrap_result(result, returns_string=False)
-
-    @forbid_nonstring_types(["bytes"])
-    def normalize(self, form):
-        """
-        Return the Unicode normal form for the strings in the Series/Index.
-
-        For more information on the forms, see the
-        :func:`unicodedata.normalize`.
-
-        Parameters
-        ----------
-        form : {'NFC', 'NFKC', 'NFD', 'NFKD'}
-            Unicode form.
-
-        Returns
-        -------
-        normalized : Series/Index of objects
-        """
-        import unicodedata
-
-        f = lambda x: unicodedata.normalize(form, x)
-        result = _na_map(f, self._parent, dtype=str)
-        return self._wrap_result(result)
-
-    _shared_docs[
-        "index"
-    ] = """
-    Return %(side)s indexes in each string in Series/Index.
-
-    Each of the returned indexes corresponds to the position where the
-    substring is fully contained between [start:end]. This is the same
-    as ``str.%(similar)s`` except instead of returning -1, it raises a
-    ValueError when the substring is not found. Equivalent to standard
-    ``str.%(method)s``.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    %(also)s
-    """
-
-    @Appender(
-        _shared_docs["index"]
-        % dict(
-            side="lowest",
-            similar="find",
-            method="index",
-            also="rindex : Return highest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def index(self, sub, start=0, end=None):
-        result = str_index(self._parent, sub, start=start, end=end, side="left")
-        return self._wrap_result(result, returns_string=False)
-
-    @Appender(
-        _shared_docs["index"]
-        % dict(
-            side="highest",
-            similar="rfind",
-            method="rindex",
-            also="index : Return lowest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rindex(self, sub, start=0, end=None):
-        result = str_index(self._parent, sub, start=start, end=end, side="right")
-        return self._wrap_result(result, returns_string=False)
-
-    _shared_docs[
-        "len"
-    ] = """
-    Compute the length of each element in the Series/Index.
-
-    The element may be a sequence (such as a string, tuple or list) or a collection
-    (such as a dictionary).
-
-    Returns
-    -------
-    Series or Index of int
-        A Series or Index of integer values indicating the length of each
-        element in the Series or Index.
-
-    See Also
-    --------
-    str.len : Python built-in function returning the length of an object.
-    Series.size : Returns the length of the Series.
-
-    Examples
-    --------
-    Returns the length (number of characters) in a string. Returns the
-    number of entries for dictionaries, lists or tuples.
-
-    >>> s = pd.Series(['dog',
-    ...                 '',
-    ...                 5,
-    ...                 {'foo' : 'bar'},
-    ...                 [2, 3, 5, 7],
-    ...                 ('one', 'two', 'three')])
-    >>> s
-    0                  dog
-    1
-    2                    5
-    3       {'foo': 'bar'}
-    4         [2, 3, 5, 7]
-    5    (one, two, three)
-    dtype: object
-    >>> s.str.len()
-    0    3.0
-    1    0.0
-    2    NaN
-    3    1.0
-    4    4.0
-    5    3.0
-    dtype: float64
-    """
-    len = _noarg_wrapper(
-        len,
-        docstring=_shared_docs["len"],
-        forbidden_types=None,
-        dtype=np.dtype("int64"),
-        returns_string=False,
-    )
-
-    _shared_docs[
-        "casemethods"
-    ] = """
-    Convert strings in the Series/Index to %(type)s.
-    %(version)s
-    Equivalent to :meth:`str.%(method)s`.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    Series.str.lower : Converts all characters to lowercase.
-    Series.str.upper : Converts all characters to uppercase.
-    Series.str.title : Converts first character of each word to uppercase and
-        remaining to lowercase.
-    Series.str.capitalize : Converts first character to uppercase and
-        remaining to lowercase.
-    Series.str.swapcase : Converts uppercase to lowercase and lowercase to
-        uppercase.
-    Series.str.casefold: Removes all case distinctions in the string.
-
-    Examples
-    --------
-    >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
-    >>> s
-    0                 lower
-    1              CAPITALS
-    2    this is a sentence
-    3              SwApCaSe
-    dtype: object
-
-    >>> s.str.lower()
-    0                 lower
-    1              capitals
-    2    this is a sentence
-    3              swapcase
-    dtype: object
-
-    >>> s.str.upper()
-    0                 LOWER
-    1              CAPITALS
-    2    THIS IS A SENTENCE
-    3              SWAPCASE
-    dtype: object
-
-    >>> s.str.title()
-    0                 Lower
-    1              Capitals
-    2    This Is A Sentence
-    3              Swapcase
-    dtype: object
-
-    >>> s.str.capitalize()
-    0                 Lower
-    1              Capitals
-    2    This is a sentence
-    3              Swapcase
-    dtype: object
-
-    >>> s.str.swapcase()
-    0                 LOWER
-    1              capitals
-    2    THIS IS A SENTENCE
-    3              sWaPcAsE
-    dtype: object
-    """
-
-    # _doc_args holds dict of strings to use in substituting casemethod docs
-    _doc_args: Dict[str, Dict[str, str]] = {}
-    _doc_args["lower"] = dict(type="lowercase", method="lower", version="")
-    _doc_args["upper"] = dict(type="uppercase", method="upper", version="")
-    _doc_args["title"] = dict(type="titlecase", method="title", version="")
-    _doc_args["capitalize"] = dict(
-        type="be capitalized", method="capitalize", version=""
-    )
-    _doc_args["swapcase"] = dict(type="be swapcased", method="swapcase", version="")
-    _doc_args["casefold"] = dict(
-        type="be casefolded",
-        method="casefold",
-        version="\n    .. versionadded:: 0.25.0\n",
-    )
-    lower = _noarg_wrapper(
-        lambda x: x.lower(),
-        name="lower",
-        docstring=_shared_docs["casemethods"] % _doc_args["lower"],
-        dtype=str,
-    )
-    upper = _noarg_wrapper(
-        lambda x: x.upper(),
-        name="upper",
-        docstring=_shared_docs["casemethods"] % _doc_args["upper"],
-        dtype=str,
-    )
-    title = _noarg_wrapper(
-        lambda x: x.title(),
-        name="title",
-        docstring=_shared_docs["casemethods"] % _doc_args["title"],
-        dtype=str,
-    )
-    capitalize = _noarg_wrapper(
-        lambda x: x.capitalize(),
-        name="capitalize",
-        docstring=_shared_docs["casemethods"] % _doc_args["capitalize"],
-        dtype=str,
-    )
-    swapcase = _noarg_wrapper(
-        lambda x: x.swapcase(),
-        name="swapcase",
-        docstring=_shared_docs["casemethods"] % _doc_args["swapcase"],
-        dtype=str,
-    )
-    casefold = _noarg_wrapper(
-        lambda x: x.casefold(),
-        name="casefold",
-        docstring=_shared_docs["casemethods"] % _doc_args["casefold"],
-        dtype=str,
-    )
-
-    _shared_docs[
-        "ismethods"
-    ] = """
-    Check whether all characters in each string are %(type)s.
-
-    This is equivalent to running the Python string method
-    :meth:`str.%(method)s` for each element of the Series/Index. If a string
-    has zero characters, ``False`` is returned for that check.
-
-    Returns
-    -------
-    Series or Index of bool
-        Series or Index of boolean values with the same length as the original
-        Series/Index.
-
-    See Also
-    --------
-    Series.str.isalpha : Check whether all characters are alphabetic.
-    Series.str.isnumeric : Check whether all characters are numeric.
-    Series.str.isalnum : Check whether all characters are alphanumeric.
-    Series.str.isdigit : Check whether all characters are digits.
-    Series.str.isdecimal : Check whether all characters are decimal.
-    Series.str.isspace : Check whether all characters are whitespace.
-    Series.str.islower : Check whether all characters are lowercase.
-    Series.str.isupper : Check whether all characters are uppercase.
-    Series.str.istitle : Check whether all characters are titlecase.
-
-    Examples
-    --------
-    **Checks for Alphabetic and Numeric Characters**
-
-    >>> s1 = pd.Series(['one', 'one1', '1', ''])
-
-    >>> s1.str.isalpha()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    >>> s1.str.isnumeric()
-    0    False
-    1    False
-    2     True
-    3    False
-    dtype: bool
-
-    >>> s1.str.isalnum()
-    0     True
-    1     True
-    2     True
-    3    False
-    dtype: bool
-
-    Note that checks against characters mixed with any additional punctuation
-    or whitespace will evaluate to false for an alphanumeric check.
-
-    >>> s2 = pd.Series(['A B', '1.5', '3,000'])
-    >>> s2.str.isalnum()
-    0    False
-    1    False
-    2    False
-    dtype: bool
-
-    **More Detailed Checks for Numeric Characters**
-
-    There are several different but overlapping sets of numeric characters that
-    can be checked for.
-
-    >>> s3 = pd.Series(['23', '³', '⅕', ''])
-
-    The ``s3.str.isdecimal`` method checks for characters used to form numbers
-    in base 10.
-
-    >>> s3.str.isdecimal()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also
-    includes special digits, like superscripted and subscripted digits in
-    unicode.
-
-    >>> s3.str.isdigit()
-    0     True
-    1     True
-    2    False
-    3    False
-    dtype: bool
-
-    The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also
-    includes other characters that can represent quantities such as unicode
-    fractions.
-
-    >>> s3.str.isnumeric()
-    0     True
-    1     True
-    2     True
-    3    False
-    dtype: bool
-
-    **Checks for Whitespace**
-
-    >>> s4 = pd.Series([' ', '\\t\\r\\n ', ''])
-    >>> s4.str.isspace()
-    0     True
-    1     True
-    2    False
-    dtype: bool
-
-    **Checks for Character Case**
-
-    >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
-
-    >>> s5.str.islower()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    >>> s5.str.isupper()
-    0    False
-    1    False
-    2     True
-    3    False
-    dtype: bool
-
-    The ``s5.str.istitle`` method checks for whether all words are in title
-    case (whether only the first letter of each word is capitalized). Words are
-    assumed to be as any sequence of non-numeric characters separated by
-    whitespace characters.
-
-    >>> s5.str.istitle()
-    0    False
-    1     True
-    2    False
-    3    False
-    dtype: bool
-    """
-    _doc_args["isalnum"] = dict(type="alphanumeric", method="isalnum")
-    _doc_args["isalpha"] = dict(type="alphabetic", method="isalpha")
-    _doc_args["isdigit"] = dict(type="digits", method="isdigit")
-    _doc_args["isspace"] = dict(type="whitespace", method="isspace")
-    _doc_args["islower"] = dict(type="lowercase", method="islower")
-    _doc_args["isupper"] = dict(type="uppercase", method="isupper")
-    _doc_args["istitle"] = dict(type="titlecase", method="istitle")
-    _doc_args["isnumeric"] = dict(type="numeric", method="isnumeric")
-    _doc_args["isdecimal"] = dict(type="decimal", method="isdecimal")
-    # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624)
-    isalnum = _noarg_wrapper(
-        lambda x: x.isalnum(),
-        name="isalnum",
-        docstring=_shared_docs["ismethods"] % _doc_args["isalnum"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isalpha = _noarg_wrapper(
-        lambda x: x.isalpha(),
-        name="isalpha",
-        docstring=_shared_docs["ismethods"] % _doc_args["isalpha"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isdigit = _noarg_wrapper(
-        lambda x: x.isdigit(),
-        name="isdigit",
-        docstring=_shared_docs["ismethods"] % _doc_args["isdigit"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isspace = _noarg_wrapper(
-        lambda x: x.isspace(),
-        name="isspace",
-        docstring=_shared_docs["ismethods"] % _doc_args["isspace"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    islower = _noarg_wrapper(
-        lambda x: x.islower(),
-        name="islower",
-        docstring=_shared_docs["ismethods"] % _doc_args["islower"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isupper = _noarg_wrapper(
-        lambda x: x.isupper(),
-        name="isupper",
-        docstring=_shared_docs["ismethods"] % _doc_args["isupper"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    istitle = _noarg_wrapper(
-        lambda x: x.istitle(),
-        name="istitle",
-        docstring=_shared_docs["ismethods"] % _doc_args["istitle"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isnumeric = _noarg_wrapper(
-        lambda x: x.isnumeric(),
-        name="isnumeric",
-        docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isdecimal = _noarg_wrapper(
-        lambda x: x.isdecimal(),
-        name="isdecimal",
-        docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-
-    @classmethod
-    def _make_accessor(cls, data):
-        cls._validate(data)
-        return cls(data)

From e76a3c1e9dd88bd75da767dbe3f0d698eb22f4ed Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 14 Sep 2020 15:21:57 -0500
Subject: [PATCH 07/24] fixup

---
 pandas/core/strings/accessor.py     | 32 +++++++++++++++++++----------
 pandas/core/strings/base.py         |  6 +++---
 pandas/core/strings/object_array.py | 16 ++++++++-------
 pandas/core/strings/string_array.py |  6 +++---
 pandas/tests/test_strings.py        |  7 +++++++
 5 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index f5c91b692d0e7..66bedd15b91b4 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -977,7 +977,7 @@ def join(self, sep):
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
-    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
+    def contains(self, pat, case=True, flags=0, na=None, regex=True):
         """
         Test if pattern or regex is contained within a string of a Series or Index.
 
@@ -992,8 +992,10 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
             If True, case sensitive.
         flags : int, default 0 (no flags)
             Flags to pass through to the re module, e.g. re.IGNORECASE.
-        na : default NaN
-            Fill value for missing values.
+        na : scalar, optional.
+            Fill value for missing values. The default depends on dtype of the
+            array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``,
+            ``pandas.NA`` is used.
         regex : bool, default True
             If True, assumes the pat is a regular expression.
 
@@ -1103,7 +1105,7 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
         return self._wrap_result(result, fill_value=na, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
-    def match(self, pat, case=True, flags=0, na=np.nan):
+    def match(self, pat, case=True, flags=0, na=None):
         """
         Determine if each string starts with a match of a regular expression.
 
@@ -1115,8 +1117,10 @@ def match(self, pat, case=True, flags=0, na=np.nan):
             If True, case sensitive.
         flags : int, default 0 (no flags)
             Regex module flags, e.g. re.IGNORECASE.
-        na : default NaN
-            Fill value for missing values.
+        na : scalar, optional.
+            Fill value for missing values. The default depends on dtype of the
+            array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``,
+            ``pandas.NA`` is used.
 
         Returns
         -------
@@ -1133,7 +1137,7 @@ def match(self, pat, case=True, flags=0, na=np.nan):
         return self._wrap_result(result, fill_value=na, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
-    def fullmatch(self, pat, case=True, flags=0, na=np.nan):
+    def fullmatch(self, pat, case=True, flags=0, na=None):
         """
         Determine if each string entirely matches a regular expression.
 
@@ -1147,8 +1151,10 @@ def fullmatch(self, pat, case=True, flags=0, na=np.nan):
             If True, case sensitive.
         flags : int, default 0 (no flags)
             Regex module flags, e.g. re.IGNORECASE.
-        na : default NaN
-            Fill value for missing values.
+        na : scalar, optional.
+            Fill value for missing values. The default depends on dtype of the
+            array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``,
+            ``pandas.NA`` is used.
 
         Returns
         -------
@@ -1993,7 +1999,9 @@ def startswith(self, pat, na=None):
         pat : str
             Character sequence. Regular expressions are not accepted.
         na : object, default NaN
-            Object shown if element tested is not a string.
+            Object shown if element tested is not a string. The default depends
+            on dtype of the array. For object-dtype, ``numpy.nan`` is used.
+            For ``StringDtype``, ``pandas.NA`` is used.
 
         Returns
         -------
@@ -2048,7 +2056,9 @@ def endswith(self, pat, na=None):
         pat : str
             Character sequence. Regular expressions are not accepted.
         na : object, default NaN
-            Object shown if element tested is not a string.
+            Object shown if element tested is not a string. The default depends
+            on dtype of the array. For object-dtype, ``numpy.nan`` is used.
+            For ``StringDtype``, ``pandas.NA`` is used.
 
         Returns
         -------
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index f9e70f8559fd9..d21e8f78ce9ba 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -41,15 +41,15 @@ def pad(self, width, side="left", fillchar=" "):
         pass
 
     @abc.abstractmethod
-    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
+    def contains(self, pat, case=True, flags=0, na=None, regex=True):
         pass
 
     @abc.abstractmethod
-    def startswith(self, pat, na=np.nan):
+    def startswith(self, pat, na=None):
         pass
 
     @abc.abstractmethod
-    def endswith(self, pat, na=np.nan):
+    def endswith(self, pat, na=None):
         pass
 
     @abc.abstractmethod
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index dc6144f0320cf..cf6922880ddd2 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -20,17 +20,19 @@
 
 
 class ObjectArrayMethods(BaseStringArrayMethods):
+    _default_na_value = np.nan
+
     def _map(self, f, na_value=None, dtype=None):
         arr = self._array  # object-dtype ndarray.
         if dtype is None:
             dtype = np.dtype("object")
         if na_value is None:
-            na_value = np.nan
+            na_value = self._default_na_value
 
         if not len(arr):
             return np.ndarray(0, dtype=dtype)
         if na_value is None:
-            na_value = np.nan
+            na_value = self._default_na_value
 
         if not isinstance(arr, np.ndarray):
             arr = np.asarray(arr, dtype=object)
@@ -111,11 +113,11 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
                 f = lambda x: upper_pat in x.upper()
         return self._map(f, na, dtype=np.dtype("bool"))
 
-    def startswith(self, pat, na=np.nan):
+    def startswith(self, pat, na=None):
         f = lambda x: x.startswith(pat)
         return self._map(f, na_value=na, dtype=np.dtype(bool))
 
-    def endswith(self, pat, na=np.nan):
+    def endswith(self, pat, na=None):
         f = lambda x: x.endswith(pat)
         return self._map(f, na_value=na, dtype=np.dtype(bool))
 
@@ -191,7 +193,7 @@ def match(
         pat: Union[str, Pattern],
         case: bool = True,
         flags: int = 0,
-        na: Scalar = np.nan,
+        na: Scalar = None,
     ):
         if not case:
             flags |= re.IGNORECASE
@@ -206,7 +208,7 @@ def fullmatch(
         pat: Union[str, Pattern],
         case: bool = True,
         flags: int = 0,
-        na: Scalar = np.nan,
+        na: Scalar = None,
     ):
         if not case:
             flags |= re.IGNORECASE
@@ -250,7 +252,7 @@ def f(x):
                 return x.get(i)
             elif len(x) > i >= -len(x):
                 return x[i]
-            return np.nan
+            return self._default_na_value
 
         return self._map(f)
 
diff --git a/pandas/core/strings/string_array.py b/pandas/core/strings/string_array.py
index cfc7cb5db9c64..269e85b71161a 100644
--- a/pandas/core/strings/string_array.py
+++ b/pandas/core/strings/string_array.py
@@ -16,6 +16,8 @@
 
 
 class StringArrayMethods(ObjectArrayMethods):
+    _default_na_value = libmissing.NA
+
     def _map(self, f, na_value=None, dtype=None):
         from pandas.arrays import BooleanArray, IntegerArray, StringArray
         from pandas.core.arrays.string_ import StringDtype
@@ -23,14 +25,12 @@ def _map(self, f, na_value=None, dtype=None):
         if dtype is None:
             dtype = StringDtype()
         if na_value is None:
-            na_value = libmissing.NA
+            na_value = self._default_na_value
 
         arr = self._array
         mask = isna(arr)
 
         arr = np.asarray(arr)
-        if na_value is None:
-            na_value = libmissing.NA
 
         if is_integer_dtype(dtype) or is_bool_dtype(dtype):
             constructor: Union[Type[IntegerArray], Type[BooleanArray]]
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index f3983a58719aa..df0e775e8750b 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -3643,3 +3643,10 @@ def test_cat_different_classes(klass):
     result = s.str.cat(klass(["x", "y", "z"]))
     expected = pd.Series(["ax", "by", "cz"])
     tm.assert_series_equal(result, expected)
+
+
+def test_str_get_stringarray_multiple_nans():
+    s = pd.Series(pd.array(["a", "ab", pd.NA, "abc"]))
+    result = s.str.get(2)
+    expected = pd.Series(pd.array([pd.NA, pd.NA, pd.NA, "c"]))
+    tm.assert_series_equal(result, expected)

From 75831b3e112ea9b24ca98b4e31a08abc77115b0f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 17 Sep 2020 06:57:57 -0500
Subject: [PATCH 08/24] fixup

---
 pandas/core/strings/accessor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 66bedd15b91b4..432f6dcd7fb9d 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -1203,7 +1203,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
             - If True, assumes the passed-in pattern is a regular expression.
             - If False, treats the pattern as a literal string
             - Cannot be set to False if `pat` is a compiled regex or `repl` is
-            a callable.
+              a callable.
 
             .. versionadded:: 0.23.0
 
@@ -1217,7 +1217,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         ------
         ValueError
             * if `regex` is False and `repl` is a callable or `pat` is a compiled
-            regex
+              regex
             * if `pat` is a compiled regex and `case` or `flags` is set
 
         Notes

From 1cf54cc513e53f2ff629e86da4c6bba61520a073 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 17 Sep 2020 08:26:31 -0500
Subject: [PATCH 09/24] doctest

---
 ci/code_checks.sh               | 2 +-
 pandas/core/strings/accessor.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 54aa830379c07..b8f6bd53d4a59 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -335,7 +335,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests strings.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/strings.py
+    pytest -q --doctest-modules pandas/core/strings/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # Directories
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 432f6dcd7fb9d..ae05f878aee84 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -978,7 +978,7 @@ def join(self, sep):
 
     @forbid_nonstring_types(["bytes"])
     def contains(self, pat, case=True, flags=0, na=None, regex=True):
-        """
+        r"""
         Test if pattern or regex is contained within a string of a Series or Index.
 
         Return boolean Series or Index based on whether a given pattern or regex is
@@ -1921,7 +1921,7 @@ def translate(self, table):
 
     @forbid_nonstring_types(["bytes"])
     def count(self, pat, flags=0):
-        """
+        r"""
         Count occurrences of pattern in each string of the Series/Index.
 
         This function is used to count the number of times a particular regex

From fc81ebe4503bfcbc02f1e12a01cfe80891dad903 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 17 Sep 2020 08:27:27 -0500
Subject: [PATCH 10/24] docstrings

---
 pandas/core/strings/accessor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index ae05f878aee84..18d0a8ef41e36 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -992,7 +992,7 @@ def contains(self, pat, case=True, flags=0, na=None, regex=True):
             If True, case sensitive.
         flags : int, default 0 (no flags)
             Flags to pass through to the re module, e.g. re.IGNORECASE.
-        na : scalar, optional.
+        na : scalar, optional
             Fill value for missing values. The default depends on dtype of the
             array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``,
             ``pandas.NA`` is used.
@@ -1117,7 +1117,7 @@ def match(self, pat, case=True, flags=0, na=None):
             If True, case sensitive.
         flags : int, default 0 (no flags)
             Regex module flags, e.g. re.IGNORECASE.
-        na : scalar, optional.
+        na : scalar, optional
             Fill value for missing values. The default depends on dtype of the
             array. For object-dtype, ``numpy.nan`` is used. For ``StringDtype``,
             ``pandas.NA`` is used.

From 6be1af6b02cbfa5953a0e486426290ccbd49ca25 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 17 Sep 2020 08:40:23 -0500
Subject: [PATCH 11/24] typing

---
 pandas/core/strings/accessor.py     |  5 +++--
 pandas/core/strings/object_array.py | 13 +++++++------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 18d0a8ef41e36..2c436740624b5 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -2,7 +2,7 @@
 from functools import wraps
 import operator
 import re
-from typing import Dict, List
+from typing import Dict, List, Optional
 import warnings
 
 import numpy as np
@@ -312,6 +312,7 @@ def cons_row(x):
         else:
             index = self._orig.index
             # This is a mess.
+            dtype: Optional[str]
             if self._is_string and returns_string:
                 dtype = "string"
             else:
@@ -369,7 +370,7 @@ def _get_series_list(self, others):
                 or (isinstance(x, np.ndarray) and x.ndim == 1)
                 for x in others
             ):
-                los = []
+                los: List[Series] = []
                 while others:  # iterate through list and append each element
                     los = los + self._get_series_list(others.pop(0))
                 return los
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index cf6922880ddd2..ec07c0b1dc534 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -1,6 +1,6 @@
 import re
 import textwrap
-from typing import Pattern, Union
+from typing import Pattern, Set, Union
 import unicodedata
 import warnings
 
@@ -341,22 +341,23 @@ def get_dummies(self, sep="|"):
         from pandas import Series
 
         arr = Series(self._array).fillna("")
+        assert isinstance(arr, Series)  # fillna returns Optional[Series]
         try:
             arr = sep + arr + sep
         except TypeError:
             arr = sep + arr.astype(str) + sep
 
-        tags = set()
+        tags: Set[str] = set()
         for ts in Series(arr).str.split(sep):
             tags.update(ts)
-        tags = sorted(tags - {""})
+        tags2 = sorted(tags - {""})
 
-        dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
+        dummies = np.empty((len(arr), len(tags2)), dtype=np.int64)
 
-        for i, t in enumerate(tags):
+        for i, t in enumerate(tags2):
             pat = sep + t + sep
             dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
-        return dummies, tags
+        return dummies, tags2
 
     def upper(self):
         return self._map(lambda x: x.upper())

From 95b33101827a692faa0c3e09fd586ec51adc8c04 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 17 Sep 2020 13:40:28 -0500
Subject: [PATCH 12/24] typing

---
 pandas/core/strings/categorical.py  | 7 ++++++-
 pandas/core/strings/object_array.py | 5 +++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/core/strings/categorical.py b/pandas/core/strings/categorical.py
index 3be3825032988..f7d7e564ca43e 100644
--- a/pandas/core/strings/categorical.py
+++ b/pandas/core/strings/categorical.py
@@ -1,3 +1,5 @@
+from typing import cast
+
 import numpy as np
 
 from pandas.core.algorithms import take_1d
@@ -6,7 +8,10 @@
 
 class CategoricalStringMethods(ObjectArrayMethods):
     def _map(self, f, na_value=np.nan, dtype=np.dtype(object)):
-        arr = self._array  # Categorical
+        from pandas import Categorical
+
+        arr = cast(Categorical, self._array)
+
         categories = arr.categories
         codes = arr.codes
         result = ObjectArrayMethods(categories)._map(f, na_value, dtype)
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index ec07c0b1dc534..2219a4af3f51b 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -1,6 +1,6 @@
 import re
 import textwrap
-from typing import Pattern, Set, Union
+from typing import Pattern, Set, Union, cast
 import unicodedata
 import warnings
 
@@ -341,11 +341,12 @@ def get_dummies(self, sep="|"):
         from pandas import Series
 
         arr = Series(self._array).fillna("")
-        assert isinstance(arr, Series)  # fillna returns Optional[Series]
         try:
             arr = sep + arr + sep
         except TypeError:
+            arr = cast(Series, arr)
             arr = sep + arr.astype(str) + sep
+        arr = cast(Series, arr)
 
         tags: Set[str] = set()
         for ts in Series(arr).str.split(sep):

From 20a870500a52902c6e30ed516e2556c941129305 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 21 Sep 2020 06:35:34 -0500
Subject: [PATCH 13/24] wip

---
 pandas/core/arrays/string_.py       |   5 +-
 pandas/core/strings/base.py         |  99 ++++++++++++------------
 pandas/core/strings/object_array.py | 114 ++++++++++++++--------------
 3 files changed, 106 insertions(+), 112 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 46d0fedbe8f39..72d5e8cc862af 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -16,7 +16,7 @@
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
-from pandas.core.strings.string_array import StringArrayMethods
+from pandas.core.strings.object_array import ObjectStringArray
 
 if TYPE_CHECKING:
     import pyarrow  # noqa: F401
@@ -97,6 +97,9 @@ def __from_arrow__(
         return StringArray._concat_same_type(results)
 
 
+# Uhmmm, this is going to have to dispatch on the dtype if we want
+# to share StringArray between python / arrow.
+
 class StringArray(PandasArray):
     """
     Extension array for string data.
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index d21e8f78ce9ba..d16bec1053f95 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -5,8 +5,6 @@
 
 from pandas._typing import Scalar
 
-from pandas.core.arrays.base import ExtensionArray
-
 
 class BaseStringArrayMethods(abc.ABC):
     """
@@ -23,45 +21,42 @@ class BaseStringArrayMethods(abc.ABC):
     See :ref:`Series.str` for the docstring of each method.
     """
 
-    def __init__(self, array: ExtensionArray):
-        self._array = array
-
-    def __getitem__(self, key):
+    def _str_getitem(self, key):
         if isinstance(key, slice):
             return self.slice(start=key.start, stop=key.stop, step=key.step)
         else:
             return self.get(key)
 
     @abc.abstractmethod
-    def count(self, pat, flags=0):
+    def _str_count(self, pat, flags=0):
         pass
 
     @abc.abstractmethod
-    def pad(self, width, side="left", fillchar=" "):
+    def _str_pad(self, width, side="left", fillchar=" "):
         pass
 
     @abc.abstractmethod
-    def contains(self, pat, case=True, flags=0, na=None, regex=True):
+    def _str_contains(self, pat, case=True, flags=0, na=None, regex=True):
         pass
 
     @abc.abstractmethod
-    def startswith(self, pat, na=None):
+    def _str_startswith(self, pat, na=None):
         pass
 
     @abc.abstractmethod
-    def endswith(self, pat, na=None):
+    def _str_endswith(self, pat, na=None):
         pass
 
     @abc.abstractmethod
-    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
+    def _str_replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         pass
 
     @abc.abstractmethod
-    def repeat(self, repeats):
+    def _str_repeat(self, repeats):
         pass
 
     @abc.abstractmethod
-    def match(
+    def _str_match(
         self,
         pat: Union[str, Pattern],
         case: bool = True,
@@ -71,7 +66,7 @@ def match(
         pass
 
     @abc.abstractmethod
-    def fullmatch(
+    def _str_fullmatch(
         self,
         pat: Union[str, Pattern],
         case: bool = True,
@@ -81,149 +76,149 @@ def fullmatch(
         pass
 
     @abc.abstractmethod
-    def encode(self, encoding, errors="strict"):
+    def _str_encode(self, encoding, errors="strict"):
         pass
 
     @abc.abstractmethod
-    def find(self, sub, start=0, end=None):
+    def _str_find(self, sub, start=0, end=None):
         pass
 
     @abc.abstractmethod
-    def rfind(self, sub, start=0, end=None):
+    def _str_rfind(self, sub, start=0, end=None):
         pass
 
     @abc.abstractmethod
-    def findall(self, pat, flags=0):
+    def _str_findall(self, pat, flags=0):
         pass
 
     @abc.abstractmethod
-    def get(self, i):
+    def _str_get(self, i):
         pass
 
     @abc.abstractmethod
-    def index(self, sub, start=0, end=None):
+    def _str_index(self, sub, start=0, end=None):
         pass
 
     @abc.abstractmethod
-    def rindex(self, sub, start=0, end=None):
+    def _str_rindex(self, sub, start=0, end=None):
         pass
 
     @abc.abstractmethod
-    def join(self, sep):
+    def _str_join(self, sep):
         pass
 
     @abc.abstractmethod
-    def partition(self, sep, expand):
+    def _str_partition(self, sep, expand):
         pass
 
     @abc.abstractmethod
-    def rpartition(self, sep, expand):
+    def _str_rpartition(self, sep, expand):
         pass
 
     @abc.abstractmethod
-    def len(self):
+    def _str_len(self):
         pass
 
     @abc.abstractmethod
-    def slice(self, start=None, stop=None, step=None):
+    def _str_slice(self, start=None, stop=None, step=None):
         pass
 
     @abc.abstractmethod
-    def slice_replace(self, start=None, stop=None, repl=None):
+    def _str_slice_replace(self, start=None, stop=None, repl=None):
         pass
 
     @abc.abstractmethod
-    def translate(self, table):
+    def _str_translate(self, table):
         pass
 
     @abc.abstractmethod
-    def wrap(self, width, **kwargs):
+    def _str_wrap(self, width, **kwargs):
         pass
 
     @abc.abstractmethod
-    def get_dummies(self, sep="|"):
+    def _str_get_dummies(self, sep="|"):
         pass
 
     @abc.abstractmethod
-    def isalnum(self):
+    def _str_isalnum(self):
         pass
 
     @abc.abstractmethod
-    def isalpha(self):
+    def _str_isalpha(self):
         pass
 
     @abc.abstractmethod
-    def isdecimal(self):
+    def _str_isdecimal(self):
         pass
 
     @abc.abstractmethod
-    def isdigit(self):
+    def _str_isdigit(self):
         pass
 
     @abc.abstractmethod
-    def islower(self):
+    def _str_islower(self):
         pass
 
     @abc.abstractmethod
-    def isnumeric(self):
+    def _str_isnumeric(self):
         pass
 
     @abc.abstractmethod
-    def isspace(self):
+    def _str_isspace(self):
         pass
 
     @abc.abstractmethod
-    def istitle(self):
+    def _str_istitle(self):
         pass
 
     @abc.abstractmethod
-    def isupper(self):
+    def _str_isupper(self):
         pass
 
     @abc.abstractmethod
-    def capitalize(self):
+    def _str_capitalize(self):
         pass
 
     @abc.abstractmethod
-    def casefold(self):
+    def _str_casefold(self):
         pass
 
     @abc.abstractmethod
-    def title(self):
+    def _str_title(self):
         pass
 
     @abc.abstractmethod
-    def swapcase(self):
+    def _str_swapcase(self):
         pass
 
     @abc.abstractmethod
-    def lower(self):
+    def _str_lower(self):
         pass
 
     @abc.abstractmethod
-    def upper(self):
+    def _str_upper(self):
         pass
 
     @abc.abstractmethod
-    def normalize(self, form):
+    def _str_normalize(self, form):
         pass
 
     @abc.abstractmethod
-    def strip(self, to_strip=None):
+    def _str_strip(self, to_strip=None):
         pass
 
     @abc.abstractmethod
-    def lstrip(self, to_strip=None):
+    def _str_lstrip(self, to_strip=None):
         pass
 
     @abc.abstractmethod
-    def rstrip(self, to_strip=None):
+    def _str_rstrip(self, to_strip=None):
         pass
 
     @abc.abstractmethod
-    def split(self, pat=None, n=-1, expand=False):
+    def _str_split(self, pat=None, n=-1, expand=False):
         pass
 
     @abc.abstractmethod
-    def rsplit(self, pat=None, n=-1):
+    def _str_rsplit(self, pat=None, n=-1):
         pass
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 2219a4af3f51b..c97943da41cbf 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -19,10 +19,10 @@
 from pandas.core.strings.base import BaseStringArrayMethods
 
 
-class ObjectArrayMethods(BaseStringArrayMethods):
+class ObjectStringArray(PandasArray):
     _default_na_value = np.nan
 
-    def _map(self, f, na_value=None, dtype=None):
+    def _str_map(self, f, na_value=None, dtype=None):
         arr = self._array  # object-dtype ndarray.
         if dtype is None:
             dtype = np.dtype("object")
@@ -52,7 +52,7 @@ def _map(self, f, na_value=None, dtype=None):
                 # FIXME: this should be totally avoidable
                 raise e
 
-            def g(x):
+            def _str_g(x):
                 # This type of fallback behavior can be removed once
                 # we remove object-dtype .str accessor.
                 try:
@@ -67,18 +67,18 @@ def g(x):
                 result = lib.maybe_convert_objects(result)
         return result
 
-    def __getitem__(self, key):
+    def _str_getitem(self, key):
         if isinstance(key, slice):
             return self.slice(start=key.start, stop=key.stop, step=key.step)
         else:
             return self.get(key)
 
-    def count(self, pat, flags=0):
+    def _str_count(self, pat, flags=0):
         regex = re.compile(pat, flags=flags)
         f = lambda x: len(regex.findall(x))
         return self._map(f, dtype="int64")
 
-    def pad(self, width, side="left", fillchar=" "):
+    def _str_pad(self, width, side="left", fillchar=" "):
         if side == "left":
             f = lambda x: x.rjust(width, fillchar)
         elif side == "right":
@@ -89,7 +89,7 @@ def pad(self, width, side="left", fillchar=" "):
             raise ValueError("Invalid side")
         return self._map(f)
 
-    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
+    def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
         if regex:
             if not case:
                 flags |= re.IGNORECASE
@@ -113,15 +113,15 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
                 f = lambda x: upper_pat in x.upper()
         return self._map(f, na, dtype=np.dtype("bool"))
 
-    def startswith(self, pat, na=None):
+    def _str_startswith(self, pat, na=None):
         f = lambda x: x.startswith(pat)
         return self._map(f, na_value=na, dtype=np.dtype(bool))
 
-    def endswith(self, pat, na=None):
+    def _str_endswith(self, pat, na=None):
         f = lambda x: x.endswith(pat)
         return self._map(f, na_value=na, dtype=np.dtype(bool))
 
-    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
+    def _str_replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         # Check whether repl is valid (GH 13438, GH 15055)
         if not (isinstance(repl, str) or callable(repl)):
             raise TypeError("repl must be a string or callable")
@@ -160,10 +160,10 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
 
         return self._map(f, dtype=str)
 
-    def repeat(self, repeats):
+    def _str_repeat(self, repeats):
         if is_scalar(repeats):
 
-            def scalar_rep(x):
+            def _str_scalar_rep(x):
                 try:
                     return bytes.__mul__(x, repeats)
                 except TypeError:
@@ -173,7 +173,7 @@ def scalar_rep(x):
         else:
             from pandas.core.arrays.string_ import StringArray
 
-            def rep(x, r):
+            def _str_rep(x, r):
                 if x is libmissing.NA:
                     return x
                 try:
@@ -188,7 +188,7 @@ def rep(x, r):
                 result = StringArray._from_sequence(result)
             return result
 
-    def match(
+    def _str_match(
         self,
         pat: Union[str, Pattern],
         case: bool = True,
@@ -203,7 +203,7 @@ def match(
         f = lambda x: regex.match(x) is not None
         return self._map(f, na_value=na, dtype=np.dtype(bool))
 
-    def fullmatch(
+    def _str_fullmatch(
         self,
         pat: Union[str, Pattern],
         case: bool = True,
@@ -218,17 +218,17 @@ def fullmatch(
         f = lambda x: regex.fullmatch(x) is not None
         return self._map(f, na_value=na, dtype=np.dtype(bool))
 
-    def encode(self, encoding, errors="strict"):
+    def _str_encode(self, encoding, errors="strict"):
         f = lambda x: x.encode(encoding, errors=errors)
         return self._map(f, dtype=object)
 
-    def find(self, sub, start=0, end=None):
+    def _str_find(self, sub, start=0, end=None):
         return self._find(sub, start, end, side="left")
 
-    def rfind(self, sub, start=0, end=None):
+    def _str_rfind(self, sub, start=0, end=None):
         return self._find(sub, start, end, side="right")
 
-    def _find(self, sub, start, end, side):
+    def _str__find(self, sub, start, end, side):
         if side == "left":
             method = "find"
         elif side == "right":
@@ -242,12 +242,12 @@ def _find(self, sub, start, end, side):
             f = lambda x: getattr(x, method)(sub, start, end)
         return self._map(f, dtype="int64")
 
-    def findall(self, pat, flags=0):
+    def _str_findall(self, pat, flags=0):
         regex = re.compile(pat, flags=flags)
         return self._map(regex.findall, dtype="object")
 
-    def get(self, i):
-        def f(x):
+    def _str_get(self, i):
+        def _str_f(x):
             if isinstance(x, dict):
                 return x.get(i)
             elif len(x) > i >= -len(x):
@@ -256,42 +256,42 @@ def f(x):
 
         return self._map(f)
 
-    def index(self, sub, start=0, end=None):
+    def _str_index(self, sub, start=0, end=None):
         if end:
             f = lambda x: x.index(sub, start, end)
         else:
             f = lambda x: x.index(sub, start, end)
         return self._map(f, dtype="int64")
 
-    def rindex(self, sub, start=0, end=None):
+    def _str_rindex(self, sub, start=0, end=None):
         if end:
             f = lambda x: x.rindex(sub, start, end)
         else:
             f = lambda x: x.rindex(sub, start, end)
         return self._map(f, dtype="int64")
 
-    def join(self, sep):
+    def _str_join(self, sep):
         return self._map(sep.join)
 
-    def partition(self, sep, expand):
+    def _str_partition(self, sep, expand):
         result = self._map(lambda x: x.partition(sep), dtype="object")
         return result
 
-    def rpartition(self, sep, expand):
+    def _str_rpartition(self, sep, expand):
         return self._map(lambda x: x.rpartition(sep), dtype="object")
 
-    def len(self):
+    def _str_len(self):
         return self._map(len, dtype="int64")
 
-    def slice(self, start=None, stop=None, step=None):
+    def _str_slice(self, start=None, stop=None, step=None):
         obj = slice(start, stop, step)
         return self._map(lambda x: x[obj])
 
-    def slice_replace(self, start=None, stop=None, repl=None):
+    def _str_slice_replace(self, start=None, stop=None, repl=None):
         if repl is None:
             repl = ""
 
-        def f(x):
+        def _str_f(x):
             if x[start:stop] == "":
                 local_stop = start
             else:
@@ -306,7 +306,7 @@ def f(x):
 
         return self._map(f)
 
-    def split(self, pat=None, n=-1, expand=False):
+    def _str_split(self, pat=None, n=-1, expand=False):
         if pat is None:
             if n is None or n == 0:
                 n = -1
@@ -323,21 +323,21 @@ def split(self, pat=None, n=-1, expand=False):
                 f = lambda x: regex.split(x, maxsplit=n)
         return self._map(f, dtype=object)
 
-    def rsplit(self, pat=None, n=-1):
+    def _str_rsplit(self, pat=None, n=-1):
         if n is None or n == 0:
             n = -1
         f = lambda x: x.rsplit(pat, n)
         return self._map(f, dtype="object")
 
-    def translate(self, table):
+    def _str_translate(self, table):
         return self._map(lambda x: x.translate(table))
 
-    def wrap(self, width, **kwargs):
+    def _str_wrap(self, width, **kwargs):
         kwargs["width"] = width
         tw = textwrap.TextWrapper(**kwargs)
         return self._map(lambda s: "\n".join(tw.wrap(s)))
 
-    def get_dummies(self, sep="|"):
+    def _str_get_dummies(self, sep="|"):
         from pandas import Series
 
         arr = Series(self._array).fillna("")
@@ -360,64 +360,60 @@ def get_dummies(self, sep="|"):
             dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
         return dummies, tags2
 
-    def upper(self):
+    def _str_upper(self):
         return self._map(lambda x: x.upper())
 
-    def isalnum(self):
+    def _str_isalnum(self):
         return self._map(str.isalnum, dtype="bool")
 
-    def isalpha(self):
+    def _str_isalpha(self):
         return self._map(str.isalpha, dtype="bool")
 
-    def isdecimal(self):
+    def _str_isdecimal(self):
         return self._map(str.isdecimal, dtype="bool")
 
-    def isdigit(self):
+    def _str_isdigit(self):
         return self._map(str.isdigit, dtype="bool")
 
-    def islower(self):
+    def _str_islower(self):
         return self._map(str.islower, dtype="bool")
 
-    def isnumeric(self):
+    def _str_isnumeric(self):
         return self._map(str.isnumeric, dtype="bool")
 
-    def isspace(self):
+    def _str_isspace(self):
         return self._map(str.isspace, dtype="bool")
 
-    def istitle(self):
+    def _str_istitle(self):
         return self._map(str.istitle, dtype="bool")
 
-    def isupper(self):
+    def _str_isupper(self):
         return self._map(str.isupper, dtype="bool")
 
-    def capitalize(self):
+    def _str_capitalize(self):
         return self._map(str.capitalize)
 
-    def casefold(self):
+    def _str_casefold(self):
         return self._map(str.casefold)
 
-    def title(self):
+    def _str_title(self):
         return self._map(str.title)
 
-    def swapcase(self):
+    def _str_swapcase(self):
         return self._map(str.swapcase)
 
-    def lower(self):
+    def _str_lower(self):
         return self._map(str.lower)
 
-    def normalize(self, form):
+    def _str_normalize(self, form):
         f = lambda x: unicodedata.normalize(form, x)
         return self._map(f)
 
-    def strip(self, to_strip=None):
+    def _str_strip(self, to_strip=None):
         return self._map(lambda x: x.strip(to_strip))
 
-    def lstrip(self, to_strip=None):
+    def _str_lstrip(self, to_strip=None):
         return self._map(lambda x: x.lstrip(to_strip))
 
-    def rstrip(self, to_strip=None):
+    def _str_rstrip(self, to_strip=None):
         return self._map(lambda x: x.rstrip(to_strip))
-
-
-class ObjectProxy(PandasArray):
-    _str = CachedAccessor("str", ObjectArrayMethods)

From 38c16111a71e74a0557e5fc80a13652cfbfc2010 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 22 Sep 2020 06:44:28 -0500
Subject: [PATCH 14/24] wip

---
 pandas/core/arrays/categorical.py   |   5 +-
 pandas/core/arrays/string_.py       |   4 +-
 pandas/core/strings/accessor.py     |  90 +++++++++----------
 pandas/core/strings/categorical.py  |  19 +++--
 pandas/core/strings/object_array.py | 128 +++++++++++++++-------------
 5 files changed, 129 insertions(+), 117 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index e94e91cd240c9..ef69d6565cfeb 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -41,7 +41,7 @@
 from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
 
 from pandas.core import ops
-from pandas.core.accessor import CachedAccessor, PandasDelegate, delegate_names
+from pandas.core.accessor import PandasDelegate, delegate_names
 import pandas.core.algorithms as algorithms
 from pandas.core.algorithms import factorize, get_data_algo, take_1d, unique1d
 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
@@ -52,7 +52,6 @@
 from pandas.core.missing import interpolate_2d
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.sorting import nargsort
-from pandas.core.strings.categorical import CategoricalStringMethods
 
 from pandas.io.formats import console
 
@@ -2336,8 +2335,6 @@ def replace(self, to_replace, value, inplace: bool = False):
         if not inplace:
             return cat
 
-    _str = CachedAccessor("_str", CategoricalStringMethods)
-
 
 # The Series.cat accessor
 
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index c407cb764f23f..5bf9838f6b964 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -10,13 +10,12 @@
 
 from pandas import compat
 from pandas.core import ops
-from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import IntegerArray, PandasArray
 from pandas.core.arrays.integer import _IntegerDtype
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
-from pandas.core.strings.object_array import ObjectStringArray
+# from pandas.core.strings.object_array import ObjectStringArray
 
 if TYPE_CHECKING:
     import pyarrow  # noqa: F401
@@ -351,7 +350,6 @@ def _add_arithmetic_ops(cls):
         cls.__rmul__ = cls._create_arithmetic_method(ops.rmul)
 
     _create_comparison_method = _create_arithmetic_method
-    _str = CachedAccessor("_str", StringArrayMethods)
 
 
 StringArray._add_arithmetic_ops()
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 2c436740624b5..97de112114520 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -1,6 +1,5 @@
 import codecs
 from functools import wraps
-import operator
 import re
 from typing import Dict, List, Optional
 import warnings
@@ -27,7 +26,7 @@
 
 from pandas.core.arrays.numpy_ import PandasArray
 from pandas.core.base import NoNewAttributesMixin
-from pandas.core.strings.object_array import ObjectProxy
+from pandas.core.strings.object_array import ObjectStringArray
 
 _shared_docs: Dict[str, str] = dict()
 _cpython_optimized_encoders = (
@@ -112,7 +111,7 @@ def wrapper(self, *args, **kwargs):
 def _map_and_wrap(name, docstring):
     @forbid_nonstring_types(["bytes"], name=name)
     def wrapper(self):
-        result = operator.methodcaller(name)(self._array._str)
+        result = getattr(self._array, f"_str_{name}")()
         return self._wrap_result(result)
 
     wrapper.__doc__ = docstring
@@ -151,6 +150,8 @@ class StringMethods(NoNewAttributesMixin):
 
     def __init__(self, data):
         from pandas.core.arrays.string_ import StringDtype
+        from pandas.core.arrays import Categorical
+        from pandas.core.strings.categorical import CategoricalStringMethods
 
         self._inferred_dtype = self._validate(data)
         self._is_categorical = is_categorical_dtype(data.dtype)
@@ -159,8 +160,9 @@ def __init__(self, data):
 
         if type(array) is PandasArray:
             # wrap in an object proxy to get the str methods.
-            # Alternatively, just add _str to PandasArray.
-            array = ObjectProxy(array._ndarray)
+            array = ObjectStringArray(array._ndarray)
+        elif isinstance(array, Categorical):
+            array = CategoricalStringMethods(array)
         self._array = array
 
         if isinstance(data, ABCSeries):
@@ -226,7 +228,7 @@ def _validate(data):
         return inferred_dtype
 
     def __getitem__(self, key):
-        result = self._array._str[key]
+        result = self._array._str_getitem(key)
         return self._wrap_result(result)
 
     def __iter__(self):
@@ -738,13 +740,13 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
     @forbid_nonstring_types(["bytes"])
     def split(self, pat=None, n=-1, expand=False):
-        result = self._array._str.split(pat, n, expand)
+        result = self._array._str_split(pat, n, expand)
         return self._wrap_result(result, returns_string=expand, expand=expand)
 
     @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
     @forbid_nonstring_types(["bytes"])
     def rsplit(self, pat=None, n=-1, expand=False):
-        result = self._array._str.rsplit(pat, n=n)
+        result = self._array._str_rsplit(pat, n=n)
         return self._wrap_result(result, expand=expand, returns_string=expand)
 
     _shared_docs[
@@ -840,7 +842,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
     )
     @forbid_nonstring_types(["bytes"])
     def partition(self, sep=" ", expand=True):
-        result = self._array._str.partition(sep, expand)
+        result = self._array._str_partition(sep, expand)
         return self._wrap_result(result, expand=expand, returns_string=expand)
 
     @Appender(
@@ -854,7 +856,7 @@ def partition(self, sep=" ", expand=True):
     )
     @forbid_nonstring_types(["bytes"])
     def rpartition(self, sep=" ", expand=True):
-        result = self._array._str.rpartition(sep, expand)
+        result = self._array._str_rpartition(sep, expand)
         return self._wrap_result(result, expand=expand, returns_string=expand)
 
     def get(self, i):
@@ -908,7 +910,7 @@ def get(self, i):
         5    None
         dtype: object
         """
-        result = self._array._str.get(i)
+        result = self._array._str_get(i)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -974,7 +976,7 @@ def join(self, sep):
         4                    NaN
         dtype: object
         """
-        result = self._array._str.join(sep)
+        result = self._array._str_join(sep)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1102,7 +1104,7 @@ def contains(self, pat, case=True, flags=0, na=None, regex=True):
         4    False
         dtype: bool
         """
-        result = self._array._str.contains(pat, case, flags, na, regex)
+        result = self._array._str_contains(pat, case, flags, na, regex)
         return self._wrap_result(result, fill_value=na, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
@@ -1134,7 +1136,7 @@ def match(self, pat, case=True, flags=0, na=None):
             re.match.
         extract : Extract matched groups.
         """
-        result = self._array._str.match(pat, case=case, flags=flags, na=na)
+        result = self._array._str_match(pat, case=case, flags=flags, na=na)
         return self._wrap_result(result, fill_value=na, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
@@ -1167,7 +1169,7 @@ def fullmatch(self, pat, case=True, flags=0, na=None):
             matches the regular expression.
         extract : Extract matched groups.
         """
-        result = self._array._str.fullmatch(pat, case=case, flags=flags, na=na)
+        result = self._array._str_fullmatch(pat, case=case, flags=flags, na=na)
         return self._wrap_result(result, fill_value=na, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
@@ -1289,7 +1291,7 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         2    NaN
         dtype: object
         """
-        result = self._array._str.replace(
+        result = self._array._str_replace(
             pat, repl, n=n, case=case, flags=flags, regex=regex
         )
         return self._wrap_result(result)
@@ -1335,7 +1337,7 @@ def repeat(self, repeats):
         2    ccc
         dtype: object
         """
-        result = self._array._str.repeat(repeats)
+        result = self._array._str_repeat(repeats)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1403,7 +1405,7 @@ def pad(self, width, side="left", fillchar=" "):
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
-        result = self._array._str.pad(width, side=side, fillchar=fillchar)
+        result = self._array._str_pad(width, side=side, fillchar=fillchar)
         return self._wrap_result(result)
 
     _shared_docs[
@@ -1577,7 +1579,7 @@ def slice(self, start=None, stop=None, step=None):
         2    cm
         dtype: object
         """
-        result = self._array._str.slice(start, stop, step)
+        result = self._array._str_slice(start, stop, step)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1653,7 +1655,7 @@ def slice_replace(self, start=None, stop=None, repl=None):
         4    aXde
         dtype: object
         """
-        result = self._array._str.slice_replace(start, stop, repl)
+        result = self._array._str_slice_replace(start, stop, repl)
         return self._wrap_result(result)
 
     def decode(self, encoding, errors="strict"):
@@ -1681,7 +1683,7 @@ def decode(self, encoding, errors="strict"):
             f = lambda x: decoder(x, errors)[0]
         arr = self._array
         # assert isinstance(arr, (StringArray,))
-        result = arr._str._map(f)
+        result = arr._str_map(f)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1700,7 +1702,7 @@ def encode(self, encoding, errors="strict"):
         -------
         encoded : Series/Index of objects
         """
-        result = self._array._str.encode(encoding, errors)
+        result = self._array._str_encode(encoding, errors)
         return self._wrap_result(result, returns_string=False)
 
     _shared_docs[
@@ -1776,7 +1778,7 @@ def encode(self, encoding, errors="strict"):
     )
     @forbid_nonstring_types(["bytes"])
     def strip(self, to_strip=None):
-        result = self._array._str.strip(to_strip)
+        result = self._array._str_strip(to_strip)
         return self._wrap_result(result)
 
     @Appender(
@@ -1785,7 +1787,7 @@ def strip(self, to_strip=None):
     )
     @forbid_nonstring_types(["bytes"])
     def lstrip(self, to_strip=None):
-        result = self._array._str.lstrip(to_strip)
+        result = self._array._str_lstrip(to_strip)
         return self._wrap_result(result)
 
     @Appender(
@@ -1794,7 +1796,7 @@ def lstrip(self, to_strip=None):
     )
     @forbid_nonstring_types(["bytes"])
     def rstrip(self, to_strip=None):
-        result = self._array._str.rstrip(to_strip)
+        result = self._array._str_rstrip(to_strip)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1853,7 +1855,7 @@ def wrap(self, width, **kwargs):
         1    another line\nto be\nwrapped
         dtype: object
         """
-        result = self._array._str.wrap(width, **kwargs)
+        result = self._array._str_wrap(width, **kwargs)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1895,7 +1897,7 @@ def get_dummies(self, sep="|"):
         """
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
-        result, name = self._array._str.get_dummies(sep)
+        result, name = self._array._str_get_dummies(sep)
         return self._wrap_result(result, name=name, expand=True, returns_string=False,)
 
     @forbid_nonstring_types(["bytes"])
@@ -1917,7 +1919,7 @@ def translate(self, table):
         -------
         Series or Index
         """
-        result = self._array._str.translate(table)
+        result = self._array._str_translate(table)
         return self._wrap_result(result)
 
     @forbid_nonstring_types(["bytes"])
@@ -1985,7 +1987,7 @@ def count(self, pat, flags=0):
         >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')
         Int64Index([0, 0, 2, 1], dtype='int64')
         """
-        result = self._array._str.count(pat, flags)
+        result = self._array._str_count(pat, flags)
         return self._wrap_result(result, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
@@ -2042,7 +2044,7 @@ def startswith(self, pat, na=None):
         3    False
         dtype: bool
         """
-        result = self._array._str.startswith(pat, na=na)
+        result = self._array._str_startswith(pat, na=na)
         return self._wrap_result(result, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
@@ -2099,7 +2101,7 @@ def endswith(self, pat, na=None):
         3    False
         dtype: bool
         """
-        result = self._array._str.endswith(pat, na=na)
+        result = self._array._str_endswith(pat, na=na)
         return self._wrap_result(result, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
@@ -2192,7 +2194,7 @@ def findall(self, pat, flags=0):
         2    [b, b]
         dtype: object
         """
-        result = self._array._str.findall(pat, flags)
+        result = self._array._str_findall(pat, flags)
         return self._wrap_result(result, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
@@ -2399,7 +2401,7 @@ def find(self, sub, start=0, end=None):
             msg = f"expected a string object, not {type(sub).__name__}"
             raise TypeError(msg)
 
-        result = self._array._str.find(sub, start, end)
+        result = self._array._str_find(sub, start, end)
         return self._wrap_result(result, returns_string=False)
 
     @Appender(
@@ -2416,7 +2418,7 @@ def rfind(self, sub, start=0, end=None):
             msg = f"expected a string object, not {type(sub).__name__}"
             raise TypeError(msg)
 
-        result = self._array._str.rfind(sub, start=start, end=end)
+        result = self._array._str_rfind(sub, start=start, end=end)
         return self._wrap_result(result, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
@@ -2436,7 +2438,7 @@ def normalize(self, form):
         -------
         normalized : Series/Index of objects
         """
-        result = self._array._str.normalize(form)
+        result = self._array._str_normalize(form)
         return self._wrap_result(result)
 
     _shared_docs[
@@ -2483,7 +2485,7 @@ def index(self, sub, start=0, end=None):
             msg = f"expected a string object, not {type(sub).__name__}"
             raise TypeError(msg)
 
-        result = self._array._str.index(sub, start=start, end=end)
+        result = self._array._str_index(sub, start=start, end=end)
         return self._wrap_result(result, returns_string=False)
 
     @Appender(
@@ -2501,7 +2503,7 @@ def rindex(self, sub, start=0, end=None):
             msg = f"expected a string object, not {type(sub).__name__}"
             raise TypeError(msg)
 
-        result = self._array._str.rindex(sub, start=start, end=end)
+        result = self._array._str_rindex(sub, start=start, end=end)
         return self._wrap_result(result, returns_string=False)
 
     def len(self):
@@ -2550,7 +2552,7 @@ def len(self):
         5    3.0
         dtype: float64
         """
-        result = self._array._str.len()
+        result = self._array._str_len()
         return self._wrap_result(result, returns_string=False)
 
     _shared_docs[
@@ -2644,37 +2646,37 @@ def len(self):
     @Appender(_shared_docs["casemethods"] % _doc_args["lower"])
     @forbid_nonstring_types(["bytes"])
     def lower(self):
-        result = self._array._str.lower()
+        result = self._array._str_lower()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["upper"])
     @forbid_nonstring_types(["bytes"])
     def upper(self):
-        result = self._array._str.upper()
+        result = self._array._str_upper()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["title"])
     @forbid_nonstring_types(["bytes"])
     def title(self):
-        result = self._array._str.title()
+        result = self._array._str_title()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["capitalize"])
     @forbid_nonstring_types(["bytes"])
     def capitalize(self):
-        result = self._array._str.capitalize()
+        result = self._array._str_capitalize()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["swapcase"])
     @forbid_nonstring_types(["bytes"])
     def swapcase(self):
-        result = self._array._str.swapcase()
+        result = self._array._str_swapcase()
         return self._wrap_result(result)
 
     @Appender(_shared_docs["casemethods"] % _doc_args["casefold"])
     @forbid_nonstring_types(["bytes"])
     def casefold(self):
-        result = self._array._str.casefold()
+        result = self._array._str_casefold()
         return self._wrap_result(result)
 
     _shared_docs[
diff --git a/pandas/core/strings/categorical.py b/pandas/core/strings/categorical.py
index f7d7e564ca43e..e2a2cc731c0c3 100644
--- a/pandas/core/strings/categorical.py
+++ b/pandas/core/strings/categorical.py
@@ -3,10 +3,19 @@
 import numpy as np
 
 from pandas.core.algorithms import take_1d
-from pandas.core.strings.object_array import ObjectArrayMethods
+from pandas.core.strings.object_array import ObjectStringArray
+from pandas.core.arrays import Categorical
 
 
-class CategoricalStringMethods(ObjectArrayMethods):
+class CategoricalStringMethods(Categorical, ObjectStringArray):
+    """
+    Extension array implementing _str methods for Categorical.
+
+    We implement this just to avoid mucking up the inheritance chain
+    for Categorical. This inherits from ObjectStringArray just for
+    convenience.
+    """
+    # Probably lots of room for improvement here.
     def _map(self, f, na_value=np.nan, dtype=np.dtype(object)):
         from pandas import Categorical
 
@@ -14,9 +23,9 @@ def _map(self, f, na_value=np.nan, dtype=np.dtype(object)):
 
         categories = arr.categories
         codes = arr.codes
-        result = ObjectArrayMethods(categories)._map(f, na_value, dtype)
+        result = ObjectStringArray(categories)._map(f, na_value, dtype)
         return take_1d(result, codes, fill_value=na_value)
 
-    def get_dummies(self, sep="|"):
+    def _str_get_dummies(self, sep="|"):
         # sep may not be in categories. Just bail on this.
-        return ObjectArrayMethods(self._array.astype(str)).get_dummies(sep)
+        return ObjectStringArray(self.astype(str))._str_get_dummies(sep)
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index c97943da41cbf..c1851993e24a7 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -14,16 +14,22 @@
 from pandas.core.dtypes.common import is_re, is_scalar
 from pandas.core.dtypes.missing import isna
 
-from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays.numpy_ import PandasArray
 from pandas.core.strings.base import BaseStringArrayMethods
 
 
-class ObjectStringArray(PandasArray):
+class ObjectStringArray(PandasArray, BaseStringArrayMethods):
+    """
+    PandasArray subclass with _str methods.
+
+    We don't want to put the _str methods on all PandasArrays
+    so we use this subclass with the BaseStringArrayMethods
+    mixin.
+    """
     _default_na_value = np.nan
 
     def _str_map(self, f, na_value=None, dtype=None):
-        arr = self._array  # object-dtype ndarray.
+        arr = self
         if dtype is None:
             dtype = np.dtype("object")
         if na_value is None:
@@ -52,7 +58,7 @@ def _str_map(self, f, na_value=None, dtype=None):
                 # FIXME: this should be totally avoidable
                 raise e
 
-            def _str_g(x):
+            def g(x):
                 # This type of fallback behavior can be removed once
                 # we remove object-dtype .str accessor.
                 try:
@@ -60,7 +66,7 @@ def _str_g(x):
                 except (TypeError, AttributeError):
                     return na_value
 
-            return self._map(g, na_value=na_value, dtype=dtype)
+            return self._str_map(g, na_value=na_value, dtype=dtype)
         if na_value is not np.nan:
             np.putmask(result, mask, na_value)
             if result.dtype == object:
@@ -69,14 +75,14 @@ def _str_g(x):
 
     def _str_getitem(self, key):
         if isinstance(key, slice):
-            return self.slice(start=key.start, stop=key.stop, step=key.step)
+            return self._str_slice(start=key.start, stop=key.stop, step=key.step)
         else:
-            return self.get(key)
+            return self._str_get(key)
 
     def _str_count(self, pat, flags=0):
         regex = re.compile(pat, flags=flags)
         f = lambda x: len(regex.findall(x))
-        return self._map(f, dtype="int64")
+        return self._str_map(f, dtype="int64")
 
     def _str_pad(self, width, side="left", fillchar=" "):
         if side == "left":
@@ -87,7 +93,7 @@ def _str_pad(self, width, side="left", fillchar=" "):
             f = lambda x: x.center(width, fillchar)
         else:  # pragma: no cover
             raise ValueError("Invalid side")
-        return self._map(f)
+        return self._str_map(f)
 
     def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
         if regex:
@@ -111,15 +117,15 @@ def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
             else:
                 upper_pat = pat.upper()
                 f = lambda x: upper_pat in x.upper()
-        return self._map(f, na, dtype=np.dtype("bool"))
+        return self._str_map(f, na, dtype=np.dtype("bool"))
 
     def _str_startswith(self, pat, na=None):
         f = lambda x: x.startswith(pat)
-        return self._map(f, na_value=na, dtype=np.dtype(bool))
+        return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
     def _str_endswith(self, pat, na=None):
         f = lambda x: x.endswith(pat)
-        return self._map(f, na_value=na, dtype=np.dtype(bool))
+        return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
     def _str_replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         # Check whether repl is valid (GH 13438, GH 15055)
@@ -158,22 +164,22 @@ def _str_replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
                 raise ValueError("Cannot use a callable replacement when regex=False")
             f = lambda x: x.replace(pat, repl, n)
 
-        return self._map(f, dtype=str)
+        return self._str_map(f, dtype=str)
 
     def _str_repeat(self, repeats):
         if is_scalar(repeats):
 
-            def _str_scalar_rep(x):
+            def scalar_rep(x):
                 try:
                     return bytes.__mul__(x, repeats)
                 except TypeError:
                     return str.__mul__(x, repeats)
 
-            return self._map(scalar_rep, dtype=str)
+            return self._str_map(scalar_rep, dtype=str)
         else:
             from pandas.core.arrays.string_ import StringArray
 
-            def _str_rep(x, r):
+            def rep(x, r):
                 if x is libmissing.NA:
                     return x
                 try:
@@ -182,8 +188,8 @@ def _str_rep(x, r):
                     return str.__mul__(x, r)
 
             repeats = np.asarray(repeats, dtype=object)
-            result = libops.vec_binop(np.asarray(self._array), repeats, rep)
-            if isinstance(self._array, StringArray):
+            result = libops.vec_binop(np.asarray(self), repeats, rep)
+            if isinstance(self, StringArray):
                 # Not going through map, so we have to do this here.
                 result = StringArray._from_sequence(result)
             return result
@@ -201,7 +207,7 @@ def _str_match(
         regex = re.compile(pat, flags=flags)
 
         f = lambda x: regex.match(x) is not None
-        return self._map(f, na_value=na, dtype=np.dtype(bool))
+        return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
     def _str_fullmatch(
         self,
@@ -216,19 +222,19 @@ def _str_fullmatch(
         regex = re.compile(pat, flags=flags)
 
         f = lambda x: regex.fullmatch(x) is not None
-        return self._map(f, na_value=na, dtype=np.dtype(bool))
+        return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
     def _str_encode(self, encoding, errors="strict"):
         f = lambda x: x.encode(encoding, errors=errors)
-        return self._map(f, dtype=object)
+        return self._str_map(f, dtype=object)
 
     def _str_find(self, sub, start=0, end=None):
-        return self._find(sub, start, end, side="left")
+        return self._str_find_(sub, start, end, side="left")
 
     def _str_rfind(self, sub, start=0, end=None):
-        return self._find(sub, start, end, side="right")
+        return self._str_find_(sub, start, end, side="right")
 
-    def _str__find(self, sub, start, end, side):
+    def _str_find_(self, sub, start, end, side):
         if side == "left":
             method = "find"
         elif side == "right":
@@ -240,58 +246,58 @@ def _str__find(self, sub, start, end, side):
             f = lambda x: getattr(x, method)(sub, start)
         else:
             f = lambda x: getattr(x, method)(sub, start, end)
-        return self._map(f, dtype="int64")
+        return self._str_map(f, dtype="int64")
 
     def _str_findall(self, pat, flags=0):
         regex = re.compile(pat, flags=flags)
-        return self._map(regex.findall, dtype="object")
+        return self._str_map(regex.findall, dtype="object")
 
     def _str_get(self, i):
-        def _str_f(x):
+        def f(x):
             if isinstance(x, dict):
                 return x.get(i)
             elif len(x) > i >= -len(x):
                 return x[i]
             return self._default_na_value
 
-        return self._map(f)
+        return self._str_map(f)
 
     def _str_index(self, sub, start=0, end=None):
         if end:
             f = lambda x: x.index(sub, start, end)
         else:
             f = lambda x: x.index(sub, start, end)
-        return self._map(f, dtype="int64")
+        return self._str_map(f, dtype="int64")
 
     def _str_rindex(self, sub, start=0, end=None):
         if end:
             f = lambda x: x.rindex(sub, start, end)
         else:
             f = lambda x: x.rindex(sub, start, end)
-        return self._map(f, dtype="int64")
+        return self._str_map(f, dtype="int64")
 
     def _str_join(self, sep):
-        return self._map(sep.join)
+        return self._str_map(sep.join)
 
     def _str_partition(self, sep, expand):
-        result = self._map(lambda x: x.partition(sep), dtype="object")
+        result = self._str_map(lambda x: x.partition(sep), dtype="object")
         return result
 
     def _str_rpartition(self, sep, expand):
-        return self._map(lambda x: x.rpartition(sep), dtype="object")
+        return self._str_map(lambda x: x.rpartition(sep), dtype="object")
 
     def _str_len(self):
-        return self._map(len, dtype="int64")
+        return self._str_map(len, dtype="int64")
 
     def _str_slice(self, start=None, stop=None, step=None):
         obj = slice(start, stop, step)
-        return self._map(lambda x: x[obj])
+        return self._str_map(lambda x: x[obj])
 
     def _str_slice_replace(self, start=None, stop=None, repl=None):
         if repl is None:
             repl = ""
 
-        def _str_f(x):
+        def f(x):
             if x[start:stop] == "":
                 local_stop = start
             else:
@@ -304,7 +310,7 @@ def _str_f(x):
                 y += x[local_stop:]
             return y
 
-        return self._map(f)
+        return self._str_map(f)
 
     def _str_split(self, pat=None, n=-1, expand=False):
         if pat is None:
@@ -321,26 +327,26 @@ def _str_split(self, pat=None, n=-1, expand=False):
                     n = 0
                 regex = re.compile(pat)
                 f = lambda x: regex.split(x, maxsplit=n)
-        return self._map(f, dtype=object)
+        return self._str_map(f, dtype=object)
 
     def _str_rsplit(self, pat=None, n=-1):
         if n is None or n == 0:
             n = -1
         f = lambda x: x.rsplit(pat, n)
-        return self._map(f, dtype="object")
+        return self._str_map(f, dtype="object")
 
     def _str_translate(self, table):
-        return self._map(lambda x: x.translate(table))
+        return self._str_map(lambda x: x.translate(table))
 
     def _str_wrap(self, width, **kwargs):
         kwargs["width"] = width
         tw = textwrap.TextWrapper(**kwargs)
-        return self._map(lambda s: "\n".join(tw.wrap(s)))
+        return self._str_map(lambda s: "\n".join(tw.wrap(s)))
 
     def _str_get_dummies(self, sep="|"):
         from pandas import Series
 
-        arr = Series(self._array).fillna("")
+        arr = Series(self).fillna("")
         try:
             arr = sep + arr + sep
         except TypeError:
@@ -361,59 +367,59 @@ def _str_get_dummies(self, sep="|"):
         return dummies, tags2
 
     def _str_upper(self):
-        return self._map(lambda x: x.upper())
+        return self._str_map(lambda x: x.upper())
 
     def _str_isalnum(self):
-        return self._map(str.isalnum, dtype="bool")
+        return self._str_map(str.isalnum, dtype="bool")
 
     def _str_isalpha(self):
-        return self._map(str.isalpha, dtype="bool")
+        return self._str_map(str.isalpha, dtype="bool")
 
     def _str_isdecimal(self):
-        return self._map(str.isdecimal, dtype="bool")
+        return self._str_map(str.isdecimal, dtype="bool")
 
     def _str_isdigit(self):
-        return self._map(str.isdigit, dtype="bool")
+        return self._str_map(str.isdigit, dtype="bool")
 
     def _str_islower(self):
-        return self._map(str.islower, dtype="bool")
+        return self._str_map(str.islower, dtype="bool")
 
     def _str_isnumeric(self):
-        return self._map(str.isnumeric, dtype="bool")
+        return self._str_map(str.isnumeric, dtype="bool")
 
     def _str_isspace(self):
-        return self._map(str.isspace, dtype="bool")
+        return self._str_map(str.isspace, dtype="bool")
 
     def _str_istitle(self):
-        return self._map(str.istitle, dtype="bool")
+        return self._str_map(str.istitle, dtype="bool")
 
     def _str_isupper(self):
-        return self._map(str.isupper, dtype="bool")
+        return self._str_map(str.isupper, dtype="bool")
 
     def _str_capitalize(self):
-        return self._map(str.capitalize)
+        return self._str_map(str.capitalize)
 
     def _str_casefold(self):
-        return self._map(str.casefold)
+        return self._str_map(str.casefold)
 
     def _str_title(self):
-        return self._map(str.title)
+        return self._str_map(str.title)
 
     def _str_swapcase(self):
-        return self._map(str.swapcase)
+        return self._str_map(str.swapcase)
 
     def _str_lower(self):
-        return self._map(str.lower)
+        return self._str_map(str.lower)
 
     def _str_normalize(self, form):
         f = lambda x: unicodedata.normalize(form, x)
-        return self._map(f)
+        return self._str_map(f)
 
     def _str_strip(self, to_strip=None):
-        return self._map(lambda x: x.strip(to_strip))
+        return self._str_map(lambda x: x.strip(to_strip))
 
     def _str_lstrip(self, to_strip=None):
-        return self._map(lambda x: x.lstrip(to_strip))
+        return self._str_map(lambda x: x.lstrip(to_strip))
 
     def _str_rstrip(self, to_strip=None):
-        return self._map(lambda x: x.rstrip(to_strip))
+        return self._str_map(lambda x: x.rstrip(to_strip))

From 8d3aecd996636f6a947547fdd400fd5337505650 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 22 Sep 2020 09:38:15 -0500
Subject: [PATCH 15/24] Move to arrays

---
 pandas/core/arrays/categorical.py   | 19 +++++++-
 pandas/core/arrays/numpy_.py        | 10 ++++-
 pandas/core/arrays/string_.py       | 69 +++++++++++++++++++++++++---
 pandas/core/strings/accessor.py     | 10 -----
 pandas/core/strings/categorical.py  | 31 -------------
 pandas/core/strings/object_array.py | 18 +++-----
 pandas/core/strings/string_array.py | 70 -----------------------------
 7 files changed, 97 insertions(+), 130 deletions(-)
 delete mode 100644 pandas/core/strings/categorical.py
 delete mode 100644 pandas/core/strings/string_array.py

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ef69d6565cfeb..9210bd88362de 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -52,6 +52,7 @@
 from pandas.core.missing import interpolate_2d
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.sorting import nargsort
+from pandas.core.strings.object_array import ObjectStringArrayMixin
 
 from pandas.io.formats import console
 
@@ -177,7 +178,7 @@ def contains(cat, key, container):
         return any(loc_ in container for loc_ in loc)
 
 
-class Categorical(NDArrayBackedExtensionArray, PandasObject):
+class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin):
     """
     Represent a categorical variable in classic R / S-plus fashion.
 
@@ -2335,6 +2336,22 @@ def replace(self, to_replace, value, inplace: bool = False):
         if not inplace:
             return cat
 
+    # ------------------------------------------------------------------------
+    # String methods interface
+    def _str_map(self, f, na_value=np.nan, dtype=np.dtype(object)):
+        from pandas.core.arrays import PandasArray
+
+        categories = self.categories
+        codes = self.codes
+        result = PandasArray(categories.to_numpy())._str_map(f, na_value, dtype)
+        return take_1d(result, codes, fill_value=na_value)
+
+    def _str_get_dummies(self, sep="|"):
+        # sep may not be in categories. Just bail on this.
+        from pandas.core.arrays import PandasArray
+
+        return PandasArray(self.astype(str))._str_get_dummies(sep)
+
 
 # The Series.cat accessor
 
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index 61076132b24cd..0a299b3473ef0 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -17,6 +17,7 @@
 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 from pandas.core.arrays.base import ExtensionOpsMixin
 from pandas.core.construction import extract_array
+from pandas.core.strings.object_array import ObjectStringArrayMixin
 
 
 class PandasDtype(ExtensionDtype):
@@ -115,7 +116,10 @@ def itemsize(self) -> int:
 
 
 class PandasArray(
-    NDArrayBackedExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin
+    NDArrayBackedExtensionArray,
+    ExtensionOpsMixin,
+    NDArrayOperatorsMixin,
+    ObjectStringArrayMixin,
 ):
     """
     A pandas ExtensionArray for NumPy data.
@@ -398,6 +402,10 @@ def arithmetic_method(self, other):
 
     _create_comparison_method = _create_arithmetic_method
 
+    # ------------------------------------------------------------------------
+    # String methods interface
+    _str_na_value = np.nan
+
 
 PandasArray._add_arithmetic_ops()
 PandasArray._add_comparison_ops()
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 5bf9838f6b964..6f3eed9c93301 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -6,7 +6,14 @@
 from pandas._libs import lib, missing as libmissing
 
 from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype
-from pandas.core.dtypes.common import is_array_like, pandas_dtype
+from pandas.core.dtypes.common import (
+    is_array_like,
+    is_bool_dtype,
+    is_integer_dtype,
+    is_object_dtype,
+    is_string_dtype,
+    pandas_dtype,
+)
 
 from pandas import compat
 from pandas.core import ops
@@ -15,7 +22,7 @@
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
-# from pandas.core.strings.object_array import ObjectStringArray
+from pandas.core.strings.object_array import ObjectStringArrayMixin
 
 if TYPE_CHECKING:
     import pyarrow  # noqa: F401
@@ -96,10 +103,7 @@ def __from_arrow__(
         return StringArray._concat_same_type(results)
 
 
-# Uhmmm, this is going to have to dispatch on the dtype if we want
-# to share StringArray between python / arrow.
-
-class StringArray(PandasArray):
+class StringArray(PandasArray, ObjectStringArrayMixin):
     """
     Extension array for string data.
 
@@ -350,6 +354,59 @@ def _add_arithmetic_ops(cls):
         cls.__rmul__ = cls._create_arithmetic_method(ops.rmul)
 
     _create_comparison_method = _create_arithmetic_method
+    # ------------------------------------------------------------------------
+    # String methods interface
+    _str_na_value = StringDtype.na_value
+
+    def _str_map(self, f, na_value=None, dtype=None):
+        from pandas.arrays import BooleanArray, IntegerArray, StringArray
+        from pandas.core.arrays.string_ import StringDtype
+
+        if dtype is None:
+            dtype = StringDtype()
+        if na_value is None:
+            na_value = self.dtype.na_value
+
+        mask = isna(self)
+        arr = self
+        arr = np.asarray(self)
+
+        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+            constructor: Union[Type[IntegerArray], Type[BooleanArray]]
+            if is_integer_dtype(dtype):
+                constructor = IntegerArray
+            else:
+                constructor = BooleanArray
+
+            na_value_is_na = isna(na_value)
+            if na_value_is_na:
+                na_value = 1
+            result = lib.map_infer_mask(
+                arr,
+                f,
+                mask.view("uint8"),
+                convert=False,
+                na_value=na_value,
+                dtype=np.dtype(dtype),
+            )
+
+            if not na_value_is_na:
+                mask[:] = False
+
+            return constructor(result, mask)
+
+        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
+            # i.e. StringDtype
+            result = lib.map_infer_mask(
+                arr, f, mask.view("uint8"), convert=False, na_value=na_value
+            )
+            return StringArray(result)
+        else:
+            # This is when the result type is object. We reach this when
+            # -> We know the result type is truly object (e.g. .encode returns bytes
+            #    or .findall returns a list).
+            # -> We don't know the result type. E.g. `.get` can return anything.
+            return lib.map_infer_mask(arr, f, mask.view("uint8"))
 
 
 StringArray._add_arithmetic_ops()
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 97de112114520..6578ab99ee885 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -24,9 +24,7 @@
 )
 from pandas.core.dtypes.missing import isna
 
-from pandas.core.arrays.numpy_ import PandasArray
 from pandas.core.base import NoNewAttributesMixin
-from pandas.core.strings.object_array import ObjectStringArray
 
 _shared_docs: Dict[str, str] = dict()
 _cpython_optimized_encoders = (
@@ -150,19 +148,11 @@ class StringMethods(NoNewAttributesMixin):
 
     def __init__(self, data):
         from pandas.core.arrays.string_ import StringDtype
-        from pandas.core.arrays import Categorical
-        from pandas.core.strings.categorical import CategoricalStringMethods
 
         self._inferred_dtype = self._validate(data)
         self._is_categorical = is_categorical_dtype(data.dtype)
         self._is_string = isinstance(data.dtype, StringDtype)
         array = data.array
-
-        if type(array) is PandasArray:
-            # wrap in an object proxy to get the str methods.
-            array = ObjectStringArray(array._ndarray)
-        elif isinstance(array, Categorical):
-            array = CategoricalStringMethods(array)
         self._array = array
 
         if isinstance(data, ABCSeries):
diff --git a/pandas/core/strings/categorical.py b/pandas/core/strings/categorical.py
deleted file mode 100644
index e2a2cc731c0c3..0000000000000
--- a/pandas/core/strings/categorical.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from typing import cast
-
-import numpy as np
-
-from pandas.core.algorithms import take_1d
-from pandas.core.strings.object_array import ObjectStringArray
-from pandas.core.arrays import Categorical
-
-
-class CategoricalStringMethods(Categorical, ObjectStringArray):
-    """
-    Extension array implementing _str methods for Categorical.
-
-    We implement this just to avoid mucking up the inheritance chain
-    for Categorical. This inherits from ObjectStringArray just for
-    convenience.
-    """
-    # Probably lots of room for improvement here.
-    def _map(self, f, na_value=np.nan, dtype=np.dtype(object)):
-        from pandas import Categorical
-
-        arr = cast(Categorical, self._array)
-
-        categories = arr.categories
-        codes = arr.codes
-        result = ObjectStringArray(categories)._map(f, na_value, dtype)
-        return take_1d(result, codes, fill_value=na_value)
-
-    def _str_get_dummies(self, sep="|"):
-        # sep may not be in categories. Just bail on this.
-        return ObjectStringArray(self.astype(str))._str_get_dummies(sep)
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index c1851993e24a7..d47513b18d6b9 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -14,31 +14,27 @@
 from pandas.core.dtypes.common import is_re, is_scalar
 from pandas.core.dtypes.missing import isna
 
-from pandas.core.arrays.numpy_ import PandasArray
 from pandas.core.strings.base import BaseStringArrayMethods
 
 
-class ObjectStringArray(PandasArray, BaseStringArrayMethods):
+class ObjectStringArrayMixin(BaseStringArrayMethods):
     """
-    PandasArray subclass with _str methods.
-
-    We don't want to put the _str methods on all PandasArrays
-    so we use this subclass with the BaseStringArrayMethods
-    mixin.
+    String Methods operating on object-dtype ndarrays.
     """
-    _default_na_value = np.nan
+
+    _str_na_value = np.nan
 
     def _str_map(self, f, na_value=None, dtype=None):
         arr = self
         if dtype is None:
             dtype = np.dtype("object")
         if na_value is None:
-            na_value = self._default_na_value
+            na_value = self._str_na_value
 
         if not len(arr):
             return np.ndarray(0, dtype=dtype)
         if na_value is None:
-            na_value = self._default_na_value
+            na_value = self._str_na_value
 
         if not isinstance(arr, np.ndarray):
             arr = np.asarray(arr, dtype=object)
@@ -258,7 +254,7 @@ def f(x):
                 return x.get(i)
             elif len(x) > i >= -len(x):
                 return x[i]
-            return self._default_na_value
+            return self._str_na_value
 
         return self._str_map(f)
 
diff --git a/pandas/core/strings/string_array.py b/pandas/core/strings/string_array.py
deleted file mode 100644
index 269e85b71161a..0000000000000
--- a/pandas/core/strings/string_array.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from typing import Type, Union
-
-import numpy as np
-
-from pandas._libs import lib, missing as libmissing
-
-from pandas.core.dtypes.common import (
-    is_bool_dtype,
-    is_integer_dtype,
-    is_object_dtype,
-    is_string_dtype,
-)
-
-from pandas.core.missing import isna
-from pandas.core.strings.object_array import ObjectArrayMethods
-
-
-class StringArrayMethods(ObjectArrayMethods):
-    _default_na_value = libmissing.NA
-
-    def _map(self, f, na_value=None, dtype=None):
-        from pandas.arrays import BooleanArray, IntegerArray, StringArray
-        from pandas.core.arrays.string_ import StringDtype
-
-        if dtype is None:
-            dtype = StringDtype()
-        if na_value is None:
-            na_value = self._default_na_value
-
-        arr = self._array
-        mask = isna(arr)
-
-        arr = np.asarray(arr)
-
-        if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-            constructor: Union[Type[IntegerArray], Type[BooleanArray]]
-            if is_integer_dtype(dtype):
-                constructor = IntegerArray
-            else:
-                constructor = BooleanArray
-
-            na_value_is_na = isna(na_value)
-            if na_value_is_na:
-                na_value = 1
-            result = lib.map_infer_mask(
-                arr,
-                f,
-                mask.view("uint8"),
-                convert=False,
-                na_value=na_value,
-                dtype=np.dtype(dtype),
-            )
-
-            if not na_value_is_na:
-                mask[:] = False
-
-            return constructor(result, mask)
-
-        elif is_string_dtype(dtype) and not is_object_dtype(dtype):
-            # i.e. StringDtype
-            result = lib.map_infer_mask(
-                arr, f, mask.view("uint8"), convert=False, na_value=na_value
-            )
-            return StringArray(result)
-        else:
-            # This is when the result type is object. We reach this when
-            # -> We know the result type is truly object (e.g. .encode returns bytes
-            #    or .findall returns a list).
-            # -> We don't know the result type. E.g. `.get` can return anything.
-            return lib.map_infer_mask(arr, f, mask.view("uint8"))

From d11c2baabc6c23ec85b6f3b0cdd3ad5df578cac8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 22 Sep 2020 09:43:20 -0500
Subject: [PATCH 16/24] Fixup types

---
 pandas/core/strings/base.py         | 4 ++--
 pandas/core/strings/object_array.py | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index d16bec1053f95..89de0fd531e89 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -23,9 +23,9 @@ class BaseStringArrayMethods(abc.ABC):
 
     def _str_getitem(self, key):
         if isinstance(key, slice):
-            return self.slice(start=key.start, stop=key.stop, step=key.step)
+            return self._str_slice(start=key.start, stop=key.stop, step=key.step)
         else:
-            return self.get(key)
+            return self._str_get(key)
 
     @abc.abstractmethod
     def _str_count(self, pat, flags=0):
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index d47513b18d6b9..b9d9a4c8e3828 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -24,6 +24,10 @@ class ObjectStringArrayMixin(BaseStringArrayMethods):
 
     _str_na_value = np.nan
 
+    def __len__(self):
+        # For typing, _str_map relies on the object being sized.
+        raise NotImplementedError
+
     def _str_map(self, f, na_value=None, dtype=None):
         arr = self
         if dtype is None:

From 349e281884f99740c555d9b80cd9af8e1df5ddc1 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 22 Sep 2020 09:51:43 -0500
Subject: [PATCH 17/24] test coverage

---
 pandas/core/strings/object_array.py |  8 --------
 pandas/tests/test_strings.py        | 12 ++++++++++++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index b9d9a4c8e3828..af097a283818e 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -37,8 +37,6 @@ def _str_map(self, f, na_value=None, dtype=None):
 
         if not len(arr):
             return np.ndarray(0, dtype=dtype)
-        if na_value is None:
-            na_value = self._str_na_value
 
         if not isinstance(arr, np.ndarray):
             arr = np.asarray(arr, dtype=object)
@@ -73,12 +71,6 @@ def g(x):
                 result = lib.maybe_convert_objects(result)
         return result
 
-    def _str_getitem(self, key):
-        if isinstance(key, slice):
-            return self._str_slice(start=key.start, stop=key.stop, step=key.step)
-        else:
-            return self._str_get(key)
-
     def _str_count(self, pat, flags=0):
         regex = re.compile(pat, flags=flags)
         f = lambda x: len(regex.findall(x))
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index df0e775e8750b..6ad55639ae5d8 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -2541,6 +2541,18 @@ def test_split(self):
         exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]])
         tm.assert_series_equal(result, exp)
 
+    @pytest.mark.parametrize("dtype", [object, "string"])
+    @pytest.mark.parametrize("method", ["split", "rsplit"])
+    def test_split_n(self, dtype, method):
+        s = pd.Series(["a b", pd.NA, "b c"], dtype=dtype)
+        expected = pd.Series([["a", "b"], pd.NA, ["b", "c"]])
+
+        result = getattr(s.str, method)(" ", n=None)
+        tm.assert_series_equal(result, expected)
+
+        result = getattr(s.str, method)(" ", n=0)
+        tm.assert_series_equal(result, expected)
+
     def test_rsplit(self):
         values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"])
         result = values.str.rsplit("_")

From b7ab130c0ac041aaeb6be826b31b4b585e492082 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 22 Sep 2020 11:21:10 -0500
Subject: [PATCH 18/24] fixup

---
 pandas/core/strings/accessor.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 6578ab99ee885..a921250bf3eb6 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -235,7 +235,12 @@ def __iter__(self):
             g = self.get(i)
 
     def _wrap_result(
-        self, result, name=None, expand=None, fill_value=np.nan, returns_string=True,
+        self,
+        result,
+        name=None,
+        expand=None,
+        fill_value=np.nan,
+        returns_string=True,
     ):
         from pandas import Index, MultiIndex
 
@@ -1888,7 +1893,12 @@ def get_dummies(self, sep="|"):
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
         result, name = self._array._str_get_dummies(sep)
-        return self._wrap_result(result, name=name, expand=True, returns_string=False,)
+        return self._wrap_result(
+            result,
+            name=name,
+            expand=True,
+            returns_string=False,
+        )
 
     @forbid_nonstring_types(["bytes"])
     def translate(self, table):

From 6dcd44e4fc5be091fca56d5cd41bc3e32ecb0043 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 23 Sep 2020 08:13:37 -0500
Subject: [PATCH 19/24] update docstring

---
 pandas/core/strings/base.py         |  9 +++++----
 pandas/core/strings/object_array.py | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index 89de0fd531e89..3d0e6bf69d6f9 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -8,14 +8,15 @@
 
 class BaseStringArrayMethods(abc.ABC):
     """
-    Base class for array _str accessor.
+    Base class for extension arrays implementing string methods.
 
-    This is where ExtensionArrays can override the implementation of
-    Series.str.<method>. The rough layout is
+    This is our ExtensionArrays can override the implementation of
+    Series.str.<method>. We don't currenlty expect this to work with
+    3rd-party extension arrays.
 
     * User calls Series.str.<method>
     * pandas extracts the extension array from the Series
-    * pandas calls ``extension_array._str.<method>(*args, **kwargs)``
+    * pandas calls ``extension_array._str_<method>(*args, **kwargs)``
     * pandas wraps the result, to return to the user.
 
     See :ref:`Series.str` for the docstring of each method.
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index af097a283818e..a29d84edd3a77 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -29,6 +29,21 @@ def __len__(self):
         raise NotImplementedError
 
     def _str_map(self, f, na_value=None, dtype=None):
+        """
+        Map a callable over valid element of the array.
+
+        Parameters
+        ----------
+        f : Callable
+            A function to call on each non-NA element.
+        na_value : Scalar, optional
+            The value to set for NA values. Might also be used for the
+            fill value if the callable `f` raises an exception.
+            This defaults to ``self._str_na_value`` which is ``np.nan``
+            for object-dtype and Categorical and ``pd.NA`` for StringArray.
+        dtype : Dtype, optional
+            The dtype of the result array.
+        """
         arr = self
         if dtype is None:
             dtype = np.dtype("object")

From efb3e3df28b6320593d9c22937af0678ad4921fe Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 24 Sep 2020 09:43:15 -0500
Subject: [PATCH 20/24] document current implementation

---
 pandas/core/arrays/categorical.py |  3 +++
 pandas/core/strings/__init__.py   | 28 ++++++++++++++++++++++++++++
 pandas/core/strings/accessor.py   |  2 ++
 3 files changed, 33 insertions(+)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c76b5314ee937..0930183f0bf2f 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2316,6 +2316,9 @@ def replace(self, to_replace, value, inplace: bool = False):
     # ------------------------------------------------------------------------
     # String methods interface
     def _str_map(self, f, na_value=np.nan, dtype=np.dtype(object)):
+        # Optimization to apply the callable `f` to the categories once
+        # and rebuild the result by `take`ing from the result with the codes.
+        # Returns the same type as the object-dtype impelmentation though.
         from pandas.core.arrays import PandasArray
 
         categories = self.categories
diff --git a/pandas/core/strings/__init__.py b/pandas/core/strings/__init__.py
index bf0ac8ef872ca..ec44c9d7053e7 100644
--- a/pandas/core/strings/__init__.py
+++ b/pandas/core/strings/__init__.py
@@ -1,3 +1,31 @@
+"""
+Implementation of pandas.Series.str and its interface.
+
+* strings.accessor.StringMethods : Accessor for Series.str
+* strings.base.BaseStringArrayMethods: Mixin ABC for EAs to implement str methods
+
+Most methods on the StringMethods accessor follow the pattern:
+
+    1. extract the array from the series (or index)
+    2. Call that array's impelmentation of the string method
+    3. Wrap the result (in a Series, index, or DataFrame)
+
+Pandas extension arrays implementing string methods should inherit from
+pandas.core.strings.base.BaseStringArrayMethods. This is an ABC defining
+the various string methods. To avoid namespace clashes and pollution,
+these are prefixed with `_str_`. So ``Series.str.upper()`` calls
+``Series.array._str_upper()``. The interface isn't currently public
+to other string extension arrays.
+"""
+# Pandas current implementation is in ObjectStringArrayMixin. This is designed
+# to work on object-dtype ndarrays.
+#
+# BaseStringArrayMethods
+#  - ObjectStringArrayMixin
+#     - StringArray
+#     - PandasArray
+#     - Categorical
+
 from .accessor import StringMethods
 from .base import BaseStringArrayMethods
 
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index a921250bf3eb6..2aec354223da0 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -140,6 +140,8 @@ class StringMethods(NoNewAttributesMixin):
     dtype: object
     """
 
+    # Note: see the docstring in pandas.core.strings.__init__
+    # for an explanation of the implementation.
     # TODO: Dispatch all the methods
     # Currently the following are not dispatched to the array
     # * cat

From 0da70311b4741a5588213aeec510d3a50ec179ef Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 24 Sep 2020 09:44:06 -0500
Subject: [PATCH 21/24] typo

---
 pandas/core/strings/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index 3d0e6bf69d6f9..08064244a2ff9 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -10,8 +10,8 @@ class BaseStringArrayMethods(abc.ABC):
     """
     Base class for extension arrays implementing string methods.
 
-    This is our ExtensionArrays can override the implementation of
-    Series.str.<method>. We don't currenlty expect this to work with
+    This is where our ExtensionArrays can override the implementation of
+    Series.str.<method>. We don't expect this to work with
     3rd-party extension arrays.
 
     * User calls Series.str.<method>

From d681f99859a17a2ab214a4c47143cdd9241faac1 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 25 Sep 2020 15:38:19 -0500
Subject: [PATCH 22/24] fixup

---
 pandas/core/arrays/categorical.py | 2 +-
 pandas/core/strings/__init__.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f36de9cb972ee..1e06bf1b6ce96 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2312,7 +2312,7 @@ def replace(self, to_replace, value, inplace: bool = False):
     def _str_map(self, f, na_value=np.nan, dtype=np.dtype(object)):
         # Optimization to apply the callable `f` to the categories once
         # and rebuild the result by `take`ing from the result with the codes.
-        # Returns the same type as the object-dtype impelmentation though.
+        # Returns the same type as the object-dtype implementation though.
         from pandas.core.arrays import PandasArray
 
         categories = self.categories
diff --git a/pandas/core/strings/__init__.py b/pandas/core/strings/__init__.py
index ec44c9d7053e7..243250f0360a0 100644
--- a/pandas/core/strings/__init__.py
+++ b/pandas/core/strings/__init__.py
@@ -7,7 +7,7 @@
 Most methods on the StringMethods accessor follow the pattern:
 
     1. extract the array from the series (or index)
-    2. Call that array's impelmentation of the string method
+    2. Call that array's implementation of the string method
     3. Wrap the result (in a Series, index, or DataFrame)
 
 Pandas extension arrays implementing string methods should inherit from

From 457c1122429af282983b302a62be4822b5eaef6b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 29 Sep 2020 08:20:36 -0500
Subject: [PATCH 23/24] fixup

---
 pandas/core/arrays/string_.py |    1 -
 pandas/core/strings.py        | 3650 ---------------------------------
 2 files changed, 3651 deletions(-)
 delete mode 100644 pandas/core/strings.py

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 7ad97c546f720..0db5fadce614e 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -368,7 +368,6 @@ def _str_map(self, f, na_value=None, dtype=None):
             na_value = self.dtype.na_value
 
         mask = isna(self)
-        arr = self
         arr = np.asarray(self)
 
         if is_integer_dtype(dtype) or is_bool_dtype(dtype):
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
deleted file mode 100644
index 4467c96041dc7..0000000000000
--- a/pandas/core/strings.py
+++ /dev/null
@@ -1,3650 +0,0 @@
-import codecs
-from functools import wraps
-import re
-import textwrap
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Pattern, Type, Union
-import warnings
-
-import numpy as np
-
-import pandas._libs.lib as lib
-import pandas._libs.missing as libmissing
-import pandas._libs.ops as libops
-from pandas._typing import ArrayLike, Dtype, Scalar
-from pandas.util._decorators import Appender
-
-from pandas.core.dtypes.common import (
-    ensure_object,
-    is_bool_dtype,
-    is_categorical_dtype,
-    is_extension_array_dtype,
-    is_integer,
-    is_integer_dtype,
-    is_list_like,
-    is_object_dtype,
-    is_re,
-    is_scalar,
-    is_string_dtype,
-)
-from pandas.core.dtypes.generic import (
-    ABCDataFrame,
-    ABCIndexClass,
-    ABCMultiIndex,
-    ABCSeries,
-)
-from pandas.core.dtypes.missing import isna
-
-from pandas.core.algorithms import take_1d
-from pandas.core.base import NoNewAttributesMixin
-from pandas.core.construction import extract_array
-
-if TYPE_CHECKING:
-    from pandas.arrays import StringArray
-
-_cpython_optimized_encoders = (
-    "utf-8",
-    "utf8",
-    "latin-1",
-    "latin1",
-    "iso-8859-1",
-    "mbcs",
-    "ascii",
-)
-_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32")
-
-_shared_docs: Dict[str, str] = dict()
-
-
-def cat_core(list_of_columns: List, sep: str):
-    """
-    Auxiliary function for :meth:`str.cat`
-
-    Parameters
-    ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
-    sep : string
-        The separator string for concatenating the columns.
-
-    Returns
-    -------
-    nd.array
-        The concatenation of list_of_columns with sep.
-    """
-    if sep == "":
-        # no need to interleave sep if it is empty
-        arr_of_cols = np.asarray(list_of_columns, dtype=object)
-        return np.sum(arr_of_cols, axis=0)
-    list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
-    list_with_sep[::2] = list_of_columns
-    arr_with_sep = np.asarray(list_with_sep, dtype=object)
-    return np.sum(arr_with_sep, axis=0)
-
-
-def cat_safe(list_of_columns: List, sep: str):
-    """
-    Auxiliary function for :meth:`str.cat`.
-
-    Same signature as cat_core, but handles TypeErrors in concatenation, which
-    happen if the arrays in list_of columns have the wrong dtypes or content.
-
-    Parameters
-    ----------
-    list_of_columns : list of numpy arrays
-        List of arrays to be concatenated with sep;
-        these arrays may not contain NaNs!
-    sep : string
-        The separator string for concatenating the columns.
-
-    Returns
-    -------
-    nd.array
-        The concatenation of list_of_columns with sep.
-    """
-    try:
-        result = cat_core(list_of_columns, sep)
-    except TypeError:
-        # if there are any non-string values (wrong dtype or hidden behind
-        # object dtype), np.sum will fail; catch and return with better message
-        for column in list_of_columns:
-            dtype = lib.infer_dtype(column, skipna=True)
-            if dtype not in ["string", "empty"]:
-                raise TypeError(
-                    "Concatenation requires list-likes containing only "
-                    "strings (or missing values). Offending values found in "
-                    f"column {dtype}"
-                ) from None
-    return result
-
-
-def _na_map(f, arr, na_result=None, dtype=np.dtype(object)):
-    if is_extension_array_dtype(arr.dtype):
-        if na_result is None:
-            na_result = libmissing.NA
-        # just StringDtype
-        arr = extract_array(arr)
-        return _map_stringarray(f, arr, na_value=na_result, dtype=dtype)
-    if na_result is None:
-        na_result = np.nan
-    return _map_object(f, arr, na_mask=True, na_value=na_result, dtype=dtype)
-
-
-def _map_stringarray(
-    func: Callable[[str], Any], arr: "StringArray", na_value: Any, dtype: Dtype
-) -> ArrayLike:
-    """
-    Map a callable over valid elements of a StringArray.
-
-    Parameters
-    ----------
-    func : Callable[[str], Any]
-        Apply to each valid element.
-    arr : StringArray
-    na_value : Any
-        The value to use for missing values. By default, this is
-        the original value (NA).
-    dtype : Dtype
-        The result dtype to use. Specifying this avoids an intermediate
-        object-dtype allocation.
-
-    Returns
-    -------
-    ArrayLike
-        An ExtensionArray for integer or string dtypes, otherwise
-        an ndarray.
-
-    """
-    from pandas.arrays import BooleanArray, IntegerArray, StringArray
-
-    mask = isna(arr)
-
-    assert isinstance(arr, StringArray)
-    arr = np.asarray(arr)
-
-    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
-        constructor: Union[Type[IntegerArray], Type[BooleanArray]]
-        if is_integer_dtype(dtype):
-            constructor = IntegerArray
-        else:
-            constructor = BooleanArray
-
-        na_value_is_na = isna(na_value)
-        if na_value_is_na:
-            na_value = 1
-        result = lib.map_infer_mask(
-            arr,
-            func,
-            mask.view("uint8"),
-            convert=False,
-            na_value=na_value,
-            dtype=np.dtype(dtype),
-        )
-
-        if not na_value_is_na:
-            mask[:] = False
-
-        return constructor(result, mask)
-
-    elif is_string_dtype(dtype) and not is_object_dtype(dtype):
-        # i.e. StringDtype
-        result = lib.map_infer_mask(
-            arr, func, mask.view("uint8"), convert=False, na_value=na_value
-        )
-        return StringArray(result)
-    else:
-        # This is when the result type is object. We reach this when
-        # -> We know the result type is truly object (e.g. .encode returns bytes
-        #    or .findall returns a list).
-        # -> We don't know the result type. E.g. `.get` can return anything.
-        return lib.map_infer_mask(arr, func, mask.view("uint8"))
-
-
-def _map_object(f, arr, na_mask=False, na_value=np.nan, dtype=np.dtype(object)):
-    if not len(arr):
-        return np.ndarray(0, dtype=dtype)
-
-    if isinstance(arr, ABCSeries):
-        arr = arr._values  # TODO: extract_array?
-    if not isinstance(arr, np.ndarray):
-        arr = np.asarray(arr, dtype=object)
-    if na_mask:
-        mask = isna(arr)
-        convert = not np.all(mask)
-        try:
-            result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert)
-        except (TypeError, AttributeError) as e:
-            # Reraise the exception if callable `f` got wrong number of args.
-            # The user may want to be warned by this, instead of getting NaN
-            p_err = (
-                r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
-                r"(?(3)required )positional arguments?"
-            )
-
-            if len(e.args) >= 1 and re.search(p_err, e.args[0]):
-                # FIXME: this should be totally avoidable
-                raise e
-
-            def g(x):
-                try:
-                    return f(x)
-                except (TypeError, AttributeError):
-                    return na_value
-
-            return _map_object(g, arr, dtype=dtype)
-        if na_value is not np.nan:
-            np.putmask(result, mask, na_value)
-            if result.dtype == object:
-                result = lib.maybe_convert_objects(result)
-        return result
-    else:
-        return lib.map_infer(arr, f)
-
-
-def str_count(arr, pat, flags=0):
-    """
-    Count occurrences of pattern in each string of the Series/Index.
-
-    This function is used to count the number of times a particular regex
-    pattern is repeated in each of the string elements of the
-    :class:`~pandas.Series`.
-
-    Parameters
-    ----------
-    pat : str
-        Valid regular expression.
-    flags : int, default 0, meaning no flags
-        Flags for the `re` module. For a complete list, `see here
-        <https://docs.python.org/3/howto/regex.html#compilation-flags>`_.
-    **kwargs
-        For compatibility with other string methods. Not used.
-
-    Returns
-    -------
-    Series or Index
-        Same type as the calling object containing the integer counts.
-
-    See Also
-    --------
-    re : Standard library module for regular expressions.
-    str.count : Standard library version, without regular expression support.
-
-    Notes
-    -----
-    Some characters need to be escaped when passing in `pat`.
-    eg. ``'$'`` has a special meaning in regex and must be escaped when
-    finding this literal character.
-
-    Examples
-    --------
-    >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat'])
-    >>> s.str.count('a')
-    0    0.0
-    1    0.0
-    2    2.0
-    3    2.0
-    4    NaN
-    5    0.0
-    6    1.0
-    dtype: float64
-
-    Escape ``'$'`` to find the literal dollar sign.
-
-    >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
-    >>> s.str.count('\\$')
-    0    1
-    1    0
-    2    1
-    3    2
-    4    2
-    5    0
-    dtype: int64
-
-    This is also available on Index
-
-    >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')
-    Int64Index([0, 0, 2, 1], dtype='int64')
-    """
-    regex = re.compile(pat, flags=flags)
-    f = lambda x: len(regex.findall(x))
-    return _na_map(f, arr, dtype="int64")
-
-
-def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
-    """
-    Test if pattern or regex is contained within a string of a Series or Index.
-
-    Return boolean Series or Index based on whether a given pattern or regex is
-    contained within a string of a Series or Index.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Flags to pass through to the re module, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-    regex : bool, default True
-        If True, assumes the pat is a regular expression.
-
-        If False, treats the pat as a literal string.
-
-    Returns
-    -------
-    Series or Index of boolean values
-        A Series or Index of boolean values indicating whether the
-        given pattern is contained within the string of each element
-        of the Series or Index.
-
-    See Also
-    --------
-    match : Analogous, but stricter, relying on re.match instead of re.search.
-    Series.str.startswith : Test if the start of each string element matches a
-        pattern.
-    Series.str.endswith : Same as startswith, but tests the end of string.
-
-    Examples
-    --------
-    Returning a Series of booleans using only a literal pattern.
-
-    >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
-    >>> s1.str.contains('og', regex=False)
-    0    False
-    1     True
-    2    False
-    3    False
-    4      NaN
-    dtype: object
-
-    Returning an Index of booleans using only a literal pattern.
-
-    >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
-    >>> ind.str.contains('23', regex=False)
-    Index([False, False, False, True, nan], dtype='object')
-
-    Specifying case sensitivity using `case`.
-
-    >>> s1.str.contains('oG', case=True, regex=True)
-    0    False
-    1    False
-    2    False
-    3    False
-    4      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN` replaces NaN values
-    with `False`. If Series or Index does not contain NaN values
-    the resultant dtype will be `bool`, otherwise, an `object` dtype.
-
-    >>> s1.str.contains('og', na=False, regex=True)
-    0    False
-    1     True
-    2    False
-    3    False
-    4    False
-    dtype: bool
-
-    Returning 'house' or 'dog' when either expression occurs in a string.
-
-    >>> s1.str.contains('house|dog', regex=True)
-    0    False
-    1     True
-    2     True
-    3    False
-    4      NaN
-    dtype: object
-
-    Ignoring case sensitivity using `flags` with regex.
-
-    >>> import re
-    >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True)
-    0    False
-    1    False
-    2     True
-    3    False
-    4      NaN
-    dtype: object
-
-    Returning any digit using regular expression.
-
-    >>> s1.str.contains('\\d', regex=True)
-    0    False
-    1    False
-    2    False
-    3     True
-    4      NaN
-    dtype: object
-
-    Ensure `pat` is a not a literal pattern when `regex` is set to True.
-    Note in the following example one might expect only `s2[1]` and `s2[3]` to
-    return `True`. However, '.0' as a regex matches any character
-    followed by a 0.
-
-    >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35'])
-    >>> s2.str.contains('.0', regex=True)
-    0     True
-    1     True
-    2    False
-    3     True
-    4    False
-    dtype: bool
-    """
-    if regex:
-        if not case:
-            flags |= re.IGNORECASE
-
-        regex = re.compile(pat, flags=flags)
-
-        if regex.groups > 0:
-            warnings.warn(
-                "This pattern has match groups. To actually get the "
-                "groups, use str.extract.",
-                UserWarning,
-                stacklevel=3,
-            )
-
-        f = lambda x: regex.search(x) is not None
-    else:
-        if case:
-            f = lambda x: pat in x
-        else:
-            upper_pat = pat.upper()
-            f = lambda x: upper_pat in x
-            uppered = _na_map(lambda x: x.upper(), arr)
-            return _na_map(f, uppered, na, dtype=np.dtype(bool))
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_startswith(arr, pat, na=np.nan):
-    """
-    Test if the start of each string element matches a pattern.
-
-    Equivalent to :meth:`str.startswith`.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence. Regular expressions are not accepted.
-    na : object, default NaN
-        Object shown if element tested is not a string.
-
-    Returns
-    -------
-    Series or Index of bool
-        A Series of booleans indicating whether the given pattern matches
-        the start of each string element.
-
-    See Also
-    --------
-    str.startswith : Python standard library string method.
-    Series.str.endswith : Same as startswith, but tests the end of string.
-    Series.str.contains : Tests if string element contains a pattern.
-
-    Examples
-    --------
-    >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan])
-    >>> s
-    0     bat
-    1    Bear
-    2     cat
-    3     NaN
-    dtype: object
-
-    >>> s.str.startswith('b')
-    0     True
-    1    False
-    2    False
-    3      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN`.
-
-    >>> s.str.startswith('b', na=False)
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-    """
-    f = lambda x: x.startswith(pat)
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_endswith(arr, pat, na=np.nan):
-    """
-    Test if the end of each string element matches a pattern.
-
-    Equivalent to :meth:`str.endswith`.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence. Regular expressions are not accepted.
-    na : object, default NaN
-        Object shown if element tested is not a string.
-
-    Returns
-    -------
-    Series or Index of bool
-        A Series of booleans indicating whether the given pattern matches
-        the end of each string element.
-
-    See Also
-    --------
-    str.endswith : Python standard library string method.
-    Series.str.startswith : Same as endswith, but tests the start of string.
-    Series.str.contains : Tests if string element contains a pattern.
-
-    Examples
-    --------
-    >>> s = pd.Series(['bat', 'bear', 'caT', np.nan])
-    >>> s
-    0     bat
-    1    bear
-    2     caT
-    3     NaN
-    dtype: object
-
-    >>> s.str.endswith('t')
-    0     True
-    1    False
-    2    False
-    3      NaN
-    dtype: object
-
-    Specifying `na` to be `False` instead of `NaN`.
-
-    >>> s.str.endswith('t', na=False)
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-    """
-    f = lambda x: x.endswith(pat)
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
-    r"""
-    Replace each occurrence of pattern/regex in the Series/Index.
-
-    Equivalent to :meth:`str.replace` or :func:`re.sub`, depending on the regex value.
-
-    Parameters
-    ----------
-    pat : str or compiled regex
-        String can be a character sequence or regular expression.
-    repl : str or callable
-        Replacement string or a callable. The callable is passed the regex
-        match object and must return a replacement string to be used.
-        See :func:`re.sub`.
-    n : int, default -1 (all)
-        Number of replacements to make from start.
-    case : bool, default None
-        Determines if replace is case sensitive:
-
-        - If True, case sensitive (the default if `pat` is a string)
-        - Set to False for case insensitive
-        - Cannot be set if `pat` is a compiled regex.
-
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled
-        regex.
-    regex : bool, default True
-        Determines if assumes the passed-in pattern is a regular expression:
-
-        - If True, assumes the passed-in pattern is a regular expression.
-        - If False, treats the pattern as a literal string
-        - Cannot be set to False if `pat` is a compiled regex or `repl` is
-          a callable.
-
-    Returns
-    -------
-    Series or Index of object
-        A copy of the object with all matching occurrences of `pat` replaced by
-        `repl`.
-
-    Raises
-    ------
-    ValueError
-        * if `regex` is False and `repl` is a callable or `pat` is a compiled
-          regex
-        * if `pat` is a compiled regex and `case` or `flags` is set
-
-    Notes
-    -----
-    When `pat` is a compiled regex, all flags should be included in the
-    compiled regex. Use of `case`, `flags`, or `regex=False` with a compiled
-    regex will raise an error.
-
-    Examples
-    --------
-    When `pat` is a string and `regex` is True (the default), the given `pat`
-    is compiled as a regex. When `repl` is a string, it replaces matching
-    regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are
-    left as is:
-
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)
-    0    bao
-    1    baz
-    2    NaN
-    dtype: object
-
-    When `pat` is a string and `regex` is False, every `pat` is replaced with
-    `repl` as with :meth:`str.replace`:
-
-    >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)
-    0    bao
-    1    fuz
-    2    NaN
-    dtype: object
-
-    When `repl` is a callable, it is called on every `pat` using
-    :func:`re.sub`. The callable should expect one positional argument
-    (a regex object) and return a string.
-
-    To get the idea:
-
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)
-    0    <re.Match object; span=(0, 1), match='f'>oo
-    1    <re.Match object; span=(0, 1), match='f'>uz
-    2                                            NaN
-    dtype: object
-
-    Reverse every lowercase alphabetic word:
-
-    >>> repl = lambda m: m.group(0)[::-1]
-    >>> pd.Series(['foo 123', 'bar baz', np.nan]).str.replace(r'[a-z]+', repl)
-    0    oof 123
-    1    rab zab
-    2        NaN
-    dtype: object
-
-    Using regex groups (extract second group and swap case):
-
-    >>> pat = r"(?P<one>\w+) (?P<two>\w+) (?P<three>\w+)"
-    >>> repl = lambda m: m.group('two').swapcase()
-    >>> pd.Series(['One Two Three', 'Foo Bar Baz']).str.replace(pat, repl)
-    0    tWO
-    1    bAR
-    dtype: object
-
-    Using a compiled regex with flags
-
-    >>> import re
-    >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE)
-    >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar')
-    0    foo
-    1    bar
-    2    NaN
-    dtype: object
-    """
-    # Check whether repl is valid (GH 13438, GH 15055)
-    if not (isinstance(repl, str) or callable(repl)):
-        raise TypeError("repl must be a string or callable")
-
-    is_compiled_re = is_re(pat)
-    if regex:
-        if is_compiled_re:
-            if (case is not None) or (flags != 0):
-                raise ValueError(
-                    "case and flags cannot be set when pat is a compiled regex"
-                )
-        else:
-            # not a compiled regex
-            # set default case
-            if case is None:
-                case = True
-
-            # add case flag, if provided
-            if case is False:
-                flags |= re.IGNORECASE
-        if is_compiled_re or len(pat) > 1 or flags or callable(repl):
-            n = n if n >= 0 else 0
-            compiled = re.compile(pat, flags=flags)
-            f = lambda x: compiled.sub(repl=repl, string=x, count=n)
-        else:
-            f = lambda x: x.replace(pat, repl, n)
-    else:
-        if is_compiled_re:
-            raise ValueError(
-                "Cannot use a compiled regex as replacement pattern with regex=False"
-            )
-        if callable(repl):
-            raise ValueError("Cannot use a callable replacement when regex=False")
-        f = lambda x: x.replace(pat, repl, n)
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_repeat(arr, repeats):
-    """
-    Duplicate each string in the Series or Index.
-
-    Parameters
-    ----------
-    repeats : int or sequence of int
-        Same value for all (int) or different value per (sequence).
-
-    Returns
-    -------
-    Series or Index of object
-        Series or Index of repeated string objects specified by
-        input parameter repeats.
-
-    Examples
-    --------
-    >>> s = pd.Series(['a', 'b', 'c'])
-    >>> s
-    0    a
-    1    b
-    2    c
-    dtype: object
-
-    Single int repeats string in Series
-
-    >>> s.str.repeat(repeats=2)
-    0    aa
-    1    bb
-    2    cc
-    dtype: object
-
-    Sequence of int repeats corresponding string in Series
-
-    >>> s.str.repeat(repeats=[1, 2, 3])
-    0      a
-    1     bb
-    2    ccc
-    dtype: object
-    """
-    if is_scalar(repeats):
-
-        def scalar_rep(x):
-            try:
-                return bytes.__mul__(x, repeats)
-            except TypeError:
-                return str.__mul__(x, repeats)
-
-        return _na_map(scalar_rep, arr, dtype=str)
-    else:
-
-        def rep(x, r):
-            if x is libmissing.NA:
-                return x
-            try:
-                return bytes.__mul__(x, r)
-            except TypeError:
-                return str.__mul__(x, r)
-
-        repeats = np.asarray(repeats, dtype=object)
-        result = libops.vec_binop(np.asarray(arr), repeats, rep)
-        return result
-
-
-def str_match(
-    arr: ArrayLike,
-    pat: Union[str, Pattern],
-    case: bool = True,
-    flags: int = 0,
-    na: Scalar = np.nan,
-):
-    """
-    Determine if each string starts with a match of a regular expression.
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-
-    Returns
-    -------
-    Series/array of boolean values
-
-    See Also
-    --------
-    fullmatch : Stricter matching that requires the entire string to match.
-    contains : Analogous, but less strict, relying on re.search instead of
-        re.match.
-    extract : Extract matched groups.
-    """
-    if not case:
-        flags |= re.IGNORECASE
-
-    regex = re.compile(pat, flags=flags)
-
-    f = lambda x: regex.match(x) is not None
-
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def str_fullmatch(
-    arr: ArrayLike,
-    pat: Union[str, Pattern],
-    case: bool = True,
-    flags: int = 0,
-    na: Scalar = np.nan,
-):
-    """
-    Determine if each string entirely matches a regular expression.
-
-    .. versionadded:: 1.1.0
-
-    Parameters
-    ----------
-    pat : str
-        Character sequence or regular expression.
-    case : bool, default True
-        If True, case sensitive.
-    flags : int, default 0 (no flags)
-        Regex module flags, e.g. re.IGNORECASE.
-    na : default NaN
-        Fill value for missing values.
-
-    Returns
-    -------
-    Series/array of boolean values
-
-    See Also
-    --------
-    match : Similar, but also returns `True` when only a *prefix* of the string
-        matches the regular expression.
-    extract : Extract matched groups.
-    """
-    if not case:
-        flags |= re.IGNORECASE
-
-    regex = re.compile(pat, flags=flags)
-
-    f = lambda x: regex.fullmatch(x) is not None
-
-    return _na_map(f, arr, na, dtype=np.dtype(bool))
-
-
-def _get_single_group_name(rx):
-    try:
-        return list(rx.groupindex.keys()).pop()
-    except IndexError:
-        return None
-
-
-def _groups_or_na_fun(regex):
-    """Used in both extract_noexpand and extract_frame"""
-    if regex.groups == 0:
-        raise ValueError("pattern contains no capture groups")
-    empty_row = [np.nan] * regex.groups
-
-    def f(x):
-        if not isinstance(x, str):
-            return empty_row
-        m = regex.search(x)
-        if m:
-            return [np.nan if item is None else item for item in m.groups()]
-        else:
-            return empty_row
-
-    return f
-
-
-def _result_dtype(arr):
-    # workaround #27953
-    # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
-    # when the list of values is empty.
-    if arr.dtype.name == "string":
-        return "string"
-    else:
-        return object
-
-
-def _str_extract_noexpand(arr, pat, flags=0):
-    """
-    Find groups in each string in the Series using passed regular
-    expression. This function is called from
-    str_extract(expand=False), and can return Series, DataFrame, or
-    Index.
-
-    """
-    from pandas import DataFrame
-
-    regex = re.compile(pat, flags=flags)
-    groups_or_na = _groups_or_na_fun(regex)
-
-    if regex.groups == 1:
-        result = np.array([groups_or_na(val)[0] for val in arr], dtype=object)
-        name = _get_single_group_name(regex)
-    else:
-        if isinstance(arr, ABCIndexClass):
-            raise ValueError("only one regex group is supported with Index")
-        name = None
-        names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-        columns = [names.get(1 + i, i) for i in range(regex.groups)]
-        if arr.empty:
-            result = DataFrame(columns=columns, dtype=object)
-        else:
-            dtype = _result_dtype(arr)
-            result = DataFrame(
-                [groups_or_na(val) for val in arr],
-                columns=columns,
-                index=arr.index,
-                dtype=dtype,
-            )
-    return result, name
-
-
-def _str_extract_frame(arr, pat, flags=0):
-    """
-    For each subject string in the Series, extract groups from the
-    first match of regular expression pat. This function is called from
-    str_extract(expand=True), and always returns a DataFrame.
-
-    """
-    from pandas import DataFrame
-
-    regex = re.compile(pat, flags=flags)
-    groups_or_na = _groups_or_na_fun(regex)
-    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-    columns = [names.get(1 + i, i) for i in range(regex.groups)]
-
-    if len(arr) == 0:
-        return DataFrame(columns=columns, dtype=object)
-    try:
-        result_index = arr.index
-    except AttributeError:
-        result_index = None
-    dtype = _result_dtype(arr)
-    return DataFrame(
-        [groups_or_na(val) for val in arr],
-        columns=columns,
-        index=result_index,
-        dtype=dtype,
-    )
-
-
-def str_extract(arr, pat, flags=0, expand=True):
-    r"""
-    Extract capture groups in the regex `pat` as columns in a DataFrame.
-
-    For each subject string in the Series, extract groups from the
-    first match of regular expression `pat`.
-
-    Parameters
-    ----------
-    pat : str
-        Regular expression pattern with capturing groups.
-    flags : int, default 0 (no flags)
-        Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that
-        modify regular expression matching for things like case,
-        spaces, etc. For more details, see :mod:`re`.
-    expand : bool, default True
-        If True, return DataFrame with one column per capture group.
-        If False, return a Series/Index if there is one capture group
-        or DataFrame if there are multiple capture groups.
-
-    Returns
-    -------
-    DataFrame or Series or Index
-        A DataFrame with one row for each subject string, and one
-        column for each group. Any capture group names in regular
-        expression pat will be used for column names; otherwise
-        capture group numbers will be used. The dtype of each result
-        column is always object, even when no match is found. If
-        ``expand=False`` and pat has only one capture group, then
-        return a Series (if subject is a Series) or Index (if subject
-        is an Index).
-
-    See Also
-    --------
-    extractall : Returns all matches (not just the first match).
-
-    Examples
-    --------
-    A pattern with two groups will return a DataFrame with two columns.
-    Non-matches will be NaN.
-
-    >>> s = pd.Series(['a1', 'b2', 'c3'])
-    >>> s.str.extract(r'([ab])(\d)')
-         0    1
-    0    a    1
-    1    b    2
-    2  NaN  NaN
-
-    A pattern may contain optional groups.
-
-    >>> s.str.extract(r'([ab])?(\d)')
-         0  1
-    0    a  1
-    1    b  2
-    2  NaN  3
-
-    Named groups will become column names in the result.
-
-    >>> s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')
-      letter digit
-    0      a     1
-    1      b     2
-    2    NaN   NaN
-
-    A pattern with one group will return a DataFrame with one column
-    if expand=True.
-
-    >>> s.str.extract(r'[ab](\d)', expand=True)
-         0
-    0    1
-    1    2
-    2  NaN
-
-    A pattern with one group will return a Series if expand=False.
-
-    >>> s.str.extract(r'[ab](\d)', expand=False)
-    0      1
-    1      2
-    2    NaN
-    dtype: object
-    """
-    if not isinstance(expand, bool):
-        raise ValueError("expand must be True or False")
-    if expand:
-        return _str_extract_frame(arr._orig, pat, flags=flags)
-    else:
-        result, name = _str_extract_noexpand(arr._parent, pat, flags=flags)
-        return arr._wrap_result(result, name=name, expand=expand)
-
-
-def str_extractall(arr, pat, flags=0):
-    r"""
-    Extract capture groups in the regex `pat` as columns in DataFrame.
-
-    For each subject string in the Series, extract groups from all
-    matches of regular expression pat. When each subject string in the
-    Series has exactly one match, extractall(pat).xs(0, level='match')
-    is the same as extract(pat).
-
-    Parameters
-    ----------
-    pat : str
-        Regular expression pattern with capturing groups.
-    flags : int, default 0 (no flags)
-        A ``re`` module flag, for example ``re.IGNORECASE``. These allow
-        to modify regular expression matching for things like case, spaces,
-        etc. Multiple flags can be combined with the bitwise OR operator,
-        for example ``re.IGNORECASE | re.MULTILINE``.
-
-    Returns
-    -------
-    DataFrame
-        A ``DataFrame`` with one row for each match, and one column for each
-        group. Its rows have a ``MultiIndex`` with first levels that come from
-        the subject ``Series``. The last level is named 'match' and indexes the
-        matches in each item of the ``Series``. Any capture group names in
-        regular expression pat will be used for column names; otherwise capture
-        group numbers will be used.
-
-    See Also
-    --------
-    extract : Returns first match only (not all matches).
-
-    Examples
-    --------
-    A pattern with one group will return a DataFrame with one column.
-    Indices with no matches will not appear in the result.
-
-    >>> s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
-    >>> s.str.extractall(r"[ab](\d)")
-             0
-      match
-    A 0      1
-      1      2
-    B 0      1
-
-    Capture group names are used for column names of the result.
-
-    >>> s.str.extractall(r"[ab](?P<digit>\d)")
-            digit
-      match
-    A 0         1
-      1         2
-    B 0         1
-
-    A pattern with two groups will return a DataFrame with two columns.
-
-    >>> s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")
-            letter digit
-      match
-    A 0          a     1
-      1          a     2
-    B 0          b     1
-
-    Optional groups that do not match are NaN in the result.
-
-    >>> s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")
-            letter digit
-      match
-    A 0          a     1
-      1          a     2
-    B 0          b     1
-    C 0        NaN     1
-    """
-    regex = re.compile(pat, flags=flags)
-    # the regex must contain capture groups.
-    if regex.groups == 0:
-        raise ValueError("pattern contains no capture groups")
-
-    if isinstance(arr, ABCIndexClass):
-        arr = arr.to_series().reset_index(drop=True)
-
-    names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
-    columns = [names.get(1 + i, i) for i in range(regex.groups)]
-    match_list = []
-    index_list = []
-    is_mi = arr.index.nlevels > 1
-
-    for subject_key, subject in arr.items():
-        if isinstance(subject, str):
-
-            if not is_mi:
-                subject_key = (subject_key,)
-
-            for match_i, match_tuple in enumerate(regex.findall(subject)):
-                if isinstance(match_tuple, str):
-                    match_tuple = (match_tuple,)
-                na_tuple = [np.NaN if group == "" else group for group in match_tuple]
-                match_list.append(na_tuple)
-                result_key = tuple(subject_key + (match_i,))
-                index_list.append(result_key)
-
-    from pandas import MultiIndex
-
-    index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"])
-    dtype = _result_dtype(arr)
-
-    result = arr._constructor_expanddim(
-        match_list, index=index, columns=columns, dtype=dtype
-    )
-    return result
-
-
-def str_get_dummies(arr, sep="|"):
-    """
-    Return DataFrame of dummy/indicator variables for Series.
-
-    Each string in Series is split by sep and returned as a DataFrame
-    of dummy/indicator variables.
-
-    Parameters
-    ----------
-    sep : str, default "|"
-        String to split on.
-
-    Returns
-    -------
-    DataFrame
-        Dummy variables corresponding to values of the Series.
-
-    See Also
-    --------
-    get_dummies : Convert categorical variable into dummy/indicator
-        variables.
-
-    Examples
-    --------
-    >>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies()
-       a  b  c
-    0  1  1  0
-    1  1  0  0
-    2  1  0  1
-
-    >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()
-       a  b  c
-    0  1  1  0
-    1  0  0  0
-    2  1  0  1
-    """
-    arr = arr.fillna("")
-    try:
-        arr = sep + arr + sep
-    except TypeError:
-        arr = sep + arr.astype(str) + sep
-
-    tags = set()
-    for ts in arr.str.split(sep):
-        tags.update(ts)
-    tags = sorted(tags - {""})
-
-    dummies = np.empty((len(arr), len(tags)), dtype=np.int64)
-
-    for i, t in enumerate(tags):
-        pat = sep + t + sep
-        dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)
-    return dummies, tags
-
-
-def str_join(arr, sep):
-    """
-    Join lists contained as elements in the Series/Index with passed delimiter.
-
-    If the elements of a Series are lists themselves, join the content of these
-    lists using the delimiter passed to the function.
-    This function is an equivalent to :meth:`str.join`.
-
-    Parameters
-    ----------
-    sep : str
-        Delimiter to use between list entries.
-
-    Returns
-    -------
-    Series/Index: object
-        The list entries concatenated by intervening occurrences of the
-        delimiter.
-
-    Raises
-    ------
-    AttributeError
-        If the supplied Series contains neither strings nor lists.
-
-    See Also
-    --------
-    str.join : Standard library version of this method.
-    Series.str.split : Split strings around given separator/delimiter.
-
-    Notes
-    -----
-    If any of the list items is not a string object, the result of the join
-    will be `NaN`.
-
-    Examples
-    --------
-    Example with a list that contains non-string elements.
-
-    >>> s = pd.Series([['lion', 'elephant', 'zebra'],
-    ...                [1.1, 2.2, 3.3],
-    ...                ['cat', np.nan, 'dog'],
-    ...                ['cow', 4.5, 'goat'],
-    ...                ['duck', ['swan', 'fish'], 'guppy']])
-    >>> s
-    0        [lion, elephant, zebra]
-    1                [1.1, 2.2, 3.3]
-    2                [cat, nan, dog]
-    3               [cow, 4.5, goat]
-    4    [duck, [swan, fish], guppy]
-    dtype: object
-
-    Join all lists using a '-'. The lists containing object(s) of types other
-    than str will produce a NaN.
-
-    >>> s.str.join('-')
-    0    lion-elephant-zebra
-    1                    NaN
-    2                    NaN
-    3                    NaN
-    4                    NaN
-    dtype: object
-    """
-    return _na_map(sep.join, arr, dtype=str)
-
-
-def str_findall(arr, pat, flags=0):
-    """
-    Find all occurrences of pattern or regular expression in the Series/Index.
-
-    Equivalent to applying :func:`re.findall` to all the elements in the
-    Series/Index.
-
-    Parameters
-    ----------
-    pat : str
-        Pattern or regular expression.
-    flags : int, default 0
-        Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which
-        means no flags).
-
-    Returns
-    -------
-    Series/Index of lists of strings
-        All non-overlapping matches of pattern or regular expression in each
-        string of this Series/Index.
-
-    See Also
-    --------
-    count : Count occurrences of pattern or regular expression in each string
-        of the Series/Index.
-    extractall : For each string in the Series, extract groups from all matches
-        of regular expression and return a DataFrame with one row for each
-        match and one column for each group.
-    re.findall : The equivalent ``re`` function to all non-overlapping matches
-        of pattern or regular expression in string, as a list of strings.
-
-    Examples
-    --------
-    >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit'])
-
-    The search for the pattern 'Monkey' returns one match:
-
-    >>> s.str.findall('Monkey')
-    0          []
-    1    [Monkey]
-    2          []
-    dtype: object
-
-    On the other hand, the search for the pattern 'MONKEY' doesn't return any
-    match:
-
-    >>> s.str.findall('MONKEY')
-    0    []
-    1    []
-    2    []
-    dtype: object
-
-    Flags can be added to the pattern or regular expression. For instance,
-    to find the pattern 'MONKEY' ignoring the case:
-
-    >>> import re
-    >>> s.str.findall('MONKEY', flags=re.IGNORECASE)
-    0          []
-    1    [Monkey]
-    2          []
-    dtype: object
-
-    When the pattern matches more than one string in the Series, all matches
-    are returned:
-
-    >>> s.str.findall('on')
-    0    [on]
-    1    [on]
-    2      []
-    dtype: object
-
-    Regular expressions are supported too. For instance, the search for all the
-    strings ending with the word 'on' is shown next:
-
-    >>> s.str.findall('on$')
-    0    [on]
-    1      []
-    2      []
-    dtype: object
-
-    If the pattern is found more than once in the same string, then a list of
-    multiple strings is returned:
-
-    >>> s.str.findall('b')
-    0        []
-    1        []
-    2    [b, b]
-    dtype: object
-    """
-    regex = re.compile(pat, flags=flags)
-    return _na_map(regex.findall, arr)
-
-
-def str_find(arr, sub, start=0, end=None, side="left"):
-    """
-    Return indexes in each strings in the Series/Index where the
-    substring is fully contained between [start:end]. Return -1 on failure.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-    side : {'left', 'right'}, default 'left'
-        Specifies a starting side, equivalent to ``find`` or ``rfind``.
-
-    Returns
-    -------
-    Series or Index
-        Indexes where substring is found.
-    """
-    if not isinstance(sub, str):
-        msg = f"expected a string object, not {type(sub).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        method = "find"
-    elif side == "right":
-        method = "rfind"
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    if end is None:
-        f = lambda x: getattr(x, method)(sub, start)
-    else:
-        f = lambda x: getattr(x, method)(sub, start, end)
-
-    return _na_map(f, arr, dtype=np.dtype("int64"))
-
-
-def str_index(arr, sub, start=0, end=None, side="left"):
-    if not isinstance(sub, str):
-        msg = f"expected a string object, not {type(sub).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        method = "index"
-    elif side == "right":
-        method = "rindex"
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    if end is None:
-        f = lambda x: getattr(x, method)(sub, start)
-    else:
-        f = lambda x: getattr(x, method)(sub, start, end)
-
-    return _na_map(f, arr, dtype=np.dtype("int64"))
-
-
-def str_pad(arr, width, side="left", fillchar=" "):
-    """
-    Pad strings in the Series/Index up to width.
-
-    Parameters
-    ----------
-    width : int
-        Minimum width of resulting string; additional characters will be filled
-        with character defined in `fillchar`.
-    side : {'left', 'right', 'both'}, default 'left'
-        Side from which to fill resulting string.
-    fillchar : str, default ' '
-        Additional character for filling, default is whitespace.
-
-    Returns
-    -------
-    Series or Index of object
-        Returns Series or Index with minimum number of char in object.
-
-    See Also
-    --------
-    Series.str.rjust : Fills the left side of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='left')``.
-    Series.str.ljust : Fills the right side of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='right')``.
-    Series.str.center : Fills both sides of strings with an arbitrary
-        character. Equivalent to ``Series.str.pad(side='both')``.
-    Series.str.zfill : Pad strings in the Series/Index by prepending '0'
-        character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``.
-
-    Examples
-    --------
-    >>> s = pd.Series(["caribou", "tiger"])
-    >>> s
-    0    caribou
-    1      tiger
-    dtype: object
-
-    >>> s.str.pad(width=10)
-    0       caribou
-    1         tiger
-    dtype: object
-
-    >>> s.str.pad(width=10, side='right', fillchar='-')
-    0    caribou---
-    1    tiger-----
-    dtype: object
-
-    >>> s.str.pad(width=10, side='both', fillchar='-')
-    0    -caribou--
-    1    --tiger---
-    dtype: object
-    """
-    if not isinstance(fillchar, str):
-        msg = f"fillchar must be a character, not {type(fillchar).__name__}"
-        raise TypeError(msg)
-
-    if len(fillchar) != 1:
-        raise TypeError("fillchar must be a character, not str")
-
-    if not is_integer(width):
-        msg = f"width must be of integer type, not {type(width).__name__}"
-        raise TypeError(msg)
-
-    if side == "left":
-        f = lambda x: x.rjust(width, fillchar)
-    elif side == "right":
-        f = lambda x: x.ljust(width, fillchar)
-    elif side == "both":
-        f = lambda x: x.center(width, fillchar)
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_split(arr, pat=None, n=None):
-
-    if pat is None:
-        if n is None or n == 0:
-            n = -1
-        f = lambda x: x.split(pat, n)
-    else:
-        if len(pat) == 1:
-            if n is None or n == 0:
-                n = -1
-            f = lambda x: x.split(pat, n)
-        else:
-            if n is None or n == -1:
-                n = 0
-            regex = re.compile(pat)
-            f = lambda x: regex.split(x, maxsplit=n)
-    res = _na_map(f, arr)
-    return res
-
-
-def str_rsplit(arr, pat=None, n=None):
-
-    if n is None or n == 0:
-        n = -1
-    f = lambda x: x.rsplit(pat, n)
-    res = _na_map(f, arr)
-    return res
-
-
-def str_slice(arr, start=None, stop=None, step=None):
-    """
-    Slice substrings from each element in the Series or Index.
-
-    Parameters
-    ----------
-    start : int, optional
-        Start position for slice operation.
-    stop : int, optional
-        Stop position for slice operation.
-    step : int, optional
-        Step size for slice operation.
-
-    Returns
-    -------
-    Series or Index of object
-        Series or Index from sliced substring from original string object.
-
-    See Also
-    --------
-    Series.str.slice_replace : Replace a slice with a string.
-    Series.str.get : Return element at position.
-        Equivalent to `Series.str.slice(start=i, stop=i+1)` with `i`
-        being the position.
-
-    Examples
-    --------
-    >>> s = pd.Series(["koala", "fox", "chameleon"])
-    >>> s
-    0        koala
-    1          fox
-    2    chameleon
-    dtype: object
-
-    >>> s.str.slice(start=1)
-    0        oala
-    1          ox
-    2    hameleon
-    dtype: object
-
-    >>> s.str.slice(start=-1)
-    0           a
-    1           x
-    2           n
-    dtype: object
-
-    >>> s.str.slice(stop=2)
-    0    ko
-    1    fo
-    2    ch
-    dtype: object
-
-    >>> s.str.slice(step=2)
-    0      kaa
-    1       fx
-    2    caeen
-    dtype: object
-
-    >>> s.str.slice(start=0, stop=5, step=3)
-    0    kl
-    1     f
-    2    cm
-    dtype: object
-
-    Equivalent behaviour to:
-
-    >>> s.str[0:5:3]
-    0    kl
-    1     f
-    2    cm
-    dtype: object
-    """
-    obj = slice(start, stop, step)
-    f = lambda x: x[obj]
-    return _na_map(f, arr, dtype=str)
-
-
-def str_slice_replace(arr, start=None, stop=None, repl=None):
-    """
-    Replace a positional slice of a string with another value.
-
-    Parameters
-    ----------
-    start : int, optional
-        Left index position to use for the slice. If not specified (None),
-        the slice is unbounded on the left, i.e. slice from the start
-        of the string.
-    stop : int, optional
-        Right index position to use for the slice. If not specified (None),
-        the slice is unbounded on the right, i.e. slice until the
-        end of the string.
-    repl : str, optional
-        String for replacement. If not specified (None), the sliced region
-        is replaced with an empty string.
-
-    Returns
-    -------
-    Series or Index
-        Same type as the original object.
-
-    See Also
-    --------
-    Series.str.slice : Just slicing without replacement.
-
-    Examples
-    --------
-    >>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde'])
-    >>> s
-    0        a
-    1       ab
-    2      abc
-    3     abdc
-    4    abcde
-    dtype: object
-
-    Specify just `start`, meaning replace `start` until the end of the
-    string with `repl`.
-
-    >>> s.str.slice_replace(1, repl='X')
-    0    aX
-    1    aX
-    2    aX
-    3    aX
-    4    aX
-    dtype: object
-
-    Specify just `stop`, meaning the start of the string to `stop` is replaced
-    with `repl`, and the rest of the string is included.
-
-    >>> s.str.slice_replace(stop=2, repl='X')
-    0       X
-    1       X
-    2      Xc
-    3     Xdc
-    4    Xcde
-    dtype: object
-
-    Specify `start` and `stop`, meaning the slice from `start` to `stop` is
-    replaced with `repl`. Everything before or after `start` and `stop` is
-    included as is.
-
-    >>> s.str.slice_replace(start=1, stop=3, repl='X')
-    0      aX
-    1      aX
-    2      aX
-    3     aXc
-    4    aXde
-    dtype: object
-    """
-    if repl is None:
-        repl = ""
-
-    def f(x):
-        if x[start:stop] == "":
-            local_stop = start
-        else:
-            local_stop = stop
-        y = ""
-        if start is not None:
-            y += x[:start]
-        y += repl
-        if stop is not None:
-            y += x[local_stop:]
-        return y
-
-    return _na_map(f, arr, dtype=str)
-
-
-def str_strip(arr, to_strip=None, side="both"):
-    """
-    Strip whitespace (including newlines) from each string in the
-    Series/Index.
-
-    Parameters
-    ----------
-    to_strip : str or unicode
-    side : {'left', 'right', 'both'}, default 'both'
-
-    Returns
-    -------
-    Series or Index
-    """
-    if side == "both":
-        f = lambda x: x.strip(to_strip)
-    elif side == "left":
-        f = lambda x: x.lstrip(to_strip)
-    elif side == "right":
-        f = lambda x: x.rstrip(to_strip)
-    else:  # pragma: no cover
-        raise ValueError("Invalid side")
-    return _na_map(f, arr, dtype=str)
-
-
-def str_wrap(arr, width, **kwargs):
-    r"""
-    Wrap strings in Series/Index at specified line width.
-
-    This method has the same keyword parameters and defaults as
-    :class:`textwrap.TextWrapper`.
-
-    Parameters
-    ----------
-    width : int
-        Maximum line width.
-    expand_tabs : bool, optional
-        If True, tab characters will be expanded to spaces (default: True).
-    replace_whitespace : bool, optional
-        If True, each whitespace character (as defined by string.whitespace)
-        remaining after tab expansion will be replaced by a single space
-        (default: True).
-    drop_whitespace : bool, optional
-        If True, whitespace that, after wrapping, happens to end up at the
-        beginning or end of a line is dropped (default: True).
-    break_long_words : bool, optional
-        If True, then words longer than width will be broken in order to ensure
-        that no lines are longer than width. If it is false, long words will
-        not be broken, and some lines may be longer than width (default: True).
-    break_on_hyphens : bool, optional
-        If True, wrapping will occur preferably on whitespace and right after
-        hyphens in compound words, as it is customary in English. If false,
-        only whitespaces will be considered as potentially good places for line
-        breaks, but you need to set break_long_words to false if you want truly
-        insecable words (default: True).
-
-    Returns
-    -------
-    Series or Index
-
-    Notes
-    -----
-    Internally, this method uses a :class:`textwrap.TextWrapper` instance with
-    default settings. To achieve behavior matching R's stringr library str_wrap
-    function, use the arguments:
-
-    - expand_tabs = False
-    - replace_whitespace = True
-    - drop_whitespace = True
-    - break_long_words = False
-    - break_on_hyphens = False
-
-    Examples
-    --------
-    >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped'])
-    >>> s.str.wrap(12)
-    0             line to be\nwrapped
-    1    another line\nto be\nwrapped
-    dtype: object
-    """
-    kwargs["width"] = width
-
-    tw = textwrap.TextWrapper(**kwargs)
-
-    return _na_map(lambda s: "\n".join(tw.wrap(s)), arr, dtype=str)
-
-
-def str_translate(arr, table):
-    """
-    Map all characters in the string through the given mapping table.
-
-    Equivalent to standard :meth:`str.translate`.
-
-    Parameters
-    ----------
-    table : dict
-        Table is a mapping of Unicode ordinals to Unicode ordinals, strings, or
-        None. Unmapped characters are left untouched.
-        Characters mapped to None are deleted. :meth:`str.maketrans` is a
-        helper function for making translation tables.
-
-    Returns
-    -------
-    Series or Index
-    """
-    return _na_map(lambda x: x.translate(table), arr, dtype=str)
-
-
-def str_get(arr, i):
-    """
-    Extract element from each component at specified position.
-
-    Extract element from lists, tuples, or strings in each element in the
-    Series/Index.
-
-    Parameters
-    ----------
-    i : int
-        Position of element to extract.
-
-    Returns
-    -------
-    Series or Index
-
-    Examples
-    --------
-    >>> s = pd.Series(["String",
-    ...               (1, 2, 3),
-    ...               ["a", "b", "c"],
-    ...               123,
-    ...               -456,
-    ...               {1: "Hello", "2": "World"}])
-    >>> s
-    0                        String
-    1                     (1, 2, 3)
-    2                     [a, b, c]
-    3                           123
-    4                          -456
-    5    {1: 'Hello', '2': 'World'}
-    dtype: object
-
-    >>> s.str.get(1)
-    0        t
-    1        2
-    2        b
-    3      NaN
-    4      NaN
-    5    Hello
-    dtype: object
-
-    >>> s.str.get(-1)
-    0      g
-    1      3
-    2      c
-    3    NaN
-    4    NaN
-    5    None
-    dtype: object
-    """
-
-    def f(x):
-        if isinstance(x, dict):
-            return x.get(i)
-        elif len(x) > i >= -len(x):
-            return x[i]
-        return np.nan
-
-    return _na_map(f, arr)
-
-
-def str_decode(arr, encoding, errors="strict"):
-    """
-    Decode character string in the Series/Index using indicated encoding.
-
-    Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in
-    python3.
-
-    Parameters
-    ----------
-    encoding : str
-    errors : str, optional
-
-    Returns
-    -------
-    Series or Index
-    """
-    if encoding in _cpython_optimized_decoders:
-        # CPython optimized implementation
-        f = lambda x: x.decode(encoding, errors)
-    else:
-        decoder = codecs.getdecoder(encoding)
-        f = lambda x: decoder(x, errors)[0]
-    return _na_map(f, arr)
-
-
-def str_encode(arr, encoding, errors="strict"):
-    """
-    Encode character string in the Series/Index using indicated encoding.
-
-    Equivalent to :meth:`str.encode`.
-
-    Parameters
-    ----------
-    encoding : str
-    errors : str, optional
-
-    Returns
-    -------
-    encoded : Series/Index of objects
-    """
-    if encoding in _cpython_optimized_encoders:
-        # CPython optimized implementation
-        f = lambda x: x.encode(encoding, errors)
-    else:
-        encoder = codecs.getencoder(encoding)
-        f = lambda x: encoder(x, errors)[0]
-    return _na_map(f, arr)
-
-
-def forbid_nonstring_types(forbidden, name=None):
-    """
-    Decorator to forbid specific types for a method of StringMethods.
-
-    For calling `.str.{method}` on a Series or Index, it is necessary to first
-    initialize the :class:`StringMethods` object, and then call the method.
-    However, different methods allow different input types, and so this can not
-    be checked during :meth:`StringMethods.__init__`, but must be done on a
-    per-method basis. This decorator exists to facilitate this process, and
-    make it explicit which (inferred) types are disallowed by the method.
-
-    :meth:`StringMethods.__init__` allows the *union* of types its different
-    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
-    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
-
-    The default string types ['string', 'empty'] are allowed for all methods.
-    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
-    then needs to forbid the types it is not intended for.
-
-    Parameters
-    ----------
-    forbidden : list-of-str or None
-        List of forbidden non-string types, may be one or more of
-        `['bytes', 'mixed', 'mixed-integer']`.
-    name : str, default None
-        Name of the method to use in the error message. By default, this is
-        None, in which case the name from the method being wrapped will be
-        copied. However, for working with further wrappers (like _pat_wrapper
-        and _noarg_wrapper), it is necessary to specify the name.
-
-    Returns
-    -------
-    func : wrapper
-        The method to which the decorator is applied, with an added check that
-        enforces the inferred type to not be in the list of forbidden types.
-
-    Raises
-    ------
-    TypeError
-        If the inferred type of the underlying data is in `forbidden`.
-    """
-    # deal with None
-    forbidden = [] if forbidden is None else forbidden
-
-    allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set(
-        forbidden
-    )
-
-    def _forbid_nonstring_types(func):
-        func_name = func.__name__ if name is None else name
-
-        @wraps(func)
-        def wrapper(self, *args, **kwargs):
-            if self._inferred_dtype not in allowed_types:
-                msg = (
-                    f"Cannot use .str.{func_name} with values of "
-                    f"inferred dtype '{self._inferred_dtype}'."
-                )
-                raise TypeError(msg)
-            return func(self, *args, **kwargs)
-
-        wrapper.__name__ = func_name
-        return wrapper
-
-    return _forbid_nonstring_types
-
-
-def _noarg_wrapper(
-    f,
-    name=None,
-    docstring=None,
-    forbidden_types=["bytes"],
-    returns_string=True,
-    **kwargs,
-):
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper(self):
-        result = _na_map(f, self._parent, **kwargs)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    wrapper.__name__ = f.__name__ if name is None else name
-    if docstring is not None:
-        wrapper.__doc__ = docstring
-    else:
-        raise ValueError("Provide docstring")
-
-    return wrapper
-
-
-def _pat_wrapper(
-    f,
-    flags=False,
-    na=False,
-    name=None,
-    forbidden_types=["bytes"],
-    returns_string=True,
-    **kwargs,
-):
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper1(self, pat):
-        result = f(self._parent, pat)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper2(self, pat, flags=0, **kwargs):
-        result = f(self._parent, pat, flags=flags, **kwargs)
-        return self._wrap_result(result, returns_string=returns_string)
-
-    @forbid_nonstring_types(forbidden_types, name=name)
-    def wrapper3(self, pat, na=np.nan):
-        result = f(self._parent, pat, na=na)
-        return self._wrap_result(result, returns_string=returns_string, fill_value=na)
-
-    wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
-
-    wrapper.__name__ = f.__name__ if name is None else name
-    if f.__doc__:
-        wrapper.__doc__ = f.__doc__
-
-    return wrapper
-
-
-def copy(source):
-    """Copy a docstring from another source function (if present)"""
-
-    def do_copy(target):
-        if source.__doc__:
-            target.__doc__ = source.__doc__
-        return target
-
-    return do_copy
-
-
-class StringMethods(NoNewAttributesMixin):
-    """
-    Vectorized string functions for Series and Index.
-
-    NAs stay NA unless handled otherwise by a particular method.
-    Patterned after Python's string methods, with some inspiration from
-    R's stringr package.
-
-    Examples
-    --------
-    >>> s = pd.Series(["A_Str_Series"])
-    >>> s
-    0    A_Str_Series
-    dtype: object
-
-    >>> s.str.split("_")
-    0    [A, Str, Series]
-    dtype: object
-
-    >>> s.str.replace("_", "")
-    0    AStrSeries
-    dtype: object
-    """
-
-    def __init__(self, data):
-        self._inferred_dtype = self._validate(data)
-        self._is_categorical = is_categorical_dtype(data.dtype)
-        self._is_string = data.dtype.name == "string"
-
-        # ._values.categories works for both Series/Index
-        self._parent = data._values.categories if self._is_categorical else data
-        # save orig to blow up categoricals to the right type
-        self._orig = data
-        self._freeze()
-
-    @staticmethod
-    def _validate(data):
-        """
-        Auxiliary function for StringMethods, infers and checks dtype of data.
-
-        This is a "first line of defence" at the creation of the StringMethods-
-        object (see _make_accessor), and just checks that the dtype is in the
-        *union* of the allowed types over all string methods below; this
-        restriction is then refined on a per-method basis using the decorator
-        @forbid_nonstring_types (more info in the corresponding docstring).
-
-        This really should exclude all series/index with any non-string values,
-        but that isn't practical for performance reasons until we have a str
-        dtype (GH 9343 / 13877)
-
-        Parameters
-        ----------
-        data : The content of the Series
-
-        Returns
-        -------
-        dtype : inferred dtype of data
-        """
-        from pandas import StringDtype
-
-        if isinstance(data, ABCMultiIndex):
-            raise AttributeError(
-                "Can only use .str accessor with Index, not MultiIndex"
-            )
-
-        # see _libs/lib.pyx for list of inferred types
-        allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]
-
-        values = getattr(data, "values", data)  # Series / Index
-        values = getattr(values, "categories", values)  # categorical / normal
-
-        # explicitly allow StringDtype
-        if isinstance(values.dtype, StringDtype):
-            return "string"
-
-        try:
-            inferred_dtype = lib.infer_dtype(values, skipna=True)
-        except ValueError:
-            # GH#27571 mostly occurs with ExtensionArray
-            inferred_dtype = None
-
-        if inferred_dtype not in allowed_types:
-            raise AttributeError("Can only use .str accessor with string values!")
-        return inferred_dtype
-
-    def __getitem__(self, key):
-        if isinstance(key, slice):
-            return self.slice(start=key.start, stop=key.stop, step=key.step)
-        else:
-            return self.get(key)
-
-    def __iter__(self):
-        warnings.warn(
-            "Columnar iteration over characters will be deprecated in future releases.",
-            FutureWarning,
-            stacklevel=2,
-        )
-        i = 0
-        g = self.get(i)
-        while g.notna().any():
-            yield g
-            i += 1
-            g = self.get(i)
-
-    def _wrap_result(
-        self,
-        result,
-        use_codes=True,
-        name=None,
-        expand=None,
-        fill_value=np.nan,
-        returns_string=True,
-    ):
-
-        from pandas import Index, MultiIndex, Series
-
-        # for category, we do the stuff on the categories, so blow it up
-        # to the full series again
-        # But for some operations, we have to do the stuff on the full values,
-        # so make it possible to skip this step as the method already did this
-        # before the transformation...
-        if use_codes and self._is_categorical:
-            # if self._orig is a CategoricalIndex, there is no .cat-accessor
-            result = take_1d(
-                result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value
-            )
-
-        if not hasattr(result, "ndim") or not hasattr(result, "dtype"):
-            return result
-        assert result.ndim < 3
-
-        # We can be wrapping a string / object / categorical result, in which
-        # case we'll want to return the same dtype as the input.
-        # Or we can be wrapping a numeric output, in which case we don't want
-        # to return a StringArray.
-        if self._is_string and returns_string:
-            dtype = "string"
-        else:
-            dtype = None
-
-        if expand is None:
-            # infer from ndim if expand is not specified
-            expand = result.ndim != 1
-
-        elif expand is True and not isinstance(self._orig, ABCIndexClass):
-            # required when expand=True is explicitly specified
-            # not needed when inferred
-
-            def cons_row(x):
-                if is_list_like(x):
-                    return x
-                else:
-                    return [x]
-
-            result = [cons_row(x) for x in result]
-            if result:
-                # propagate nan values to match longest sequence (GH 18450)
-                max_len = max(len(x) for x in result)
-                result = [
-                    x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result
-                ]
-
-        if not isinstance(expand, bool):
-            raise ValueError("expand must be True or False")
-
-        if expand is False:
-            # if expand is False, result should have the same name
-            # as the original otherwise specified
-            if name is None:
-                name = getattr(result, "name", None)
-            if name is None:
-                # do not use logical or, _orig may be a DataFrame
-                # which has "name" column
-                name = self._orig.name
-
-        # Wait until we are sure result is a Series or Index before
-        # checking attributes (GH 12180)
-        if isinstance(self._orig, ABCIndexClass):
-            # if result is a boolean np.array, return the np.array
-            # instead of wrapping it into a boolean Index (GH 8875)
-            if is_bool_dtype(result):
-                return result
-
-            if expand:
-                result = list(result)
-                out = MultiIndex.from_tuples(result, names=name)
-                if out.nlevels == 1:
-                    # We had all tuples of length-one, which are
-                    # better represented as a regular Index.
-                    out = out.get_level_values(0)
-                return out
-            else:
-                return Index(result, name=name)
-        else:
-            index = self._orig.index
-            if expand:
-                cons = self._orig._constructor_expanddim
-                result = cons(result, columns=name, index=index, dtype=dtype)
-            else:
-                # Must be a Series
-                cons = self._orig._constructor
-                result = cons(result, name=name, index=index, dtype=dtype)
-            return result
-
-    def _get_series_list(self, others):
-        """
-        Auxiliary function for :meth:`str.cat`. Turn potentially mixed input
-        into a list of Series (elements without an index must match the length
-        of the calling Series/Index).
-
-        Parameters
-        ----------
-        others : Series, DataFrame, np.ndarray, list-like or list-like of
-            Objects that are either Series, Index or np.ndarray (1-dim).
-
-        Returns
-        -------
-        list of Series
-            Others transformed into list of Series.
-        """
-        from pandas import DataFrame, Series
-
-        # self._orig is either Series or Index
-        idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index
-
-        # Generally speaking, all objects without an index inherit the index
-        # `idx` of the calling Series/Index - i.e. must have matching length.
-        # Objects with an index (i.e. Series/Index/DataFrame) keep their own.
-        if isinstance(others, ABCSeries):
-            return [others]
-        elif isinstance(others, ABCIndexClass):
-            return [Series(others._values, index=idx)]
-        elif isinstance(others, ABCDataFrame):
-            return [others[x] for x in others]
-        elif isinstance(others, np.ndarray) and others.ndim == 2:
-            others = DataFrame(others, index=idx)
-            return [others[x] for x in others]
-        elif is_list_like(others, allow_sets=False):
-            others = list(others)  # ensure iterators do not get read twice etc
-
-            # in case of list-like `others`, all elements must be
-            # either Series/Index/np.ndarray (1-dim)...
-            if all(
-                isinstance(x, (ABCSeries, ABCIndexClass))
-                or (isinstance(x, np.ndarray) and x.ndim == 1)
-                for x in others
-            ):
-                los = []
-                while others:  # iterate through list and append each element
-                    los = los + self._get_series_list(others.pop(0))
-                return los
-            # ... or just strings
-            elif all(not is_list_like(x) for x in others):
-                return [Series(others, index=idx)]
-        raise TypeError(
-            "others must be Series, Index, DataFrame, np.ndarray "
-            "or list-like (either containing only strings or "
-            "containing only objects of type Series/Index/"
-            "np.ndarray[1-dim])"
-        )
-
-    @forbid_nonstring_types(["bytes", "mixed", "mixed-integer"])
-    def cat(self, others=None, sep=None, na_rep=None, join="left"):
-        """
-        Concatenate strings in the Series/Index with given separator.
-
-        If `others` is specified, this function concatenates the Series/Index
-        and elements of `others` element-wise.
-        If `others` is not passed, then all values in the Series/Index are
-        concatenated into a single string with a given `sep`.
-
-        Parameters
-        ----------
-        others : Series, Index, DataFrame, np.ndarray or list-like
-            Series, Index, DataFrame, np.ndarray (one- or two-dimensional) and
-            other list-likes of strings must have the same length as the
-            calling Series/Index, with the exception of indexed objects (i.e.
-            Series/Index/DataFrame) if `join` is not None.
-
-            If others is a list-like that contains a combination of Series,
-            Index or np.ndarray (1-dim), then all elements will be unpacked and
-            must satisfy the above criteria individually.
-
-            If others is None, the method returns the concatenation of all
-            strings in the calling Series/Index.
-        sep : str, default ''
-            The separator between the different elements/columns. By default
-            the empty string `''` is used.
-        na_rep : str or None, default None
-            Representation that is inserted for all missing values:
-
-            - If `na_rep` is None, and `others` is None, missing values in the
-              Series/Index are omitted from the result.
-            - If `na_rep` is None, and `others` is not None, a row containing a
-              missing value in any of the columns (before concatenation) will
-              have a missing value in the result.
-        join : {'left', 'right', 'outer', 'inner'}, default 'left'
-            Determines the join-style between the calling Series/Index and any
-            Series/Index/DataFrame in `others` (objects without an index need
-            to match the length of the calling Series/Index). To disable
-            alignment, use `.values` on any Series/Index/DataFrame in `others`.
-
-            .. versionchanged:: 1.0.0
-                Changed default of `join` from None to `'left'`.
-
-        Returns
-        -------
-        str, Series or Index
-            If `others` is None, `str` is returned, otherwise a `Series/Index`
-            (same type as caller) of objects is returned.
-
-        See Also
-        --------
-        split : Split each string in the Series/Index.
-        join : Join lists contained as elements in the Series/Index.
-
-        Examples
-        --------
-        When not passing `others`, all values are concatenated into a single
-        string:
-
-        >>> s = pd.Series(['a', 'b', np.nan, 'd'])
-        >>> s.str.cat(sep=' ')
-        'a b d'
-
-        By default, NA values in the Series are ignored. Using `na_rep`, they
-        can be given a representation:
-
-        >>> s.str.cat(sep=' ', na_rep='?')
-        'a b ? d'
-
-        If `others` is specified, corresponding values are concatenated with
-        the separator. Result will be a Series of strings.
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',')
-        0    a,A
-        1    b,B
-        2    NaN
-        3    d,D
-        dtype: object
-
-        Missing values will remain missing in the result, but can again be
-        represented using `na_rep`
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')
-        0    a,A
-        1    b,B
-        2    -,C
-        3    d,D
-        dtype: object
-
-        If `sep` is not specified, the values are concatenated without
-        separation.
-
-        >>> s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')
-        0    aA
-        1    bB
-        2    -C
-        3    dD
-        dtype: object
-
-        Series with different indexes can be aligned before concatenation. The
-        `join`-keyword works as in other methods.
-
-        >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2])
-        >>> s.str.cat(t, join='left', na_rep='-')
-        0    aa
-        1    b-
-        2    -c
-        3    dd
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='outer', na_rep='-')
-        0    aa
-        1    b-
-        2    -c
-        3    dd
-        4    -e
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='inner', na_rep='-')
-        0    aa
-        2    -c
-        3    dd
-        dtype: object
-        >>>
-        >>> s.str.cat(t, join='right', na_rep='-')
-        3    dd
-        0    aa
-        4    -e
-        2    -c
-        dtype: object
-
-        For more examples, see :ref:`here <text.concatenate>`.
-        """
-        from pandas import Index, Series, concat
-
-        if isinstance(others, str):
-            raise ValueError("Did you mean to supply a `sep` keyword?")
-        if sep is None:
-            sep = ""
-
-        if isinstance(self._orig, ABCIndexClass):
-            data = Series(self._orig, index=self._orig)
-        else:  # Series
-            data = self._orig
-
-        # concatenate Series/Index with itself if no "others"
-        if others is None:
-            data = ensure_object(data)
-            na_mask = isna(data)
-            if na_rep is None and na_mask.any():
-                data = data[~na_mask]
-            elif na_rep is not None and na_mask.any():
-                data = np.where(na_mask, na_rep, data)
-            return sep.join(data)
-
-        try:
-            # turn anything in "others" into lists of Series
-            others = self._get_series_list(others)
-        except ValueError as err:  # do not catch TypeError raised by _get_series_list
-            raise ValueError(
-                "If `others` contains arrays or lists (or other "
-                "list-likes without an index), these must all be "
-                "of the same length as the calling Series/Index."
-            ) from err
-
-        # align if required
-        if any(not data.index.equals(x.index) for x in others):
-            # Need to add keys for uniqueness in case of duplicate columns
-            others = concat(
-                others,
-                axis=1,
-                join=(join if join == "inner" else "outer"),
-                keys=range(len(others)),
-                sort=False,
-                copy=False,
-            )
-            data, others = data.align(others, join=join)
-            others = [others[x] for x in others]  # again list of Series
-
-        all_cols = [ensure_object(x) for x in [data] + others]
-        na_masks = np.array([isna(x) for x in all_cols])
-        union_mask = np.logical_or.reduce(na_masks, axis=0)
-
-        if na_rep is None and union_mask.any():
-            # no na_rep means NaNs for all rows where any column has a NaN
-            # only necessary if there are actually any NaNs
-            result = np.empty(len(data), dtype=object)
-            np.putmask(result, union_mask, np.nan)
-
-            not_masked = ~union_mask
-            result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep)
-        elif na_rep is not None and union_mask.any():
-            # fill NaNs with na_rep in case there are actually any NaNs
-            all_cols = [
-                np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols)
-            ]
-            result = cat_safe(all_cols, sep)
-        else:
-            # no NaNs - can just concatenate
-            result = cat_safe(all_cols, sep)
-
-        if isinstance(self._orig, ABCIndexClass):
-            # add dtype for case that result is all-NA
-            result = Index(result, dtype=object, name=self._orig.name)
-        else:  # Series
-            if is_categorical_dtype(self._orig.dtype):
-                # We need to infer the new categories.
-                dtype = None
-            else:
-                dtype = self._orig.dtype
-            result = Series(result, dtype=dtype, index=data.index, name=self._orig.name)
-        return result
-
-    _shared_docs[
-        "str_split"
-    ] = r"""
-    Split strings around given separator/delimiter.
-
-    Splits the string in the Series/Index from the %(side)s,
-    at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    pat : str, optional
-        String or regular expression to split on.
-        If not specified, split on whitespace.
-    n : int, default -1 (all)
-        Limit number of splits in output.
-        ``None``, 0 and -1 will be interpreted as return all splits.
-    expand : bool, default False
-        Expand the split strings into separate columns.
-
-        * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
-        * If ``False``, return Series/Index, containing lists of strings.
-
-    Returns
-    -------
-    Series, Index, DataFrame or MultiIndex
-        Type matches caller unless ``expand=True`` (see Notes).
-
-    See Also
-    --------
-    Series.str.split : Split strings around given separator/delimiter.
-    Series.str.rsplit : Splits string around given separator/delimiter,
-        starting from the right.
-    Series.str.join : Join lists contained as elements in the Series/Index
-        with passed delimiter.
-    str.split : Standard library version for split.
-    str.rsplit : Standard library version for rsplit.
-
-    Notes
-    -----
-    The handling of the `n` keyword depends on the number of found splits:
-
-    - If found splits > `n`,  make first `n` splits only
-    - If found splits <= `n`, make all splits
-    - If for a certain row the number of found splits < `n`,
-      append `None` for padding up to `n` if ``expand=True``
-
-    If using ``expand=True``, Series and Index callers return DataFrame and
-    MultiIndex objects, respectively.
-
-    Examples
-    --------
-    >>> s = pd.Series(
-    ...     [
-    ...         "this is a regular sentence",
-    ...         "https://docs.python.org/3/tutorial/index.html",
-    ...         np.nan
-    ...     ]
-    ... )
-    >>> s
-    0                       this is a regular sentence
-    1    https://docs.python.org/3/tutorial/index.html
-    2                                              NaN
-    dtype: object
-
-    In the default setting, the string is split by whitespace.
-
-    >>> s.str.split()
-    0                   [this, is, a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    Without the `n` parameter, the outputs of `rsplit` and `split`
-    are identical.
-
-    >>> s.str.rsplit()
-    0                   [this, is, a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    The `n` parameter can be used to limit the number of splits on the
-    delimiter. The outputs of `split` and `rsplit` are different.
-
-    >>> s.str.split(n=2)
-    0                     [this, is, a regular sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    >>> s.str.rsplit(n=2)
-    0                     [this is a, regular, sentence]
-    1    [https://docs.python.org/3/tutorial/index.html]
-    2                                                NaN
-    dtype: object
-
-    The `pat` parameter can be used to split by other characters.
-
-    >>> s.str.split(pat="/")
-    0                         [this is a regular sentence]
-    1    [https:, , docs.python.org, 3, tutorial, index...
-    2                                                  NaN
-    dtype: object
-
-    When using ``expand=True``, the split elements will expand out into
-    separate columns. If NaN is present, it is propagated throughout
-    the columns during the split.
-
-    >>> s.str.split(expand=True)
-                                                   0     1     2        3         4
-    0                                           this    is     a  regular  sentence
-    1  https://docs.python.org/3/tutorial/index.html  None  None     None      None
-    2                                            NaN   NaN   NaN      NaN       NaN
-
-    For slightly more complex use cases like splitting the html document name
-    from a url, a combination of parameter settings can be used.
-
-    >>> s.str.rsplit("/", n=1, expand=True)
-                                        0           1
-    0          this is a regular sentence        None
-    1  https://docs.python.org/3/tutorial  index.html
-    2                                 NaN         NaN
-
-    Remember to escape special characters when explicitly using regular
-    expressions.
-
-    >>> s = pd.Series(["1+1=2"])
-    >>> s
-    0    1+1=2
-    dtype: object
-    >>> s.str.split(r"\+|=", expand=True)
-         0    1    2
-    0    1    1    2
-    """
-
-    @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"})
-    @forbid_nonstring_types(["bytes"])
-    def split(self, pat=None, n=-1, expand=False):
-        result = str_split(self._parent, pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"})
-    @forbid_nonstring_types(["bytes"])
-    def rsplit(self, pat=None, n=-1, expand=False):
-        result = str_rsplit(self._parent, pat, n=n)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    _shared_docs[
-        "str_partition"
-    ] = """
-    Split the string at the %(side)s occurrence of `sep`.
-
-    This method splits the string at the %(side)s occurrence of `sep`,
-    and returns 3 elements containing the part before the separator,
-    the separator itself, and the part after the separator.
-    If the separator is not found, return %(return)s.
-
-    Parameters
-    ----------
-    sep : str, default whitespace
-        String to split on.
-    expand : bool, default True
-        If True, return DataFrame/MultiIndex expanding dimensionality.
-        If False, return Series/Index.
-
-    Returns
-    -------
-    DataFrame/MultiIndex or Series/Index of objects
-
-    See Also
-    --------
-    %(also)s
-    Series.str.split : Split strings around given separators.
-    str.partition : Standard library version.
-
-    Examples
-    --------
-
-    >>> s = pd.Series(['Linda van der Berg', 'George Pitt-Rivers'])
-    >>> s
-    0    Linda van der Berg
-    1    George Pitt-Rivers
-    dtype: object
-
-    >>> s.str.partition()
-            0  1             2
-    0   Linda     van der Berg
-    1  George      Pitt-Rivers
-
-    To partition by the last space instead of the first one:
-
-    >>> s.str.rpartition()
-                   0  1            2
-    0  Linda van der            Berg
-    1         George     Pitt-Rivers
-
-    To partition by something different than a space:
-
-    >>> s.str.partition('-')
-                        0  1       2
-    0  Linda van der Berg
-    1         George Pitt  -  Rivers
-
-    To return a Series containing tuples instead of a DataFrame:
-
-    >>> s.str.partition('-', expand=False)
-    0    (Linda van der Berg, , )
-    1    (George Pitt, -, Rivers)
-    dtype: object
-
-    Also available on indices:
-
-    >>> idx = pd.Index(['X 123', 'Y 999'])
-    >>> idx
-    Index(['X 123', 'Y 999'], dtype='object')
-
-    Which will create a MultiIndex:
-
-    >>> idx.str.partition()
-    MultiIndex([('X', ' ', '123'),
-                ('Y', ' ', '999')],
-               )
-
-    Or an index with tuples with ``expand=False``:
-
-    >>> idx.str.partition(expand=False)
-    Index([('X', ' ', '123'), ('Y', ' ', '999')], dtype='object')
-    """
-
-    @Appender(
-        _shared_docs["str_partition"]
-        % {
-            "side": "first",
-            "return": "3 elements containing the string itself, followed by two "
-            "empty strings",
-            "also": "rpartition : Split the string at the last occurrence of `sep`.",
-        }
-    )
-    @forbid_nonstring_types(["bytes"])
-    def partition(self, sep=" ", expand=True):
-        f = lambda x: x.partition(sep)
-        result = _na_map(f, self._parent)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @Appender(
-        _shared_docs["str_partition"]
-        % {
-            "side": "last",
-            "return": "3 elements containing two empty strings, followed by the "
-            "string itself",
-            "also": "partition : Split the string at the first occurrence of `sep`.",
-        }
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rpartition(self, sep=" ", expand=True):
-        f = lambda x: x.rpartition(sep)
-        result = _na_map(f, self._parent)
-        return self._wrap_result(result, expand=expand, returns_string=expand)
-
-    @copy(str_get)
-    def get(self, i):
-        result = str_get(self._parent, i)
-        return self._wrap_result(result)
-
-    @copy(str_join)
-    @forbid_nonstring_types(["bytes"])
-    def join(self, sep):
-        result = str_join(self._parent, sep)
-        return self._wrap_result(result)
-
-    @copy(str_contains)
-    @forbid_nonstring_types(["bytes"])
-    def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
-        result = str_contains(
-            self._parent, pat, case=case, flags=flags, na=na, regex=regex
-        )
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_match)
-    @forbid_nonstring_types(["bytes"])
-    def match(self, pat, case=True, flags=0, na=np.nan):
-        result = str_match(self._parent, pat, case=case, flags=flags, na=na)
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_fullmatch)
-    @forbid_nonstring_types(["bytes"])
-    def fullmatch(self, pat, case=True, flags=0, na=np.nan):
-        result = str_fullmatch(self._parent, pat, case=case, flags=flags, na=na)
-        return self._wrap_result(result, fill_value=na, returns_string=False)
-
-    @copy(str_replace)
-    @forbid_nonstring_types(["bytes"])
-    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
-        result = str_replace(
-            self._parent, pat, repl, n=n, case=case, flags=flags, regex=regex
-        )
-        return self._wrap_result(result)
-
-    @copy(str_repeat)
-    @forbid_nonstring_types(["bytes"])
-    def repeat(self, repeats):
-        result = str_repeat(self._parent, repeats)
-        return self._wrap_result(result)
-
-    @copy(str_pad)
-    @forbid_nonstring_types(["bytes"])
-    def pad(self, width, side="left", fillchar=" "):
-        result = str_pad(self._parent, width, side=side, fillchar=fillchar)
-        return self._wrap_result(result)
-
-    _shared_docs[
-        "str_pad"
-    ] = """
-    Pad %(side)s side of strings in the Series/Index.
-
-    Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    width : int
-        Minimum width of resulting string; additional characters will be filled
-        with ``fillchar``.
-    fillchar : str
-        Additional character for filling, default is whitespace.
-
-    Returns
-    -------
-    filled : Series/Index of objects.
-    """
-
-    @Appender(_shared_docs["str_pad"] % dict(side="left and right", method="center"))
-    @forbid_nonstring_types(["bytes"])
-    def center(self, width, fillchar=" "):
-        return self.pad(width, side="both", fillchar=fillchar)
-
-    @Appender(_shared_docs["str_pad"] % dict(side="right", method="ljust"))
-    @forbid_nonstring_types(["bytes"])
-    def ljust(self, width, fillchar=" "):
-        return self.pad(width, side="right", fillchar=fillchar)
-
-    @Appender(_shared_docs["str_pad"] % dict(side="left", method="rjust"))
-    @forbid_nonstring_types(["bytes"])
-    def rjust(self, width, fillchar=" "):
-        return self.pad(width, side="left", fillchar=fillchar)
-
-    @forbid_nonstring_types(["bytes"])
-    def zfill(self, width):
-        """
-        Pad strings in the Series/Index by prepending '0' characters.
-
-        Strings in the Series/Index are padded with '0' characters on the
-        left of the string to reach a total string length  `width`. Strings
-        in the Series/Index with length greater or equal to `width` are
-        unchanged.
-
-        Parameters
-        ----------
-        width : int
-            Minimum length of resulting string; strings with length less
-            than `width` be prepended with '0' characters.
-
-        Returns
-        -------
-        Series/Index of objects.
-
-        See Also
-        --------
-        Series.str.rjust : Fills the left side of strings with an arbitrary
-            character.
-        Series.str.ljust : Fills the right side of strings with an arbitrary
-            character.
-        Series.str.pad : Fills the specified sides of strings with an arbitrary
-            character.
-        Series.str.center : Fills both sides of strings with an arbitrary
-            character.
-
-        Notes
-        -----
-        Differs from :meth:`str.zfill` which has special handling
-        for '+'/'-' in the string.
-
-        Examples
-        --------
-        >>> s = pd.Series(['-1', '1', '1000', 10, np.nan])
-        >>> s
-        0      -1
-        1       1
-        2    1000
-        3      10
-        4     NaN
-        dtype: object
-
-        Note that ``10`` and ``NaN`` are not strings, therefore they are
-        converted to ``NaN``. The minus sign in ``'-1'`` is treated as a
-        regular character and the zero is added to the left of it
-        (:meth:`str.zfill` would have moved it to the left). ``1000``
-        remains unchanged as it is longer than `width`.
-
-        >>> s.str.zfill(3)
-        0     0-1
-        1     001
-        2    1000
-        3     NaN
-        4     NaN
-        dtype: object
-        """
-        result = str_pad(self._parent, width, side="left", fillchar="0")
-        return self._wrap_result(result)
-
-    @copy(str_slice)
-    def slice(self, start=None, stop=None, step=None):
-        result = str_slice(self._parent, start, stop, step)
-        return self._wrap_result(result)
-
-    @copy(str_slice_replace)
-    @forbid_nonstring_types(["bytes"])
-    def slice_replace(self, start=None, stop=None, repl=None):
-        result = str_slice_replace(self._parent, start, stop, repl)
-        return self._wrap_result(result)
-
-    @copy(str_decode)
-    def decode(self, encoding, errors="strict"):
-        # need to allow bytes here
-        result = str_decode(self._parent, encoding, errors)
-        # TODO: Not sure how to handle this.
-        return self._wrap_result(result, returns_string=False)
-
-    @copy(str_encode)
-    @forbid_nonstring_types(["bytes"])
-    def encode(self, encoding, errors="strict"):
-        result = str_encode(self._parent, encoding, errors)
-        return self._wrap_result(result, returns_string=False)
-
-    _shared_docs[
-        "str_strip"
-    ] = r"""
-    Remove %(position)s characters.
-
-    Strip whitespaces (including newlines) or a set of specified characters
-    from each string in the Series/Index from %(side)s.
-    Equivalent to :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    to_strip : str or None, default None
-        Specifying the set of characters to be removed.
-        All combinations of this set of characters will be stripped.
-        If None then whitespaces are removed.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    Series.str.strip : Remove leading and trailing characters in Series/Index.
-    Series.str.lstrip : Remove leading characters in Series/Index.
-    Series.str.rstrip : Remove trailing characters in Series/Index.
-
-    Examples
-    --------
-    >>> s = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', np.nan])
-    >>> s
-    0    1. Ant.
-    1    2. Bee!\n
-    2    3. Cat?\t
-    3          NaN
-    dtype: object
-
-    >>> s.str.strip()
-    0    1. Ant.
-    1    2. Bee!
-    2    3. Cat?
-    3        NaN
-    dtype: object
-
-    >>> s.str.lstrip('123.')
-    0    Ant.
-    1    Bee!\n
-    2    Cat?\t
-    3       NaN
-    dtype: object
-
-    >>> s.str.rstrip('.!? \n\t')
-    0    1. Ant
-    1    2. Bee
-    2    3. Cat
-    3       NaN
-    dtype: object
-
-    >>> s.str.strip('123.!? \n\t')
-    0    Ant
-    1    Bee
-    2    Cat
-    3    NaN
-    dtype: object
-    """
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(
-            side="left and right sides", method="strip", position="leading and trailing"
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def strip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="both")
-        return self._wrap_result(result)
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(side="left side", method="lstrip", position="leading")
-    )
-    @forbid_nonstring_types(["bytes"])
-    def lstrip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="left")
-        return self._wrap_result(result)
-
-    @Appender(
-        _shared_docs["str_strip"]
-        % dict(side="right side", method="rstrip", position="trailing")
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rstrip(self, to_strip=None):
-        result = str_strip(self._parent, to_strip, side="right")
-        return self._wrap_result(result)
-
-    @copy(str_wrap)
-    @forbid_nonstring_types(["bytes"])
-    def wrap(self, width, **kwargs):
-        result = str_wrap(self._parent, width, **kwargs)
-        return self._wrap_result(result)
-
-    @copy(str_get_dummies)
-    @forbid_nonstring_types(["bytes"])
-    def get_dummies(self, sep="|"):
-        # we need to cast to Series of strings as only that has all
-        # methods available for making the dummies...
-        data = self._orig.astype(str) if self._is_categorical else self._parent
-        result, name = str_get_dummies(data, sep)
-        return self._wrap_result(
-            result,
-            use_codes=(not self._is_categorical),
-            name=name,
-            expand=True,
-            returns_string=False,
-        )
-
-    @copy(str_translate)
-    @forbid_nonstring_types(["bytes"])
-    def translate(self, table):
-        result = str_translate(self._parent, table)
-        return self._wrap_result(result)
-
-    count = _pat_wrapper(str_count, flags=True, name="count", returns_string=False)
-    startswith = _pat_wrapper(
-        str_startswith, na=True, name="startswith", returns_string=False
-    )
-    endswith = _pat_wrapper(
-        str_endswith, na=True, name="endswith", returns_string=False
-    )
-    findall = _pat_wrapper(
-        str_findall, flags=True, name="findall", returns_string=False
-    )
-
-    @copy(str_extract)
-    @forbid_nonstring_types(["bytes"])
-    def extract(self, pat, flags=0, expand=True):
-        return str_extract(self, pat, flags=flags, expand=expand)
-
-    @copy(str_extractall)
-    @forbid_nonstring_types(["bytes"])
-    def extractall(self, pat, flags=0):
-        return str_extractall(self._orig, pat, flags=flags)
-
-    _shared_docs[
-        "find"
-    ] = """
-    Return %(side)s indexes in each strings in the Series/Index.
-
-    Each of returned indexes corresponds to the position where the
-    substring is fully contained between [start:end]. Return -1 on
-    failure. Equivalent to standard :meth:`str.%(method)s`.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-
-    Returns
-    -------
-    Series or Index of int.
-
-    See Also
-    --------
-    %(also)s
-    """
-
-    @Appender(
-        _shared_docs["find"]
-        % dict(
-            side="lowest",
-            method="find",
-            also="rfind : Return highest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def find(self, sub, start=0, end=None):
-        result = str_find(self._parent, sub, start=start, end=end, side="left")
-        return self._wrap_result(result, returns_string=False)
-
-    @Appender(
-        _shared_docs["find"]
-        % dict(
-            side="highest",
-            method="rfind",
-            also="find : Return lowest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rfind(self, sub, start=0, end=None):
-        result = str_find(self._parent, sub, start=start, end=end, side="right")
-        return self._wrap_result(result, returns_string=False)
-
-    @forbid_nonstring_types(["bytes"])
-    def normalize(self, form):
-        """
-        Return the Unicode normal form for the strings in the Series/Index.
-
-        For more information on the forms, see the
-        :func:`unicodedata.normalize`.
-
-        Parameters
-        ----------
-        form : {'NFC', 'NFKC', 'NFD', 'NFKD'}
-            Unicode form.
-
-        Returns
-        -------
-        normalized : Series/Index of objects
-        """
-        import unicodedata
-
-        f = lambda x: unicodedata.normalize(form, x)
-        result = _na_map(f, self._parent, dtype=str)
-        return self._wrap_result(result)
-
-    _shared_docs[
-        "index"
-    ] = """
-    Return %(side)s indexes in each string in Series/Index.
-
-    Each of the returned indexes corresponds to the position where the
-    substring is fully contained between [start:end]. This is the same
-    as ``str.%(similar)s`` except instead of returning -1, it raises a
-    ValueError when the substring is not found. Equivalent to standard
-    ``str.%(method)s``.
-
-    Parameters
-    ----------
-    sub : str
-        Substring being searched.
-    start : int
-        Left edge index.
-    end : int
-        Right edge index.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    %(also)s
-    """
-
-    @Appender(
-        _shared_docs["index"]
-        % dict(
-            side="lowest",
-            similar="find",
-            method="index",
-            also="rindex : Return highest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def index(self, sub, start=0, end=None):
-        result = str_index(self._parent, sub, start=start, end=end, side="left")
-        return self._wrap_result(result, returns_string=False)
-
-    @Appender(
-        _shared_docs["index"]
-        % dict(
-            side="highest",
-            similar="rfind",
-            method="rindex",
-            also="index : Return lowest indexes in each strings.",
-        )
-    )
-    @forbid_nonstring_types(["bytes"])
-    def rindex(self, sub, start=0, end=None):
-        result = str_index(self._parent, sub, start=start, end=end, side="right")
-        return self._wrap_result(result, returns_string=False)
-
-    _shared_docs[
-        "len"
-    ] = """
-    Compute the length of each element in the Series/Index.
-
-    The element may be a sequence (such as a string, tuple or list) or a collection
-    (such as a dictionary).
-
-    Returns
-    -------
-    Series or Index of int
-        A Series or Index of integer values indicating the length of each
-        element in the Series or Index.
-
-    See Also
-    --------
-    str.len : Python built-in function returning the length of an object.
-    Series.size : Returns the length of the Series.
-
-    Examples
-    --------
-    Returns the length (number of characters) in a string. Returns the
-    number of entries for dictionaries, lists or tuples.
-
-    >>> s = pd.Series(['dog',
-    ...                 '',
-    ...                 5,
-    ...                 {'foo' : 'bar'},
-    ...                 [2, 3, 5, 7],
-    ...                 ('one', 'two', 'three')])
-    >>> s
-    0                  dog
-    1
-    2                    5
-    3       {'foo': 'bar'}
-    4         [2, 3, 5, 7]
-    5    (one, two, three)
-    dtype: object
-    >>> s.str.len()
-    0    3.0
-    1    0.0
-    2    NaN
-    3    1.0
-    4    4.0
-    5    3.0
-    dtype: float64
-    """
-    len = _noarg_wrapper(
-        len,
-        docstring=_shared_docs["len"],
-        forbidden_types=None,
-        dtype=np.dtype("int64"),
-        returns_string=False,
-    )
-
-    _shared_docs[
-        "casemethods"
-    ] = """
-    Convert strings in the Series/Index to %(type)s.
-    %(version)s
-    Equivalent to :meth:`str.%(method)s`.
-
-    Returns
-    -------
-    Series or Index of object
-
-    See Also
-    --------
-    Series.str.lower : Converts all characters to lowercase.
-    Series.str.upper : Converts all characters to uppercase.
-    Series.str.title : Converts first character of each word to uppercase and
-        remaining to lowercase.
-    Series.str.capitalize : Converts first character to uppercase and
-        remaining to lowercase.
-    Series.str.swapcase : Converts uppercase to lowercase and lowercase to
-        uppercase.
-    Series.str.casefold: Removes all case distinctions in the string.
-
-    Examples
-    --------
-    >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
-    >>> s
-    0                 lower
-    1              CAPITALS
-    2    this is a sentence
-    3              SwApCaSe
-    dtype: object
-
-    >>> s.str.lower()
-    0                 lower
-    1              capitals
-    2    this is a sentence
-    3              swapcase
-    dtype: object
-
-    >>> s.str.upper()
-    0                 LOWER
-    1              CAPITALS
-    2    THIS IS A SENTENCE
-    3              SWAPCASE
-    dtype: object
-
-    >>> s.str.title()
-    0                 Lower
-    1              Capitals
-    2    This Is A Sentence
-    3              Swapcase
-    dtype: object
-
-    >>> s.str.capitalize()
-    0                 Lower
-    1              Capitals
-    2    This is a sentence
-    3              Swapcase
-    dtype: object
-
-    >>> s.str.swapcase()
-    0                 LOWER
-    1              capitals
-    2    THIS IS A SENTENCE
-    3              sWaPcAsE
-    dtype: object
-    """
-
-    # _doc_args holds dict of strings to use in substituting casemethod docs
-    _doc_args: Dict[str, Dict[str, str]] = {}
-    _doc_args["lower"] = dict(type="lowercase", method="lower", version="")
-    _doc_args["upper"] = dict(type="uppercase", method="upper", version="")
-    _doc_args["title"] = dict(type="titlecase", method="title", version="")
-    _doc_args["capitalize"] = dict(
-        type="be capitalized", method="capitalize", version=""
-    )
-    _doc_args["swapcase"] = dict(type="be swapcased", method="swapcase", version="")
-    _doc_args["casefold"] = dict(
-        type="be casefolded",
-        method="casefold",
-        version="\n    .. versionadded:: 0.25.0\n",
-    )
-    lower = _noarg_wrapper(
-        lambda x: x.lower(),
-        name="lower",
-        docstring=_shared_docs["casemethods"] % _doc_args["lower"],
-        dtype=str,
-    )
-    upper = _noarg_wrapper(
-        lambda x: x.upper(),
-        name="upper",
-        docstring=_shared_docs["casemethods"] % _doc_args["upper"],
-        dtype=str,
-    )
-    title = _noarg_wrapper(
-        lambda x: x.title(),
-        name="title",
-        docstring=_shared_docs["casemethods"] % _doc_args["title"],
-        dtype=str,
-    )
-    capitalize = _noarg_wrapper(
-        lambda x: x.capitalize(),
-        name="capitalize",
-        docstring=_shared_docs["casemethods"] % _doc_args["capitalize"],
-        dtype=str,
-    )
-    swapcase = _noarg_wrapper(
-        lambda x: x.swapcase(),
-        name="swapcase",
-        docstring=_shared_docs["casemethods"] % _doc_args["swapcase"],
-        dtype=str,
-    )
-    casefold = _noarg_wrapper(
-        lambda x: x.casefold(),
-        name="casefold",
-        docstring=_shared_docs["casemethods"] % _doc_args["casefold"],
-        dtype=str,
-    )
-
-    _shared_docs[
-        "ismethods"
-    ] = """
-    Check whether all characters in each string are %(type)s.
-
-    This is equivalent to running the Python string method
-    :meth:`str.%(method)s` for each element of the Series/Index. If a string
-    has zero characters, ``False`` is returned for that check.
-
-    Returns
-    -------
-    Series or Index of bool
-        Series or Index of boolean values with the same length as the original
-        Series/Index.
-
-    See Also
-    --------
-    Series.str.isalpha : Check whether all characters are alphabetic.
-    Series.str.isnumeric : Check whether all characters are numeric.
-    Series.str.isalnum : Check whether all characters are alphanumeric.
-    Series.str.isdigit : Check whether all characters are digits.
-    Series.str.isdecimal : Check whether all characters are decimal.
-    Series.str.isspace : Check whether all characters are whitespace.
-    Series.str.islower : Check whether all characters are lowercase.
-    Series.str.isupper : Check whether all characters are uppercase.
-    Series.str.istitle : Check whether all characters are titlecase.
-
-    Examples
-    --------
-    **Checks for Alphabetic and Numeric Characters**
-
-    >>> s1 = pd.Series(['one', 'one1', '1', ''])
-
-    >>> s1.str.isalpha()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    >>> s1.str.isnumeric()
-    0    False
-    1    False
-    2     True
-    3    False
-    dtype: bool
-
-    >>> s1.str.isalnum()
-    0     True
-    1     True
-    2     True
-    3    False
-    dtype: bool
-
-    Note that checks against characters mixed with any additional punctuation
-    or whitespace will evaluate to false for an alphanumeric check.
-
-    >>> s2 = pd.Series(['A B', '1.5', '3,000'])
-    >>> s2.str.isalnum()
-    0    False
-    1    False
-    2    False
-    dtype: bool
-
-    **More Detailed Checks for Numeric Characters**
-
-    There are several different but overlapping sets of numeric characters that
-    can be checked for.
-
-    >>> s3 = pd.Series(['23', '³', '⅕', ''])
-
-    The ``s3.str.isdecimal`` method checks for characters used to form numbers
-    in base 10.
-
-    >>> s3.str.isdecimal()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also
-    includes special digits, like superscripted and subscripted digits in
-    unicode.
-
-    >>> s3.str.isdigit()
-    0     True
-    1     True
-    2    False
-    3    False
-    dtype: bool
-
-    The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also
-    includes other characters that can represent quantities such as unicode
-    fractions.
-
-    >>> s3.str.isnumeric()
-    0     True
-    1     True
-    2     True
-    3    False
-    dtype: bool
-
-    **Checks for Whitespace**
-
-    >>> s4 = pd.Series([' ', '\\t\\r\\n ', ''])
-    >>> s4.str.isspace()
-    0     True
-    1     True
-    2    False
-    dtype: bool
-
-    **Checks for Character Case**
-
-    >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
-
-    >>> s5.str.islower()
-    0     True
-    1    False
-    2    False
-    3    False
-    dtype: bool
-
-    >>> s5.str.isupper()
-    0    False
-    1    False
-    2     True
-    3    False
-    dtype: bool
-
-    The ``s5.str.istitle`` method checks for whether all words are in title
-    case (whether only the first letter of each word is capitalized). Words are
-    assumed to be as any sequence of non-numeric characters separated by
-    whitespace characters.
-
-    >>> s5.str.istitle()
-    0    False
-    1     True
-    2    False
-    3    False
-    dtype: bool
-    """
-    _doc_args["isalnum"] = dict(type="alphanumeric", method="isalnum")
-    _doc_args["isalpha"] = dict(type="alphabetic", method="isalpha")
-    _doc_args["isdigit"] = dict(type="digits", method="isdigit")
-    _doc_args["isspace"] = dict(type="whitespace", method="isspace")
-    _doc_args["islower"] = dict(type="lowercase", method="islower")
-    _doc_args["isupper"] = dict(type="uppercase", method="isupper")
-    _doc_args["istitle"] = dict(type="titlecase", method="istitle")
-    _doc_args["isnumeric"] = dict(type="numeric", method="isnumeric")
-    _doc_args["isdecimal"] = dict(type="decimal", method="isdecimal")
-    # force _noarg_wrapper return type with dtype=np.dtype(bool) (GH 29624)
-    isalnum = _noarg_wrapper(
-        lambda x: x.isalnum(),
-        name="isalnum",
-        docstring=_shared_docs["ismethods"] % _doc_args["isalnum"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isalpha = _noarg_wrapper(
-        lambda x: x.isalpha(),
-        name="isalpha",
-        docstring=_shared_docs["ismethods"] % _doc_args["isalpha"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isdigit = _noarg_wrapper(
-        lambda x: x.isdigit(),
-        name="isdigit",
-        docstring=_shared_docs["ismethods"] % _doc_args["isdigit"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isspace = _noarg_wrapper(
-        lambda x: x.isspace(),
-        name="isspace",
-        docstring=_shared_docs["ismethods"] % _doc_args["isspace"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    islower = _noarg_wrapper(
-        lambda x: x.islower(),
-        name="islower",
-        docstring=_shared_docs["ismethods"] % _doc_args["islower"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isupper = _noarg_wrapper(
-        lambda x: x.isupper(),
-        name="isupper",
-        docstring=_shared_docs["ismethods"] % _doc_args["isupper"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    istitle = _noarg_wrapper(
-        lambda x: x.istitle(),
-        name="istitle",
-        docstring=_shared_docs["ismethods"] % _doc_args["istitle"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isnumeric = _noarg_wrapper(
-        lambda x: x.isnumeric(),
-        name="isnumeric",
-        docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-    isdecimal = _noarg_wrapper(
-        lambda x: x.isdecimal(),
-        name="isdecimal",
-        docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"],
-        returns_string=False,
-        dtype=np.dtype(bool),
-    )
-
-    @classmethod
-    def _make_accessor(cls, data):
-        cls._validate(data)
-        return cls(data)

From cb2fb24439c69a040d2f1eec144c514cdf27d1f2 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 29 Sep 2020 15:44:49 -0500
Subject: [PATCH 24/24] simplify inheritance

---
 pandas/core/arrays/string_.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 0db5fadce614e..fb126b3725237 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -22,7 +22,6 @@
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
 from pandas.core.missing import isna
-from pandas.core.strings.object_array import ObjectStringArrayMixin
 
 if TYPE_CHECKING:
     import pyarrow
@@ -103,7 +102,7 @@ def __from_arrow__(
         return StringArray._concat_same_type(results)
 
 
-class StringArray(PandasArray, ObjectStringArrayMixin):
+class StringArray(PandasArray):
     """
     Extension array for string data.