From f7c10294cceff11539529eed7a5845c2ecd2b9cb Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 31 Mar 2022 21:33:14 -0700
Subject: [PATCH 1/2] REF: Create pandas/core/arrays/arrow

---
 pandas/core/arrays/_mixins.py                 | 424 +----------------
 pandas/core/arrays/arrow/__init__.py          |   3 +
 .../core/arrays/{ => arrow}/_arrow_utils.py   |   0
 pandas/core/arrays/arrow/array.py             | 439 ++++++++++++++++++
 pandas/core/arrays/interval.py                |   2 +-
 pandas/core/arrays/numeric.py                 |   4 +-
 pandas/core/arrays/period.py                  |   2 +-
 pandas/core/arrays/string_arrow.py            |   2 +-
 pandas/core/dtypes/dtypes.py                  |   4 +-
 pandas/io/parquet.py                          |   2 +-
 pandas/tests/arrays/interval/test_interval.py |   8 +-
 .../tests/arrays/masked/test_arrow_compat.py  |   2 +-
 .../tests/arrays/period/test_arrow_compat.py  |  10 +-
 13 files changed, 464 insertions(+), 438 deletions(-)
 create mode 100644 pandas/core/arrays/arrow/__init__.py
 rename pandas/core/arrays/{ => arrow}/_arrow_utils.py (100%)
 create mode 100644 pandas/core/arrays/arrow/array.py

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index eb0ebd8d08340..4563759c63a36 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -28,11 +28,6 @@
     npt,
     type_t,
 )
-from pandas.compat import (
-    pa_version_under1p01,
-    pa_version_under2p0,
-    pa_version_under5p0,
-)
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import doc
 from pandas.util._validators import (
@@ -42,11 +37,7 @@
 )
 
 from pandas.core.dtypes.common import (
-    is_array_like,
-    is_bool_dtype,
     is_dtype_equal,
-    is_integer,
-    is_scalar,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
@@ -54,10 +45,7 @@
     ExtensionDtype,
     PeriodDtype,
 )
-from pandas.core.dtypes.missing import (
-    array_equivalent,
-    isna,
-)
+from pandas.core.dtypes.missing import array_equivalent
 
 from pandas.core import missing
 from pandas.core.algorithms import (
@@ -69,28 +57,19 @@
 from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays.base import ExtensionArray
 from pandas.core.construction import extract_array
-from pandas.core.indexers import (
-    check_array_indexer,
-    validate_indices,
-)
+from pandas.core.indexers import check_array_indexer
 from pandas.core.sorting import nargminmax
 
 NDArrayBackedExtensionArrayT = TypeVar(
     "NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray"
 )
 
-if not pa_version_under1p01:
-    import pyarrow as pa
-    import pyarrow.compute as pc
-
 if TYPE_CHECKING:
     from pandas._typing import (
         NumpySorter,
         NumpyValueArrayLike,
     )
 
-    from pandas import Series
-
 
 def ravel_compat(meth: F) -> F:
     """
@@ -538,402 +517,3 @@ def _empty(
         arr = cls._from_sequence([], dtype=dtype)
         backing = np.empty(shape, dtype=arr._ndarray.dtype)
         return arr._from_backing_data(backing)
-
-
-ArrowExtensionArrayT = TypeVar("ArrowExtensionArrayT", bound="ArrowExtensionArray")
-
-
-class ArrowExtensionArray(ExtensionArray):
-    """
-    Base class for ExtensionArray backed by Arrow array.
-    """
-
-    _data: pa.ChunkedArray
-
-    def __init__(self, values: pa.ChunkedArray) -> None:
-        self._data = values
-
-    def __arrow_array__(self, type=None):
-        """Convert myself to a pyarrow Array or ChunkedArray."""
-        return self._data
-
-    def equals(self, other) -> bool:
-        if not isinstance(other, ArrowExtensionArray):
-            return False
-        # I'm told that pyarrow makes __eq__ behave like pandas' equals;
-        #  TODO: is this documented somewhere?
-        return self._data == other._data
-
-    @property
-    def nbytes(self) -> int:
-        """
-        The number of bytes needed to store this object in memory.
-        """
-        return self._data.nbytes
-
-    def __len__(self) -> int:
-        """
-        Length of this array.
-
-        Returns
-        -------
-        length : int
-        """
-        return len(self._data)
-
-    def isna(self) -> npt.NDArray[np.bool_]:
-        """
-        Boolean NumPy array indicating if each value is missing.
-
-        This should return a 1-D array the same length as 'self'.
-        """
-        if pa_version_under2p0:
-            return self._data.is_null().to_pandas().values
-        else:
-            return self._data.is_null().to_numpy()
-
-    def copy(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
-        """
-        Return a shallow copy of the array.
-
-        Underlying ChunkedArray is immutable, so a deep copy is unnecessary.
-
-        Returns
-        -------
-        type(self)
-        """
-        return type(self)(self._data)
-
-    @doc(ExtensionArray.factorize)
-    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
-        encoded = self._data.dictionary_encode()
-        indices = pa.chunked_array(
-            [c.indices for c in encoded.chunks], type=encoded.type.index_type
-        ).to_pandas()
-        if indices.dtype.kind == "f":
-            indices[np.isnan(indices)] = na_sentinel
-        indices = indices.astype(np.int64, copy=False)
-
-        if encoded.num_chunks:
-            uniques = type(self)(encoded.chunk(0).dictionary)
-        else:
-            uniques = type(self)(pa.array([], type=encoded.type.value_type))
-
-        return indices.values, uniques
-
-    def take(
-        self,
-        indices: TakeIndexer,
-        allow_fill: bool = False,
-        fill_value: Any = None,
-    ):
-        """
-        Take elements from an array.
-
-        Parameters
-        ----------
-        indices : sequence of int or one-dimensional np.ndarray of int
-            Indices to be taken.
-        allow_fill : bool, default False
-            How to handle negative values in `indices`.
-
-            * False: negative values in `indices` indicate positional indices
-              from the right (the default). This is similar to
-              :func:`numpy.take`.
-
-            * True: negative values in `indices` indicate
-              missing values. These values are set to `fill_value`. Any other
-              other negative values raise a ``ValueError``.
-
-        fill_value : any, optional
-            Fill value to use for NA-indices when `allow_fill` is True.
-            This may be ``None``, in which case the default NA value for
-            the type, ``self.dtype.na_value``, is used.
-
-            For many ExtensionArrays, there will be two representations of
-            `fill_value`: a user-facing "boxed" scalar, and a low-level
-            physical NA value. `fill_value` should be the user-facing version,
-            and the implementation should handle translating that to the
-            physical version for processing the take if necessary.
-
-        Returns
-        -------
-        ExtensionArray
-
-        Raises
-        ------
-        IndexError
-            When the indices are out of bounds for the array.
-        ValueError
-            When `indices` contains negative values other than ``-1``
-            and `allow_fill` is True.
-
-        See Also
-        --------
-        numpy.take
-        api.extensions.take
-
-        Notes
-        -----
-        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
-        ``iloc``, when `indices` is a sequence of values. Additionally,
-        it's called by :meth:`Series.reindex`, or any other method
-        that causes realignment, with a `fill_value`.
-        """
-        # TODO: Remove once we got rid of the (indices < 0) check
-        if not is_array_like(indices):
-            indices_array = np.asanyarray(indices)
-        else:
-            # error: Incompatible types in assignment (expression has type
-            # "Sequence[int]", variable has type "ndarray")
-            indices_array = indices  # type: ignore[assignment]
-
-        if len(self._data) == 0 and (indices_array >= 0).any():
-            raise IndexError("cannot do a non-empty take")
-        if indices_array.size > 0 and indices_array.max() >= len(self._data):
-            raise IndexError("out of bounds value in 'indices'.")
-
-        if allow_fill:
-            fill_mask = indices_array < 0
-            if fill_mask.any():
-                validate_indices(indices_array, len(self._data))
-                # TODO(ARROW-9433): Treat negative indices as NULL
-                indices_array = pa.array(indices_array, mask=fill_mask)
-                result = self._data.take(indices_array)
-                if isna(fill_value):
-                    return type(self)(result)
-                # TODO: ArrowNotImplementedError: Function fill_null has no
-                # kernel matching input types (array[string], scalar[string])
-                result = type(self)(result)
-                result[fill_mask] = fill_value
-                return result
-                # return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
-            else:
-                # Nothing to fill
-                return type(self)(self._data.take(indices))
-        else:  # allow_fill=False
-            # TODO(ARROW-9432): Treat negative indices as indices from the right.
-            if (indices_array < 0).any():
-                # Don't modify in-place
-                indices_array = np.copy(indices_array)
-                indices_array[indices_array < 0] += len(self._data)
-            return type(self)(self._data.take(indices_array))
-
-    def value_counts(self, dropna: bool = True) -> Series:
-        """
-        Return a Series containing counts of each unique value.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            Don't include counts of missing values.
-
-        Returns
-        -------
-        counts : Series
-
-        See Also
-        --------
-        Series.value_counts
-        """
-        from pandas import (
-            Index,
-            Series,
-        )
-
-        vc = self._data.value_counts()
-
-        values = vc.field(0)
-        counts = vc.field(1)
-        if dropna and self._data.null_count > 0:
-            mask = values.is_valid()
-            values = values.filter(mask)
-            counts = counts.filter(mask)
-
-        # No missing values so we can adhere to the interface and return a numpy array.
-        counts = np.array(counts)
-
-        index = Index(type(self)(values))
-
-        return Series(counts, index=index).astype("Int64")
-
-    @classmethod
-    def _concat_same_type(
-        cls: type[ArrowExtensionArrayT], to_concat
-    ) -> ArrowExtensionArrayT:
-        """
-        Concatenate multiple ArrowExtensionArrays.
-
-        Parameters
-        ----------
-        to_concat : sequence of ArrowExtensionArrays
-
-        Returns
-        -------
-        ArrowExtensionArray
-        """
-        import pyarrow as pa
-
-        chunks = [array for ea in to_concat for array in ea._data.iterchunks()]
-        arr = pa.chunked_array(chunks)
-        return cls(arr)
-
-    def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
-        """Set one or more values inplace.
-
-        Parameters
-        ----------
-        key : int, ndarray, or slice
-            When called from, e.g. ``Series.__setitem__``, ``key`` will be
-            one of
-
-            * scalar int
-            * ndarray of integers.
-            * boolean ndarray
-            * slice object
-
-        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
-            value or values to be set of ``key``.
-
-        Returns
-        -------
-        None
-        """
-        key = check_array_indexer(self, key)
-        indices = self._indexing_key_to_indices(key)
-        value = self._maybe_convert_setitem_value(value)
-
-        argsort = np.argsort(indices)
-        indices = indices[argsort]
-
-        if is_scalar(value):
-            value = np.broadcast_to(value, len(self))
-        elif len(indices) != len(value):
-            raise ValueError("Length of indexer and values mismatch")
-        else:
-            value = np.asarray(value)[argsort]
-
-        self._data = self._set_via_chunk_iteration(indices=indices, value=value)
-
-    def _indexing_key_to_indices(
-        self, key: int | slice | np.ndarray
-    ) -> npt.NDArray[np.intp]:
-        """
-        Convert indexing key for self into positional indices.
-
-        Parameters
-        ----------
-        key : int | slice | np.ndarray
-
-        Returns
-        -------
-        npt.NDArray[np.intp]
-        """
-        n = len(self)
-        if isinstance(key, slice):
-            indices = np.arange(n)[key]
-        elif is_integer(key):
-            indices = np.arange(n)[[key]]  # type: ignore[index]
-        elif is_bool_dtype(key):
-            key = np.asarray(key)
-            if len(key) != n:
-                raise ValueError("Length of indexer and values mismatch")
-            indices = key.nonzero()[0]
-        else:
-            key = np.asarray(key)
-            indices = np.arange(n)[key]
-        return indices
-
-    def _maybe_convert_setitem_value(self, value):
-        """Maybe convert value to be pyarrow compatible."""
-        raise NotImplementedError()
-
-    def _set_via_chunk_iteration(
-        self, indices: npt.NDArray[np.intp], value: npt.NDArray[Any]
-    ) -> pa.ChunkedArray:
-        """
-        Loop through the array chunks and set the new values while
-        leaving the chunking layout unchanged.
-
-        Parameters
-        ----------
-        indices : npt.NDArray[np.intp]
-            Position indices for the underlying ChunkedArray.
-
-        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
-            value or values to be set of ``key``.
-
-        Notes
-        -----
-        Assumes that indices is sorted. Caller is responsible for sorting.
-        """
-        new_data = []
-        stop = 0
-        for chunk in self._data.iterchunks():
-            start, stop = stop, stop + len(chunk)
-            if len(indices) == 0 or stop <= indices[0]:
-                new_data.append(chunk)
-            else:
-                n = int(np.searchsorted(indices, stop, side="left"))
-                c_ind = indices[:n] - start
-                indices = indices[n:]
-                n = len(c_ind)
-                c_value, value = value[:n], value[n:]
-                new_data.append(self._replace_with_indices(chunk, c_ind, c_value))
-        return pa.chunked_array(new_data)
-
-    @classmethod
-    def _replace_with_indices(
-        cls,
-        chunk: pa.Array,
-        indices: npt.NDArray[np.intp],
-        value: npt.NDArray[Any],
-    ) -> pa.Array:
-        """
-        Replace items selected with a set of positional indices.
-
-        Analogous to pyarrow.compute.replace_with_mask, except that replacement
-        positions are identified via indices rather than a mask.
-
-        Parameters
-        ----------
-        chunk : pa.Array
-        indices : npt.NDArray[np.intp]
-        value : npt.NDArray[Any]
-            Replacement value(s).
-
-        Returns
-        -------
-        pa.Array
-        """
-        n = len(indices)
-
-        if n == 0:
-            return chunk
-
-        start, stop = indices[[0, -1]]
-
-        if (stop - start) == (n - 1):
-            # fast path for a contiguous set of indices
-            arrays = [
-                chunk[:start],
-                pa.array(value, type=chunk.type),
-                chunk[stop + 1 :],
-            ]
-            arrays = [arr for arr in arrays if len(arr)]
-            if len(arrays) == 1:
-                return arrays[0]
-            return pa.concat_arrays(arrays)
-
-        mask = np.zeros(len(chunk), dtype=np.bool_)
-        mask[indices] = True
-
-        if pa_version_under5p0:
-            arr = chunk.to_numpy(zero_copy_only=False)
-            arr[mask] = value
-            return pa.array(arr, type=chunk.type)
-
-        if isna(value).all():
-            return pc.if_else(mask, None, chunk)
-
-        return pc.replace_with_mask(chunk, mask, value)
diff --git a/pandas/core/arrays/arrow/__init__.py b/pandas/core/arrays/arrow/__init__.py
new file mode 100644
index 0000000000000..6bdf29e38ac62
--- /dev/null
+++ b/pandas/core/arrays/arrow/__init__.py
@@ -0,0 +1,3 @@
+# flake8: noqa: F401
+
+from pandas.core.arrays.arrow.array import ArrowExtensionArray
diff --git a/pandas/core/arrays/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py
similarity index 100%
rename from pandas/core/arrays/_arrow_utils.py
rename to pandas/core/arrays/arrow/_arrow_utils.py
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
new file mode 100644
index 0000000000000..0a48638f5cf05
--- /dev/null
+++ b/pandas/core/arrays/arrow/array.py
@@ -0,0 +1,439 @@
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    TypeVar,
+)
+
+import numpy as np
+
+from pandas._typing import (
+    TakeIndexer,
+    npt,
+)
+from pandas.compat import (
+    pa_version_under1p01,
+    pa_version_under2p0,
+    pa_version_under5p0,
+)
+from pandas.util._decorators import doc
+
+from pandas.core.dtypes.common import (
+    is_array_like,
+    is_bool_dtype,
+    is_integer,
+    is_scalar,
+)
+from pandas.core.dtypes.missing import isna
+
+from pandas.core.arrays.base import ExtensionArray
+from pandas.core.indexers import (
+    check_array_indexer,
+    validate_indices,
+)
+
+if not pa_version_under1p01:
+    import pyarrow as pa
+    import pyarrow.compute as pc
+
+if TYPE_CHECKING:
+    from pandas import Series
+
+ArrowExtensionArrayT = TypeVar("ArrowExtensionArrayT", bound="ArrowExtensionArray")
+
+
+class ArrowExtensionArray(ExtensionArray):
+    """
+    Base class for ExtensionArray backed by Arrow array.
+    """
+
+    _data: pa.ChunkedArray
+
+    def __init__(self, values: pa.ChunkedArray) -> None:
+        self._data = values
+
+    def __arrow_array__(self, type=None):
+        """Convert myself to a pyarrow Array or ChunkedArray."""
+        return self._data
+
+    def equals(self, other) -> bool:
+        if not isinstance(other, ArrowExtensionArray):
+            return False
+        # I'm told that pyarrow makes __eq__ behave like pandas' equals;
+        #  TODO: is this documented somewhere?
+        return self._data == other._data
+
+    @property
+    def nbytes(self) -> int:
+        """
+        The number of bytes needed to store this object in memory.
+        """
+        return self._data.nbytes
+
+    def __len__(self) -> int:
+        """
+        Length of this array.
+
+        Returns
+        -------
+        length : int
+        """
+        return len(self._data)
+
+    def isna(self) -> npt.NDArray[np.bool_]:
+        """
+        Boolean NumPy array indicating if each value is missing.
+
+        This should return a 1-D array the same length as 'self'.
+        """
+        if pa_version_under2p0:
+            return self._data.is_null().to_pandas().values
+        else:
+            return self._data.is_null().to_numpy()
+
+    def copy(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
+        """
+        Return a shallow copy of the array.
+
+        Underlying ChunkedArray is immutable, so a deep copy is unnecessary.
+
+        Returns
+        -------
+        type(self)
+        """
+        return type(self)(self._data)
+
+    @doc(ExtensionArray.factorize)
+    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
+        encoded = self._data.dictionary_encode()
+        indices = pa.chunked_array(
+            [c.indices for c in encoded.chunks], type=encoded.type.index_type
+        ).to_pandas()
+        if indices.dtype.kind == "f":
+            indices[np.isnan(indices)] = na_sentinel
+        indices = indices.astype(np.int64, copy=False)
+
+        if encoded.num_chunks:
+            uniques = type(self)(encoded.chunk(0).dictionary)
+        else:
+            uniques = type(self)(pa.array([], type=encoded.type.value_type))
+
+        return indices.values, uniques
+
+    def take(
+        self,
+        indices: TakeIndexer,
+        allow_fill: bool = False,
+        fill_value: Any = None,
+    ):
+        """
+        Take elements from an array.
+
+        Parameters
+        ----------
+        indices : sequence of int or one-dimensional np.ndarray of int
+            Indices to be taken.
+        allow_fill : bool, default False
+            How to handle negative values in `indices`.
+
+            * False: negative values in `indices` indicate positional indices
+              from the right (the default). This is similar to
+              :func:`numpy.take`.
+
+            * True: negative values in `indices` indicate
+              missing values. These values are set to `fill_value`. Any other
+              other negative values raise a ``ValueError``.
+
+        fill_value : any, optional
+            Fill value to use for NA-indices when `allow_fill` is True.
+            This may be ``None``, in which case the default NA value for
+            the type, ``self.dtype.na_value``, is used.
+
+            For many ExtensionArrays, there will be two representations of
+            `fill_value`: a user-facing "boxed" scalar, and a low-level
+            physical NA value. `fill_value` should be the user-facing version,
+            and the implementation should handle translating that to the
+            physical version for processing the take if necessary.
+
+        Returns
+        -------
+        ExtensionArray
+
+        Raises
+        ------
+        IndexError
+            When the indices are out of bounds for the array.
+        ValueError
+            When `indices` contains negative values other than ``-1``
+            and `allow_fill` is True.
+
+        See Also
+        --------
+        numpy.take
+        api.extensions.take
+
+        Notes
+        -----
+        ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
+        ``iloc``, when `indices` is a sequence of values. Additionally,
+        it's called by :meth:`Series.reindex`, or any other method
+        that causes realignment, with a `fill_value`.
+        """
+        # TODO: Remove once we got rid of the (indices < 0) check
+        if not is_array_like(indices):
+            indices_array = np.asanyarray(indices)
+        else:
+            # error: Incompatible types in assignment (expression has type
+            # "Sequence[int]", variable has type "ndarray")
+            indices_array = indices  # type: ignore[assignment]
+
+        if len(self._data) == 0 and (indices_array >= 0).any():
+            raise IndexError("cannot do a non-empty take")
+        if indices_array.size > 0 and indices_array.max() >= len(self._data):
+            raise IndexError("out of bounds value in 'indices'.")
+
+        if allow_fill:
+            fill_mask = indices_array < 0
+            if fill_mask.any():
+                validate_indices(indices_array, len(self._data))
+                # TODO(ARROW-9433): Treat negative indices as NULL
+                indices_array = pa.array(indices_array, mask=fill_mask)
+                result = self._data.take(indices_array)
+                if isna(fill_value):
+                    return type(self)(result)
+                # TODO: ArrowNotImplementedError: Function fill_null has no
+                # kernel matching input types (array[string], scalar[string])
+                result = type(self)(result)
+                result[fill_mask] = fill_value
+                return result
+                # return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
+            else:
+                # Nothing to fill
+                return type(self)(self._data.take(indices))
+        else:  # allow_fill=False
+            # TODO(ARROW-9432): Treat negative indices as indices from the right.
+            if (indices_array < 0).any():
+                # Don't modify in-place
+                indices_array = np.copy(indices_array)
+                indices_array[indices_array < 0] += len(self._data)
+            return type(self)(self._data.take(indices_array))
+
+    def value_counts(self, dropna: bool = True) -> Series:
+        """
+        Return a Series containing counts of each unique value.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include counts of missing values.
+
+        Returns
+        -------
+        counts : Series
+
+        See Also
+        --------
+        Series.value_counts
+        """
+        from pandas import (
+            Index,
+            Series,
+        )
+
+        vc = self._data.value_counts()
+
+        values = vc.field(0)
+        counts = vc.field(1)
+        if dropna and self._data.null_count > 0:
+            mask = values.is_valid()
+            values = values.filter(mask)
+            counts = counts.filter(mask)
+
+        # No missing values so we can adhere to the interface and return a numpy array.
+        counts = np.array(counts)
+
+        index = Index(type(self)(values))
+
+        return Series(counts, index=index).astype("Int64")
+
+    @classmethod
+    def _concat_same_type(
+        cls: type[ArrowExtensionArrayT], to_concat
+    ) -> ArrowExtensionArrayT:
+        """
+        Concatenate multiple ArrowExtensionArrays.
+
+        Parameters
+        ----------
+        to_concat : sequence of ArrowExtensionArrays
+
+        Returns
+        -------
+        ArrowExtensionArray
+        """
+        import pyarrow as pa
+
+        chunks = [array for ea in to_concat for array in ea._data.iterchunks()]
+        arr = pa.chunked_array(chunks)
+        return cls(arr)
+
+    def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
+        """Set one or more values inplace.
+
+        Parameters
+        ----------
+        key : int, ndarray, or slice
+            When called from, e.g. ``Series.__setitem__``, ``key`` will be
+            one of
+
+            * scalar int
+            * ndarray of integers.
+            * boolean ndarray
+            * slice object
+
+        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
+            value or values to be set of ``key``.
+
+        Returns
+        -------
+        None
+        """
+        key = check_array_indexer(self, key)
+        indices = self._indexing_key_to_indices(key)
+        value = self._maybe_convert_setitem_value(value)
+
+        argsort = np.argsort(indices)
+        indices = indices[argsort]
+
+        if is_scalar(value):
+            value = np.broadcast_to(value, len(self))
+        elif len(indices) != len(value):
+            raise ValueError("Length of indexer and values mismatch")
+        else:
+            value = np.asarray(value)[argsort]
+
+        self._data = self._set_via_chunk_iteration(indices=indices, value=value)
+
+    def _indexing_key_to_indices(
+        self, key: int | slice | np.ndarray
+    ) -> npt.NDArray[np.intp]:
+        """
+        Convert indexing key for self into positional indices.
+
+        Parameters
+        ----------
+        key : int | slice | np.ndarray
+
+        Returns
+        -------
+        npt.NDArray[np.intp]
+        """
+        n = len(self)
+        if isinstance(key, slice):
+            indices = np.arange(n)[key]
+        elif is_integer(key):
+            indices = np.arange(n)[[key]]  # type: ignore[index]
+        elif is_bool_dtype(key):
+            key = np.asarray(key)
+            if len(key) != n:
+                raise ValueError("Length of indexer and values mismatch")
+            indices = key.nonzero()[0]
+        else:
+            key = np.asarray(key)
+            indices = np.arange(n)[key]
+        return indices
+
+    def _maybe_convert_setitem_value(self, value):
+        """Maybe convert value to be pyarrow compatible."""
+        raise NotImplementedError()
+
+    def _set_via_chunk_iteration(
+        self, indices: npt.NDArray[np.intp], value: npt.NDArray[Any]
+    ) -> pa.ChunkedArray:
+        """
+        Loop through the array chunks and set the new values while
+        leaving the chunking layout unchanged.
+
+        Parameters
+        ----------
+        indices : npt.NDArray[np.intp]
+            Position indices for the underlying ChunkedArray.
+
+        value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
+            value or values to be set of ``key``.
+
+        Notes
+        -----
+        Assumes that indices is sorted. Caller is responsible for sorting.
+        """
+        new_data = []
+        stop = 0
+        for chunk in self._data.iterchunks():
+            start, stop = stop, stop + len(chunk)
+            if len(indices) == 0 or stop <= indices[0]:
+                new_data.append(chunk)
+            else:
+                n = int(np.searchsorted(indices, stop, side="left"))
+                c_ind = indices[:n] - start
+                indices = indices[n:]
+                n = len(c_ind)
+                c_value, value = value[:n], value[n:]
+                new_data.append(self._replace_with_indices(chunk, c_ind, c_value))
+        return pa.chunked_array(new_data)
+
+    @classmethod
+    def _replace_with_indices(
+        cls,
+        chunk: pa.Array,
+        indices: npt.NDArray[np.intp],
+        value: npt.NDArray[Any],
+    ) -> pa.Array:
+        """
+        Replace items selected with a set of positional indices.
+
+        Analogous to pyarrow.compute.replace_with_mask, except that replacement
+        positions are identified via indices rather than a mask.
+
+        Parameters
+        ----------
+        chunk : pa.Array
+        indices : npt.NDArray[np.intp]
+        value : npt.NDArray[Any]
+            Replacement value(s).
+
+        Returns
+        -------
+        pa.Array
+        """
+        n = len(indices)
+
+        if n == 0:
+            return chunk
+
+        start, stop = indices[[0, -1]]
+
+        if (stop - start) == (n - 1):
+            # fast path for a contiguous set of indices
+            arrays = [
+                chunk[:start],
+                pa.array(value, type=chunk.type),
+                chunk[stop + 1 :],
+            ]
+            arrays = [arr for arr in arrays if len(arr)]
+            if len(arrays) == 1:
+                return arrays[0]
+            return pa.concat_arrays(arrays)
+
+        mask = np.zeros(len(chunk), dtype=np.bool_)
+        mask[indices] = True
+
+        if pa_version_under5p0:
+            arr = chunk.to_numpy(zero_copy_only=False)
+            arr[mask] = value
+            return pa.array(arr, type=chunk.type)
+
+        if isna(value).all():
+            return pc.if_else(mask, None, chunk)
+
+        return pc.replace_with_mask(chunk, mask, value)
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index e14eec419377c..679feaca71024 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -1445,7 +1445,7 @@ def __arrow_array__(self, type=None):
         """
         import pyarrow
 
-        from pandas.core.arrays._arrow_utils import ArrowIntervalType
+        from pandas.core.arrays.arrow._arrow_utils import ArrowIntervalType
 
         try:
             subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 98d43db9904c8..cdffd57df9a84 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -70,7 +70,9 @@ def __from_arrow__(
         """
         import pyarrow
 
-        from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
+        from pandas.core.arrays.arrow._arrow_utils import (
+            pyarrow_array_to_numpy_and_mask,
+        )
 
         array_class = self.construct_array_type()
 
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index 7d0b30a1abb60..065e597537be9 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -366,7 +366,7 @@ def __arrow_array__(self, type=None):
         """
         import pyarrow
 
-        from pandas.core.arrays._arrow_utils import ArrowPeriodType
+        from pandas.core.arrays.arrow._arrow_utils import ArrowPeriodType
 
         if type is not None:
             if pyarrow.types.is_integer(type):
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 154c143ac89df..b8136402b00e6 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -42,7 +42,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.arraylike import OpsMixin
-from pandas.core.arrays._mixins import ArrowExtensionArray
+from pandas.core.arrays.arrow import ArrowExtensionArray
 from pandas.core.arrays.boolean import BooleanDtype
 from pandas.core.arrays.integer import Int64Dtype
 from pandas.core.arrays.numeric import NumericDtype
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 66eed0a75fa19..58e91f46dff43 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -997,7 +997,9 @@ def __from_arrow__(
         import pyarrow
 
         from pandas.core.arrays import PeriodArray
-        from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
+        from pandas.core.arrays.arrow._arrow_utils import (
+            pyarrow_array_to_numpy_and_mask,
+        )
 
         if isinstance(array, pyarrow.Array):
             chunks = [array]
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 27b0b3d08ad53..cbf3bcc9278d5 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -151,7 +151,7 @@ def __init__(self) -> None:
         import pyarrow.parquet
 
         # import utils to register the pyarrow extension types
-        import pandas.core.arrays._arrow_utils  # noqa:F401
+        import pandas.core.arrays.arrow._arrow_utils  # noqa:F401
 
         self.api = pyarrow
 
diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
index 2b5712e76e8cc..eaf86f5d521ae 100644
--- a/pandas/tests/arrays/interval/test_interval.py
+++ b/pandas/tests/arrays/interval/test_interval.py
@@ -248,7 +248,7 @@ def test_min_max(self, left_right_dtypes, index_or_series_or_array):
 def test_arrow_extension_type():
     import pyarrow as pa
 
-    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowIntervalType
 
     p1 = ArrowIntervalType(pa.int64(), "left")
     p2 = ArrowIntervalType(pa.int64(), "left")
@@ -265,7 +265,7 @@ def test_arrow_extension_type():
 def test_arrow_array():
     import pyarrow as pa
 
-    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowIntervalType
 
     intervals = pd.interval_range(1, 5, freq=1).array
 
@@ -295,7 +295,7 @@ def test_arrow_array():
 def test_arrow_array_missing():
     import pyarrow as pa
 
-    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowIntervalType
 
     arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0])
     arr[1] = None
@@ -330,7 +330,7 @@ def test_arrow_array_missing():
 def test_arrow_table_roundtrip(breaks):
     import pyarrow as pa
 
-    from pandas.core.arrays._arrow_utils import ArrowIntervalType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowIntervalType
 
     arr = IntervalArray.from_breaks(breaks)
     arr[1] = None
diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py
index 051762511a6ca..4ccc54636eaee 100644
--- a/pandas/tests/arrays/masked/test_arrow_compat.py
+++ b/pandas/tests/arrays/masked/test_arrow_compat.py
@@ -6,7 +6,7 @@
 
 pa = pytest.importorskip("pyarrow", minversion="1.0.1")
 
-from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
+from pandas.core.arrays.arrow._arrow_utils import pyarrow_array_to_numpy_and_mask
 
 arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES]
 arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES]
diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py
index 560299a4a47f5..7d2d2daed3497 100644
--- a/pandas/tests/arrays/period/test_arrow_compat.py
+++ b/pandas/tests/arrays/period/test_arrow_compat.py
@@ -13,7 +13,7 @@
 
 
 def test_arrow_extension_type():
-    from pandas.core.arrays._arrow_utils import ArrowPeriodType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowPeriodType
 
     p1 = ArrowPeriodType("D")
     p2 = ArrowPeriodType("D")
@@ -34,7 +34,7 @@ def test_arrow_extension_type():
     ],
 )
 def test_arrow_array(data, freq):
-    from pandas.core.arrays._arrow_utils import ArrowPeriodType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowPeriodType
 
     periods = period_array(data, freq=freq)
     result = pa.array(periods)
@@ -57,7 +57,7 @@ def test_arrow_array(data, freq):
 
 
 def test_arrow_array_missing():
-    from pandas.core.arrays._arrow_utils import ArrowPeriodType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowPeriodType
 
     arr = PeriodArray([1, 2, 3], freq="D")
     arr[1] = pd.NaT
@@ -70,7 +70,7 @@ def test_arrow_array_missing():
 
 
 def test_arrow_table_roundtrip():
-    from pandas.core.arrays._arrow_utils import ArrowPeriodType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowPeriodType
 
     arr = PeriodArray([1, 2, 3], freq="D")
     arr[1] = pd.NaT
@@ -91,7 +91,7 @@ def test_arrow_table_roundtrip():
 def test_arrow_load_from_zero_chunks():
     # GH-41040
 
-    from pandas.core.arrays._arrow_utils import ArrowPeriodType
+    from pandas.core.arrays.arrow._arrow_utils import ArrowPeriodType
 
     arr = PeriodArray([], freq="D")
     df = pd.DataFrame({"a": arr})

From 2f8978ba8b2c42fc0247b69bf4897a18c8397f07 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Fri, 1 Apr 2022 08:55:44 -0700
Subject: [PATCH 2/2] Fix test

---
 pandas/tests/extension/arrow/arrays.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py
index 33eef35153bce..d19a6245809be 100644
--- a/pandas/tests/extension/arrow/arrays.py
+++ b/pandas/tests/extension/arrow/arrays.py
@@ -24,7 +24,7 @@
 )
 from pandas.api.types import is_scalar
 from pandas.core.arraylike import OpsMixin
-from pandas.core.arrays._mixins import ArrowExtensionArray as _ArrowExtensionArray
+from pandas.core.arrays.arrow import ArrowExtensionArray as _ArrowExtensionArray
 from pandas.core.construction import extract_array