pandas-dev · andrewgsavage · Jan 21, 2022 · Jan 21, 2022 · Jan 23, 2022 · Jan 24, 2022
@@ -135,6 +135,93 @@ by some other storage type, like Python lists.
 See the `extension array source`_ for the interface definition. The docstrings
 and comments contain guidance for properly implementing the interface.
 
+:class:`~pandas.api.extensions.NDArrayBackedExtensionArray`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For ExtensionArrays backed by a single NumPy array, the
+:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you
+some effort. It contains a private property ``_ndarray`` with the backing NumPy
+array and implements the extension array interface.
+
+Implement the following:
+
+``_box_func``
+  Convert from array values to the type you wish to expose to users.
+
+``_internal_fill_value``
+   Scalar used to denote ``NA`` value inside our ``self._ndarray``, e.g. ``-1``
+   for ``Categorical``, ``iNaT`` for ``Period``.
+
+``_validate_scalar``
+  Convert from an object to a value which can be stored in the NumPy array.
+
+``_validate_setitem_value``
+  Convert a value or values for use in setting a value or values in the backing
+  NumPy array.
+
+.. code-block:: python
+
+    class DateArray(NDArrayBackedExtensionArray):
+        _internal_fill_value = numpy.datetime64("NaT")
+
+        def __init__(self, values):
+            backing_array_dtype = "<M8[ns]"
+            super().__init__(values=values, dtype=backing_array_dtype)
+
+        def _box_func(self, value):
+            if pandas.isna(x):
+                return pandas.NaT
+            return x.astype("datetime64[us]").item().date()
+
+        def _validate_scalar(self, scalar):
+            if pandas.isna(scalar):
+                return numpy.datetime64("NaT")
+            elif isinstance(scalar, datetime.date):
+                return pandas.Timestamp(
+                    year=scalar.year, month=scalar.month, day=scalar.day
+                ).to_datetime64()
+            else:
+                raise TypeError("Invalid value type", scalar)
+
+        def _validate_setitem_value(self, value):
+            if pandas.api.types.is_list_like(value):
+                return np.array([self._validate_scalar(v) for v in value], dtype = self.dtype)
+            return self._validate_scalar(value)
+
+
+To support 2D arrays, use the ``_from_backing_data`` helper function when a
+method is called on multi-dimensional data of the same dtype as ``_ndarray``.
+
+.. code-block:: python
+
+    class CustomArray(NDArrayBackedExtensionArray):
+
+        ...
+
+        def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs):
+            pandas.compat.numpy.function.validate_minnumpy_validate_min((), kwargs)
+            result = pandas.core.nanops.nanmin(
+                values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
+            )
+            if axis is None or self.ndim == 1:
+                return self._box_func(result)
+            return self._from_backing_data(result)
+
+
+Subclass the tests in :mod:`pandas.tests.extension.base` in your test suite to
+validate your implementation.
+
+.. code-block:: python
+
+    @pytest.fixture
+    def data():
+        return CustomArray(numpy.arange(-10, 10, 1)
+
+
+    class TestCustomArray(base.ExtensionTests):
+        pass
+
+
 .. _extending.extension.operator:
 
 :class:`~pandas.api.extensions.ExtensionArray` operator support

diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst
@@ -24,6 +24,7 @@ objects.
    :template: autosummary/class_without_autosummary.rst
 
    api.extensions.ExtensionArray
+   api.extensions.NDArrayBackedExtensionArray
    arrays.NumpyExtensionArray
 
 .. We need this autosummary so that methods and attributes are generated.
@@ -68,6 +69,26 @@ objects.
       api.extensions.ExtensionArray.ndim
       api.extensions.ExtensionArray.shape
       api.extensions.ExtensionArray.tolist
+      api.extensions.NDArrayBackedExtensionArray.dtype
+      api.extensions.NDArrayBackedExtensionArray.argmax
+      api.extensions.NDArrayBackedExtensionArray.argmin
+      api.extensions.NDArrayBackedExtensionArray.argsort
+      api.extensions.NDArrayBackedExtensionArray.astype
+      api.extensions.NDArrayBackedExtensionArray.dropna
+      api.extensions.NDArrayBackedExtensionArray.equals
+      api.extensions.NDArrayBackedExtensionArray.factorize
+      api.extensions.NDArrayBackedExtensionArray.fillna
+      api.extensions.NDArrayBackedExtensionArray.insert
+      api.extensions.NDArrayBackedExtensionArray.isin
+      api.extensions.NDArrayBackedExtensionArray.isna
+      api.extensions.NDArrayBackedExtensionArray.searchsorted
+      api.extensions.NDArrayBackedExtensionArray.shift
+      api.extensions.NDArrayBackedExtensionArray.take
+      api.extensions.NDArrayBackedExtensionArray.to_numpy
+      api.extensions.NDArrayBackedExtensionArray.tolist
+      api.extensions.NDArrayBackedExtensionArray.unique
+      api.extensions.NDArrayBackedExtensionArray.value_counts
+      api.extensions.NDArrayBackedExtensionArray.view
 
 Additionally, we have some utility methods for ensuring your object
 behaves correctly.

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -313,6 +313,7 @@ Other enhancements
 
 - :meth:`~DataFrame.to_sql` with method parameter set to ``multi`` works with Oracle on the backend
 - :attr:`Series.attrs` / :attr:`DataFrame.attrs` now uses a deepcopy for propagating ``attrs`` (:issue:`54134`).
+- :class:`NDArrayBackedExtensionArray` now exposed in the public API (:issue:`45544`)
 - :func:`get_dummies` now returning  extension dtypes ``boolean`` or ``bool[pyarrow]`` that are compatible with the input dtype (:issue:`56273`)
 - :func:`read_csv` now supports ``on_bad_lines`` parameter with ``engine="pyarrow"`` (:issue:`54480`)
 - :func:`read_sas` returns ``datetime64`` dtypes with resolutions better matching those stored natively in SAS, and avoids returning object-dtype in cases that cannot be stored with ``datetime64[ns]`` dtype (:issue:`56127`)

diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py
@@ -18,6 +18,7 @@
 from pandas.core.arrays import (
     ExtensionArray,
     ExtensionScalarOpsMixin,
+    NDArrayBackedExtensionArray,
 )
 
 __all__ = [
@@ -30,4 +31,5 @@
     "take",
     "ExtensionArray",
     "ExtensionScalarOpsMixin",
+    "NDArrayBackedExtensionArray",
 ]
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -1,3 +1,4 @@
+from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 from pandas.core.arrays.arrow import ArrowExtensionArray
 from pandas.core.arrays.base import (
     ExtensionArray,
@@ -34,6 +35,7 @@
     "FloatingArray",
     "IntegerArray",
     "IntervalArray",
+    "NDArrayBackedExtensionArray",
     "NumpyExtensionArray",
     "PeriodArray",
     "period_array",

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
@@ -92,6 +92,17 @@ def method(self, *args, **kwargs):
 class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray):
     """
     ExtensionArray that is backed by a single NumPy ndarray.
+
+    Notes
+    -----
+        This class is part of the public API, but may be adjusted in non-user-facing
+        ways more aggressively than the regular API.
+
+    Examples
+    --------
+    Please see the following:
+
+    https://pandas.pydata.org/docs/development/extending.html#NDArrayBackedExtensionArray
     """
 
     _ndarray: np.ndarray
@@ -114,6 +125,7 @@ def _validate_scalar(self, value):
 
     # ------------------------------------------------------------------------
 
+    @doc(ExtensionArray.view)
     def view(self, dtype: Dtype | None = None) -> ArrayLike:
         # We handle datetime64, datetime64tz, timedelta64, and period
         #  dtypes here. Everything else we pass through to the underlying
@@ -154,6 +166,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
         # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
         return arr.view(dtype=dtype)  # type: ignore[arg-type]
 
+    @doc(ExtensionArray.view)
     def take(
         self,
         indices: TakeIndexer,
@@ -440,20 +453,8 @@ def _where(self: Self, mask: npt.NDArray[np.bool_], value) -> Self:
     # ------------------------------------------------------------------------
     # Index compat methods
 
+    @doc(ExtensionArray.insert)
     def insert(self, loc: int, item) -> Self:
-        """
-        Make new ExtensionArray inserting new item at location. Follows
-        Python list.append semantics for negative values.
-
-        Parameters
-        ----------
-        loc : int
-        item : object
-
-        Returns
-        -------
-        type(self)
-        """
         loc = validate_insert_loc(loc, len(self))
 
         code = self._validate_scalar(item)
@@ -474,16 +475,24 @@ def insert(self, loc: int, item) -> Self:
 
     def value_counts(self, dropna: bool = True) -> Series:
         """
-        Return a Series containing counts of unique values.
+        Return a Series containing counts of each unique value.
 
         Parameters
         ----------
         dropna : bool, default True
-            Don't include counts of NA values.
+            Don't include counts of missing values.
 
         Returns
         -------
         Series
+
+        Examples
+        --------
+        >>> arr = pd.array([4, 5])
+        >>> arr.value_counts()
+        4    1
+        5    1
+        Name: count, dtype: Int64
         """
         if self.ndim != 1:
             raise NotImplementedError

diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
@@ -336,6 +336,7 @@ class TestApi(Base):
         "take",
         "ExtensionArray",
         "ExtensionScalarOpsMixin",
+        "NDArrayBackedExtensionArray",
     ]
 
     def test_api(self):