diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index e67829b8805eb..d3651a271002b 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -135,6 +135,93 @@ by some other storage type, like Python lists. See the `extension array source`_ for the interface definition. The docstrings and comments contain guidance for properly implementing the interface. +:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For ExtensionArrays backed by a single NumPy array, the +:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you +some effort. It contains a private property ``_ndarray`` with the backing NumPy +array and implements the extension array interface. + +Implement the following: + +``_box_func`` + Convert from array values to the type you wish to expose to users. + +``_internal_fill_value`` + Scalar used to denote ``NA`` value inside our ``self._ndarray``, e.g. ``-1`` + for ``Categorical``, ``iNaT`` for ``Period``. + +``_validate_scalar`` + Convert from an object to a value which can be stored in the NumPy array. + +``_validate_setitem_value`` + Convert a value or values for use in setting a value or values in the backing + NumPy array. + +.. code-block:: python + + class DateArray(NDArrayBackedExtensionArray): + _internal_fill_value = numpy.datetime64("NaT") + + def __init__(self, values): + backing_array_dtype = " ArrayLike: # We handle datetime64, datetime64tz, timedelta64, and period # dtypes here. Everything else we pass through to the underlying @@ -154,6 +166,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" return arr.view(dtype=dtype) # type: ignore[arg-type] + @doc(ExtensionArray.view) def take( self, indices: TakeIndexer, @@ -440,20 +453,8 @@ def _where(self: Self, mask: npt.NDArray[np.bool_], value) -> Self: # ------------------------------------------------------------------------ # Index compat methods + @doc(ExtensionArray.insert) def insert(self, loc: int, item) -> Self: - """ - Make new ExtensionArray inserting new item at location. Follows - Python list.append semantics for negative values. - - Parameters - ---------- - loc : int - item : object - - Returns - ------- - type(self) - """ loc = validate_insert_loc(loc, len(self)) code = self._validate_scalar(item) @@ -474,16 +475,24 @@ def insert(self, loc: int, item) -> Self: def value_counts(self, dropna: bool = True) -> Series: """ - Return a Series containing counts of unique values. + Return a Series containing counts of each unique value. Parameters ---------- dropna : bool, default True - Don't include counts of NA values. + Don't include counts of missing values. Returns ------- Series + + Examples + -------- + >>> arr = pd.array([4, 5]) + >>> arr.value_counts() + 4 1 + 5 1 + Name: count, dtype: Int64 """ if self.ndim != 1: raise NotImplementedError diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 60bcb97aaa364..d80d9f564c478 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -336,6 +336,7 @@ class TestApi(Base): "take", "ExtensionArray", "ExtensionScalarOpsMixin", + "NDArrayBackedExtensionArray", ] def test_api(self):