From dbb10480dae39d5ab65cb2d3798db61660840e4c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 15 Apr 2018 14:18:38 -0500 Subject: [PATCH 1/2] DOC: Various EA docs Closes https://github.com/pandas-dev/pandas/issues/20668 --- doc/source/extending.rst | 25 +++++++++++++++++++++++++ doc/source/install.rst | 2 +- doc/source/whatsnew/v0.23.0.txt | 6 +++--- pandas/core/arrays/base.py | 16 ++++++++++++---- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/doc/source/extending.rst b/doc/source/extending.rst index 25c4ba4a4a2a3..cbca8f2ad29e9 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -57,6 +57,13 @@ If you write a custom accessor, make a pull request adding it to our Extension Types --------------- +.. versionadded:: 0.23.0 + +.. warning:: + + The ``ExtensionDtype`` and ``ExtensionArray`` APIs are new and + experimental. They may change between versions without warning. + Pandas defines an interface for implementing data types and arrays that *extend* NumPy's type system. Pandas itself uses the extension system for some types that aren't built into NumPy (categorical, period, interval, datetime with @@ -106,6 +113,24 @@ by some other storage type, like Python lists. See the `extension array source`_ for the interface definition. The docstrings and comments contain guidance for properly implementing the interface. +We provide a test suite for ensuring that your extension arrays satisfy the expected +behavior. To use the test-suite, you must provide several pytest fixtures and inherit +from the base test class. The required fixtures are found in +https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/conftest.py. + +To use a test, subclass it + +.. code-block:: python + + from pandas.tests.extension import base + + class TestConstructors(base.BaseConstructorsTests): + pass + + +See https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/base/__init__.py +for a list of all the tests available. + .. _extension dtype source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py .. _extension array source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/arrays/base.py diff --git a/doc/source/install.rst b/doc/source/install.rst index fdb22a8dc3380..ce825cefafae4 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -15,7 +15,7 @@ Instructions for installing from source, `PyPI `__, `ActivePython `__, various Linux distributions, or a `development version `__ are also provided. -.. _install.dropping_27: +.. _install.dropping-27: Plan for dropping Python 2.7 ---------------------------- diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c708b477e42f4..43dce44183e3f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -11,7 +11,7 @@ version. .. warning:: Starting January 1, 2019, pandas feature releases will support Python 3 only. - See :ref:`here ` for more. + See :ref:`install.dropping-27` for more. .. _whatsnew_0230.enhancements: @@ -306,8 +306,8 @@ Supplying a ``CategoricalDtype`` will make the categories in each column consist .. _whatsnew_023.enhancements.extension: -Extending Pandas with Custom Types -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Extending Pandas with Custom Types (Experimental) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Pandas now supports storing array-like objects that aren't necessarily 1-D NumPy arrays as columns in a DataFrame or values in a Series. This allows third-party diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c281bd80cb274..e5418217b1565 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -14,12 +14,15 @@ class ExtensionArray(object): with a custom type and will not attempt to coerce them to objects. They may be stored directly inside a :class:`DataFrame` or :class:`Series`. + .. versionadded:: 0.23.0 + Notes ----- The interface includes the following abstract methods that must be implemented by subclasses: * _constructor_from_sequence + * _from_factorized * __getitem__ * __len__ * dtype @@ -35,6 +38,15 @@ class ExtensionArray(object): * _can_hold_na * _formatting_values + Some methods require casting the ExtensionArray to an ndarray of Python + objects, which may be expensive. When performance is a concern, we highly + recommend overriding the following methods. + + * fillna + * unique + * factorize / _values_for_factorize + * argsort / _values_for_argsort + This class does not inherit from 'abc.ABCMeta' for performance reasons. Methods and properties required by the interface raise ``pandas.errors.AbstractMethodError`` and no ``register`` method is @@ -50,10 +62,6 @@ class ExtensionArray(object): by some other storage type, like Python lists. Pandas makes no assumptions on how the data are stored, just that it can be converted to a NumPy array. - - Extension arrays should be able to be constructed with instances of - the class, i.e. ``ExtensionArray(extension_array)`` should return - an instance, not error. """ # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. From f425ad913676469f6337f58ce497285e29abf096 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 21 Apr 2018 06:35:40 -0500 Subject: [PATCH 2/2] Updated --- doc/source/extending.rst | 4 ++-- pandas/core/arrays/base.py | 15 +++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/doc/source/extending.rst b/doc/source/extending.rst index cbca8f2ad29e9..b94a43480ed93 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -114,11 +114,11 @@ See the `extension array source`_ for the interface definition. The docstrings and comments contain guidance for properly implementing the interface. We provide a test suite for ensuring that your extension arrays satisfy the expected -behavior. To use the test-suite, you must provide several pytest fixtures and inherit +behavior. To use the test suite, you must provide several pytest fixtures and inherit from the base test class. The required fixtures are found in https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/conftest.py. -To use a test, subclass it +To use a test, subclass it: .. code-block:: python diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e5418217b1565..2eaad3980cf08 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1,4 +1,10 @@ -"""An interface for extending pandas with custom arrays.""" +"""An interface for extending pandas with custom arrays. + +.. warning:: + + This is an experimental API and subject to breaking changes + without warning. +""" import numpy as np from pandas.errors import AbstractMethodError @@ -33,14 +39,15 @@ class ExtensionArray(object): * _concat_same_type Some additional methods are available to satisfy pandas' internal, private - block API. + block API: * _can_hold_na * _formatting_values Some methods require casting the ExtensionArray to an ndarray of Python - objects, which may be expensive. When performance is a concern, we highly - recommend overriding the following methods. + objects with ``self.astype(object)``, which may be expensive. When + performance is a concern, we highly recommend overriding the following + methods: * fillna * unique