From 3c29114b1d8d72a4cf6d564bd40406ee70c37c7d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 25 Oct 2019 21:14:38 +0200 Subject: [PATCH 1/3] API/DOC: an ExtensionDtype.__from_arrow__ method to convert pyarrow.Array into ExtensionArray --- pandas/core/dtypes/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 59ef17e3d121f..7ed1002949621 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -63,6 +63,11 @@ class property**. Added ``_metadata``, ``__hash__``, and changed the default definition of ``__eq__``. + For interaction with Apache Arrow (pyarrow), a ``__from_arrow__`` method + can be implemented: this method receives a pyarrow Array as only argument + and is expected to return the appropriate pandas ExtensionArray for this + dtype and the passed values. + This class does not inherit from 'abc.ABCMeta' for performance reasons. Methods and properties required by the interface raise ``pandas.errors.AbstractMethodError`` and no ``register`` method is From be334143cd92f44ab9a7e09e8c155f9ff5e4235f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 29 Oct 2019 14:28:24 +0100 Subject: [PATCH 2/3] add signature --- pandas/core/dtypes/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 7ed1002949621..f018876a0ac7a 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -66,7 +66,12 @@ class property**. For interaction with Apache Arrow (pyarrow), a ``__from_arrow__`` method can be implemented: this method receives a pyarrow Array as only argument and is expected to return the appropriate pandas ExtensionArray for this - dtype and the passed values. + dtype and the passed values:: + + class ExtensionDtype: + + def __from_arrow__(self, array: pyarrow.Array) -> ExtensionArray: + ... This class does not inherit from 'abc.ABCMeta' for performance reasons. Methods and properties required by the interface raise From 9ddfb20236efa068d3b0e286bf561cb5f9eb2d56 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Nov 2019 14:05:43 +0100 Subject: [PATCH 3/3] add chunked array as option --- pandas/core/dtypes/base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index f018876a0ac7a..cd5064522bf4a 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -64,13 +64,15 @@ class property**. of ``__eq__``. For interaction with Apache Arrow (pyarrow), a ``__from_arrow__`` method - can be implemented: this method receives a pyarrow Array as only argument - and is expected to return the appropriate pandas ExtensionArray for this - dtype and the passed values:: + can be implemented: this method receives a pyarrow Array or ChunkedArray + as only argument and is expected to return the appropriate pandas + ExtensionArray for this dtype and the passed values:: class ExtensionDtype: - def __from_arrow__(self, array: pyarrow.Array) -> ExtensionArray: + def __from_arrow__( + self, array: pyarrow.Array/ChunkedArray + ) -> ExtensionArray: ... This class does not inherit from 'abc.ABCMeta' for performance reasons.