From 5222d75d8957258346c4ac3ca9e439a224022f75 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 5 Sep 2020 05:56:39 -0500 Subject: [PATCH 1/2] API: Make ExtensionDtype.construct_array_type a method This allows a single dtype to support multiple array classes. For arrow-backed strings, we'll likely want a separate array class for ease of implementation, clarity. But we'll have a parametrized dtype. ```python class StringDtype: def __init__(self, storage="python"): self.storage = storage def construct_array_type(self): # regular method if self.storage == "python": return StringArray else: return ArrowStringArray ``` Closes #36126 --- doc/source/whatsnew/v1.2.0.rst | 7 +++++++ pandas/core/arrays/boolean.py | 3 +-- pandas/core/arrays/integer.py | 3 +-- pandas/core/arrays/masked.py | 3 +-- pandas/core/arrays/numpy_.py | 3 +-- pandas/core/arrays/sparse/dtype.py | 3 +-- pandas/core/dtypes/base.py | 3 +-- pandas/core/dtypes/dtypes.py | 12 ++++-------- pandas/tests/arrays/test_array.py | 3 +-- pandas/tests/extension/arrow/arrays.py | 6 ++---- pandas/tests/extension/decimal/array.py | 3 +-- pandas/tests/extension/json/array.py | 3 +-- pandas/tests/extension/list/array.py | 3 +-- 13 files changed, 23 insertions(+), 32 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 1617bf66c4f04..a98134e547630 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -57,6 +57,13 @@ Other enhancements - - +.. _whatsnew_120.api_breaking.experimental: + +Changes to experimental APIs +---------------------------- + +- :meth:`pandas.api.extensions.ExtensionDtype.construct_array_type` has changed from a classmethod to a regular method to support one dtype being used for multiple arrays. To migrate, change your definition to a regular method and ensure that your method is called on instances rather than the class (:issue:`36126`). + .. _whatsnew_120.api_breaking.python: Increased minimum version for Python diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index bd4bdc5ecb46f..807bf0f6ddf88 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -70,8 +70,7 @@ def kind(self) -> str: def numpy_dtype(self) -> np.dtype: return np.dtype("bool") - @classmethod - def construct_array_type(cls) -> Type["BooleanArray"]: + def construct_array_type(self) -> Type["BooleanArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d83ff91a1315f..bf6e0163d0b3b 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -80,8 +80,7 @@ def itemsize(self) -> int: """ Return the number of bytes in this dtype """ return self.numpy_dtype.itemsize - @classmethod - def construct_array_type(cls) -> Type["IntegerArray"]: + def construct_array_type(self) -> Type["IntegerArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 1237dea5c1a64..6e7d3dbcceeaf 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -40,8 +40,7 @@ class BaseMaskedDtype(ExtensionDtype): def numpy_dtype(self) -> np.dtype: raise AbstractMethodError - @classmethod - def construct_array_type(cls) -> Type["BaseMaskedArray"]: + def construct_array_type(self) -> Type["BaseMaskedArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 23a4a70734c81..4b3db24e343b3 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -94,8 +94,7 @@ def construct_from_string(cls, string: str) -> "PandasDtype": raise TypeError(msg) from err return cls(dtype) - @classmethod - def construct_array_type(cls) -> Type["PandasArray"]: + def construct_array_type(self) -> Type["PandasArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index ccf2825162f51..fef995884d781 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -171,8 +171,7 @@ def name(self): def __repr__(self) -> str: return self.name - @classmethod - def construct_array_type(cls) -> Type["SparseArray"]: + def construct_array_type(self) -> Type["SparseArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 07c73876954d0..a9cc8c9f25d2e 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -187,8 +187,7 @@ def names(self) -> Optional[List[str]]: """ return None - @classmethod - def construct_array_type(cls) -> Type["ExtensionArray"]: + def construct_array_type(self) -> Type["ExtensionArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 8dc500dddeafa..a8d8501eb86f8 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -430,8 +430,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int: hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) return np.bitwise_xor.reduce(hashed) - @classmethod - def construct_array_type(cls) -> Type["Categorical"]: + def construct_array_type(self) -> Type["Categorical"]: """ Return the array type associated with this dtype. @@ -679,8 +678,7 @@ def tz(self): """ return self._tz - @classmethod - def construct_array_type(cls) -> Type["DatetimeArray"]: + def construct_array_type(self) -> Type["DatetimeArray"]: """ Return the array type associated with this dtype. @@ -922,8 +920,7 @@ def is_dtype(cls, dtype: object) -> bool: return False return super().is_dtype(dtype) - @classmethod - def construct_array_type(cls) -> Type["PeriodArray"]: + def construct_array_type(self) -> Type["PeriodArray"]: """ Return the array type associated with this dtype. @@ -1047,8 +1044,7 @@ def subtype(self): """ return self._subtype - @classmethod - def construct_array_type(cls) -> Type["IntervalArray"]: + def construct_array_type(self) -> Type["IntervalArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index a0525aa511ee2..da771edd42608 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -277,8 +277,7 @@ def test_scalar_raises(): class DecimalDtype2(DecimalDtype): name = "decimal2" - @classmethod - def construct_array_type(cls): + def construct_array_type(self): """ Return the array type associated with this dtype. diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 8a18f505058bc..6a837f94ae746 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -31,8 +31,7 @@ class ArrowBoolDtype(ExtensionDtype): name = "arrow_bool" na_value = pa.NULL - @classmethod - def construct_array_type(cls) -> Type["ArrowBoolArray"]: + def construct_array_type(self) -> Type["ArrowBoolArray"]: """ Return the array type associated with this dtype. @@ -55,8 +54,7 @@ class ArrowStringDtype(ExtensionDtype): name = "arrow_string" na_value = pa.NULL - @classmethod - def construct_array_type(cls) -> Type["ArrowStringArray"]: + def construct_array_type(self) -> Type["ArrowStringArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2fbeec8dd8378..4626b4a30a60b 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -28,8 +28,7 @@ def __init__(self, context=None): def __repr__(self) -> str: return f"DecimalDtype(context={self.context})" - @classmethod - def construct_array_type(cls) -> Type["DecimalArray"]: + def construct_array_type(self) -> Type["DecimalArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 447a6108fc3c7..4094a2d9bd28c 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -32,8 +32,7 @@ class JSONDtype(ExtensionDtype): name = "json" na_value: Mapping[str, Any] = UserDict() - @classmethod - def construct_array_type(cls) -> Type["JSONArray"]: + def construct_array_type(self) -> Type["JSONArray"]: """ Return the array type associated with this dtype. diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index d86f90e58d897..8cca8b00614c1 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -21,8 +21,7 @@ class ListDtype(ExtensionDtype): name = "list" na_value = np.nan - @classmethod - def construct_array_type(cls) -> Type["ListArray"]: + def construct_array_type(self) -> Type["ListArray"]: """ Return the array type associated with this dtype. From ee9300092492a88a793fe35998c2399b58bd0406 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Sep 2020 15:13:45 -0700 Subject: [PATCH 2/2] Fix test_array_not_registered --- pandas/core/construction.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f145e76046bee..c15440014c33c 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -280,6 +280,9 @@ def array( # this returns None for not-found dtypes. if isinstance(dtype, str): dtype = registry.find(dtype) or dtype + if isinstance(dtype, type) and issubclass(dtype, ExtensionDtype): + # Needed for test_array_not_registered GH#36136 + dtype = dtype() if is_extension_array_dtype(dtype): cls = cast(ExtensionDtype, dtype).construct_array_type()