From 0522cecda2095ce53911643ef958aa95b1005807 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 27 Mar 2022 16:39:36 -0700 Subject: [PATCH 1/2] REF: Create StorageExtensionDtype --- pandas/core/arrays/string_.py | 39 +++-------------------- pandas/core/dtypes/base.py | 58 ++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 35 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f3db5598e306c..932951d01d405 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1,9 +1,6 @@ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Any, -) +from typing import TYPE_CHECKING import numpy as np @@ -24,6 +21,7 @@ from pandas.core.dtypes.base import ( ExtensionDtype, + StorageExtensionDtype, register_extension_dtype, ) from pandas.core.dtypes.common import ( @@ -55,7 +53,7 @@ @register_extension_dtype -class StringDtype(ExtensionDtype): +class StringDtype(StorageExtensionDtype): """ Extension dtype for string data. @@ -67,7 +65,7 @@ class StringDtype(ExtensionDtype): parts of the API may change without warning. In particular, StringDtype.na_value may change to no longer be - ``numpy.nan``. + ``pd.NA``. Parameters ---------- @@ -141,29 +139,8 @@ def construct_from_string(cls, string): ----- TypeError If the string is not a valid option. - """ - if not isinstance(string, str): - raise TypeError( - f"'construct_from_string' expects a string, got {type(string)}" - ) - if string == "string": - return cls() - elif string == "string[python]": - return cls(storage="python") - elif string == "string[pyarrow]": - return cls(storage="pyarrow") - else: - raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") - - def __eq__(self, other: Any) -> bool: - if isinstance(other, str) and other == "string": - return True - return super().__eq__(other) - - def __hash__(self) -> int: - # custom __eq__ so have to override __hash__ - return super().__hash__() + return super().construct_from_string(string) # https://github.com/pandas-dev/pandas/issues/36126 # error: Signature of "construct_array_type" incompatible with supertype @@ -185,12 +162,6 @@ def construct_array_type( # type: ignore[override] else: return ArrowStringArray - def __repr__(self): - return f"string[{self.storage}]" - - def __str__(self): - return self.name - def __from_arrow__( self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> BaseStringArray: diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index eb5d1ccc5ed84..3497670612734 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -1,9 +1,9 @@ """ Extend pandas with custom array types. """ - from __future__ import annotations +import re from typing import ( TYPE_CHECKING, Any, @@ -14,6 +14,7 @@ import numpy as np +from pandas._libs import missing as libmissing from pandas._libs.hashtable import object_hash from pandas._typing import ( DtypeObj, @@ -391,6 +392,61 @@ def _can_hold_na(self) -> bool: return True +class StorageExtensionDtype(ExtensionDtype): + """ExtensionDtype that may be backed by more than one implementation.""" + + name: str + na_value = libmissing.NA + _metadata = ("storage",) + + def __init__(self, storage=None) -> None: + self.storage = storage + + @classmethod + def construct_from_string(cls, string: str): + """ + Construct a StorageExtensionDtype from a string. + + Parameters + ---------- + string : str + The type of StorageExtensionDtype to construct. String is assumed to match: + * self.name, where the default storage will be selected + * __repr__, where the storage option in the brackets will be selected + + Raise + ----- + TypeError + If the string is not a valid option. + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + if string == cls.name: + return cls() + storage_search = re.search(r"\[.*?]", string) + if storage_search: + return cls(storage=storage_search.group(0)[1:-1]) + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + def __repr__(self): + return f"{self.name}[{self.storage}]" + + def __str__(self): + return self.name + + def __eq__(self, other: Any) -> bool: + if isinstance(other, self.type) and other == self.name: + return True + return super().__eq__(other) + + def __hash__(self) -> int: + # custom __eq__ so have to override __hash__ + return super().__hash__() + + def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]: """ Register an ExtensionType with pandas as class decorator. From ad08fca90d3ad65857062a85d320f424254bb5cd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 27 Mar 2022 18:10:28 -0700 Subject: [PATCH 2/2] Move construct_from_string later --- pandas/core/arrays/string_.py | 13 ++++++++++++- pandas/core/dtypes/base.py | 30 ------------------------------ 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 932951d01d405..21b5dc625956e 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -140,7 +140,18 @@ def construct_from_string(cls, string): TypeError If the string is not a valid option. """ - return super().construct_from_string(string) + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + if string == "string": + return cls() + elif string == "string[python]": + return cls(storage="python") + elif string == "string[pyarrow]": + return cls(storage="pyarrow") + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") # https://github.com/pandas-dev/pandas/issues/36126 # error: Signature of "construct_array_type" incompatible with supertype diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 3497670612734..9762b779477e4 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -3,7 +3,6 @@ """ from __future__ import annotations -import re from typing import ( TYPE_CHECKING, Any, @@ -402,35 +401,6 @@ class StorageExtensionDtype(ExtensionDtype): def __init__(self, storage=None) -> None: self.storage = storage - @classmethod - def construct_from_string(cls, string: str): - """ - Construct a StorageExtensionDtype from a string. - - Parameters - ---------- - string : str - The type of StorageExtensionDtype to construct. String is assumed to match: - * self.name, where the default storage will be selected - * __repr__, where the storage option in the brackets will be selected - - Raise - ----- - TypeError - If the string is not a valid option. - """ - if not isinstance(string, str): - raise TypeError( - f"'construct_from_string' expects a string, got {type(string)}" - ) - if string == cls.name: - return cls() - storage_search = re.search(r"\[.*?]", string) - if storage_search: - return cls(storage=storage_search.group(0)[1:-1]) - else: - raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") - def __repr__(self): return f"{self.name}[{self.storage}]"