diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f3db5598e306c..21b5dc625956e 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1,9 +1,6 @@ from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Any, -) +from typing import TYPE_CHECKING import numpy as np @@ -24,6 +21,7 @@ from pandas.core.dtypes.base import ( ExtensionDtype, + StorageExtensionDtype, register_extension_dtype, ) from pandas.core.dtypes.common import ( @@ -55,7 +53,7 @@ @register_extension_dtype -class StringDtype(ExtensionDtype): +class StringDtype(StorageExtensionDtype): """ Extension dtype for string data. @@ -67,7 +65,7 @@ class StringDtype(ExtensionDtype): parts of the API may change without warning. In particular, StringDtype.na_value may change to no longer be - ``numpy.nan``. + ``pd.NA``. Parameters ---------- @@ -141,7 +139,6 @@ def construct_from_string(cls, string): ----- TypeError If the string is not a valid option. - """ if not isinstance(string, str): raise TypeError( @@ -156,15 +153,6 @@ def construct_from_string(cls, string): else: raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") - def __eq__(self, other: Any) -> bool: - if isinstance(other, str) and other == "string": - return True - return super().__eq__(other) - - def __hash__(self) -> int: - # custom __eq__ so have to override __hash__ - return super().__hash__() - # https://github.com/pandas-dev/pandas/issues/36126 # error: Signature of "construct_array_type" incompatible with supertype # "ExtensionDtype" @@ -185,12 +173,6 @@ def construct_array_type( # type: ignore[override] else: return ArrowStringArray - def __repr__(self): - return f"string[{self.storage}]" - - def __str__(self): - return self.name - def __from_arrow__( self, array: pyarrow.Array | pyarrow.ChunkedArray ) -> BaseStringArray: diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index eb5d1ccc5ed84..9762b779477e4 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -1,7 +1,6 @@ """ Extend pandas with custom array types. """ - from __future__ import annotations from typing import ( @@ -14,6 +13,7 @@ import numpy as np +from pandas._libs import missing as libmissing from pandas._libs.hashtable import object_hash from pandas._typing import ( DtypeObj, @@ -391,6 +391,32 @@ def _can_hold_na(self) -> bool: return True +class StorageExtensionDtype(ExtensionDtype): + """ExtensionDtype that may be backed by more than one implementation.""" + + name: str + na_value = libmissing.NA + _metadata = ("storage",) + + def __init__(self, storage=None) -> None: + self.storage = storage + + def __repr__(self): + return f"{self.name}[{self.storage}]" + + def __str__(self): + return self.name + + def __eq__(self, other: Any) -> bool: + if isinstance(other, self.type) and other == self.name: + return True + return super().__eq__(other) + + def __hash__(self) -> int: + # custom __eq__ so have to override __hash__ + return super().__hash__() + + def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]: """ Register an ExtensionType with pandas as class decorator.