Skip to content

Commit 583c8da

Browse files
authored
REF: Create StorageExtensionDtype (#46537)
1 parent 58a2069 commit 583c8da

File tree

2 files changed

+31
-23
lines changed

2 files changed

+31
-23
lines changed

pandas/core/arrays/string_.py

+4-22
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import (
4-
TYPE_CHECKING,
5-
Any,
6-
)
3+
from typing import TYPE_CHECKING
74

85
import numpy as np
96

@@ -24,6 +21,7 @@
2421

2522
from pandas.core.dtypes.base import (
2623
ExtensionDtype,
24+
StorageExtensionDtype,
2725
register_extension_dtype,
2826
)
2927
from pandas.core.dtypes.common import (
@@ -55,7 +53,7 @@
5553

5654

5755
@register_extension_dtype
58-
class StringDtype(ExtensionDtype):
56+
class StringDtype(StorageExtensionDtype):
5957
"""
6058
Extension dtype for string data.
6159
@@ -67,7 +65,7 @@ class StringDtype(ExtensionDtype):
6765
parts of the API may change without warning.
6866
6967
In particular, StringDtype.na_value may change to no longer be
70-
``numpy.nan``.
68+
``pd.NA``.
7169
7270
Parameters
7371
----------
@@ -141,7 +139,6 @@ def construct_from_string(cls, string):
141139
-----
142140
TypeError
143141
If the string is not a valid option.
144-
145142
"""
146143
if not isinstance(string, str):
147144
raise TypeError(
@@ -156,15 +153,6 @@ def construct_from_string(cls, string):
156153
else:
157154
raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
158155

159-
def __eq__(self, other: Any) -> bool:
160-
if isinstance(other, str) and other == "string":
161-
return True
162-
return super().__eq__(other)
163-
164-
def __hash__(self) -> int:
165-
# custom __eq__ so have to override __hash__
166-
return super().__hash__()
167-
168156
# https://github.com/pandas-dev/pandas/issues/36126
169157
# error: Signature of "construct_array_type" incompatible with supertype
170158
# "ExtensionDtype"
@@ -185,12 +173,6 @@ def construct_array_type( # type: ignore[override]
185173
else:
186174
return ArrowStringArray
187175

188-
def __repr__(self):
189-
return f"string[{self.storage}]"
190-
191-
def __str__(self):
192-
return self.name
193-
194176
def __from_arrow__(
195177
self, array: pyarrow.Array | pyarrow.ChunkedArray
196178
) -> BaseStringArray:

pandas/core/dtypes/base.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""
22
Extend pandas with custom array types.
33
"""
4-
54
from __future__ import annotations
65

76
from typing import (
@@ -14,6 +13,7 @@
1413

1514
import numpy as np
1615

16+
from pandas._libs import missing as libmissing
1717
from pandas._libs.hashtable import object_hash
1818
from pandas._typing import (
1919
DtypeObj,
@@ -391,6 +391,32 @@ def _can_hold_na(self) -> bool:
391391
return True
392392

393393

394+
class StorageExtensionDtype(ExtensionDtype):
395+
"""ExtensionDtype that may be backed by more than one implementation."""
396+
397+
name: str
398+
na_value = libmissing.NA
399+
_metadata = ("storage",)
400+
401+
def __init__(self, storage=None) -> None:
402+
self.storage = storage
403+
404+
def __repr__(self):
405+
return f"{self.name}[{self.storage}]"
406+
407+
def __str__(self):
408+
return self.name
409+
410+
def __eq__(self, other: Any) -> bool:
411+
if isinstance(other, self.type) and other == self.name:
412+
return True
413+
return super().__eq__(other)
414+
415+
def __hash__(self) -> int:
416+
# custom __eq__ so have to override __hash__
417+
return super().__hash__()
418+
419+
394420
def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]:
395421
"""
396422
Register an ExtensionType with pandas as class decorator.

0 commit comments

Comments
 (0)