Skip to content

Commit d37643a

Browse files
authored
ENH: Create ArrowDtype (#46774)
1 parent 9465321 commit d37643a

File tree

1 file changed

+106
-0
lines changed

1 file changed

+106
-0
lines changed

pandas/core/arrays/arrow/dtype.py

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from __future__ import annotations
2+
3+
import numpy as np
4+
import pyarrow as pa
5+
6+
from pandas._typing import DtypeObj
7+
from pandas.util._decorators import cache_readonly
8+
9+
from pandas.core.dtypes.base import StorageExtensionDtype
10+
11+
from pandas.core.arrays.arrow import ArrowExtensionArray
12+
13+
14+
class ArrowDtype(StorageExtensionDtype):
15+
"""
16+
Base class for dtypes for BaseArrowArray subclasses.
17+
Modeled after BaseMaskedDtype
18+
"""
19+
20+
name: str
21+
base = None
22+
type: pa.DataType
23+
24+
na_value = pa.NA
25+
26+
def __init__(self, storage="pyarrow") -> None:
27+
super().__init__(storage)
28+
29+
@cache_readonly
30+
def numpy_dtype(self) -> np.dtype:
31+
"""Return an instance of the related numpy dtype"""
32+
return self.type.to_pandas_dtype()
33+
34+
@cache_readonly
35+
def kind(self) -> str:
36+
return self.numpy_dtype.kind
37+
38+
@cache_readonly
39+
def itemsize(self) -> int:
40+
"""Return the number of bytes in this dtype"""
41+
return self.numpy_dtype.itemsize
42+
43+
@classmethod
44+
def construct_array_type(cls):
45+
"""
46+
Return the array type associated with this dtype.
47+
48+
Returns
49+
-------
50+
type
51+
"""
52+
return ArrowExtensionArray
53+
54+
@classmethod
55+
def construct_from_string(cls, string: str):
56+
"""
57+
Construct this type from a string.
58+
59+
Parameters
60+
----------
61+
string : str
62+
"""
63+
if not isinstance(string, str):
64+
raise TypeError(
65+
f"'construct_from_string' expects a string, got {type(string)}"
66+
)
67+
if string == f"{cls.name}[pyarrow]":
68+
return cls(storage="pyarrow")
69+
raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
70+
71+
@classmethod
72+
def from_numpy_dtype(cls, dtype: np.dtype) -> ArrowDtype:
73+
"""
74+
Construct the ArrowDtype corresponding to the given numpy dtype.
75+
"""
76+
# TODO: This may be incomplete
77+
pa_dtype = pa.from_numpy_dtype(dtype)
78+
if pa_dtype is cls.type:
79+
return cls()
80+
raise NotImplementedError(dtype)
81+
82+
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
83+
# We unwrap any masked dtypes, find the common dtype we would use
84+
# for that, then re-mask the result.
85+
from pandas.core.dtypes.cast import find_common_type
86+
87+
new_dtype = find_common_type(
88+
[
89+
dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype
90+
for dtype in dtypes
91+
]
92+
)
93+
if not isinstance(new_dtype, np.dtype):
94+
# If we ever support e.g. Masked[DatetimeArray] then this will change
95+
return None
96+
try:
97+
return type(self).from_numpy_dtype(new_dtype)
98+
except (KeyError, NotImplementedError):
99+
return None
100+
101+
def __from_arrow__(self, array: pa.Array | pa.ChunkedArray):
102+
"""
103+
Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
104+
"""
105+
array_class = self.construct_array_type()
106+
return array_class(array)

0 commit comments

Comments
 (0)