forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbase.py
89 lines (71 loc) · 2.5 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from __future__ import annotations
import numpy as np
import pyarrow as pa
from pandas._typing import DtypeObj
from pandas.util._decorators import cache_readonly
from pandas.core.dtypes.base import StorageExtensionDtype
from pandas.core.arrays.arrow import ArrowExtensionArray
class ArrowDtype(StorageExtensionDtype):
"""
Base class for dtypes for BaseArrowArray subclasses.
Modeled after BaseMaskedDtype
"""
name: str
base = None
type: pa.DataType
na_value = pa.NA
def __init__(self, storage="pyarrow") -> None:
super().__init__(storage)
@cache_readonly
def numpy_dtype(self) -> np.dtype:
"""Return an instance of the related numpy dtype"""
return self.type.to_pandas_dtype()
@cache_readonly
def kind(self) -> str:
return self.numpy_dtype.kind
@cache_readonly
def itemsize(self) -> int:
"""Return the number of bytes in this dtype"""
return self.numpy_dtype.itemsize
@classmethod
def construct_array_type(cls):
"""
Return the array type associated with this dtype.
Returns
-------
type
"""
return ArrowExtensionArray
@classmethod
def from_numpy_dtype(cls, dtype: np.dtype) -> ArrowDtype:
"""
Construct the ArrowDtype corresponding to the given numpy dtype.
"""
# TODO: This may be incomplete
pa_dtype = pa.from_numpy_dtype(dtype)
if pa_dtype is cls.type:
return cls()
raise NotImplementedError(dtype)
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
# We unwrap any masked dtypes, find the common dtype we would use
# for that, then re-mask the result.
from pandas.core.dtypes.cast import find_common_type
new_dtype = find_common_type(
[
dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype
for dtype in dtypes
]
)
if not isinstance(new_dtype, np.dtype):
# If we ever support e.g. Masked[DatetimeArray] then this will change
return None
try:
return type(self).from_numpy_dtype(new_dtype)
except (KeyError, NotImplementedError):
return None
def __from_arrow__(self, array: pa.Array | pa.ChunkedArray):
"""
Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
"""
array_class = self.construct_array_type()
return array_class(array)