|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pyarrow as pa |
| 5 | + |
| 6 | +from pandas._typing import DtypeObj |
| 7 | +from pandas.util._decorators import cache_readonly |
| 8 | + |
| 9 | +from pandas.core.dtypes.base import StorageExtensionDtype |
| 10 | + |
| 11 | +from pandas.core.arrays.arrow import ArrowExtensionArray |
| 12 | + |
| 13 | + |
| 14 | +class ArrowDtype(StorageExtensionDtype): |
| 15 | + """ |
| 16 | + Base class for dtypes for BaseArrowArray subclasses. |
| 17 | + Modeled after BaseMaskedDtype |
| 18 | + """ |
| 19 | + |
| 20 | + name: str |
| 21 | + base = None |
| 22 | + type: pa.DataType |
| 23 | + |
| 24 | + na_value = pa.NA |
| 25 | + |
| 26 | + def __init__(self, storage="pyarrow") -> None: |
| 27 | + super().__init__(storage) |
| 28 | + |
| 29 | + @cache_readonly |
| 30 | + def numpy_dtype(self) -> np.dtype: |
| 31 | + """Return an instance of the related numpy dtype""" |
| 32 | + return self.type.to_pandas_dtype() |
| 33 | + |
| 34 | + @cache_readonly |
| 35 | + def kind(self) -> str: |
| 36 | + return self.numpy_dtype.kind |
| 37 | + |
| 38 | + @cache_readonly |
| 39 | + def itemsize(self) -> int: |
| 40 | + """Return the number of bytes in this dtype""" |
| 41 | + return self.numpy_dtype.itemsize |
| 42 | + |
| 43 | + @classmethod |
| 44 | + def construct_array_type(cls): |
| 45 | + """ |
| 46 | + Return the array type associated with this dtype. |
| 47 | +
|
| 48 | + Returns |
| 49 | + ------- |
| 50 | + type |
| 51 | + """ |
| 52 | + return ArrowExtensionArray |
| 53 | + |
| 54 | + @classmethod |
| 55 | + def construct_from_string(cls, string: str): |
| 56 | + """ |
| 57 | + Construct this type from a string. |
| 58 | +
|
| 59 | + Parameters |
| 60 | + ---------- |
| 61 | + string : str |
| 62 | + """ |
| 63 | + if not isinstance(string, str): |
| 64 | + raise TypeError( |
| 65 | + f"'construct_from_string' expects a string, got {type(string)}" |
| 66 | + ) |
| 67 | + if string == f"{cls.name}[pyarrow]": |
| 68 | + return cls(storage="pyarrow") |
| 69 | + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") |
| 70 | + |
| 71 | + @classmethod |
| 72 | + def from_numpy_dtype(cls, dtype: np.dtype) -> ArrowDtype: |
| 73 | + """ |
| 74 | + Construct the ArrowDtype corresponding to the given numpy dtype. |
| 75 | + """ |
| 76 | + # TODO: This may be incomplete |
| 77 | + pa_dtype = pa.from_numpy_dtype(dtype) |
| 78 | + if pa_dtype is cls.type: |
| 79 | + return cls() |
| 80 | + raise NotImplementedError(dtype) |
| 81 | + |
| 82 | + def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: |
| 83 | + # We unwrap any masked dtypes, find the common dtype we would use |
| 84 | + # for that, then re-mask the result. |
| 85 | + from pandas.core.dtypes.cast import find_common_type |
| 86 | + |
| 87 | + new_dtype = find_common_type( |
| 88 | + [ |
| 89 | + dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype |
| 90 | + for dtype in dtypes |
| 91 | + ] |
| 92 | + ) |
| 93 | + if not isinstance(new_dtype, np.dtype): |
| 94 | + # If we ever support e.g. Masked[DatetimeArray] then this will change |
| 95 | + return None |
| 96 | + try: |
| 97 | + return type(self).from_numpy_dtype(new_dtype) |
| 98 | + except (KeyError, NotImplementedError): |
| 99 | + return None |
| 100 | + |
| 101 | + def __from_arrow__(self, array: pa.Array | pa.ChunkedArray): |
| 102 | + """ |
| 103 | + Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray. |
| 104 | + """ |
| 105 | + array_class = self.construct_array_type() |
| 106 | + return array_class(array) |
0 commit comments