|
23 | 23 | from pandas.core.arrays import ExtensionArray
|
24 | 24 |
|
25 | 25 | if TYPE_CHECKING:
|
26 |
| - from pandas._typing import type_t |
| 26 | + from pandas._typing import ( |
| 27 | + type_t, |
| 28 | + Shape, |
| 29 | + ) |
27 | 30 |
|
28 | 31 | import pyarrow as pa
|
29 | 32 |
|
@@ -82,8 +85,21 @@ def __init__(self, values: pa.Array | pa.ChunkedArray | list | ListArray) -> Non
|
82 | 85 | def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
|
83 | 86 | if isinstance(scalars, ListArray):
|
84 | 87 | return cls(scalars)
|
| 88 | + elif isinstance(scalars, pa.Scalar): |
| 89 | + scalars = [scalars] |
| 90 | + return cls(scalars) |
85 | 91 |
|
86 |
| - values = pa.array(scalars, from_pandas=True) |
| 92 | + try: |
| 93 | + values = pa.array(scalars, from_pandas=True) |
| 94 | + except TypeError: |
| 95 | + # TypeError: object of type 'NoneType' has no len() if you have |
| 96 | + # pa.ListScalar(None). Upstream issue in Arrow - see: |
| 97 | + # https://github.com/apache/arrow/issues/40319 |
| 98 | + for i in range(len(scalars)): |
| 99 | + if not scalars[i].is_valid: |
| 100 | + scalars[i] = None |
| 101 | + |
| 102 | + values = pa.array(scalars, from_pandas=True) |
87 | 103 | if values.type == "null":
|
88 | 104 | # TODO(wayd): this is a hack to get the tests to pass, but the overall issue
|
89 | 105 | # is that our extension types don't support parametrization but the pyarrow
|
@@ -113,8 +129,35 @@ def take(self, indexer, allow_fill=False, fill_value=None):
|
113 | 129 | # TODO: what do we need to do with allow_fill and fill_value here?
|
114 | 130 | return type(self)(self._pa_array.take(indexer))
|
115 | 131 |
|
| 132 | + @classmethod |
| 133 | + def _empty(cls, shape: Shape, dtype: ExtensionDtype): |
| 134 | + """ |
| 135 | + Create an ExtensionArray with the given shape and dtype. |
| 136 | +
|
| 137 | + See also |
| 138 | + -------- |
| 139 | + ExtensionDtype.empty |
| 140 | + ExtensionDtype.empty is the 'official' public version of this API. |
| 141 | + """ |
| 142 | + # Implementer note: while ExtensionDtype.empty is the public way to |
| 143 | + # call this method, it is still required to implement this `_empty` |
| 144 | + # method as well (it is called internally in pandas) |
| 145 | + if isinstance(shape, tuple): |
| 146 | + if len(shape) > 1: |
| 147 | + raise ValueError("ListArray may only be 1-D") |
| 148 | + else: |
| 149 | + length = shape[0] |
| 150 | + else: |
| 151 | + length = shape |
| 152 | + return cls._from_sequence([None] * length, dtype=pa.list_(pa.null())) |
| 153 | + |
116 | 154 | def copy(self):
|
117 |
| - return type(self)(self._pa_array.take(pa.array(range(len(self._pa_array))))) |
| 155 | + mm = pa.default_cpu_memory_manager() |
| 156 | + |
| 157 | + # TODO(wayd): ChunkedArray does not implement copy_to so this |
| 158 | + # ends up creating an Array |
| 159 | + copied = self._pa_array.combine_chunks().copy_to(mm.device) |
| 160 | + return type(self)(copied) |
118 | 161 |
|
119 | 162 | def astype(self, dtype, copy=True):
|
120 | 163 | if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
|
|
0 commit comments