48
48
from pandas .core .dtypes .missing import isna
49
49
50
50
from pandas .core .arraylike import OpsMixin
51
+ from pandas .core .arrays ._mixins import ArrowExtensionArray
51
52
from pandas .core .arrays .base import ExtensionArray
52
53
from pandas .core .arrays .boolean import BooleanDtype
53
54
from pandas .core .arrays .integer import Int64Dtype
@@ -94,7 +95,9 @@ def _chk_pyarrow_available() -> None:
94
95
# fallback for the ones that pyarrow doesn't yet support
95
96
96
97
97
- class ArrowStringArray (OpsMixin , BaseStringArray , ObjectStringArrayMixin ):
98
+ class ArrowStringArray (
99
+ OpsMixin , ArrowExtensionArray , BaseStringArray , ObjectStringArrayMixin
100
+ ):
98
101
"""
99
102
Extension array for string data in a ``pyarrow.ChunkedArray``.
100
103
@@ -191,10 +194,6 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
191
194
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
192
195
return self .to_numpy (dtype = dtype )
193
196
194
- def __arrow_array__ (self , type = None ):
195
- """Convert myself to a pyarrow Array or ChunkedArray."""
196
- return self ._data
197
-
198
197
def to_numpy (
199
198
self ,
200
199
dtype : npt .DTypeLike | None = None ,
@@ -216,16 +215,6 @@ def to_numpy(
216
215
result [mask ] = na_value
217
216
return result
218
217
219
- def __len__ (self ) -> int :
220
- """
221
- Length of this array.
222
-
223
- Returns
224
- -------
225
- length : int
226
- """
227
- return len (self ._data )
228
-
229
218
@doc (ExtensionArray .factorize )
230
219
def factorize (self , na_sentinel : int = - 1 ) -> tuple [np .ndarray , ExtensionArray ]:
231
220
encoded = self ._data .dictionary_encode ()
@@ -243,25 +232,6 @@ def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
243
232
244
233
return indices .values , uniques
245
234
246
- @classmethod
247
- def _concat_same_type (cls , to_concat ) -> ArrowStringArray :
248
- """
249
- Concatenate multiple ArrowStringArray.
250
-
251
- Parameters
252
- ----------
253
- to_concat : sequence of ArrowStringArray
254
-
255
- Returns
256
- -------
257
- ArrowStringArray
258
- """
259
- return cls (
260
- pa .chunked_array (
261
- [array for ea in to_concat for array in ea ._data .iterchunks ()]
262
- )
263
- )
264
-
265
235
@overload
266
236
def __getitem__ (self , item : ScalarIndexer ) -> ArrowStringScalarOrNAT :
267
237
...
@@ -342,34 +312,6 @@ def _as_pandas_scalar(self, arrow_scalar: pa.Scalar):
342
312
else :
343
313
return scalar
344
314
345
- @property
346
- def nbytes (self ) -> int :
347
- """
348
- The number of bytes needed to store this object in memory.
349
- """
350
- return self ._data .nbytes
351
-
352
- def isna (self ) -> np .ndarray :
353
- """
354
- Boolean NumPy array indicating if each value is missing.
355
-
356
- This should return a 1-D array the same length as 'self'.
357
- """
358
- # TODO: Implement .to_numpy for ChunkedArray
359
- return self ._data .is_null ().to_pandas ().values
360
-
361
- def copy (self ) -> ArrowStringArray :
362
- """
363
- Return a shallow copy of the array.
364
-
365
- Underlying ChunkedArray is immutable, so a deep copy is unnecessary.
366
-
367
- Returns
368
- -------
369
- ArrowStringArray
370
- """
371
- return type (self )(self ._data )
372
-
373
315
def _cmp_method (self , other , op ):
374
316
from pandas .arrays import BooleanArray
375
317
0 commit comments