Skip to content

Commit f5b6e99

Browse files
authored
REF: mix NDArrayBackedExtensionArray into PandasArray (#33797)
1 parent afb0464 commit f5b6e99

File tree

4 files changed

+70
-126
lines changed

4 files changed

+70
-126
lines changed

pandas/core/arrays/_mixins.py

+59-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
from typing import Any, Sequence, TypeVar
1+
from typing import Any, Sequence, Tuple, TypeVar
22

33
import numpy as np
44

5+
from pandas.compat.numpy import function as nv
56
from pandas.errors import AbstractMethodError
67

7-
from pandas.core.algorithms import take
8+
from pandas.core.algorithms import take, unique
89
from pandas.core.arrays.base import ExtensionArray
910

1011
_T = TypeVar("_T", bound="NDArrayBackedExtensionArray")
@@ -60,3 +61,59 @@ def _validate_fill_value(self, fill_value):
6061
ValueError
6162
"""
6263
raise AbstractMethodError(self)
64+
65+
# ------------------------------------------------------------------------
66+
67+
@property
68+
def shape(self) -> Tuple[int, ...]:
69+
return self._ndarray.shape
70+
71+
def __len__(self) -> int:
72+
return self.shape[0]
73+
74+
@property
75+
def ndim(self) -> int:
76+
return len(self.shape)
77+
78+
@property
79+
def size(self) -> int:
80+
return np.prod(self.shape)
81+
82+
@property
83+
def nbytes(self) -> int:
84+
return self._ndarray.nbytes
85+
86+
def reshape(self: _T, *args, **kwargs) -> _T:
87+
new_data = self._ndarray.reshape(*args, **kwargs)
88+
return self._from_backing_data(new_data)
89+
90+
def ravel(self: _T, *args, **kwargs) -> _T:
91+
new_data = self._ndarray.ravel(*args, **kwargs)
92+
return self._from_backing_data(new_data)
93+
94+
@property
95+
def T(self: _T) -> _T:
96+
new_data = self._ndarray.T
97+
return self._from_backing_data(new_data)
98+
99+
# ------------------------------------------------------------------------
100+
101+
def copy(self: _T) -> _T:
102+
new_data = self._ndarray.copy()
103+
return self._from_backing_data(new_data)
104+
105+
def repeat(self: _T, repeats, axis=None) -> _T:
106+
"""
107+
Repeat elements of an array.
108+
109+
See Also
110+
--------
111+
numpy.ndarray.repeat
112+
"""
113+
nv.validate_repeat(tuple(), dict(axis=axis))
114+
new_data = self._ndarray.repeat(repeats, axis=axis)
115+
return self._from_backing_data(new_data)
116+
117+
def unique(self: _T) -> _T:
118+
new_data = unique(self._ndarray)
119+
return self._from_backing_data(new_data)

pandas/core/arrays/categorical.py

+1-58
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,7 @@
99

1010
from pandas._libs import algos as libalgos, hashtable as htable
1111
from pandas._typing import ArrayLike, Dtype, Ordered, Scalar
12-
from pandas.compat.numpy import function as nv
13-
from pandas.util._decorators import (
14-
Appender,
15-
Substitution,
16-
cache_readonly,
17-
deprecate_kwarg,
18-
doc,
19-
)
12+
from pandas.util._decorators import cache_readonly, deprecate_kwarg, doc
2013
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
2114

2215
from pandas.core.dtypes.cast import (
@@ -52,7 +45,6 @@
5245
from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d
5346
from pandas.core.array_algos.transforms import shift
5447
from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
55-
from pandas.core.arrays.base import _extension_array_shared_docs
5648
from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
5749
import pandas.core.common as com
5850
from pandas.core.construction import array, extract_array, sanitize_array
@@ -449,14 +441,6 @@ def _formatter(self, boxed=False):
449441
# Defer to CategoricalFormatter's formatter.
450442
return None
451443

452-
def copy(self) -> "Categorical":
453-
"""
454-
Copy constructor.
455-
"""
456-
return self._constructor(
457-
values=self._codes.copy(), dtype=self.dtype, fastpath=True
458-
)
459-
460444
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
461445
"""
462446
Coerce this type to another dtype
@@ -484,13 +468,6 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
484468
raise ValueError("Cannot convert float NaN to integer")
485469
return np.array(self, dtype=dtype, copy=copy)
486470

487-
@cache_readonly
488-
def size(self) -> int:
489-
"""
490-
Return the len of myself.
491-
"""
492-
return self._codes.size
493-
494471
@cache_readonly
495472
def itemsize(self) -> int:
496473
"""
@@ -1194,20 +1171,6 @@ def map(self, mapper):
11941171
__le__ = _cat_compare_op(operator.le)
11951172
__ge__ = _cat_compare_op(operator.ge)
11961173

1197-
# for Series/ndarray like compat
1198-
@property
1199-
def shape(self):
1200-
"""
1201-
Shape of the Categorical.
1202-
1203-
For internal compatibility with numpy arrays.
1204-
1205-
Returns
1206-
-------
1207-
shape : tuple
1208-
"""
1209-
return tuple([len(self._codes)])
1210-
12111174
def shift(self, periods, fill_value=None):
12121175
"""
12131176
Shift Categorical by desired number of periods.
@@ -1313,13 +1276,6 @@ def __setstate__(self, state):
13131276
for k, v in state.items():
13141277
setattr(self, k, v)
13151278

1316-
@property
1317-
def T(self) -> "Categorical":
1318-
"""
1319-
Return transposed numpy array.
1320-
"""
1321-
return self
1322-
13231279
@property
13241280
def nbytes(self):
13251281
return self._codes.nbytes + self.dtype.categories.values.nbytes
@@ -1865,12 +1821,6 @@ def take_nd(self, indexer, allow_fill: bool = False, fill_value=None):
18651821
)
18661822
return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value)
18671823

1868-
def __len__(self) -> int:
1869-
"""
1870-
The length of this Categorical.
1871-
"""
1872-
return len(self._codes)
1873-
18741824
def __iter__(self):
18751825
"""
18761826
Returns an Iterator over the values of this Categorical.
@@ -2337,13 +2287,6 @@ def describe(self):
23372287

23382288
return result
23392289

2340-
@Substitution(klass="Categorical")
2341-
@Appender(_extension_array_shared_docs["repeat"])
2342-
def repeat(self, repeats, axis=None):
2343-
nv.validate_repeat(tuple(), dict(axis=axis))
2344-
codes = self._codes.repeat(repeats)
2345-
return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
2346-
23472290
# Implement the ExtensionArray interface
23482291
@property
23492292
def _can_hold_na(self):

pandas/core/arrays/datetimelike.py

-46
Original file line numberDiff line numberDiff line change
@@ -465,24 +465,6 @@ def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
465465

466466
# ------------------------------------------------------------------
467467

468-
@property
469-
def ndim(self) -> int:
470-
return self._data.ndim
471-
472-
@property
473-
def shape(self):
474-
return self._data.shape
475-
476-
def reshape(self, *args, **kwargs):
477-
# Note: we drop any freq
478-
data = self._data.reshape(*args, **kwargs)
479-
return type(self)(data, dtype=self.dtype)
480-
481-
def ravel(self, *args, **kwargs):
482-
# Note: we drop any freq
483-
data = self._data.ravel(*args, **kwargs)
484-
return type(self)(data, dtype=self.dtype)
485-
486468
@property
487469
def _box_func(self):
488470
"""
@@ -532,24 +514,12 @@ def _formatter(self, boxed=False):
532514
# ----------------------------------------------------------------
533515
# Array-Like / EA-Interface Methods
534516

535-
@property
536-
def nbytes(self):
537-
return self._data.nbytes
538-
539517
def __array__(self, dtype=None) -> np.ndarray:
540518
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
541519
if is_object_dtype(dtype):
542520
return np.array(list(self), dtype=object)
543521
return self._data
544522

545-
@property
546-
def size(self) -> int:
547-
"""The number of elements in this array."""
548-
return np.prod(self.shape)
549-
550-
def __len__(self) -> int:
551-
return len(self._data)
552-
553523
def __getitem__(self, key):
554524
"""
555525
This getitem defers to the underlying array, which by-definition can
@@ -680,10 +650,6 @@ def view(self, dtype=None):
680650
# ------------------------------------------------------------------
681651
# ExtensionArray Interface
682652

683-
def unique(self):
684-
result = unique1d(self.asi8)
685-
return type(self)(result, dtype=self.dtype)
686-
687653
@classmethod
688654
def _concat_same_type(cls, to_concat, axis: int = 0):
689655

@@ -936,18 +902,6 @@ def searchsorted(self, value, side="left", sorter=None):
936902
# TODO: Use datetime64 semantics for sorting, xref GH#29844
937903
return self.asi8.searchsorted(value, side=side, sorter=sorter)
938904

939-
def repeat(self, repeats, *args, **kwargs):
940-
"""
941-
Repeat elements of an array.
942-
943-
See Also
944-
--------
945-
numpy.ndarray.repeat
946-
"""
947-
nv.validate_repeat(args, kwargs)
948-
values = self._data.repeat(repeats)
949-
return type(self)(values.view("i8"), dtype=self.dtype)
950-
951905
def value_counts(self, dropna=False):
952906
"""
953907
Return a Series containing counts of unique values.

pandas/core/arrays/numpy_.py

+10-20
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717

1818
from pandas import compat
1919
from pandas.core import nanops
20-
from pandas.core.algorithms import searchsorted, take, unique
20+
from pandas.core.algorithms import searchsorted
2121
from pandas.core.array_algos import masked_reductions
22+
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
2223
from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
2324
from pandas.core.construction import extract_array
2425
from pandas.core.indexers import check_array_indexer
@@ -120,7 +121,9 @@ def itemsize(self) -> int:
120121
return self._dtype.itemsize
121122

122123

123-
class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin):
124+
class PandasArray(
125+
NDArrayBackedExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin
126+
):
124127
"""
125128
A pandas ExtensionArray for NumPy data.
126129
@@ -191,6 +194,9 @@ def _from_factorized(cls, values, original) -> "PandasArray":
191194
def _concat_same_type(cls, to_concat) -> "PandasArray":
192195
return cls(np.concatenate(to_concat))
193196

197+
def _from_backing_data(self, arr: np.ndarray) -> "PandasArray":
198+
return type(self)(arr)
199+
194200
# ------------------------------------------------------------------------
195201
# Data
196202

@@ -272,13 +278,6 @@ def __setitem__(self, key, value) -> None:
272278

273279
self._ndarray[key] = value
274280

275-
def __len__(self) -> int:
276-
return len(self._ndarray)
277-
278-
@property
279-
def nbytes(self) -> int:
280-
return self._ndarray.nbytes
281-
282281
def isna(self) -> np.ndarray:
283282
return isna(self._ndarray)
284283

@@ -311,27 +310,18 @@ def fillna(
311310
new_values = self.copy()
312311
return new_values
313312

314-
def take(self, indices, allow_fill=False, fill_value=None) -> "PandasArray":
313+
def _validate_fill_value(self, fill_value):
315314
if fill_value is None:
316315
# Primarily for subclasses
317316
fill_value = self.dtype.na_value
318-
result = take(
319-
self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value
320-
)
321-
return type(self)(result)
322-
323-
def copy(self) -> "PandasArray":
324-
return type(self)(self._ndarray.copy())
317+
return fill_value
325318

326319
def _values_for_argsort(self) -> np.ndarray:
327320
return self._ndarray
328321

329322
def _values_for_factorize(self) -> Tuple[np.ndarray, int]:
330323
return self._ndarray, -1
331324

332-
def unique(self) -> "PandasArray":
333-
return type(self)(unique(self._ndarray))
334-
335325
# ------------------------------------------------------------------------
336326
# Reductions
337327

0 commit comments

Comments
 (0)