diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 0ed9de804c55e..d1f8957859337 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -1,10 +1,11 @@ -from typing import Any, Sequence, TypeVar +from typing import Any, Sequence, Tuple, TypeVar import numpy as np +from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.core.algorithms import take +from pandas.core.algorithms import take, unique from pandas.core.arrays.base import ExtensionArray _T = TypeVar("_T", bound="NDArrayBackedExtensionArray") @@ -60,3 +61,59 @@ def _validate_fill_value(self, fill_value): ValueError """ raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + + @property + def shape(self) -> Tuple[int, ...]: + return self._ndarray.shape + + def __len__(self) -> int: + return self.shape[0] + + @property + def ndim(self) -> int: + return len(self.shape) + + @property + def size(self) -> int: + return np.prod(self.shape) + + @property + def nbytes(self) -> int: + return self._ndarray.nbytes + + def reshape(self: _T, *args, **kwargs) -> _T: + new_data = self._ndarray.reshape(*args, **kwargs) + return self._from_backing_data(new_data) + + def ravel(self: _T, *args, **kwargs) -> _T: + new_data = self._ndarray.ravel(*args, **kwargs) + return self._from_backing_data(new_data) + + @property + def T(self: _T) -> _T: + new_data = self._ndarray.T + return self._from_backing_data(new_data) + + # ------------------------------------------------------------------------ + + def copy(self: _T) -> _T: + new_data = self._ndarray.copy() + return self._from_backing_data(new_data) + + def repeat(self: _T, repeats, axis=None) -> _T: + """ + Repeat elements of an array. + + See Also + -------- + numpy.ndarray.repeat + """ + nv.validate_repeat(tuple(), dict(axis=axis)) + new_data = self._ndarray.repeat(repeats, axis=axis) + return self._from_backing_data(new_data) + + def unique(self: _T) -> _T: + new_data = unique(self._ndarray) + return self._from_backing_data(new_data) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b5d9386aa62c3..bf14ed44e3a1c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -9,14 +9,7 @@ from pandas._libs import algos as libalgos, hashtable as htable from pandas._typing import ArrayLike, Dtype, Ordered, Scalar -from pandas.compat.numpy import function as nv -from pandas.util._decorators import ( - Appender, - Substitution, - cache_readonly, - deprecate_kwarg, - doc, -) +from pandas.util._decorators import cache_readonly, deprecate_kwarg, doc from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core.dtypes.cast import ( @@ -52,7 +45,6 @@ from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d from pandas.core.array_algos.transforms import shift from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray -from pandas.core.arrays.base import _extension_array_shared_docs from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs import pandas.core.common as com from pandas.core.construction import array, extract_array, sanitize_array @@ -449,14 +441,6 @@ def _formatter(self, boxed=False): # Defer to CategoricalFormatter's formatter. return None - def copy(self) -> "Categorical": - """ - Copy constructor. - """ - return self._constructor( - values=self._codes.copy(), dtype=self.dtype, fastpath=True - ) - def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: """ Coerce this type to another dtype @@ -484,13 +468,6 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: raise ValueError("Cannot convert float NaN to integer") return np.array(self, dtype=dtype, copy=copy) - @cache_readonly - def size(self) -> int: - """ - Return the len of myself. - """ - return self._codes.size - @cache_readonly def itemsize(self) -> int: """ @@ -1194,20 +1171,6 @@ def map(self, mapper): __le__ = _cat_compare_op(operator.le) __ge__ = _cat_compare_op(operator.ge) - # for Series/ndarray like compat - @property - def shape(self): - """ - Shape of the Categorical. - - For internal compatibility with numpy arrays. - - Returns - ------- - shape : tuple - """ - return tuple([len(self._codes)]) - def shift(self, periods, fill_value=None): """ Shift Categorical by desired number of periods. @@ -1313,13 +1276,6 @@ def __setstate__(self, state): for k, v in state.items(): setattr(self, k, v) - @property - def T(self) -> "Categorical": - """ - Return transposed numpy array. - """ - return self - @property def nbytes(self): return self._codes.nbytes + self.dtype.categories.values.nbytes @@ -1865,12 +1821,6 @@ def take_nd(self, indexer, allow_fill: bool = False, fill_value=None): ) return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value) - def __len__(self) -> int: - """ - The length of this Categorical. - """ - return len(self._codes) - def __iter__(self): """ Returns an Iterator over the values of this Categorical. @@ -2337,13 +2287,6 @@ def describe(self): return result - @Substitution(klass="Categorical") - @Appender(_extension_array_shared_docs["repeat"]) - def repeat(self, repeats, axis=None): - nv.validate_repeat(tuple(), dict(axis=axis)) - codes = self._codes.repeat(repeats) - return self._constructor(values=codes, dtype=self.dtype, fastpath=True) - # Implement the ExtensionArray interface @property def _can_hold_na(self): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index af5834f01c24c..145d6ffe4f078 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -465,24 +465,6 @@ def _from_backing_data(self: _T, arr: np.ndarray) -> _T: # ------------------------------------------------------------------ - @property - def ndim(self) -> int: - return self._data.ndim - - @property - def shape(self): - return self._data.shape - - def reshape(self, *args, **kwargs): - # Note: we drop any freq - data = self._data.reshape(*args, **kwargs) - return type(self)(data, dtype=self.dtype) - - def ravel(self, *args, **kwargs): - # Note: we drop any freq - data = self._data.ravel(*args, **kwargs) - return type(self)(data, dtype=self.dtype) - @property def _box_func(self): """ @@ -532,24 +514,12 @@ def _formatter(self, boxed=False): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods - @property - def nbytes(self): - return self._data.nbytes - def __array__(self, dtype=None) -> np.ndarray: # used for Timedelta/DatetimeArray, overwritten by PeriodArray if is_object_dtype(dtype): return np.array(list(self), dtype=object) return self._data - @property - def size(self) -> int: - """The number of elements in this array.""" - return np.prod(self.shape) - - def __len__(self) -> int: - return len(self._data) - def __getitem__(self, key): """ This getitem defers to the underlying array, which by-definition can @@ -680,10 +650,6 @@ def view(self, dtype=None): # ------------------------------------------------------------------ # ExtensionArray Interface - def unique(self): - result = unique1d(self.asi8) - return type(self)(result, dtype=self.dtype) - @classmethod def _concat_same_type(cls, to_concat, axis: int = 0): @@ -927,18 +893,6 @@ def searchsorted(self, value, side="left", sorter=None): # TODO: Use datetime64 semantics for sorting, xref GH#29844 return self.asi8.searchsorted(value, side=side, sorter=sorter) - def repeat(self, repeats, *args, **kwargs): - """ - Repeat elements of an array. - - See Also - -------- - numpy.ndarray.repeat - """ - nv.validate_repeat(args, kwargs) - values = self._data.repeat(repeats) - return type(self)(values.view("i8"), dtype=self.dtype) - def value_counts(self, dropna=False): """ Return a Series containing counts of unique values. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 6806ed2afcf5c..b9384aa1bb092 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -17,8 +17,9 @@ from pandas import compat from pandas.core import nanops -from pandas.core.algorithms import searchsorted, take, unique +from pandas.core.algorithms import searchsorted from pandas.core.array_algos import masked_reductions +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer @@ -120,7 +121,9 @@ def itemsize(self) -> int: return self._dtype.itemsize -class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): +class PandasArray( + NDArrayBackedExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin +): """ A pandas ExtensionArray for NumPy data. @@ -191,6 +194,9 @@ def _from_factorized(cls, values, original) -> "PandasArray": def _concat_same_type(cls, to_concat) -> "PandasArray": return cls(np.concatenate(to_concat)) + def _from_backing_data(self, arr: np.ndarray) -> "PandasArray": + return type(self)(arr) + # ------------------------------------------------------------------------ # Data @@ -272,13 +278,6 @@ def __setitem__(self, key, value) -> None: self._ndarray[key] = value - def __len__(self) -> int: - return len(self._ndarray) - - @property - def nbytes(self) -> int: - return self._ndarray.nbytes - def isna(self) -> np.ndarray: return isna(self._ndarray) @@ -311,17 +310,11 @@ def fillna( new_values = self.copy() return new_values - def take(self, indices, allow_fill=False, fill_value=None) -> "PandasArray": + def _validate_fill_value(self, fill_value): if fill_value is None: # Primarily for subclasses fill_value = self.dtype.na_value - result = take( - self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value - ) - return type(self)(result) - - def copy(self) -> "PandasArray": - return type(self)(self._ndarray.copy()) + return fill_value def _values_for_argsort(self) -> np.ndarray: return self._ndarray @@ -329,9 +322,6 @@ def _values_for_argsort(self) -> np.ndarray: def _values_for_factorize(self) -> Tuple[np.ndarray, int]: return self._ndarray, -1 - def unique(self) -> "PandasArray": - return type(self)(unique(self._ndarray)) - # ------------------------------------------------------------------------ # Reductions