Skip to content

REF: mix NDArrayBackedExtensionArray into PandasArray #33797

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 59 additions & 2 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from typing import Any, Sequence, TypeVar
from typing import Any, Sequence, Tuple, TypeVar

import numpy as np

from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError

from pandas.core.algorithms import take
from pandas.core.algorithms import take, unique
from pandas.core.arrays.base import ExtensionArray

_T = TypeVar("_T", bound="NDArrayBackedExtensionArray")
Expand Down Expand Up @@ -60,3 +61,59 @@ def _validate_fill_value(self, fill_value):
ValueError
"""
raise AbstractMethodError(self)

# ------------------------------------------------------------------------

@property
def shape(self) -> Tuple[int, ...]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are always 1d? i.e. Tuple[int]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DTA/TDA/PA are sometimes 2D

return self._ndarray.shape

def __len__(self) -> int:
return self.shape[0]

@property
def ndim(self) -> int:
return len(self.shape)

@property
def size(self) -> int:
return np.prod(self.shape)

@property
def nbytes(self) -> int:
return self._ndarray.nbytes

def reshape(self: _T, *args, **kwargs) -> _T:
new_data = self._ndarray.reshape(*args, **kwargs)
return self._from_backing_data(new_data)

def ravel(self: _T, *args, **kwargs) -> _T:
new_data = self._ndarray.ravel(*args, **kwargs)
return self._from_backing_data(new_data)

@property
def T(self: _T) -> _T:
new_data = self._ndarray.T
return self._from_backing_data(new_data)

# ------------------------------------------------------------------------

def copy(self: _T) -> _T:
new_data = self._ndarray.copy()
return self._from_backing_data(new_data)

def repeat(self: _T, repeats, axis=None) -> _T:
"""
Repeat elements of an array.

See Also
--------
numpy.ndarray.repeat
"""
nv.validate_repeat(tuple(), dict(axis=axis))
new_data = self._ndarray.repeat(repeats, axis=axis)
return self._from_backing_data(new_data)

def unique(self: _T) -> _T:
new_data = unique(self._ndarray)
return self._from_backing_data(new_data)
59 changes: 1 addition & 58 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,7 @@

from pandas._libs import algos as libalgos, hashtable as htable
from pandas._typing import ArrayLike, Dtype, Ordered, Scalar
from pandas.compat.numpy import function as nv
from pandas.util._decorators import (
Appender,
Substitution,
cache_readonly,
deprecate_kwarg,
doc,
)
from pandas.util._decorators import cache_readonly, deprecate_kwarg, doc
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -52,7 +45,6 @@
from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d
from pandas.core.array_algos.transforms import shift
from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
from pandas.core.arrays.base import _extension_array_shared_docs
from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
import pandas.core.common as com
from pandas.core.construction import array, extract_array, sanitize_array
Expand Down Expand Up @@ -449,14 +441,6 @@ def _formatter(self, boxed=False):
# Defer to CategoricalFormatter's formatter.
return None

def copy(self) -> "Categorical":
"""
Copy constructor.
"""
return self._constructor(
values=self._codes.copy(), dtype=self.dtype, fastpath=True
)

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
"""
Coerce this type to another dtype
Expand Down Expand Up @@ -484,13 +468,6 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
raise ValueError("Cannot convert float NaN to integer")
return np.array(self, dtype=dtype, copy=copy)

@cache_readonly
def size(self) -> int:
"""
Return the len of myself.
"""
return self._codes.size

@cache_readonly
def itemsize(self) -> int:
"""
Expand Down Expand Up @@ -1194,20 +1171,6 @@ def map(self, mapper):
__le__ = _cat_compare_op(operator.le)
__ge__ = _cat_compare_op(operator.ge)

# for Series/ndarray like compat
@property
def shape(self):
"""
Shape of the Categorical.

For internal compatibility with numpy arrays.

Returns
-------
shape : tuple
"""
return tuple([len(self._codes)])

def shift(self, periods, fill_value=None):
"""
Shift Categorical by desired number of periods.
Expand Down Expand Up @@ -1313,13 +1276,6 @@ def __setstate__(self, state):
for k, v in state.items():
setattr(self, k, v)

@property
def T(self) -> "Categorical":
"""
Return transposed numpy array.
"""
return self

@property
def nbytes(self):
return self._codes.nbytes + self.dtype.categories.values.nbytes
Expand Down Expand Up @@ -1865,12 +1821,6 @@ def take_nd(self, indexer, allow_fill: bool = False, fill_value=None):
)
return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value)

def __len__(self) -> int:
"""
The length of this Categorical.
"""
return len(self._codes)

def __iter__(self):
"""
Returns an Iterator over the values of this Categorical.
Expand Down Expand Up @@ -2337,13 +2287,6 @@ def describe(self):

return result

@Substitution(klass="Categorical")
@Appender(_extension_array_shared_docs["repeat"])
def repeat(self, repeats, axis=None):
nv.validate_repeat(tuple(), dict(axis=axis))
codes = self._codes.repeat(repeats)
return self._constructor(values=codes, dtype=self.dtype, fastpath=True)

# Implement the ExtensionArray interface
@property
def _can_hold_na(self):
Expand Down
46 changes: 0 additions & 46 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,24 +465,6 @@ def _from_backing_data(self: _T, arr: np.ndarray) -> _T:

# ------------------------------------------------------------------

@property
def ndim(self) -> int:
return self._data.ndim

@property
def shape(self):
return self._data.shape

def reshape(self, *args, **kwargs):
# Note: we drop any freq
data = self._data.reshape(*args, **kwargs)
return type(self)(data, dtype=self.dtype)

def ravel(self, *args, **kwargs):
# Note: we drop any freq
data = self._data.ravel(*args, **kwargs)
return type(self)(data, dtype=self.dtype)

@property
def _box_func(self):
"""
Expand Down Expand Up @@ -532,24 +514,12 @@ def _formatter(self, boxed=False):
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

@property
def nbytes(self):
return self._data.nbytes

def __array__(self, dtype=None) -> np.ndarray:
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
return self._data

@property
def size(self) -> int:
"""The number of elements in this array."""
return np.prod(self.shape)

def __len__(self) -> int:
return len(self._data)

def __getitem__(self, key):
"""
This getitem defers to the underlying array, which by-definition can
Expand Down Expand Up @@ -680,10 +650,6 @@ def view(self, dtype=None):
# ------------------------------------------------------------------
# ExtensionArray Interface

def unique(self):
result = unique1d(self.asi8)
return type(self)(result, dtype=self.dtype)

@classmethod
def _concat_same_type(cls, to_concat, axis: int = 0):

Expand Down Expand Up @@ -927,18 +893,6 @@ def searchsorted(self, value, side="left", sorter=None):
# TODO: Use datetime64 semantics for sorting, xref GH#29844
return self.asi8.searchsorted(value, side=side, sorter=sorter)

def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of an array.

See Also
--------
numpy.ndarray.repeat
"""
nv.validate_repeat(args, kwargs)
values = self._data.repeat(repeats)
return type(self)(values.view("i8"), dtype=self.dtype)

def value_counts(self, dropna=False):
"""
Return a Series containing counts of unique values.
Expand Down
30 changes: 10 additions & 20 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@

from pandas import compat
from pandas.core import nanops
from pandas.core.algorithms import searchsorted, take, unique
from pandas.core.algorithms import searchsorted
from pandas.core.array_algos import masked_reductions
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
from pandas.core.construction import extract_array
from pandas.core.indexers import check_array_indexer
Expand Down Expand Up @@ -120,7 +121,9 @@ def itemsize(self) -> int:
return self._dtype.itemsize


class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin):
class PandasArray(
NDArrayBackedExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin
):
"""
A pandas ExtensionArray for NumPy data.

Expand Down Expand Up @@ -191,6 +194,9 @@ def _from_factorized(cls, values, original) -> "PandasArray":
def _concat_same_type(cls, to_concat) -> "PandasArray":
return cls(np.concatenate(to_concat))

def _from_backing_data(self, arr: np.ndarray) -> "PandasArray":
return type(self)(arr)

# ------------------------------------------------------------------------
# Data

Expand Down Expand Up @@ -272,13 +278,6 @@ def __setitem__(self, key, value) -> None:

self._ndarray[key] = value

def __len__(self) -> int:
return len(self._ndarray)

@property
def nbytes(self) -> int:
return self._ndarray.nbytes

def isna(self) -> np.ndarray:
return isna(self._ndarray)

Expand Down Expand Up @@ -311,27 +310,18 @@ def fillna(
new_values = self.copy()
return new_values

def take(self, indices, allow_fill=False, fill_value=None) -> "PandasArray":
def _validate_fill_value(self, fill_value):
if fill_value is None:
# Primarily for subclasses
fill_value = self.dtype.na_value
result = take(
self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value
)
return type(self)(result)

def copy(self) -> "PandasArray":
return type(self)(self._ndarray.copy())
return fill_value

def _values_for_argsort(self) -> np.ndarray:
return self._ndarray

def _values_for_factorize(self) -> Tuple[np.ndarray, int]:
return self._ndarray, -1

def unique(self) -> "PandasArray":
return type(self)(unique(self._ndarray))

# ------------------------------------------------------------------------
# Reductions

Expand Down