Skip to content

REF: share delete, putmask, insert between ndarray-backed EA indexes #37529

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 31, 2020
4 changes: 4 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def _box_func(self, x):
"""
return x

def _validate_insert_value(self, value):
# used by NDArrayBackedExtensionIndex.insert
raise AbstractMethodError(self)

# ------------------------------------------------------------------------

def take(
Expand Down
53 changes: 2 additions & 51 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pandas.core.construction import extract_array
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
from pandas.core.indexes.extension import ExtensionIndex, inherit_names
from pandas.core.indexes.extension import NDArrayBackedExtensionIndex, inherit_names
import pandas.core.missing as missing
from pandas.core.ops import get_op_result_name

Expand Down Expand Up @@ -66,7 +66,7 @@
typ="method",
overwrite=True,
)
class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate):
class CategoricalIndex(NDArrayBackedExtensionIndex, accessor.PandasDelegate):
"""
Index based on an underlying :class:`Categorical`.

Expand Down Expand Up @@ -425,17 +425,6 @@ def where(self, cond, other=None):
cat = Categorical(values, dtype=self.dtype)
return type(self)._simple_new(cat, name=self.name)

def putmask(self, mask, value):
try:
code_value = self._data._validate_where_value(value)
except (TypeError, ValueError):
return self.astype(object).putmask(mask, value)

codes = self._data._ndarray.copy()
np.putmask(codes, mask, code_value)
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=self.name)

def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
"""
Create index with target's values (move/add/delete values as necessary)
Expand Down Expand Up @@ -665,44 +654,6 @@ def map(self, mapper):
mapped = self._values.map(mapper)
return Index(mapped, name=self.name)

def delete(self, loc):
"""
Make new Index with passed location(-s) deleted

Returns
-------
new_index : Index
"""
codes = np.delete(self.codes, loc)
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=self.name)

def insert(self, loc: int, item):
"""
Make new Index inserting new item at location. Follows
Python list.append semantics for negative values

Parameters
----------
loc : int
item : object

Returns
-------
new_index : Index

Raises
------
ValueError if the item is not in the categories

"""
code = self._data._validate_insert_value(item)

codes = self.codes
codes = np.concatenate((codes[:loc], [code], codes[loc:]))
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=self.name)

def _concat(self, to_concat, name):
# if calling index is category, don't check dtype of others
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
Expand Down
80 changes: 32 additions & 48 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.extension import (
ExtensionIndex,
NDArrayBackedExtensionIndex,
inherit_names,
make_wrapped_arith_op,
)
Expand Down Expand Up @@ -82,7 +82,7 @@ def wrapper(left, right):
cache=True,
)
@inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin)
class DatetimeIndexOpsMixin(ExtensionIndex):
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
"""
Common ops mixin to support a unified interface datetimelike Index.
"""
Expand Down Expand Up @@ -191,7 +191,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):

maybe_slice = lib.maybe_indices_to_slice(indices, len(self))

result = ExtensionIndex.take(
result = NDArrayBackedExtensionIndex.take(
self, indices, axis, allow_fill, fill_value, **kwargs
)
if isinstance(maybe_slice, slice):
Expand Down Expand Up @@ -496,17 +496,6 @@ def where(self, cond, other=None):
arr = self._data._from_backing_data(result)
return type(self)._simple_new(arr, name=self.name)

def putmask(self, mask, value):
try:
value = self._data._validate_where_value(value)
except (TypeError, ValueError):
return self.astype(object).putmask(mask, value)

result = self._data._ndarray.copy()
np.putmask(result, mask, value)
arr = self._data._from_backing_data(result)
return type(self)._simple_new(arr, name=self.name)

def _summary(self, name=None) -> str:
"""
Return a summarized representation.
Expand Down Expand Up @@ -575,41 +564,30 @@ def shift(self, periods=1, freq=None):
# --------------------------------------------------------------------
# List-like Methods

def delete(self, loc):
new_i8s = np.delete(self.asi8, loc)

def _get_delete_freq(self, loc: int):
"""
Find the `freq` for self.delete(loc).
"""
freq = None
if is_period_dtype(self.dtype):
freq = self.freq
elif is_integer(loc):
if loc in (0, -len(self), -1, len(self) - 1):
freq = self.freq
else:
if is_list_like(loc):
loc = lib.maybe_indices_to_slice(
np.asarray(loc, dtype=np.intp), len(self)
)
if isinstance(loc, slice) and loc.step in (1, None):
if loc.start in (0, None) or loc.stop in (len(self), None):
elif self.freq is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i am not sure that these specific freq routines are really worth it given the extra code. Can't we just re-infer the freq? (sure if we do this constantly its not as performant, but this is not likey a common case).

ok for here, but if you can have a look at some point

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thats a reasonable thought. i recently found a PITA bug in infer_freq though, so definitely not until that is sorted out

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kk maybe worthile to create an issue to see what implications / address later is.

if is_integer(loc):
if loc in (0, -len(self), -1, len(self) - 1):
freq = self.freq
else:
if is_list_like(loc):
loc = lib.maybe_indices_to_slice(
np.asarray(loc, dtype=np.intp), len(self)
)
if isinstance(loc, slice) and loc.step in (1, None):
if loc.start in (0, None) or loc.stop in (len(self), None):
freq = self.freq
return freq

arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq)
return type(self)._simple_new(arr, name=self.name)

def insert(self, loc: int, item):
def _get_insert_freq(self, loc, item):
"""
Make new Index inserting new item at location

Parameters
----------
loc : int
item : object
if not either a Python datetime or a numpy integer-like, returned
Index dtype will be object rather than datetime.

Returns
-------
new_index : Index
Find the `freq` for self.insert(loc, item).
"""
value = self._data._validate_insert_value(item)
item = self._data._box_func(value)
Expand All @@ -630,14 +608,20 @@ def insert(self, loc: int, item):
# Adding a single item to an empty index may preserve freq
if self.freq.is_on_offset(item):
freq = self.freq
return freq

arr = self._data
@doc(NDArrayBackedExtensionIndex.delete)
def delete(self, loc):
result = super().delete(loc)
result._data._freq = self._get_delete_freq(loc)
return result

new_values = np.concatenate([arr._ndarray[:loc], [value], arr._ndarray[loc:]])
new_arr = self._data._from_backing_data(new_values)
new_arr._freq = freq
@doc(NDArrayBackedExtensionIndex.insert)
def insert(self, loc: int, item):
result = super().insert(loc, item)

return type(self)._simple_new(new_arr, name=self.name)
result._data._freq = self._get_insert_freq(loc, item)
return result

# --------------------------------------------------------------------
# Join/Set Methods
Expand Down
57 changes: 57 additions & 0 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

from pandas.core.arrays import ExtensionArray
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.indexers import deprecate_ndim_indexing
from pandas.core.indexes.base import Index
from pandas.core.ops import get_op_result_name
Expand Down Expand Up @@ -281,3 +282,59 @@ def astype(self, dtype, copy=True):
@cache_readonly
def _isnan(self) -> np.ndarray:
return self._data.isna()


class NDArrayBackedExtensionIndex(ExtensionIndex):
"""
Index subclass for indexes backed by NDArrayBackedExtensionArray.
"""

_data: NDArrayBackedExtensionArray

def delete(self, loc):
"""
Make new Index with passed location(-s) deleted

Returns
-------
new_index : Index
"""
new_vals = np.delete(self._data._ndarray, loc)
arr = self._data._from_backing_data(new_vals)
return type(self)._simple_new(arr, name=self.name)

def insert(self, loc: int, item):
"""
Make new Index inserting new item at location. Follows
Python list.append semantics for negative values.

Parameters
----------
loc : int
item : object

Returns
-------
new_index : Index

Raises
------
ValueError if the item is not valid for this dtype.
"""
arr = self._data
code = arr._validate_insert_value(item)

new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:]))
new_arr = arr._from_backing_data(new_vals)
return type(self)._simple_new(new_arr, name=self.name)

def putmask(self, mask, value):
try:
value = self._data._validate_where_value(value)
except (TypeError, ValueError):
return self.astype(object).putmask(mask, value)

new_values = self._data._ndarray.copy()
np.putmask(new_values, mask, value)
new_arr = self._data._from_backing_data(new_values)
return type(self)._simple_new(new_arr, name=self.name)
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ def where(self, cond, other=None):
other = self._na_value
values = np.where(cond, self._values, other)
result = IntervalArray(values)
return self._shallow_copy(result)
return type(self)._simple_new(result, name=self.name)

def delete(self, loc):
"""
Expand Down