Skip to content

REF: move sharable methods to ExtensionIndex #30717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jan 9, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
95eed25
implement ExtensionIndex
jbrockmendel Jan 3, 2020
3e39309
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 3, 2020
3dbf7f6
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 4, 2020
f9d6027
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 4, 2020
40ca860
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 4, 2020
785a27c
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 4, 2020
90c2657
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 4, 2020
c48e2d3
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 5, 2020
9fc5c31
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 5, 2020
45f0c46
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 5, 2020
be7eef6
remove comment
jbrockmendel Jan 5, 2020
0ea90b9
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 6, 2020
6264814
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 6, 2020
1a5387a
troubleshoot perf
jbrockmendel Jan 6, 2020
f8832ed
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 7, 2020
b0f52a5
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 8, 2020
ef38914
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jan 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ def astype(self, dtype, copy=True):
if dtype == self.dtype:
return self.copy() if copy else self

return super().astype(dtype=dtype, copy=copy)
return Index.astype(self, dtype=dtype, copy=copy)

@cache_readonly
def _isnan(self):
Expand Down
23 changes: 0 additions & 23 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,6 @@ class DatetimeIndexOpsMixin(ExtensionIndex, ExtensionOpsMixin):
def is_all_dates(self) -> bool:
return True

def unique(self, level=None):
if level is not None:
self._validate_index_level(level)

result = self._data.unique()

# Note: if `self` is already unique, then self.unique() should share
# a `freq` with self. If not already unique, then self.freq must be
# None, so again sharing freq is correct.
return self._shallow_copy(result._data)

@classmethod
def _create_comparison_method(cls, op):
"""
Expand Down Expand Up @@ -549,18 +538,6 @@ def _concat_same_dtype(self, to_concat, name):

return self._simple_new(new_data, **attribs)

@Appender(_index_shared_docs["astype"])
def astype(self, dtype, copy=True):
if is_dtype_equal(self.dtype, dtype) and copy is False:
# Ensure that self.astype(self.dtype) is self
return self

new_values = self._data.astype(dtype, copy=copy)

# pass copy=False because any copying will be done in the
# _data.astype call above
return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False)

def shift(self, periods=1, freq=None):
"""
Shift index by desired number of time frequency increments.
Expand Down
14 changes: 0 additions & 14 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,20 +891,6 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
else:
raise

# --------------------------------------------------------------------
# Wrapping DatetimeArray

def __getitem__(self, key):
result = self._data.__getitem__(key)
if is_scalar(result):
return result
elif result.ndim > 1:
# To support MPL which performs slicing with 2 dim
# even though it only has 1 dim by definition
assert isinstance(result, np.ndarray), result
return result
return type(self)(result, name=self.name)

# --------------------------------------------------------------------

@Substitution(klass="DatetimeIndex")
Expand Down
59 changes: 56 additions & 3 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
"""
from typing import List

import numpy as np

from pandas.compat.numpy import function as nv
from pandas.util._decorators import cache_readonly
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import ensure_platform_int
from pandas.core.dtypes.common import ensure_platform_int, is_dtype_equal
from pandas.core.dtypes.generic import ABCSeries

from pandas.core.arrays import ExtensionArray
from pandas.core.ops import get_op_result_name

from .base import Index
from .base import Index, _index_shared_docs


def inherit_from_data(name: str, delegate, cache: bool = False):
Expand Down Expand Up @@ -164,6 +166,57 @@ class ExtensionIndex(Index):

_data: ExtensionArray

def __getitem__(self, key):
result = self._data[key]
if isinstance(result, type(self._data)):
return type(self)(result, name=self.name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use a faster constructor (simple_new ?) when we just want to wrap the correct type of ExtensionArray in the index?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that'd work. IIRC there were some corner cases involving CategoricalIndex.dtype, not sure if those are relevant here


# Includes cases where we get a 2D ndarray back for MPL compat
return result

def __iter__(self):
return self._data.__iter__()

@property
def _ndarray_values(self) -> np.ndarray:
return self._data._ndarray_values

@Appender(_index_shared_docs["astype"])
def astype(self, dtype, copy=True):
if is_dtype_equal(self.dtype, dtype) and copy is False:
# Ensure that self.astype(self.dtype) is self
return self

new_values = self._data.astype(dtype, copy=copy)

# pass copy=False because any copying will be done in the
# _data.astype call above
return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False)

def dropna(self, how="any"):
if how not in ("any", "all"):
raise ValueError(f"invalid how option: {how}")

if self.hasnans:
return self._shallow_copy(self._data[~self._isnan])
return self._shallow_copy()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you overwriting the base Index one?

Also, this dropped the docstring.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the base class uses ._values, where we want ._data here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But _values and _data is the same?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess. Past-me must have thought it not-obvious that this would always hold. If it can be removed, go for it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you also see my docstring comment?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did. In this case I think removing the method makes sense. More generally I wonder if we can use a metaclass or something to automatically inherit docstrings and remove a lot of boilerplate (cc @bashtage IIRC you do something like this in arch)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe open a new issue to see if we can do this smarter?

But for 1.0.0, I would just add back the docstring


def _get_unique_index(self, dropna=False):
if self.is_unique and not dropna:
return self

result = self._data.unique()
if dropna and self.hasnans:
result = result[~result.isna()]
return self._shallow_copy(result)

def unique(self, level=None):
if level is not None:
self._validate_index_level(level)

result = self._data.unique()
return self._shallow_copy(result)

def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
nv.validate_take(tuple(), kwargs)
indices = ensure_platform_int(indices)
Expand Down
10 changes: 1 addition & 9 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def astype(self, dtype, copy=True):
new_values = self.values.astype(dtype, copy=copy)
if is_interval_dtype(new_values):
return self._shallow_copy(new_values.left, new_values.right)
return super().astype(dtype, copy=copy)
return Index.astype(self, dtype, copy=copy)

@property
def inferred_type(self) -> str:
Expand Down Expand Up @@ -1003,14 +1003,6 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
)
return self._shallow_copy(result)

def __getitem__(self, value):
result = self._data[value]
if isinstance(result, IntervalArray):
return self._shallow_copy(result)
else:
# scalar
return result

# --------------------------------------------------------------------
# Rendering Methods
# __repr__ associated methods are based on MultiIndex
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,15 +588,6 @@ def get_indexer_non_unique(self, target):
indexer, missing = self._int64index.get_indexer_non_unique(target)
return ensure_platform_int(indexer), missing

def _get_unique_index(self, dropna=False):
if self.is_unique and not dropna:
return self

result = self._data.unique()
if dropna and self.hasnans:
result = result[~result.isna()]
return self._shallow_copy(result)

def get_loc(self, key, method=None, tolerance=None):
"""
Get integer location for requested label
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,15 +222,6 @@ def _formatter_func(self):

return _get_format_timedelta64(self, box=True)

# -------------------------------------------------------------------
# Wrapping TimedeltaArray

def __getitem__(self, key):
result = self._data.__getitem__(key)
if is_scalar(result):
return result
return type(self)(result, name=self.name)

# -------------------------------------------------------------------

@Appender(_index_shared_docs["astype"])
Expand Down