Skip to content

REF: implement Index._can_use_libjoin #43692

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3059,6 +3059,7 @@ def _union(self, other: Index, sort):
and self.is_monotonic
and other.is_monotonic
and not (self.has_duplicates and other.has_duplicates)
and self._can_use_libjoin
):
# Both are unique and monotonic, so can use outer join
try:
Expand Down Expand Up @@ -3189,13 +3190,7 @@ def _intersection(self, other: Index, sort=False):
"""
intersection specialized to the case with matching dtypes.
"""
if (
self.is_monotonic
and other.is_monotonic
and not is_interval_dtype(self.dtype)
):
# For IntervalIndex _inner_indexer is not more performant than get_indexer,
# so don't take this fastpath
if self.is_monotonic and other.is_monotonic and self._can_use_libjoin:
try:
result = self._inner_indexer(other)[0]
except TypeError:
Expand Down Expand Up @@ -4178,12 +4173,15 @@ def join(
return self._join_non_unique(other, how=how)
elif not self.is_unique or not other.is_unique:
if self.is_monotonic and other.is_monotonic:
return self._join_monotonic(other, how=how)
if self._can_use_libjoin:
# otherwise we will fall through to _join_via_get_indexer
return self._join_monotonic(other, how=how)
else:
return self._join_non_unique(other, how=how)
elif (
self.is_monotonic
and other.is_monotonic
and self._can_use_libjoin
and (
not isinstance(self, ABCMultiIndex)
or not any(is_categorical_dtype(dtype) for dtype in self.dtypes)
Expand Down Expand Up @@ -4545,6 +4543,15 @@ def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _Ind
name = get_op_result_name(self, other)
return self._constructor._with_infer(joined, name=name)

@cache_readonly
def _can_use_libjoin(self) -> bool:
"""
Whether we can use the fastpaths implement in _libs.join
"""
# Note: this will need to be updated when e.g. Nullable dtypes
# are supported in Indexes.
return not is_interval_dtype(self.dtype)

# --------------------------------------------------------------------
# Uncategorized Methods

Expand Down
5 changes: 0 additions & 5 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,6 @@ def searchsorted(
def _get_engine_target(self) -> np.ndarray:
return np.asarray(self._data)

def _from_join_target(self, result: np.ndarray) -> ArrayLike:
# ATM this is only for IntervalIndex, implicit assumption
# about _get_engine_target
return type(self._data)._from_sequence(result, dtype=self.dtype)

def delete(self, loc):
"""
Make new Index with passed location(-s) deleted
Expand Down
15 changes: 5 additions & 10 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
from pandas.util._exceptions import rewrite_exception

from pandas.core.dtypes.cast import (
construct_1d_object_array_from_listlike,
find_common_type,
infer_dtype_from_scalar,
maybe_box_datetimelike,
Expand Down Expand Up @@ -894,17 +893,13 @@ def _is_all_dates(self) -> bool:
return False

def _get_join_target(self) -> np.ndarray:
# constructing tuples is much faster than constructing Intervals
tups = list(zip(self.left, self.right))
target = construct_1d_object_array_from_listlike(tups)
return target
# Note: we _could_ use libjoin functions by either casting to object
# dtype or constructing tuples (faster than constructing Intervals)
# but the libjoin fastpaths are no longer fast in these cases.
raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")

def _from_join_target(self, result):
left, right = list(zip(*result))
arr = type(self._data).from_arrays(
left, right, dtype=self.dtype, closed=self.closed
)
return type(self)._simple_new(arr, name=self.name)
raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")

# TODO: arithmetic operations

Expand Down