Skip to content

Commit b490507

Browse files
authored
REF: implement Index._can_use_libjoin (#43692)
1 parent aad2cba commit b490507

File tree

3 files changed

+20
-23
lines changed

3 files changed

+20
-23
lines changed

pandas/core/indexes/base.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -3059,6 +3059,7 @@ def _union(self, other: Index, sort):
30593059
and self.is_monotonic
30603060
and other.is_monotonic
30613061
and not (self.has_duplicates and other.has_duplicates)
3062+
and self._can_use_libjoin
30623063
):
30633064
# Both are unique and monotonic, so can use outer join
30643065
try:
@@ -3189,13 +3190,7 @@ def _intersection(self, other: Index, sort=False):
31893190
"""
31903191
intersection specialized to the case with matching dtypes.
31913192
"""
3192-
if (
3193-
self.is_monotonic
3194-
and other.is_monotonic
3195-
and not is_interval_dtype(self.dtype)
3196-
):
3197-
# For IntervalIndex _inner_indexer is not more performant than get_indexer,
3198-
# so don't take this fastpath
3193+
if self.is_monotonic and other.is_monotonic and self._can_use_libjoin:
31993194
try:
32003195
result = self._inner_indexer(other)[0]
32013196
except TypeError:
@@ -4178,12 +4173,15 @@ def join(
41784173
return self._join_non_unique(other, how=how)
41794174
elif not self.is_unique or not other.is_unique:
41804175
if self.is_monotonic and other.is_monotonic:
4181-
return self._join_monotonic(other, how=how)
4176+
if self._can_use_libjoin:
4177+
# otherwise we will fall through to _join_via_get_indexer
4178+
return self._join_monotonic(other, how=how)
41824179
else:
41834180
return self._join_non_unique(other, how=how)
41844181
elif (
41854182
self.is_monotonic
41864183
and other.is_monotonic
4184+
and self._can_use_libjoin
41874185
and (
41884186
not isinstance(self, ABCMultiIndex)
41894187
or not any(is_categorical_dtype(dtype) for dtype in self.dtypes)
@@ -4545,6 +4543,15 @@ def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _Ind
45454543
name = get_op_result_name(self, other)
45464544
return self._constructor._with_infer(joined, name=name)
45474545

4546+
@cache_readonly
4547+
def _can_use_libjoin(self) -> bool:
4548+
"""
4549+
Whether we can use the fastpaths implement in _libs.join
4550+
"""
4551+
# Note: this will need to be updated when e.g. Nullable dtypes
4552+
# are supported in Indexes.
4553+
return not is_interval_dtype(self.dtype)
4554+
45484555
# --------------------------------------------------------------------
45494556
# Uncategorized Methods
45504557

pandas/core/indexes/extension.py

-5
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,6 @@ def searchsorted(
246246
def _get_engine_target(self) -> np.ndarray:
247247
return np.asarray(self._data)
248248

249-
def _from_join_target(self, result: np.ndarray) -> ArrayLike:
250-
# ATM this is only for IntervalIndex, implicit assumption
251-
# about _get_engine_target
252-
return type(self._data)._from_sequence(result, dtype=self.dtype)
253-
254249
def delete(self, loc):
255250
"""
256251
Make new Index with passed location(-s) deleted

pandas/core/indexes/interval.py

+5-10
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
from pandas.util._exceptions import rewrite_exception
3939

4040
from pandas.core.dtypes.cast import (
41-
construct_1d_object_array_from_listlike,
4241
find_common_type,
4342
infer_dtype_from_scalar,
4443
maybe_box_datetimelike,
@@ -894,17 +893,13 @@ def _is_all_dates(self) -> bool:
894893
return False
895894

896895
def _get_join_target(self) -> np.ndarray:
897-
# constructing tuples is much faster than constructing Intervals
898-
tups = list(zip(self.left, self.right))
899-
target = construct_1d_object_array_from_listlike(tups)
900-
return target
896+
# Note: we _could_ use libjoin functions by either casting to object
897+
# dtype or constructing tuples (faster than constructing Intervals)
898+
# but the libjoin fastpaths are no longer fast in these cases.
899+
raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")
901900

902901
def _from_join_target(self, result):
903-
left, right = list(zip(*result))
904-
arr = type(self._data).from_arrays(
905-
left, right, dtype=self.dtype, closed=self.closed
906-
)
907-
return type(self)._simple_new(arr, name=self.name)
902+
raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")
908903

909904
# TODO: arithmetic operations
910905

0 commit comments

Comments
 (0)