diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 95387c9557b1a..e8b68d41983d5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3059,6 +3059,7 @@ def _union(self, other: Index, sort): and self.is_monotonic and other.is_monotonic and not (self.has_duplicates and other.has_duplicates) + and self._can_use_libjoin ): # Both are unique and monotonic, so can use outer join try: @@ -3189,13 +3190,7 @@ def _intersection(self, other: Index, sort=False): """ intersection specialized to the case with matching dtypes. """ - if ( - self.is_monotonic - and other.is_monotonic - and not is_interval_dtype(self.dtype) - ): - # For IntervalIndex _inner_indexer is not more performant than get_indexer, - # so don't take this fastpath + if self.is_monotonic and other.is_monotonic and self._can_use_libjoin: try: result = self._inner_indexer(other)[0] except TypeError: @@ -4178,12 +4173,15 @@ def join( return self._join_non_unique(other, how=how) elif not self.is_unique or not other.is_unique: if self.is_monotonic and other.is_monotonic: - return self._join_monotonic(other, how=how) + if self._can_use_libjoin: + # otherwise we will fall through to _join_via_get_indexer + return self._join_monotonic(other, how=how) else: return self._join_non_unique(other, how=how) elif ( self.is_monotonic and other.is_monotonic + and self._can_use_libjoin and ( not isinstance(self, ABCMultiIndex) or not any(is_categorical_dtype(dtype) for dtype in self.dtypes) @@ -4545,6 +4543,15 @@ def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _Ind name = get_op_result_name(self, other) return self._constructor._with_infer(joined, name=name) + @cache_readonly + def _can_use_libjoin(self) -> bool: + """ + Whether we can use the fastpaths implement in _libs.join + """ + # Note: this will need to be updated when e.g. Nullable dtypes + # are supported in Indexes. + return not is_interval_dtype(self.dtype) + # -------------------------------------------------------------------- # Uncategorized Methods diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 44267895a989e..a7930ac83a96a 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -246,11 +246,6 @@ def searchsorted( def _get_engine_target(self) -> np.ndarray: return np.asarray(self._data) - def _from_join_target(self, result: np.ndarray) -> ArrayLike: - # ATM this is only for IntervalIndex, implicit assumption - # about _get_engine_target - return type(self._data)._from_sequence(result, dtype=self.dtype) - def delete(self, loc): """ Make new Index with passed location(-s) deleted diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 0b9b818b960a0..c74a1b1ea60fd 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -38,7 +38,6 @@ from pandas.util._exceptions import rewrite_exception from pandas.core.dtypes.cast import ( - construct_1d_object_array_from_listlike, find_common_type, infer_dtype_from_scalar, maybe_box_datetimelike, @@ -894,17 +893,13 @@ def _is_all_dates(self) -> bool: return False def _get_join_target(self) -> np.ndarray: - # constructing tuples is much faster than constructing Intervals - tups = list(zip(self.left, self.right)) - target = construct_1d_object_array_from_listlike(tups) - return target + # Note: we _could_ use libjoin functions by either casting to object + # dtype or constructing tuples (faster than constructing Intervals) + # but the libjoin fastpaths are no longer fast in these cases. + raise NotImplementedError("IntervalIndex does not use libjoin fastpaths") def _from_join_target(self, result): - left, right = list(zip(*result)) - arr = type(self._data).from_arrays( - left, right, dtype=self.dtype, closed=self.closed - ) - return type(self)._simple_new(arr, name=self.name) + raise NotImplementedError("IntervalIndex does not use libjoin fastpaths") # TODO: arithmetic operations