From 3661e63daf1b5837e71ea9328e541ccc0827753e Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 26 Aug 2021 13:27:43 -0700 Subject: [PATCH 1/2] REF: avoid _get_engine_target in _get_nearest_indexer --- pandas/core/indexes/base.py | 39 ++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2615bfffb3cd9..29d2507cab80a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -128,6 +128,7 @@ from pandas.core.arrays import ( Categorical, ExtensionArray, + PeriodArray, ) from pandas.core.arrays.datetimes import ( tz_to_dtype, @@ -3695,19 +3696,18 @@ def _get_fill_indexer( target=target._values, values=self._values, method=method, limit=limit ) - target_values = target._get_engine_target() - if self.is_monotonic_increasing and target.is_monotonic_increasing: engine_method = ( self._engine.get_pad_indexer if method == "pad" else self._engine.get_backfill_indexer ) + target_values = target._get_engine_target() indexer = engine_method(target_values, limit) else: indexer = self._get_fill_indexer_searchsorted(target, method, limit) if tolerance is not None and len(self): - indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance) + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) return indexer @final @@ -3759,10 +3759,8 @@ def _get_nearest_indexer( left_indexer = self.get_indexer(target, "pad", limit=limit) right_indexer = self.get_indexer(target, "backfill", limit=limit) - target_values = target._get_engine_target() - own_values = self._get_engine_target() - left_distances = np.abs(own_values[left_indexer] - target_values) - right_distances = np.abs(own_values[right_indexer] - target_values) + left_distances = self._difference_compat(target, left_indexer) + right_distances = self._difference_compat(target, right_indexer) op = operator.lt if self.is_monotonic_increasing else operator.le indexer = np.where( @@ -3771,20 +3769,39 @@ def _get_nearest_indexer( right_indexer, ) if tolerance is not None: - indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance) + indexer = self._filter_indexer_tolerance(target, indexer, tolerance) return indexer @final def _filter_indexer_tolerance( self, - target: np.ndarray, + target: Index, indexer: npt.NDArray[np.intp], tolerance, ) -> npt.NDArray[np.intp]: - own_values = self._get_engine_target() - distance = abs(own_values[indexer] - target) + + distance = self._difference_compat(target, indexer) + return np.where(distance <= tolerance, indexer, -1) + @final + def _difference_compat( + self, target: Index, indexer: npt.NDArray[np.intp] + ) -> ArrayLike: + # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object] + # of DateOffset objects, which do not support __abs__ (and would be slow + # if they did) + + if isinstance(self.dtype, PeriodDtype): + # Note: we only get here with matching dtypes + own_values = cast(PeriodArray, self._data)._ndarray + target_values = cast(PeriodArray, target._data)._ndarray + diff = own_values[indexer] - target_values + else: + # error: Unsupported left operand type for - ("ExtensionArray") + diff = self._values[indexer] - target._values # type: ignore[operator] + return abs(diff) + # -------------------------------------------------------------------- # Indexer Conversion Methods From 4c55f198133065c480cd7c612b89c706785d4375 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 26 Aug 2021 17:55:30 -0700 Subject: [PATCH 2/2] fix circular import --- pandas/core/indexes/base.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 29d2507cab80a..fd2d5658584d3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -128,7 +128,6 @@ from pandas.core.arrays import ( Categorical, ExtensionArray, - PeriodArray, ) from pandas.core.arrays.datetimes import ( tz_to_dtype, @@ -164,7 +163,6 @@ ) if TYPE_CHECKING: - from pandas import ( CategoricalIndex, DataFrame, @@ -173,6 +171,7 @@ RangeIndex, Series, ) + from pandas.core.arrays import PeriodArray __all__ = ["Index"] @@ -3794,8 +3793,8 @@ def _difference_compat( if isinstance(self.dtype, PeriodDtype): # Note: we only get here with matching dtypes - own_values = cast(PeriodArray, self._data)._ndarray - target_values = cast(PeriodArray, target._data)._ndarray + own_values = cast("PeriodArray", self._data)._ndarray + target_values = cast("PeriodArray", target._data)._ndarray diff = own_values[indexer] - target_values else: # error: Unsupported left operand type for - ("ExtensionArray")