diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 77e013e1e4fb0..3070765b3db22 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -229,6 +229,10 @@ def time_getitem_list(self, monotonic): def time_loc_list(self, monotonic): monotonic.loc[80000:] + def time_get_loc_exact(self, monotonic): + interval = monotonic.index[80000] + monotonic.index.get_loc_exact(interval) + class PanelIndexing(object): diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index acab9d0bbebf8..bc0d92ad0a385 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -323,6 +323,7 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) +- New :meth:`IntervalIndex.get_loc_exact` has been added to find exact Interval matches only (:issue:`19349`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 15df77bf772dc..2a762ef4317d0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2536,6 +2536,21 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) return loc + def get_loc_exact(self, key, method=None): + """Get integer location, slice or boolean mask for exact + matches only. + + This method dispatches to :meth:`get_loc`. The use for + ``get_loc_exact`` is mainly in :class:`IntervalIndex`, + when a exact match is needed. + + See Also + -------- + get_loc + pandas.IntervalIndex.get_loc_exact + """ + return self.get_loc(key, method=method) + def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3bf783b5a2faa..2118afa374f90 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -899,15 +899,11 @@ def _searchsorted_monotonic(self, label, side, exclude_label=False): def _get_loc_only_exact_matches(self, key): if isinstance(key, Interval): - if not self.is_unique: raise ValueError("cannot index with a slice Interval" " and a non-unique index") - - # TODO: this expands to a tuple index, see if we can - # do better - return Index(self._multiindex.values).get_loc(key) - raise KeyError + return self.get_loc_exact(key) + raise KeyError(key) def _find_non_overlapping_monotonic_bounds(self, key): if isinstance(key, IntervalMixin): @@ -970,6 +966,10 @@ def get_loc(self, key, method=None): >>> overlapping_index = pd.IntervalIndex([i2, i3]) >>> overlapping_index.get_loc(1.5) array([0, 1], dtype=int64) + + See Also + -------- + get_loc_exact : Exact matches only """ self._check_method(method) @@ -1003,6 +1003,59 @@ def get_loc(self, key, method=None): else: return self._engine.get_loc(key) + def get_loc_exact(self, key, method=None): + """Get integer location, slice or boolean mask for exact + Interval matches only. + + Parameters + ---------- + key : Interval + The label we want to find locations for. Must have type + :class:`Interval` + method : {None}, optional + * default: matches where the label exactly matches a given + :class:`Interval`. + + Returns + ------- + loc : int if unique index, slice if monotonic index, else mask + + Raises + ------ + KeyError if key is not found + + Examples + --------- + >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) + >>> index = pd.IntervalIndex([i1, i2]) + >>> index.get_loc_exact(i1) + 0 + + If an exact match is not found, a KeyError is raised + + >>> index.get_loc_exact(pd.Interval(0.5, 1.5)) + KeyError: Interval(0.5, 1.5, closed='right') + + If a label is in several locations, you get all the relevant + locations. + + >>> index = pd.IntervalIndex([i1, i2, i1]) + >>> index.get_loc_exact(i1) + array([0, 2], dtype=int64) + + See Also + -------- + get_loc + """ + + all_matches = self.get_loc(key, method=method) + exact_matches = self[all_matches] == key + if np.all(exact_matches): + return all_matches + elif np.any(exact_matches): + return all_matches[exact_matches] + raise KeyError(key) + def get_value(self, series, key): if com.is_bool_indexer(key): loc = key diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 71a6f78125004..77181c39aa665 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -497,6 +497,24 @@ def test_get_loc_interval(self): pytest.raises(KeyError, self.index.get_loc, Interval(-1, 0, 'left')) + def test_get_loc_exact(self): + # GH19353 + assert self.index.get_loc_exact(Interval(0, 1)) == 0 + with pytest.raises(KeyError): + self.index.get_loc_exact(1) + with pytest.raises(KeyError): + self.index.get_loc_exact(Interval(0, 1, 'left')) + with pytest.raises(KeyError): + self.index.get_loc_exact(Interval(0, 0.5)) + with pytest.raises(KeyError): + self.index.get_loc_exact(Interval(2, 3)) + with pytest.raises(KeyError): + self.index.get_loc_exact(Interval(-1, 0, 'left')) + + # The below tests if get_loc_exact interferes with caching + # used for index.get_loc. See #19353#issuecomment-364295029 + assert self.index.get_loc(Interval(0, 1)) == 0 + # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_indexer(self): actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3])