From 44f34718324ccea70d4a5ead7daef6a81b436294 Mon Sep 17 00:00:00 2001 From: tp Date: Thu, 8 Feb 2018 19:02:43 +0000 Subject: [PATCH 1/3] add IntervalIndex.get_loc_exact --- asv_bench/benchmarks/indexing.py | 4 ++ doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/indexes/base.py | 15 ++++ pandas/core/indexes/interval.py | 68 +++++++++++++++++-- .../tests/indexes/interval/test_interval.py | 18 +++++ 5 files changed, 100 insertions(+), 6 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 77e013e1e4fb0..3070765b3db22 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -229,6 +229,10 @@ def time_getitem_list(self, monotonic): def time_loc_list(self, monotonic): monotonic.loc[80000:] + def time_get_loc_exact(self, monotonic): + interval = monotonic.index[80000] + monotonic.index.get_loc_exact(interval) + class PanelIndexing(object): diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 5e94b9c15fa57..af712ceed03d7 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -283,6 +283,7 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) +- New :meth:`IntervalIndex.get_loc_exact` has been added to find exact Interval matches only (:issue:`19349`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 15df77bf772dc..2a762ef4317d0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2536,6 +2536,21 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) return loc + def get_loc_exact(self, key, method=None): + """Get integer location, slice or boolean mask for exact + matches only. + + This method dispatches to :meth:`get_loc`. The use for + ``get_loc_exact`` is mainly in :class:`IntervalIndex`, + when a exact match is needed. + + See Also + -------- + get_loc + pandas.IntervalIndex.get_loc_exact + """ + return self.get_loc(key, method=method) + def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 3bf783b5a2faa..c15dc47e29817 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -899,15 +899,11 @@ def _searchsorted_monotonic(self, label, side, exclude_label=False): def _get_loc_only_exact_matches(self, key): if isinstance(key, Interval): - if not self.is_unique: raise ValueError("cannot index with a slice Interval" " and a non-unique index") - - # TODO: this expands to a tuple index, see if we can - # do better - return Index(self._multiindex.values).get_loc(key) - raise KeyError + return self.get_loc_exact(key) + raise KeyError(key) def _find_non_overlapping_monotonic_bounds(self, key): if isinstance(key, IntervalMixin): @@ -970,6 +966,10 @@ def get_loc(self, key, method=None): >>> overlapping_index = pd.IntervalIndex([i2, i3]) >>> overlapping_index.get_loc(1.5) array([0, 1], dtype=int64) + + See Also + -------- + get_loc_exact : Exact matches only """ self._check_method(method) @@ -1003,6 +1003,62 @@ def get_loc(self, key, method=None): else: return self._engine.get_loc(key) + def get_loc_exact(self, key, method=None): + """Get integer location, slice or boolean mask for exact + Interval matches only. + + Parameters + ---------- + key : Interval + The label we want to find locations for. Must have type + :class:`Interval` + method : {None}, optional + * default: matches where the label exactly matches a given + :class:`Interval`. + + Returns + ------- + loc : int if unique index, slice if monotonic index, else mask + + Raises + ------ + KeyError if key is not found + + Examples + --------- + >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) + >>> index = pd.IntervalIndex([i1, i2]) + >>> index.get_loc_exact(i1) + 0 + + If an exact match is not found, a KeyError is raised + + >>> index.get_loc_exact(pd.Interval(0.5, 1.5)) + KeyError: Interval(0.5, 1.5, closed='right') + + If a label is in several locations, you get all the relevant + locations. + + >>> index = pd.IntervalIndex([i1, i2, i1]) + >>> index.get_loc_exact(i1) + array([0, 2], dtype=int64) + + See Also + -------- + get_loc + """ + all_matches = self.get_loc(key, method=method) + exact_matches = self[all_matches] == key + if is_scalar(all_matches): + if exact_matches: + return all_matches + else: + if (exact_matches).all(): + return all_matches + elif (exact_matches).any(): + return all_matches[exact_matches] + raise KeyError(key) + def get_value(self, series, key): if com.is_bool_indexer(key): loc = key diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 71a6f78125004..77181c39aa665 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -497,6 +497,24 @@ def test_get_loc_interval(self): pytest.raises(KeyError, self.index.get_loc, Interval(-1, 0, 'left')) + def test_get_loc_exact(self): + # GH19353 + assert self.index.get_loc_exact(Interval(0, 1)) == 0 + with pytest.raises(KeyError): + self.index.get_loc_exact(1) + with pytest.raises(KeyError): + self.index.get_loc_exact(Interval(0, 1, 'left')) + with pytest.raises(KeyError): + self.index.get_loc_exact(Interval(0, 0.5)) + with pytest.raises(KeyError): + self.index.get_loc_exact(Interval(2, 3)) + with pytest.raises(KeyError): + self.index.get_loc_exact(Interval(-1, 0, 'left')) + + # The below tests if get_loc_exact interferes with caching + # used for index.get_loc. See #19353#issuecomment-364295029 + assert self.index.get_loc(Interval(0, 1)) == 0 + # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_indexer(self): actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) From 613d6f5f6e702b7a44d737249e3b3a76067f3f56 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Feb 2018 10:10:53 -0500 Subject: [PATCH 2/3] clean --- pandas/core/indexes/interval.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index c15dc47e29817..432be2ed80f50 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1047,16 +1047,13 @@ def get_loc_exact(self, key, method=None): -------- get_loc """ + all_matches = self.get_loc(key, method=method) exact_matches = self[all_matches] == key - if is_scalar(all_matches): - if exact_matches: - return all_matches - else: - if (exact_matches).all(): - return all_matches - elif (exact_matches).any(): - return all_matches[exact_matches] + if np.all(all_matches): + return all_matches + elif np.any(exact_matches): + return all_matches[exact_matches] raise KeyError(key) def get_value(self, series, key): From 9045cc915020b06a3a5a892a41bf9ff68d35faf4 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 11 Feb 2018 11:12:10 -0500 Subject: [PATCH 3/3] typo --- pandas/core/indexes/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 432be2ed80f50..2118afa374f90 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1050,7 +1050,7 @@ def get_loc_exact(self, key, method=None): all_matches = self.get_loc(key, method=method) exact_matches = self[all_matches] == key - if np.all(all_matches): + if np.all(exact_matches): return all_matches elif np.any(exact_matches): return all_matches[exact_matches]