Skip to content

Commit 44f3471

Browse files
author
tp
committed
add IntervalIndex.get_loc_exact
1 parent b835127 commit 44f3471

File tree

5 files changed

+100
-6
lines changed

5 files changed

+100
-6
lines changed

asv_bench/benchmarks/indexing.py

+4
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ def time_getitem_list(self, monotonic):
229229
def time_loc_list(self, monotonic):
230230
monotonic.loc[80000:]
231231

232+
def time_get_loc_exact(self, monotonic):
233+
interval = monotonic.index[80000]
234+
monotonic.index.get_loc_exact(interval)
235+
232236

233237
class PanelIndexing(object):
234238

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ Other Enhancements
283283

284284
- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`)
285285
- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`)
286+
- New :meth:`IntervalIndex.get_loc_exact` has been added to find exact Interval matches only (:issue:`19349`)
286287

287288
.. _whatsnew_0230.api_breaking:
288289

pandas/core/indexes/base.py

+15
Original file line numberDiff line numberDiff line change
@@ -2536,6 +2536,21 @@ def get_loc(self, key, method=None, tolerance=None):
25362536
raise KeyError(key)
25372537
return loc
25382538

2539+
def get_loc_exact(self, key, method=None):
2540+
"""Get integer location, slice or boolean mask for exact
2541+
matches only.
2542+
2543+
This method dispatches to :meth:`get_loc`. The use for
2544+
``get_loc_exact`` is mainly in :class:`IntervalIndex`,
2545+
when a exact match is needed.
2546+
2547+
See Also
2548+
--------
2549+
get_loc
2550+
pandas.IntervalIndex.get_loc_exact
2551+
"""
2552+
return self.get_loc(key, method=method)
2553+
25392554
def get_value(self, series, key):
25402555
"""
25412556
Fast lookup of value from 1-dimensional ndarray. Only use this if you

pandas/core/indexes/interval.py

+62-6
Original file line numberDiff line numberDiff line change
@@ -899,15 +899,11 @@ def _searchsorted_monotonic(self, label, side, exclude_label=False):
899899

900900
def _get_loc_only_exact_matches(self, key):
901901
if isinstance(key, Interval):
902-
903902
if not self.is_unique:
904903
raise ValueError("cannot index with a slice Interval"
905904
" and a non-unique index")
906-
907-
# TODO: this expands to a tuple index, see if we can
908-
# do better
909-
return Index(self._multiindex.values).get_loc(key)
910-
raise KeyError
905+
return self.get_loc_exact(key)
906+
raise KeyError(key)
911907

912908
def _find_non_overlapping_monotonic_bounds(self, key):
913909
if isinstance(key, IntervalMixin):
@@ -970,6 +966,10 @@ def get_loc(self, key, method=None):
970966
>>> overlapping_index = pd.IntervalIndex([i2, i3])
971967
>>> overlapping_index.get_loc(1.5)
972968
array([0, 1], dtype=int64)
969+
970+
See Also
971+
--------
972+
get_loc_exact : Exact matches only
973973
"""
974974
self._check_method(method)
975975

@@ -1003,6 +1003,62 @@ def get_loc(self, key, method=None):
10031003
else:
10041004
return self._engine.get_loc(key)
10051005

1006+
def get_loc_exact(self, key, method=None):
1007+
"""Get integer location, slice or boolean mask for exact
1008+
Interval matches only.
1009+
1010+
Parameters
1011+
----------
1012+
key : Interval
1013+
The label we want to find locations for. Must have type
1014+
:class:`Interval`
1015+
method : {None}, optional
1016+
* default: matches where the label exactly matches a given
1017+
:class:`Interval`.
1018+
1019+
Returns
1020+
-------
1021+
loc : int if unique index, slice if monotonic index, else mask
1022+
1023+
Raises
1024+
------
1025+
KeyError if key is not found
1026+
1027+
Examples
1028+
---------
1029+
>>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
1030+
>>> index = pd.IntervalIndex([i1, i2])
1031+
>>> index.get_loc_exact(i1)
1032+
0
1033+
1034+
If an exact match is not found, a KeyError is raised
1035+
1036+
>>> index.get_loc_exact(pd.Interval(0.5, 1.5))
1037+
KeyError: Interval(0.5, 1.5, closed='right')
1038+
1039+
If a label is in several locations, you get all the relevant
1040+
locations.
1041+
1042+
>>> index = pd.IntervalIndex([i1, i2, i1])
1043+
>>> index.get_loc_exact(i1)
1044+
array([0, 2], dtype=int64)
1045+
1046+
See Also
1047+
--------
1048+
get_loc
1049+
"""
1050+
all_matches = self.get_loc(key, method=method)
1051+
exact_matches = self[all_matches] == key
1052+
if is_scalar(all_matches):
1053+
if exact_matches:
1054+
return all_matches
1055+
else:
1056+
if (exact_matches).all():
1057+
return all_matches
1058+
elif (exact_matches).any():
1059+
return all_matches[exact_matches]
1060+
raise KeyError(key)
1061+
10061062
def get_value(self, series, key):
10071063
if com.is_bool_indexer(key):
10081064
loc = key

pandas/tests/indexes/interval/test_interval.py

+18
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,24 @@ def test_get_loc_interval(self):
497497
pytest.raises(KeyError, self.index.get_loc,
498498
Interval(-1, 0, 'left'))
499499

500+
def test_get_loc_exact(self):
501+
# GH19353
502+
assert self.index.get_loc_exact(Interval(0, 1)) == 0
503+
with pytest.raises(KeyError):
504+
self.index.get_loc_exact(1)
505+
with pytest.raises(KeyError):
506+
self.index.get_loc_exact(Interval(0, 1, 'left'))
507+
with pytest.raises(KeyError):
508+
self.index.get_loc_exact(Interval(0, 0.5))
509+
with pytest.raises(KeyError):
510+
self.index.get_loc_exact(Interval(2, 3))
511+
with pytest.raises(KeyError):
512+
self.index.get_loc_exact(Interval(-1, 0, 'left'))
513+
514+
# The below tests if get_loc_exact interferes with caching
515+
# used for index.get_loc. See #19353#issuecomment-364295029
516+
assert self.index.get_loc(Interval(0, 1)) == 0
517+
500518
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
501519
def test_get_indexer(self):
502520
actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3])

0 commit comments

Comments
 (0)