Skip to content

Commit eff6ec6

Browse files
author
tp
committed
add IntervalIndex.get_loc_exact
1 parent b835127 commit eff6ec6

File tree

6 files changed

+102
-6
lines changed

6 files changed

+102
-6
lines changed

asv_bench/benchmarks/indexing.py

+4
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ def time_getitem_list(self, monotonic):
229229
def time_loc_list(self, monotonic):
230230
monotonic.loc[80000:]
231231

232+
def time_get_loc_exact(self, monotonic):
233+
interval = monotonic.index[80000]
234+
monotonic.index.get_loc_exact(interval)
235+
232236

233237
class PanelIndexing(object):
234238

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ Other Enhancements
283283

284284
- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`)
285285
- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`)
286+
- New :meth:`IntervalIndex.get_loc_exact` has been added to find exact Interval matches only (:issue:`19349`)
286287

287288
.. _whatsnew_0230.api_breaking:
288289

pandas/core/indexes/base.py

+15
Original file line numberDiff line numberDiff line change
@@ -2536,6 +2536,21 @@ def get_loc(self, key, method=None, tolerance=None):
25362536
raise KeyError(key)
25372537
return loc
25382538

2539+
def get_loc_exact(self, key, method=None):
2540+
"""Get integer location, slice or boolean mask for exact
2541+
matches only.
2542+
2543+
This method dispatches to :meth:`get_loc`. The use for
2544+
``get_loc_exact`` is mainly in :class:`IntervalIndex`,
2545+
when a exact match is needed.
2546+
2547+
See Also
2548+
--------
2549+
get_loc
2550+
pandas.IntervalIndex.get_loc_exact
2551+
"""
2552+
return self.get_loc(key, method=method)
2553+
25392554
def get_value(self, series, key):
25402555
"""
25412556
Fast lookup of value from 1-dimensional ndarray. Only use this if you

pandas/core/indexes/interval.py

+63-5
Original file line numberDiff line numberDiff line change
@@ -903,11 +903,8 @@ def _get_loc_only_exact_matches(self, key):
903903
if not self.is_unique:
904904
raise ValueError("cannot index with a slice Interval"
905905
" and a non-unique index")
906-
907-
# TODO: this expands to a tuple index, see if we can
908-
# do better
909-
return Index(self._multiindex.values).get_loc(key)
910-
raise KeyError
906+
return super(IntervalIndex, self)._engine.get_loc(key)
907+
raise KeyError(key)
911908

912909
def _find_non_overlapping_monotonic_bounds(self, key):
913910
if isinstance(key, IntervalMixin):
@@ -970,6 +967,10 @@ def get_loc(self, key, method=None):
970967
>>> overlapping_index = pd.IntervalIndex([i2, i3])
971968
>>> overlapping_index.get_loc(1.5)
972969
array([0, 1], dtype=int64)
970+
971+
See Also
972+
--------
973+
get_loc_exact : Exact matches only
973974
"""
974975
self._check_method(method)
975976

@@ -1003,6 +1004,63 @@ def get_loc(self, key, method=None):
10031004
else:
10041005
return self._engine.get_loc(key)
10051006

1007+
def get_loc_exact(self, key, method=None):
1008+
"""Get integer location, slice or boolean mask for exact
1009+
Interval matches only.
1010+
1011+
Parameters
1012+
----------
1013+
key : Interval
1014+
The label we want to find locations for. Must have type
1015+
:class:`Interval`
1016+
method : {None}, optional
1017+
* default: matches where the label exactly matches a given
1018+
:class:`Interval`.
1019+
1020+
Returns
1021+
-------
1022+
loc : int if unique index, slice if monotonic index, else mask
1023+
1024+
Raises
1025+
------
1026+
KeyError if key is not found
1027+
1028+
Examples
1029+
---------
1030+
>>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
1031+
>>> index = pd.IntervalIndex([i1, i2])
1032+
>>> index.get_loc_exact(i1)
1033+
0
1034+
1035+
If an exact match is not found, a KeyError is raised
1036+
1037+
>>> index.get_loc_exact(pd.Interval(0.5, 1.5))
1038+
KeyError: Interval(0.5, 1.5, closed='right')
1039+
1040+
If a label is in several locations, you get all the relevant
1041+
locations.
1042+
1043+
>>> index = pd.IntervalIndex([i1, i2, i1])
1044+
>>> index.get_loc_exact(i1)
1045+
array([0, 2], dtype=int64)
1046+
1047+
See Also
1048+
--------
1049+
get_loc
1050+
"""
1051+
all_matches = self.get_loc(key, method=method)
1052+
if is_scalar(all_matches):
1053+
if self[all_matches] == key:
1054+
return all_matches
1055+
raise KeyError(key)
1056+
else:
1057+
exact_matches = self[all_matches] == key
1058+
if (exact_matches).all():
1059+
return all_matches
1060+
elif (exact_matches).any():
1061+
return all_matches[exact_matches]
1062+
raise KeyError(key)
1063+
10061064
def get_value(self, series, key):
10071065
if com.is_bool_indexer(key):
10081066
loc = key

pandas/tests/indexes/interval/test_interval.py

+18
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,24 @@ def test_get_loc_interval(self):
497497
pytest.raises(KeyError, self.index.get_loc,
498498
Interval(-1, 0, 'left'))
499499

500+
def test_get_loc_exact(self):
501+
# GH19353
502+
assert self.index.get_loc_exact(Interval(0, 1)) == 0
503+
with pytest.raises(KeyError):
504+
self.index.get_loc_exact(1)
505+
with pytest.raises(KeyError):
506+
self.index.get_loc_exact(Interval(0, 1, 'left'))
507+
with pytest.raises(KeyError):
508+
self.index.get_loc_exact(Interval(0, 0.5))
509+
with pytest.raises(KeyError):
510+
self.index.get_loc_exact(Interval(2, 3))
511+
with pytest.raises(KeyError):
512+
self.index.get_loc_exact(Interval(-1, 0, 'left'))
513+
514+
# The below tests if get_loc_exact interferes with caching
515+
# used for index.get_loc. See #19353#issuecomment-364295029
516+
self.index.get_loc(Interval(0, 1))
517+
500518
# To be removed, replaced by test_interval_new.py (see #16316, #16386)
501519
def test_get_indexer(self):
502520
actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3])

pandas/tests/indexing/interval/test_interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def test_with_slices(self):
136136
s[Interval(3, 6):]
137137

138138
expected = s.iloc[3:5]
139-
result = s[[Interval(3, 6)]]
139+
result = s[Interval(3, 6)]
140140
tm.assert_series_equal(expected, result)
141141

142142
# slice of scalar with step != 1

0 commit comments

Comments
 (0)