Skip to content

ENH: add IntervalIndex.get_loc_exact to look for exact matches only #19353

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ def time_getitem_list(self, monotonic):
def time_loc_list(self, monotonic):
monotonic.loc[80000:]

def time_get_loc_exact(self, monotonic):
interval = monotonic.index[80000]
monotonic.index.get_loc_exact(interval)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should only run on II



class PanelIndexing(object):

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ Other Enhancements

- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`)
- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`)
- New :meth:`IntervalIndex.get_loc_exact` has been added to find exact Interval matches only (:issue:`19349`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prob needs to be added to api.rst


.. _whatsnew_0230.api_breaking:

Expand Down
15 changes: 15 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2536,6 +2536,21 @@ def get_loc(self, key, method=None, tolerance=None):
raise KeyError(key)
return loc

def get_loc_exact(self, key, method=None):
"""Get integer location, slice or boolean mask for exact
matches only.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah could remove this

This method dispatches to :meth:`get_loc`. The use for
``get_loc_exact`` is mainly in :class:`IntervalIndex`,
when a exact match is needed.

See Also
--------
get_loc
pandas.IntervalIndex.get_loc_exact
"""
return self.get_loc(key, method=method)

def get_value(self, series, key):
"""
Fast lookup of value from 1-dimensional ndarray. Only use this if you
Expand Down
65 changes: 59 additions & 6 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,15 +899,11 @@ def _searchsorted_monotonic(self, label, side, exclude_label=False):

def _get_loc_only_exact_matches(self, key):
if isinstance(key, Interval):

if not self.is_unique:
raise ValueError("cannot index with a slice Interval"
" and a non-unique index")

# TODO: this expands to a tuple index, see if we can
# do better
return Index(self._multiindex.values).get_loc(key)
raise KeyError
return self.get_loc_exact(key)
raise KeyError(key)

def _find_non_overlapping_monotonic_bounds(self, key):
if isinstance(key, IntervalMixin):
Expand Down Expand Up @@ -970,6 +966,10 @@ def get_loc(self, key, method=None):
>>> overlapping_index = pd.IntervalIndex([i2, i3])
>>> overlapping_index.get_loc(1.5)
array([0, 1], dtype=int64)

See Also
--------
get_loc_exact : Exact matches only
"""
self._check_method(method)

Expand Down Expand Up @@ -1003,6 +1003,59 @@ def get_loc(self, key, method=None):
else:
return self._engine.get_loc(key)

def get_loc_exact(self, key, method=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the same method in Index, dispatch to .get_loc

"""Get integer location, slice or boolean mask for exact
Interval matches only.

Parameters
----------
key : Interval
The label we want to find locations for. Must have type
:class:`Interval`
method : {None}, optional
* default: matches where the label exactly matches a given
:class:`Interval`.

Returns
-------
loc : int if unique index, slice if monotonic index, else mask

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add Raises section

Raises
------
KeyError if key is not found

Examples
---------
>>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
>>> index = pd.IntervalIndex([i1, i2])
>>> index.get_loc_exact(i1)
0

If an exact match is not found, a KeyError is raised

>>> index.get_loc_exact(pd.Interval(0.5, 1.5))
KeyError: Interval(0.5, 1.5, closed='right')

If a label is in several locations, you get all the relevant
locations.

>>> index = pd.IntervalIndex([i1, i2, i1])
>>> index.get_loc_exact(i1)
array([0, 2], dtype=int64)

See Also
--------
get_loc
"""

all_matches = self.get_loc(key, method=method)
exact_matches = self[all_matches] == key
if np.all(exact_matches):
return all_matches
elif np.any(exact_matches):
return all_matches[exact_matches]
raise KeyError(key)

def get_value(self, series, key):
if com.is_bool_indexer(key):
loc = key
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,24 @@ def test_get_loc_interval(self):
pytest.raises(KeyError, self.index.get_loc,
Interval(-1, 0, 'left'))

def test_get_loc_exact(self):
# GH19353
assert self.index.get_loc_exact(Interval(0, 1)) == 0
with pytest.raises(KeyError):
self.index.get_loc_exact(1)
with pytest.raises(KeyError):
self.index.get_loc_exact(Interval(0, 1, 'left'))
with pytest.raises(KeyError):
self.index.get_loc_exact(Interval(0, 0.5))
with pytest.raises(KeyError):
self.index.get_loc_exact(Interval(2, 3))
with pytest.raises(KeyError):
self.index.get_loc_exact(Interval(-1, 0, 'left'))

# The below tests if get_loc_exact interferes with caching
# used for index.get_loc. See #19353#issuecomment-364295029
assert self.index.get_loc(Interval(0, 1)) == 0

# To be removed, replaced by test_interval_new.py (see #16316, #16386)
def test_get_indexer(self):
actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3])
Expand Down