Skip to content

API: change IntervalIndex.contains to work elementwise #17753

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 1, 2019
47 changes: 47 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
from_arrays
from_tuples
from_breaks
contains
overlaps
set_closed
to_tuples
Expand Down Expand Up @@ -1017,6 +1018,52 @@ def repeat(self, repeats, axis=None):
right_repeat = self.right.repeat(repeats)
return self._shallow_copy(left=left_repeat, right=right_repeat)

_interval_shared_docs['contains'] = """
Check elementwise if the Intervals contain the value.

Return a boolean mask whether the value is contained in the Intervals
of the %(klass)s.

.. versionadded:: 0.25.0

Parameters
----------
other : scalar
The value to check whether it is contained in the Intervals.

Returns
-------
boolean array

See Also
--------
Interval.contains : Check whether Interval object contains value.
%(klass)s.overlaps : Check if an Interval overlaps the values in the
%(klass)s.

Examples
--------
>>> intervals = pd.%(qualname)s.from_tuples([(0, 1), (1, 3), (2, 4)])
>>> intervals
%(klass)s([(0, 1], (1, 3], (2, 4]],
closed='right',
dtype='interval[int64]')
>>> intervals.contains(0.5)
array([ True, False, False])
"""

@Appender(_interval_shared_docs['contains'] % _shared_docs_kwargs)
def contains(self, other):
if isinstance(other, Interval):
raise NotImplementedError(
'contains not implemented for two intervals'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should be able to implement this in the next day or two if we want to get it in for 0.25.0; it should be relatively straight-forward and we have good testing infrastructure from overlaps that could be partially reused.

)

return (
(self.left < other if self.open_left else self.left <= other) &
(other < self.right if self.open_right else other <= self.right)
)

_interval_shared_docs['overlaps'] = """
Check elementwise if an Interval overlaps the values in the %(klass)s.

Expand Down
18 changes: 10 additions & 8 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4015,13 +4015,6 @@ def is_type_compatible(self, kind):
>>> idx
Int64Index([1, 2, 3, 4], dtype='int64')

>>> idx.contains(2)
True
>>> idx.contains(6)
False

This is equivalent to:

>>> 2 in idx
True
>>> 6 in idx
Expand All @@ -4036,8 +4029,17 @@ def __contains__(self, key):
except (OverflowError, TypeError, ValueError):
return False

@Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
def contains(self, key):
"""
Return a boolean indicating whether the provided key is in the index.

.. deprecated:: 0.25.0
Use ``key in index`` instead of ``index.contains(key)``.
"""
warnings.warn(
"The 'contains' method is deprecated and will be removed in a "
"future versions. Use 'key in index' instead of "
"'index.contains(key)", FutureWarning, stacklevel=2)
return key in self

def __hash__(self):
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,10 +385,6 @@ def __contains__(self, key):

return contains(self, key, container=self._engine)

@Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
def contains(self, key):
return key in self

def __array__(self, dtype=None):
""" the array interface, return my values """
return np.array(self._data, dtype=dtype)
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,6 @@ def __contains__(self, key):
except (KeyError, TypeError, ValueError):
return False

contains = __contains__

# Try to run function on index first, and then on elements of index
# Especially important for group-by functionality
def map(self, mapper, na_action=None):
Expand Down
27 changes: 5 additions & 22 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def func(intvidx_self, other, sort=False):
name=_index_doc_kwargs['name'],
versionadded="0.20.0",
extra_attributes="is_overlapping\nvalues\n",
extra_methods="contains\n",
extra_methods="",
examples=textwrap.dedent("""\
Examples
--------
Expand Down Expand Up @@ -291,27 +291,6 @@ def __contains__(self, key):
except KeyError:
return False

def contains(self, key):
"""
Return a boolean indicating if the key is IN the index

We accept / allow keys to be not *just* actual
objects.

Parameters
----------
key : int, float, Interval

Returns
-------
boolean
"""
try:
self.get_loc(key)
return True
except KeyError:
return False

@Appender(_interval_shared_docs['to_tuples'] % dict(
return_type="Index",
examples="""
Expand Down Expand Up @@ -1137,6 +1116,10 @@ def equals(self, other):
self.right.equals(other.right) and
self.closed == other.closed)

@Appender(_interval_shared_docs['contains'] % _index_doc_kwargs)
def contains(self, other):
return self._data.contains(other)

@Appender(_interval_shared_docs['overlaps'] % _index_doc_kwargs)
def overlaps(self, other):
return self._data.overlaps(other)
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -922,8 +922,6 @@ def __contains__(self, key):
except (LookupError, TypeError, ValueError):
return False

contains = __contains__

@Appender(_index_shared_docs['_shallow_copy'])
def _shallow_copy(self, values=None, **kwargs):
if values is not None:
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,6 @@ def __contains__(self, key):
except Exception:
return False

contains = __contains__

@cache_readonly
def _int64index(self):
return Int64Index._simple_new(self.asi8, name=self.name)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2393,7 +2393,7 @@ def convert_to_index_sliceable(obj, key):
elif isinstance(key, str):

# we are an actual column
if obj._data.items.contains(key):
if key in obj._data.items:
return None

# We might have a datetimelike string that we can translate to a
Expand Down
37 changes: 20 additions & 17 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,23 +753,28 @@ def test_contains(self):
assert Interval(3, 5) not in i
assert Interval(-1, 0, closed='left') not in i

# To be removed, replaced by test_interval_new.py (see #16316, #16386)
def testcontains(self):
def test_contains_method(self):
# can select values that are IN the range of a value
i = IntervalIndex.from_arrays([0, 1], [1, 2])

assert i.contains(0.1)
assert i.contains(0.5)
assert i.contains(1)
assert i.contains(Interval(0, 1))
assert i.contains(Interval(0, 2))
expected = np.array([False, False], dtype='bool')
actual = i.contains(0)
tm.assert_numpy_array_equal(actual, expected)
actual = i.contains(3)
tm.assert_numpy_array_equal(actual, expected)

# these overlaps completely
assert i.contains(Interval(0, 3))
assert i.contains(Interval(1, 3))
expected = np.array([True, False], dtype='bool')
actual = i.contains(0.5)
tm.assert_numpy_array_equal(actual, expected)
actual = i.contains(1)
tm.assert_numpy_array_equal(actual, expected)

assert not i.contains(20)
assert not i.contains(-20)
# __contains__ not implemented for "interval in interval", follow
# that for the contains method for now
with pytest.raises(
NotImplementedError,
match='contains not implemented for two'):
i.contains(Interval(0, 1))

def test_dropna(self, closed):

Expand Down Expand Up @@ -939,11 +944,9 @@ def test_datetime(self, tz):
assert iv_false not in index

# .contains does check individual points
assert not index.contains(Timestamp('2000-01-01', tz=tz))
assert index.contains(Timestamp('2000-01-01T12', tz=tz))
assert index.contains(Timestamp('2000-01-02', tz=tz))
assert index.contains(iv_true)
assert not index.contains(iv_false)
assert not index.contains(Timestamp('2000-01-01', tz=tz)).any()
assert index.contains(Timestamp('2000-01-01T12', tz=tz)).any()
assert index.contains(Timestamp('2000-01-02', tz=tz)).any()

# test get_indexer
start = Timestamp('1999-12-31T12:00', tz=tz)
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2156,6 +2156,11 @@ def test_tab_complete_warning(self, ip):
with provisionalcompleter('ignore'):
list(ip.Completer.completions('idx.', 4))

def test_deprecated_contains(self):
for index in self.indices.values():
with tm.assert_produces_warning(FutureWarning):
index.contains(1)


class TestMixedIntIndex(Base):
# Mostly the tests from common.py for which the results differ
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,12 @@ def test_cached_data(self):
91 in idx
assert idx._cached_data is None

idx.contains(90)
with tm.assert_produces_warning(FutureWarning):
idx.contains(90)
assert idx._cached_data is None

idx.contains(91)
with tm.assert_produces_warning(FutureWarning):
idx.contains(91)
assert idx._cached_data is None

idx.all()
Expand Down