From 9c501ad3276a9278b1242320ba1d93630841b5fc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 2 Oct 2017 23:02:33 +0200 Subject: [PATCH 1/9] API: change IntervalIndex.contains to work elementwise --- pandas/core/indexes/interval.py | 16 +++++--------- pandas/tests/indexes/test_interval.py | 30 +++++++++++++++++---------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 29699f664bbf3..eda28f1690710 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -312,24 +312,18 @@ def __contains__(self, key): def contains(self, key): """ - return a boolean if this key is IN the index - - We accept / allow keys to be not *just* actual - objects. + Return a boolean mask whether the key is contained in the Intervals + of the index. Parameters ---------- - key : int, float, Interval + key : scalar, Interval Returns ------- - boolean + boolean array """ - try: - self.get_loc(key) - return True - except KeyError: - return False + return np.array([key in interval for interval in self], dtype='bool') @classmethod def from_breaks(cls, breaks, closed='right', name=None, copy=False): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index b55bab3a210cc..8a668a4c533a8 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -564,22 +564,30 @@ def test_contains(self): assert Interval(3, 5) not in i assert Interval(-1, 0, closed='left') not in i - def testcontains(self): + def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) - assert i.contains(0.1) - assert i.contains(0.5) - assert i.contains(1) - assert i.contains(Interval(0, 1)) - assert i.contains(Interval(0, 2)) + expected = np.array([False, False], dtype='bool') + actual = i.contains(0) + tm.assert_numpy_array_equal(actual, expected) + actual = i.contains(3) + tm.assert_numpy_array_equal(actual, expected) + + expected = np.array([True, False], dtype='bool') + actual = i.contains(0.5) + tm.assert_numpy_array_equal(actual, expected) + actual = i.contains(1) + tm.assert_numpy_array_equal(actual, expected) - # these overlaps completely - assert i.contains(Interval(0, 3)) - assert i.contains(Interval(1, 3)) + # TODO what to do with intervals? + # assert i.contains(Interval(0, 1)) + # assert i.contains(Interval(0, 2)) + # + # # these overlaps completely + # assert i.contains(Interval(0, 3)) + # assert i.contains(Interval(1, 3)) - assert not i.contains(20) - assert not i.contains(-20) def test_dropna(self): From d8737e4aaaf9ca53ce22883604f20c4210f861b2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 29 Jun 2019 12:02:25 -0500 Subject: [PATCH 2/9] fixup merge --- .../tests/indexes/interval/test_interval.py | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index c82864e646f7c..9283e2f1d174f 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -736,6 +736,23 @@ def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) + # To be removed, replaced by test_interval_new.py (see #16316, #16386) + def test_contains(self): + # Only endpoints are valid. + i = IntervalIndex.from_arrays([0, 1], [1, 2]) + + # Invalid + assert 0 not in i + assert 1 not in i + assert 2 not in i + + # Valid + assert Interval(0, 1) in i + assert Interval(0, 2) in i + assert Interval(0, 0.5) in i + assert Interval(3, 5) not in i + assert Interval(-1, 0, closed='left') not in i + def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) @@ -757,24 +774,6 @@ def test_contains_method(self): with pytest.raises(TypeError): i.contains(Interval(0, 1)) - # To be removed, replaced by test_interval_new.py (see #16316, #16386) - def test_contains_method(self): - # can select values that are IN the range of a value - i = IntervalIndex.from_arrays([0, 1], [1, 2]) - - assert i.contains(0.1) - assert i.contains(0.5) - assert i.contains(1) - assert i.contains(Interval(0, 1)) - assert i.contains(Interval(0, 2)) - - # these overlaps completely - assert i.contains(Interval(0, 3)) - assert i.contains(Interval(1, 3)) - - assert not i.contains(20) - assert not i.contains(-20) - def test_dropna(self, closed): expected = IntervalIndex.from_tuples( From 4acb536d17455614f9ef177a72e87e441ebe550f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 29 Jun 2019 12:04:56 -0500 Subject: [PATCH 3/9] fix usage of contains in tests --- pandas/tests/indexes/interval/test_interval.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 9283e2f1d174f..ba8e7bd4f1b87 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -770,7 +770,7 @@ def test_contains_method(self): tm.assert_numpy_array_equal(actual, expected) # __contains__ not implemented for "interval in interval", follow - # that for the contains method + # that for the contains method for now with pytest.raises(TypeError): i.contains(Interval(0, 1)) @@ -942,11 +942,9 @@ def test_datetime(self, tz): assert iv_false not in index # .contains does check individual points - assert not index.contains(Timestamp('2000-01-01', tz=tz)) - assert index.contains(Timestamp('2000-01-01T12', tz=tz)) - assert index.contains(Timestamp('2000-01-02', tz=tz)) - assert index.contains(iv_true) - assert not index.contains(iv_false) + assert not index.contains(Timestamp('2000-01-01', tz=tz)).any() + assert index.contains(Timestamp('2000-01-01T12', tz=tz)).any() + assert index.contains(Timestamp('2000-01-02', tz=tz)).any() # test get_indexer start = Timestamp('1999-12-31T12:00', tz=tz) From e960116e18cd6c019c7435a876b2a4964e02d94d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 29 Jun 2019 12:15:48 -0500 Subject: [PATCH 4/9] deprecate for other index types --- pandas/core/indexes/base.py | 14 ++++++-------- pandas/core/indexes/category.py | 4 ---- pandas/core/indexes/datetimelike.py | 2 -- pandas/core/indexes/multi.py | 2 -- pandas/core/indexes/period.py | 2 -- 5 files changed, 6 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4b7582fcf7cc0..f669a883156fe 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4015,13 +4015,6 @@ def is_type_compatible(self, kind): >>> idx Int64Index([1, 2, 3, 4], dtype='int64') - >>> idx.contains(2) - True - >>> idx.contains(6) - False - - This is equivalent to: - >>> 2 in idx True >>> 6 in idx @@ -4036,8 +4029,13 @@ def __contains__(self, key): except (OverflowError, TypeError, ValueError): return False - @Appender(_index_shared_docs['contains'] % _index_doc_kwargs) def contains(self, key): + """ + Return a boolean indicating whether the provided key is in the index. + + .. deprecated:: 0.25.0 + Use ``key in index`` instead of ``index.contains(key)``. + """ return key in self def __hash__(self): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 3d3774ce48e8b..4040d889b91eb 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -385,10 +385,6 @@ def __contains__(self, key): return contains(self, key, container=self._engine) - @Appender(_index_shared_docs['contains'] % _index_doc_kwargs) - def contains(self, key): - return key in self - def __array__(self, dtype=None): """ the array interface, return my values """ return np.array(self._data, dtype=dtype) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 7c90fb11aa1bf..e141f7b5c5b23 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -221,8 +221,6 @@ def __contains__(self, key): except (KeyError, TypeError, ValueError): return False - contains = __contains__ - # Try to run function on index first, and then on elements of index # Especially important for group-by functionality def map(self, mapper, na_action=None): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a06d304fb5a22..628cf500621d8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -922,8 +922,6 @@ def __contains__(self, key): except (LookupError, TypeError, ValueError): return False - contains = __contains__ - @Appender(_index_shared_docs['_shallow_copy']) def _shallow_copy(self, values=None, **kwargs): if values is not None: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index dc11099c3e903..f61b2e679f0c8 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -433,8 +433,6 @@ def __contains__(self, key): except Exception: return False - contains = __contains__ - @cache_readonly def _int64index(self): return Int64Index._simple_new(self.asi8, name=self.name) From 8e8b032054e200710e9cc604765b1c6e354f46a5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 29 Jun 2019 12:21:10 -0500 Subject: [PATCH 5/9] vectorized implementation --- pandas/core/indexes/interval.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e2315f25d206e..498300a65e9ce 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -304,7 +304,11 @@ def contains(self, key): ------- boolean array """ - return np.array([key in interval for interval in self], dtype='bool') + if isinstance(key, Interval): + raise TypeError('contains not defined for two intervals') + + return ((self.left < key if self.open_left else self.left <= key) & + (key < self.right if self.open_right else key <= self.right)) @Appender(_interval_shared_docs['to_tuples'] % dict( return_type="Index", From 4c7575c7fe2fcd39629d9e505c6dc783b39ab698 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 30 Jun 2019 10:24:36 -0500 Subject: [PATCH 6/9] move implementation to the array --- pandas/core/arrays/interval.py | 45 +++++++++++++++++++++++++++++++++ pandas/core/indexes/interval.py | 25 ++++-------------- 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index aaa4124182598..5cab8e52387d4 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -79,6 +79,7 @@ from_arrays from_tuples from_breaks +contains overlaps set_closed to_tuples @@ -1017,6 +1018,50 @@ def repeat(self, repeats, axis=None): right_repeat = self.right.repeat(repeats) return self._shallow_copy(left=left_repeat, right=right_repeat) + _interval_shared_docs['contains'] = """ + Check elementwise if the Intervals contain the value. + + Return a boolean mask whether the value is contained in the Intervals + of the %(klass)s. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + other : scalar + The value to check whether it is contained in the Intervals. + + Returns + ------- + boolean array + + See Also + -------- + Interval.contains : Check whether Interval object contains value. + %(klass)s.overlaps : Check if an Interval overlaps the values in the + %(klass)s. + + Examples + -------- + >>> intervals = pd.%(qualname)s.from_tuples([(0, 1), (1, 3), (2, 4)]) + >>> intervals + %(klass)s([(0, 1], (1, 3], (2, 4]], + closed='right', + dtype='interval[int64]') + >>> intervals.contains(0.5) + array([ True, False, False]) + """ + + @Appender(_interval_shared_docs['contains'] % _shared_docs_kwargs) + def contains(self, other): + if isinstance(other, Interval): + raise TypeError('contains not defined for two intervals') + + return ( + (self.left < other if self.open_left else self.left <= other) & + (other < self.right if self.open_right else other <= self.right) + ) + _interval_shared_docs['overlaps'] = """ Check elementwise if an Interval overlaps the values in the %(klass)s. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 498300a65e9ce..9f9ebcf67cee6 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -139,7 +139,7 @@ def func(intvidx_self, other, sort=False): name=_index_doc_kwargs['name'], versionadded="0.20.0", extra_attributes="is_overlapping\nvalues\n", - extra_methods="contains\n", + extra_methods="", examples=textwrap.dedent("""\ Examples -------- @@ -291,25 +291,6 @@ def __contains__(self, key): except KeyError: return False - def contains(self, key): - """ - Return a boolean mask whether the key is contained in the Intervals - of the index. - - Parameters - ---------- - key : scalar, Interval - - Returns - ------- - boolean array - """ - if isinstance(key, Interval): - raise TypeError('contains not defined for two intervals') - - return ((self.left < key if self.open_left else self.left <= key) & - (key < self.right if self.open_right else key <= self.right)) - @Appender(_interval_shared_docs['to_tuples'] % dict( return_type="Index", examples=""" @@ -1135,6 +1116,10 @@ def equals(self, other): self.right.equals(other.right) and self.closed == other.closed) + @Appender(_interval_shared_docs['contains'] % _index_doc_kwargs) + def contains(self, other): + return self._data.contains(other) + @Appender(_interval_shared_docs['overlaps'] % _index_doc_kwargs) def overlaps(self, other): return self._data.overlaps(other) From 2d6b083aabb975d59631de113f57df47e944f837 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 30 Jun 2019 10:28:18 -0500 Subject: [PATCH 7/9] change TypeError in NotImplementedError --- pandas/core/arrays/interval.py | 4 +++- pandas/tests/indexes/interval/test_interval.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 5cab8e52387d4..8ed28065ee7aa 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1055,7 +1055,9 @@ def repeat(self, repeats, axis=None): @Appender(_interval_shared_docs['contains'] % _shared_docs_kwargs) def contains(self, other): if isinstance(other, Interval): - raise TypeError('contains not defined for two intervals') + raise NotImplementedError( + 'contains not implemented for two intervals' + ) return ( (self.left < other if self.open_left else self.left <= other) & diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index ba8e7bd4f1b87..a5e9f5902f565 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -771,7 +771,9 @@ def test_contains_method(self): # __contains__ not implemented for "interval in interval", follow # that for the contains method for now - with pytest.raises(TypeError): + with pytest.raises( + NotImplementedError, + match='contains not implemented for two'): i.contains(Interval(0, 1)) def test_dropna(self, closed): From f69ec4bbaf0605bb3f730cb1f402a1440a667054 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 30 Jun 2019 10:46:01 -0500 Subject: [PATCH 8/9] add actual deprecation warning --- pandas/core/indexes/base.py | 4 ++++ pandas/core/indexing.py | 2 +- pandas/tests/indexes/test_base.py | 5 +++++ pandas/tests/indexes/test_range.py | 6 ++++-- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f669a883156fe..36f0b46415a96 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4036,6 +4036,10 @@ def contains(self, key): .. deprecated:: 0.25.0 Use ``key in index`` instead of ``index.contains(key)``. """ + warnings.warn( + "The 'contains' method is deprecated and will be removed in a " + "future versions. Use 'key in index' instead of " + "'index.contains(key)", FutureWarning, stacklevel=2) return key in self def __hash__(self): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1539feb2e0856..7e199c6c9f66b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2393,7 +2393,7 @@ def convert_to_index_sliceable(obj, key): elif isinstance(key, str): # we are an actual column - if obj._data.items.contains(key): + if key in obj._data.items: return None # We might have a datetimelike string that we can translate to a diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 1de20dc765655..fc55887a933f8 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2156,6 +2156,11 @@ def test_tab_complete_warning(self, ip): with provisionalcompleter('ignore'): list(ip.Completer.completions('idx.', 4)) + def test_deprecated_contains(self): + for index in self.indices.values(): + with tm.assert_produces_warning(FutureWarning): + index.contains(1) + class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 5f7f10e881ced..de756bf720a81 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -271,10 +271,12 @@ def test_cached_data(self): 91 in idx assert idx._cached_data is None - idx.contains(90) + with tm.assert_produces_warning(FutureWarning): + idx.contains(90) assert idx._cached_data is None - idx.contains(91) + with tm.assert_produces_warning(FutureWarning): + idx.contains(91) assert idx._cached_data is None idx.all() From 30c77f0620344ed8bbc482ef8d19ee9030b48d9a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 30 Jun 2019 11:37:06 -0500 Subject: [PATCH 9/9] fixup linting, docs, test failures --- doc/source/reference/arrays.rst | 1 + doc/source/reference/indexing.rst | 2 +- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/indexes/base.py | 10 +++++++--- pandas/tests/indexes/period/test_indexing.py | 6 ------ 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 77a87cafb9258..bf9520c54040d 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -335,6 +335,7 @@ A collection of intervals may be stored in an :class:`arrays.IntervalArray`. arrays.IntervalArray.from_arrays arrays.IntervalArray.from_tuples arrays.IntervalArray.from_breaks + arrays.IntervalArray.contains arrays.IntervalArray.overlaps arrays.IntervalArray.set_closed arrays.IntervalArray.to_tuples diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index bbac964e8a201..65860eb5c2f51 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -248,7 +248,6 @@ IntervalIndex components IntervalIndex.from_arrays IntervalIndex.from_tuples IntervalIndex.from_breaks - IntervalIndex.contains IntervalIndex.left IntervalIndex.right IntervalIndex.mid @@ -260,6 +259,7 @@ IntervalIndex components IntervalIndex.get_loc IntervalIndex.get_indexer IntervalIndex.set_closed + IntervalIndex.contains IntervalIndex.overlaps IntervalIndex.to_tuples diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 008f6f0b8643e..48d0675c86d0c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -615,6 +615,7 @@ Other deprecations - :attr:`Series.imag` and :attr:`Series.real` are deprecated. (:issue:`18262`) - :meth:`Series.put` is deprecated. (:issue:`18262`) - :meth:`Index.item` and :meth:`Series.item` is deprecated. (:issue:`18262`) +- :meth:`Index.contains` is deprecated. Use ``key in index`` (``__contains__``) instead (:issue:`17753`). .. _whatsnew_0250.prior_deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 36f0b46415a96..f122810a2fe21 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4032,14 +4032,18 @@ def __contains__(self, key): def contains(self, key): """ Return a boolean indicating whether the provided key is in the index. - + .. deprecated:: 0.25.0 Use ``key in index`` instead of ``index.contains(key)``. + + Returns + ------- + bool """ warnings.warn( "The 'contains' method is deprecated and will be removed in a " - "future versions. Use 'key in index' instead of " - "'index.contains(key)", FutureWarning, stacklevel=2) + "future version. Use 'key in index' instead of " + "'index.contains(key)'", FutureWarning, stacklevel=2) return key in self def __hash__(self): diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 0801b36402870..27a690e58b70f 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -464,19 +464,13 @@ def test_contains(self): idx0 = pd.PeriodIndex(ps0) for p in ps0: - assert idx0.contains(p) assert p in idx0 - - assert idx0.contains(str(p)) assert str(p) in idx0 - assert idx0.contains('2017-09-01 00:00:01') assert '2017-09-01 00:00:01' in idx0 - assert idx0.contains('2017-09') assert '2017-09' in idx0 - assert not idx0.contains(p3) assert p3 not in idx0 def test_get_value(self):