From 4ae2e607e4cd100a3d07d511a47a9b00712f5b78 Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Wed, 15 Aug 2018 16:20:13 +0900 Subject: [PATCH 01/15] BUG: Check types in Index.__contains__ (#22085) --- pandas/core/indexes/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bfa669a0ca164..620559d4bb70d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1947,6 +1947,8 @@ def __nonzero__(self): @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): + if is_float(key) and is_integer_dtype(self.dtype): + return False hash(key) try: return key in self._engine From b8f164e3be0d94c276b8e9a41d93105eedd91379 Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Thu, 16 Aug 2018 10:33:08 +0900 Subject: [PATCH 02/15] Update conditions in Index.__contains__ --- pandas/core/indexes/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 620559d4bb70d..5dc659a5204f4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1947,11 +1947,12 @@ def __nonzero__(self): @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) def __contains__(self, key): - if is_float(key) and is_integer_dtype(self.dtype): - return False hash(key) try: - return key in self._engine + if is_float(key) and is_integer_dtype(self.dtype) and int(key) != key: + return False + else: + return key in self._engine except (OverflowError, TypeError, ValueError): return False From 004c855e18926e1d140cbedbcefc3f0ff10f5337 Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Thu, 16 Aug 2018 13:24:30 +0900 Subject: [PATCH 03/15] Fix code to be PEP8 compliant --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5dc659a5204f4..d7a6e8ed87251 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1949,7 +1949,8 @@ def __nonzero__(self): def __contains__(self, key): hash(key) try: - if is_float(key) and is_integer_dtype(self.dtype) and int(key) != key: + if is_float(key) and is_integer_dtype(self.dtype) and\ + int(key) != key: return False else: return key in self._engine From c1409cbcf41e744eb8fd2a27ad3a114c6228c21e Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Thu, 16 Aug 2018 21:03:13 +0900 Subject: [PATCH 04/15] Add whatsnew entry #22085 --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index cf12759c051fc..88b15612d2af8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -653,6 +653,7 @@ Indexing - Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`) - Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`) - Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`) +- Bug in :func:`Index.__contains__` that returns True just in case val is float and dtype is integer, because float casted to integer (:issue:`22085`) Missing ^^^^^^^ From f979733b4881993969669656c73d45a69831d62a Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Thu, 16 Aug 2018 21:04:59 +0900 Subject: [PATCH 05/15] Add test code #22085 --- pandas/tests/indexes/test_base.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c858b4d86cf5e..a465bfbff08eb 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2483,6 +2483,14 @@ def test_comparison_tzawareness_compat(self, op): # TODO: implement _assert_tzawareness_compat for the reverse # comparison with the Series on the left-hand side + def test_contains_with_float_val(self): + # GH#22085 + index = pd.Index([0, 1, 2, 3]) + + assert not 1.1 in index + assert 1.0 in index + assert 1 in index + class TestIndexUtils(object): From a88497a2e963fc2684cadf930ac10298341874c6 Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Thu, 16 Aug 2018 21:10:13 +0900 Subject: [PATCH 06/15] Fix test code to be PEP8 compliant --- pandas/tests/indexes/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index a465bfbff08eb..7e3f729cdc539 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2487,7 +2487,7 @@ def test_contains_with_float_val(self): # GH#22085 index = pd.Index([0, 1, 2, 3]) - assert not 1.1 in index + assert 1.1 not in index assert 1.0 in index assert 1 in index From 511060ad720aa18a782e0ec265135d463881fbd4 Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Fri, 17 Aug 2018 20:49:29 +0900 Subject: [PATCH 07/15] Add a float index to test code --- pandas/tests/indexes/test_base.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7e3f729cdc539..2a4f83d9cf913 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2485,11 +2485,16 @@ def test_comparison_tzawareness_compat(self, op): def test_contains_with_float_val(self): # GH#22085 - index = pd.Index([0, 1, 2, 3]) + index1 = pd.Index([0, 1, 2, 3]) + index2 = pd.Index([0.1, 1.1, 2.2, 3.3]) - assert 1.1 not in index - assert 1.0 in index - assert 1 in index + assert 1.1 not in index1 + assert 1.0 in index1 + assert 1 in index1 + + assert 1.1 in index2 + assert 1.0 not in index2 + assert 1 not in index2 class TestIndexUtils(object): From a5f2388ffece49502dc112a938a3588e68f93ccd Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Fri, 17 Aug 2018 20:50:45 +0900 Subject: [PATCH 08/15] Update whatsnew entry --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 88b15612d2af8..6c5b998e6e4b9 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -653,7 +653,7 @@ Indexing - Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`) - Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`) - Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`) -- Bug in :func:`Index.__contains__` that returns True just in case val is float and dtype is integer, because float casted to integer (:issue:`22085`) +- Bug in `scalar in Index` if scalar is a float while the Index is of integer dtype (:issue:`22085`) Missing ^^^^^^^ From d61db2ccb3129ebb1f5627fa1a7bfebb7191906d Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Fri, 17 Aug 2018 20:51:31 +0900 Subject: [PATCH 09/15] Fix the code using parens --- pandas/core/indexes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d7a6e8ed87251..8c74beaddcb6a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1949,8 +1949,8 @@ def __nonzero__(self): def __contains__(self, key): hash(key) try: - if is_float(key) and is_integer_dtype(self.dtype) and\ - int(key) != key: + if (is_float(key) and is_integer_dtype(self.dtype) and + int(key) != key): return False else: return key in self._engine From 8384a3c600bfa31be9bb75c25d56f724312130a4 Mon Sep 17 00:00:00 2001 From: Yeojin Kim <38222260+yeojin-dev@users.noreply.github.com> Date: Fri, 24 Aug 2018 11:54:39 +0900 Subject: [PATCH 10/15] Update whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 5c0915b459a97..56c40785ae3aa 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -658,7 +658,7 @@ Indexing - Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`) - Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`) - ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`) -- Bug in `scalar in Index` if scalar is a float while the Index is of integer dtype (:issue:`22085`) +- Bug in ``scalar in Index`` if scalar is a float while the Index is of integer dtype (:issue:`22085`) Missing ^^^^^^^ From 4eec1b496d16604e080275b74701446f6c21e35d Mon Sep 17 00:00:00 2001 From: Yeojin Kim <38222260+yeojin-dev@users.noreply.github.com> Date: Fri, 24 Aug 2018 11:57:44 +0900 Subject: [PATCH 11/15] Update base.py --- pandas/core/indexes/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fc4c815029a37..41d5440394245 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1996,8 +1996,7 @@ def __contains__(self, key): if (is_float(key) and is_integer_dtype(self.dtype) and int(key) != key): return False - else: - return key in self._engine + return key in self._engine except (OverflowError, TypeError, ValueError): return False From 6093091debdd3ee782057ed41c641ebda5f942fd Mon Sep 17 00:00:00 2001 From: Yeojin Kim <38222260+yeojin-dev@users.noreply.github.com> Date: Fri, 24 Aug 2018 12:37:37 +0900 Subject: [PATCH 12/15] Fix whatsnew --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 56c40785ae3aa..3643023637d4c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -658,7 +658,7 @@ Indexing - Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`) - Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`) - ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`) -- Bug in ``scalar in Index`` if scalar is a float while the Index is of integer dtype (:issue:`22085`) +- Bug in `scalar in Index` if scalar is a float while the ``Index`` is of integer dtype (:issue:`22085`) Missing ^^^^^^^ From 20a6bd69151b1839cdac4c6d08e6dffbecdfc268 Mon Sep 17 00:00:00 2001 From: Yeojin Kim Date: Thu, 30 Aug 2018 21:42:45 +0900 Subject: [PATCH 13/15] Move python code to cython --- pandas/_libs/index.pyx | 3 +++ pandas/core/indexes/base.py | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 293f067810f27..b460e62b42386 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -93,6 +93,9 @@ cdef class IndexEngine: def __contains__(self, object val): self._ensure_mapping_populated() hash(val) + if (util.is_float_object(val) and isinstance(self, Int64Engine) and + int(val) != val): + return False return val in self.mapping cpdef get_value(self, ndarray arr, object key, object tz=None): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 41d5440394245..34cfa0b23f082 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1993,9 +1993,6 @@ def __nonzero__(self): def __contains__(self, key): hash(key) try: - if (is_float(key) and is_integer_dtype(self.dtype) and - int(key) != key): - return False return key in self._engine except (OverflowError, TypeError, ValueError): return False From 6e9f5a5b7f08919477f9b0e38e1b777e67430f60 Mon Sep 17 00:00:00 2001 From: Yeojin Kim <38222260+yeojin-dev@users.noreply.github.com> Date: Thu, 30 Aug 2018 21:56:57 +0900 Subject: [PATCH 14/15] Update index.pyx --- pandas/_libs/index.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index b460e62b42386..df30ac83f412e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -94,9 +94,10 @@ cdef class IndexEngine: self._ensure_mapping_populated() hash(val) if (util.is_float_object(val) and isinstance(self, Int64Engine) and - int(val) != val): + int(val) != val): return False - return val in self.mapping + else: + return val in self.mapping cpdef get_value(self, ndarray arr, object key, object tz=None): """ From 0b8217e57f1b5b7618d78b08447e33eb86e47de9 Mon Sep 17 00:00:00 2001 From: Yeojin Kim <38222260+yeojin-dev@users.noreply.github.com> Date: Fri, 31 Aug 2018 00:29:09 +0900 Subject: [PATCH 15/15] Update index.pyx --- pandas/_libs/index.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index df30ac83f412e..7e5496179c580 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -96,8 +96,7 @@ cdef class IndexEngine: if (util.is_float_object(val) and isinstance(self, Int64Engine) and int(val) != val): return False - else: - return val in self.mapping + return val in self.mapping cpdef get_value(self, ndarray arr, object key, object tz=None): """