From addfd754b6cce89dcb3a851b3d3a2eccb62336c4 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 11 Nov 2020 23:01:12 +0100 Subject: [PATCH 1/7] [BUG]: Isin converted floats unnecessarily to int causing rounding issues --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/algorithms.py | 12 ++++++++++-- pandas/tests/series/methods/test_isin.py | 8 ++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f751a91cecf19..b4f932da1ad45 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -557,6 +557,7 @@ Reshaping - Bug in :meth:`DataFrame.combine_first()` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`) - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) +- Bug in :meth:`Series.isin` cast ``float`` unnecessarily to ``int`` when :class:`Series` to look in was from dtype ``int`` (:issue:`19356`) Sparse ^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ec88eb817b3f8..29fa8000bbbb7 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -19,6 +19,7 @@ construct_1d_object_array_from_listlike, infer_dtype_from_array, maybe_promote, + maybe_upcast, ) from pandas.core.dtypes.common import ( ensure_float64, @@ -431,6 +432,13 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: return cast("Categorical", comps).isin(values) comps, dtype = _ensure_data(comps) + if is_numeric_dtype(comps): + try: + # Try finding a dtype which would not change our values + values, _ = maybe_upcast(values, dtype=dtype) + dtype = values.dtype + except (ValueError, TypeError): + pass values, _ = _ensure_data(values, dtype=dtype) # faster for larger cases to use np.in1d @@ -445,7 +453,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c)) else: f = np.in1d - elif is_integer_dtype(comps): + elif is_integer_dtype(comps) and is_integer_dtype(values): try: values = values.astype("int64", copy=False) comps = comps.astype("int64", copy=False) @@ -454,7 +462,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: values = values.astype(object) comps = comps.astype(object) - elif is_float_dtype(comps): + elif is_numeric_dtype(comps) or is_numeric_dtype(values): try: values = values.astype("float64", copy=False) comps = comps.astype("float64", copy=False) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 86ea2b2f02a4d..93794fe30ac30 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -90,6 +90,14 @@ def test_isin_read_only(self): expected = Series([True, True, True]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("values", [[-9., 0.], [-9, 0]]) + def test_isin_float_in_int_series(self, values): + # GH: 19356 + ser = Series(values) + result = ser.isin([-9, -0.5]) + expected = Series([True, False]) + tm.assert_series_equal(result, expected) + @pytest.mark.slow def test_isin_large_series_mixed_dtypes_and_nan(): From 63b1bc5e2137837d46483eba60063ad74ca682c1 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 11 Nov 2020 23:02:53 +0100 Subject: [PATCH 2/7] Run black --- pandas/tests/series/methods/test_isin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 93794fe30ac30..4603faf5267b4 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -90,7 +90,7 @@ def test_isin_read_only(self): expected = Series([True, True, True]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("values", [[-9., 0.], [-9, 0]]) + @pytest.mark.parametrize("values", [[-9.0, 0.0], [-9, 0]]) def test_isin_float_in_int_series(self, values): # GH: 19356 ser = Series(values) From 1e4d5726931d0a2e3a98a068b89a5140afdfab09 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 19:30:47 +0100 Subject: [PATCH 3/7] Add issue number --- pandas/tests/series/methods/test_isin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 4603faf5267b4..9f68ca6abcc90 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -92,7 +92,7 @@ def test_isin_read_only(self): @pytest.mark.parametrize("values", [[-9.0, 0.0], [-9, 0]]) def test_isin_float_in_int_series(self, values): - # GH: 19356 + # GH#19356 GH#21804 ser = Series(values) result = ser.isin([-9, -0.5]) expected = Series([True, False]) From d0344214130d8d8ad5f8ad4648c10ada56907b7d Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 26 Nov 2020 22:20:24 +0100 Subject: [PATCH 4/7] Revert "[BUG]: Isin converted floats unnecessarily to int causing rounding issues" This reverts commit addfd754 --- pandas/core/algorithms.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ffd89ee8d52ec..4c9817f3c3dc6 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -19,7 +19,6 @@ construct_1d_object_array_from_listlike, infer_dtype_from_array, maybe_promote, - maybe_upcast, ) from pandas.core.dtypes.common import ( ensure_float64, @@ -450,13 +449,6 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: return np.zeros(comps.shape, dtype=bool) comps, dtype = _ensure_data(comps) - if is_numeric_dtype(comps): - try: - # Try finding a dtype which would not change our values - values, _ = maybe_upcast(values, dtype=dtype) - dtype = values.dtype - except (ValueError, TypeError): - pass values, _ = _ensure_data(values, dtype=dtype) f = htable.ismember_object @@ -472,7 +464,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c)) else: f = np.in1d - elif is_integer_dtype(comps.dtype) and is_integer_dtype(values): + elif is_integer_dtype(comps.dtype): try: values = values.astype("int64", copy=False) comps = comps.astype("int64", copy=False) @@ -481,7 +473,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: values = values.astype(object) comps = comps.astype(object) - elif is_numeric_dtype(comps.dtype) or is_numeric_dtype(values): + elif is_float_dtype(comps.dtype): try: values = values.astype("float64", copy=False) comps = comps.astype("float64", copy=False) From c5b31ceb217ae6722b08904e1baa3f599506c5ff Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 26 Nov 2020 22:23:13 +0100 Subject: [PATCH 5/7] Revert "Revert "[BUG]: Isin converted floats unnecessarily to int causing rounding issues"" This reverts commit d0344214 --- pandas/core/algorithms.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4c9817f3c3dc6..ffd89ee8d52ec 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -19,6 +19,7 @@ construct_1d_object_array_from_listlike, infer_dtype_from_array, maybe_promote, + maybe_upcast, ) from pandas.core.dtypes.common import ( ensure_float64, @@ -449,6 +450,13 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: return np.zeros(comps.shape, dtype=bool) comps, dtype = _ensure_data(comps) + if is_numeric_dtype(comps): + try: + # Try finding a dtype which would not change our values + values, _ = maybe_upcast(values, dtype=dtype) + dtype = values.dtype + except (ValueError, TypeError): + pass values, _ = _ensure_data(values, dtype=dtype) f = htable.ismember_object @@ -464,7 +472,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c)) else: f = np.in1d - elif is_integer_dtype(comps.dtype): + elif is_integer_dtype(comps.dtype) and is_integer_dtype(values): try: values = values.astype("int64", copy=False) comps = comps.astype("int64", copy=False) @@ -473,7 +481,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: values = values.astype(object) comps = comps.astype(object) - elif is_float_dtype(comps.dtype): + elif is_numeric_dtype(comps.dtype) or is_numeric_dtype(values): try: values = values.astype("float64", copy=False) comps = comps.astype("float64", copy=False) From b41b6892f7a0e1fd13a098e314f114df8f645af1 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 5 Dec 2020 20:16:57 +0100 Subject: [PATCH 6/7] Remove unused import --- pandas/core/algorithms.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 98fc3ee19bd1e..9749297efd004 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -19,7 +19,6 @@ construct_1d_object_array_from_listlike, infer_dtype_from_array, maybe_promote, - maybe_upcast, ) from pandas.core.dtypes.common import ( ensure_float64, From ef12c105df33b11f65225b4e943d45fd42c58162 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 9 Dec 2020 21:26:03 +0100 Subject: [PATCH 7/7] Remove whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 38862973f0950..ac930b3e77785 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -801,7 +801,6 @@ Reshaping - Bug in :func:`merge_ordered` returned wrong join result when length of ``left_by`` or ``right_by`` equals to the rows of ``left`` or ``right`` (:issue:`38166`) - Bug in :func:`merge_ordered` didn't raise when elements in ``left_by`` or ``right_by`` not exist in ``left`` columns or ``right`` columns (:issue:`38167`) - Bug in :func:`DataFrame.drop_duplicates` not validating bool dtype for ``ignore_index`` keyword (:issue:`38274`) -- Bug in :meth:`Series.isin` cast ``float`` unnecessarily to ``int`` when :class:`Series` to look in was from dtype ``int`` (:issue:`19356`) Sparse ^^^^^^